// SPDX-License-Identifier: LGPL-2.1+
/*
* Copyright 2016 Tom aan de Wiel
* Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
*
* 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
*
* A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
* R.D. Brown, 1977
*/
#include <linux/string.h>
#include <linux/kernel.h>
#include "codec-fwht.h"
#define OVERFLOW_BIT BIT(14)
/*
* Note: bit 0 of the header must always be 0. Otherwise it cannot
* be guaranteed that the magic 8 byte sequence (see below) can
* never occur in the rlc output.
*/
#define PFRAME_BIT BIT(15)
#define DUPS_MASK 0x1ffe
#define PBLOCK 0
#define IBLOCK 1
#define ALL_ZEROS 15
static const uint8_t zigzag[64] = {
0,
1, 8,
2, 9, 16,
3, 10, 17, 24,
4, 11, 18, 25, 32,
5, 12, 19, 26, 33, 40,
6, 13, 20, 27, 34, 41, 48,
7, 14, 21, 28, 35, 42, 49, 56,
15, 22, 29, 36, 43, 50, 57,
23, 30, 37, 44, 51, 58,
31, 38, 45, 52, 59,
39, 46, 53, 60,
47, 54, 61,
55, 62,
63,
};
/*
* noinline_for_stack to work around
* https://bugs.llvm.org/show_bug.cgi?id=38809
*/
static int noinline_for_stack
rlc(const s16 *in, __be16 *output, int blocktype)
{
s16 block[8 * 8];
s16 *wp = block;
int i = 0;
int x, y;
int ret = 0;
/* read in block from framebuffer */
int lastzero_run = 0;
int to_encode;
for (y = 0; y < 8; y++) {
for (x = 0; x < 8; x++) {
*wp = in[x + y * 8];
wp++;
}
}
/* keep track of amount of trailing zeros */
for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
lastzero_run++;
*output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
ret++;
to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
i = 0;
while (i < to_encode) {
int cnt = 0;
int tmp;
/* count leading zeros */
while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
cnt++;
i++;
if (i == to_encode) {
cnt--;
break;
}
}
/* 4 bits for run, 12 for coefficient (quantization by 4) */
*output++ = htons((cnt | tmp << 4));
i++;
ret++;
}
if (lastzero_run > 14) {
*output = htons(ALL_ZEROS | 0);
ret++;
}
return ret;
}
/*
* This function will worst-case increase rlc_in by 65*2 bytes:
* one s16 value for the header and 8 * 8 coefficients of type s16.
*/
static noinline_for_stack u16
derlc(const __be16 **rlc_in, s16 *dwht_out, const __be16 *end_of_input)
{
/* header */
const __be16 *input = *rlc_in;
u16 stat;
int dec_count = 0;
s16 block[8 * 8 + 16];
s16 *wp = block;
int i;
if (input > end_of_input)
return OVERFLOW_BIT;
stat = ntohs(*input++);
/*
* Now de-compress, it expands one byte to up to 15 bytes
* (or fills the remainder of the 64 bytes with zeroes if it
* is the last byte to expand).
*
* So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
* allow for overflow if the incoming data was malformed.
*/
while (dec_count < 8 * 8) {
s16 in;
int length;
int coeff;
if (input > end_of_input)
return OVERFLOW_BIT;
in = ntohs(*input++);
length = in & 0xf;
coeff = in >> 4;
/* fill remainder with zeros */
if (length == 15) {
for (i = 0; i < 64 - dec_count; i++)
*wp++ = 0;
break;
}
for (i = 0; i < length; i++)
*wp++ = 0;
*wp++ = coeff;
dec_count += length + 1;
}
wp = block;
for (i = 0; i < 64; i++) {
int pos = zigzag[i];
int y = pos / 8;
int x = pos % 8;
dwht_out[x + y * 8] = *wp++;
}
*rlc_in = input;
return stat;
}
static const int quant_table[] = {
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3,
2, 2, 2, 2, 2, 2, 3, 6,
2, 2, 2, 2, 2, 3, 6, 6,
2, 2, 2, 2, 3, 6, 6, 6,
2, 2, 2, 3, 6, 6, 6, 6,
2, 2, 3, 6, 6, 6, 6, 8,
};
static const int quant_table_p[] = {
3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
3,