18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * 842 Software Compression
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2015 Dan Streetman, IBM Corp
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * See 842.h for details of the 842 compressed format.
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
118c2ecf20Sopenharmony_ci#define MODULE_NAME "842_compress"
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/hashtable.h>
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#include "842.h"
168c2ecf20Sopenharmony_ci#include "842_debugfs.h"
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci#define SW842_HASHTABLE8_BITS	(10)
198c2ecf20Sopenharmony_ci#define SW842_HASHTABLE4_BITS	(11)
208c2ecf20Sopenharmony_ci#define SW842_HASHTABLE2_BITS	(10)
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci/* By default, we allow compressing input buffers of any length, but we must
238c2ecf20Sopenharmony_ci * use the non-standard "short data" template so the decompressor can correctly
248c2ecf20Sopenharmony_ci * reproduce the uncompressed data buffer at the right length.  However the
258c2ecf20Sopenharmony_ci * hardware 842 compressor will not recognize the "short data" template, and
268c2ecf20Sopenharmony_ci * will fail to decompress any compressed buffer containing it (I have no idea
278c2ecf20Sopenharmony_ci * why anyone would want to use software to compress and hardware to decompress
288c2ecf20Sopenharmony_ci * but that's beside the point).  This parameter forces the compression
298c2ecf20Sopenharmony_ci * function to simply reject any input buffer that isn't a multiple of 8 bytes
308c2ecf20Sopenharmony_ci * long, instead of using the "short data" template, so that all compressed
318c2ecf20Sopenharmony_ci * buffers produced by this function will be decompressable by the 842 hardware
328c2ecf20Sopenharmony_ci * decompressor.  Unless you have a specific need for that, leave this disabled
338c2ecf20Sopenharmony_ci * so that any length buffer can be compressed.
348c2ecf20Sopenharmony_ci */
358c2ecf20Sopenharmony_cistatic bool sw842_strict;
368c2ecf20Sopenharmony_cimodule_param_named(strict, sw842_strict, bool, 0644);
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_cistatic u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
398c2ecf20Sopenharmony_ci	{ I8, N0, N0, N0, 0x19 }, /* 8 */
408c2ecf20Sopenharmony_ci	{ I4, I4, N0, N0, 0x18 }, /* 18 */
418c2ecf20Sopenharmony_ci	{ I4, I2, I2, N0, 0x17 }, /* 25 */
428c2ecf20Sopenharmony_ci	{ I2, I2, I4, N0, 0x13 }, /* 25 */
438c2ecf20Sopenharmony_ci	{ I2, I2, I2, I2, 0x12 }, /* 32 */
448c2ecf20Sopenharmony_ci	{ I4, I2, D2, N0, 0x16 }, /* 33 */
458c2ecf20Sopenharmony_ci	{ I4, D2, I2, N0, 0x15 }, /* 33 */
468c2ecf20Sopenharmony_ci	{ I2, D2, I4, N0, 0x0e }, /* 33 */
478c2ecf20Sopenharmony_ci	{ D2, I2, I4, N0, 0x09 }, /* 33 */
488c2ecf20Sopenharmony_ci	{ I2, I2, I2, D2, 0x11 }, /* 40 */
498c2ecf20Sopenharmony_ci	{ I2, I2, D2, I2, 0x10 }, /* 40 */
508c2ecf20Sopenharmony_ci	{ I2, D2, I2, I2, 0x0d }, /* 40 */
518c2ecf20Sopenharmony_ci	{ D2, I2, I2, I2, 0x08 }, /* 40 */
528c2ecf20Sopenharmony_ci	{ I4, D4, N0, N0, 0x14 }, /* 41 */
538c2ecf20Sopenharmony_ci	{ D4, I4, N0, N0, 0x04 }, /* 41 */
548c2ecf20Sopenharmony_ci	{ I2, I2, D4, N0, 0x0f }, /* 48 */
558c2ecf20Sopenharmony_ci	{ I2, D2, I2, D2, 0x0c }, /* 48 */
568c2ecf20Sopenharmony_ci	{ I2, D4, I2, N0, 0x0b }, /* 48 */
578c2ecf20Sopenharmony_ci	{ D2, I2, I2, D2, 0x07 }, /* 48 */
588c2ecf20Sopenharmony_ci	{ D2, I2, D2, I2, 0x06 }, /* 48 */
598c2ecf20Sopenharmony_ci	{ D4, I2, I2, N0, 0x03 }, /* 48 */
608c2ecf20Sopenharmony_ci	{ I2, D2, D4, N0, 0x0a }, /* 56 */
618c2ecf20Sopenharmony_ci	{ D2, I2, D4, N0, 0x05 }, /* 56 */
628c2ecf20Sopenharmony_ci	{ D4, I2, D2, N0, 0x02 }, /* 56 */
638c2ecf20Sopenharmony_ci	{ D4, D2, I2, N0, 0x01 }, /* 56 */
648c2ecf20Sopenharmony_ci	{ D8, N0, N0, N0, 0x00 }, /* 64 */
658c2ecf20Sopenharmony_ci};
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_cistruct sw842_hlist_node8 {
688c2ecf20Sopenharmony_ci	struct hlist_node node;
698c2ecf20Sopenharmony_ci	u64 data;
708c2ecf20Sopenharmony_ci	u8 index;
718c2ecf20Sopenharmony_ci};
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_cistruct sw842_hlist_node4 {
748c2ecf20Sopenharmony_ci	struct hlist_node node;
758c2ecf20Sopenharmony_ci	u32 data;
768c2ecf20Sopenharmony_ci	u16 index;
778c2ecf20Sopenharmony_ci};
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_cistruct sw842_hlist_node2 {
808c2ecf20Sopenharmony_ci	struct hlist_node node;
818c2ecf20Sopenharmony_ci	u16 data;
828c2ecf20Sopenharmony_ci	u8 index;
838c2ecf20Sopenharmony_ci};
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci#define INDEX_NOT_FOUND		(-1)
868c2ecf20Sopenharmony_ci#define INDEX_NOT_CHECKED	(-2)
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_cistruct sw842_param {
898c2ecf20Sopenharmony_ci	u8 *in;
908c2ecf20Sopenharmony_ci	u8 *instart;
918c2ecf20Sopenharmony_ci	u64 ilen;
928c2ecf20Sopenharmony_ci	u8 *out;
938c2ecf20Sopenharmony_ci	u64 olen;
948c2ecf20Sopenharmony_ci	u8 bit;
958c2ecf20Sopenharmony_ci	u64 data8[1];
968c2ecf20Sopenharmony_ci	u32 data4[2];
978c2ecf20Sopenharmony_ci	u16 data2[4];
988c2ecf20Sopenharmony_ci	int index8[1];
998c2ecf20Sopenharmony_ci	int index4[2];
1008c2ecf20Sopenharmony_ci	int index2[4];
1018c2ecf20Sopenharmony_ci	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
1028c2ecf20Sopenharmony_ci	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
1038c2ecf20Sopenharmony_ci	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
1048c2ecf20Sopenharmony_ci	struct sw842_hlist_node8 node8[1 << I8_BITS];
1058c2ecf20Sopenharmony_ci	struct sw842_hlist_node4 node4[1 << I4_BITS];
1068c2ecf20Sopenharmony_ci	struct sw842_hlist_node2 node2[1 << I2_BITS];
1078c2ecf20Sopenharmony_ci};
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci#define get_input_data(p, o, b)						\
1108c2ecf20Sopenharmony_ci	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci#define init_hashtable_nodes(p, b)	do {			\
1138c2ecf20Sopenharmony_ci	int _i;							\
1148c2ecf20Sopenharmony_ci	hash_init((p)->htable##b);				\
1158c2ecf20Sopenharmony_ci	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
1168c2ecf20Sopenharmony_ci		(p)->node##b[_i].index = _i;			\
1178c2ecf20Sopenharmony_ci		(p)->node##b[_i].data = 0;			\
1188c2ecf20Sopenharmony_ci		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
1198c2ecf20Sopenharmony_ci	}							\
1208c2ecf20Sopenharmony_ci} while (0)
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci#define find_index(p, b, n)	({					\
1238c2ecf20Sopenharmony_ci	struct sw842_hlist_node##b *_n;					\
1248c2ecf20Sopenharmony_ci	p->index##b[n] = INDEX_NOT_FOUND;				\
1258c2ecf20Sopenharmony_ci	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
1268c2ecf20Sopenharmony_ci		if (p->data##b[n] == _n->data) {			\
1278c2ecf20Sopenharmony_ci			p->index##b[n] = _n->index;			\
1288c2ecf20Sopenharmony_ci			break;						\
1298c2ecf20Sopenharmony_ci		}							\
1308c2ecf20Sopenharmony_ci	}								\
1318c2ecf20Sopenharmony_ci	p->index##b[n] >= 0;						\
1328c2ecf20Sopenharmony_ci})
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci#define check_index(p, b, n)			\
1358c2ecf20Sopenharmony_ci	((p)->index##b[n] == INDEX_NOT_CHECKED	\
1368c2ecf20Sopenharmony_ci	 ? find_index(p, b, n)			\
1378c2ecf20Sopenharmony_ci	 : (p)->index##b[n] >= 0)
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci#define replace_hash(p, b, i, d)	do {				\
1408c2ecf20Sopenharmony_ci	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
1418c2ecf20Sopenharmony_ci	hash_del(&_n->node);						\
1428c2ecf20Sopenharmony_ci	_n->data = (p)->data##b[d];					\
1438c2ecf20Sopenharmony_ci	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
1448c2ecf20Sopenharmony_ci		 (unsigned int)_n->index,				\
1458c2ecf20Sopenharmony_ci		 (unsigned int)((p)->in - (p)->instart),		\
1468c2ecf20Sopenharmony_ci		 (unsigned long)_n->data);				\
1478c2ecf20Sopenharmony_ci	hash_add((p)->htable##b, &_n->node, _n->data);			\
1488c2ecf20Sopenharmony_ci} while (0)
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_cistatic u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_cistatic int add_bits(struct sw842_param *p, u64 d, u8 n);
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_cistatic int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
1558c2ecf20Sopenharmony_ci{
1568c2ecf20Sopenharmony_ci	int ret;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	if (n <= s)
1598c2ecf20Sopenharmony_ci		return -EINVAL;
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	ret = add_bits(p, d >> s, n - s);
1628c2ecf20Sopenharmony_ci	if (ret)
1638c2ecf20Sopenharmony_ci		return ret;
1648c2ecf20Sopenharmony_ci	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
1658c2ecf20Sopenharmony_ci}
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_cistatic int add_bits(struct sw842_param *p, u64 d, u8 n)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
1708c2ecf20Sopenharmony_ci	u64 o;
1718c2ecf20Sopenharmony_ci	u8 *out = p->out;
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	if (n > 64)
1768c2ecf20Sopenharmony_ci		return -EINVAL;
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
1798c2ecf20Sopenharmony_ci	 * or if we're at the end of the output buffer and would write past end
1808c2ecf20Sopenharmony_ci	 */
1818c2ecf20Sopenharmony_ci	if (bits > 64)
1828c2ecf20Sopenharmony_ci		return __split_add_bits(p, d, n, 32);
1838c2ecf20Sopenharmony_ci	else if (p->olen < 8 && bits > 32 && bits <= 56)
1848c2ecf20Sopenharmony_ci		return __split_add_bits(p, d, n, 16);
1858c2ecf20Sopenharmony_ci	else if (p->olen < 4 && bits > 16 && bits <= 24)
1868c2ecf20Sopenharmony_ci		return __split_add_bits(p, d, n, 8);
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	if (DIV_ROUND_UP(bits, 8) > p->olen)
1898c2ecf20Sopenharmony_ci		return -ENOSPC;
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	o = *out & bmask[b];
1928c2ecf20Sopenharmony_ci	d <<= s;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	if (bits <= 8)
1958c2ecf20Sopenharmony_ci		*out = o | d;
1968c2ecf20Sopenharmony_ci	else if (bits <= 16)
1978c2ecf20Sopenharmony_ci		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
1988c2ecf20Sopenharmony_ci	else if (bits <= 24)
1998c2ecf20Sopenharmony_ci		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
2008c2ecf20Sopenharmony_ci	else if (bits <= 32)
2018c2ecf20Sopenharmony_ci		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
2028c2ecf20Sopenharmony_ci	else if (bits <= 40)
2038c2ecf20Sopenharmony_ci		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
2048c2ecf20Sopenharmony_ci	else if (bits <= 48)
2058c2ecf20Sopenharmony_ci		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
2068c2ecf20Sopenharmony_ci	else if (bits <= 56)
2078c2ecf20Sopenharmony_ci		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
2088c2ecf20Sopenharmony_ci	else
2098c2ecf20Sopenharmony_ci		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci	p->bit += n;
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	if (p->bit > 7) {
2148c2ecf20Sopenharmony_ci		p->out += p->bit / 8;
2158c2ecf20Sopenharmony_ci		p->olen -= p->bit / 8;
2168c2ecf20Sopenharmony_ci		p->bit %= 8;
2178c2ecf20Sopenharmony_ci	}
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	return 0;
2208c2ecf20Sopenharmony_ci}
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_cistatic int add_template(struct sw842_param *p, u8 c)
2238c2ecf20Sopenharmony_ci{
2248c2ecf20Sopenharmony_ci	int ret, i, b = 0;
2258c2ecf20Sopenharmony_ci	u8 *t = comp_ops[c];
2268c2ecf20Sopenharmony_ci	bool inv = false;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	if (c >= OPS_MAX)
2298c2ecf20Sopenharmony_ci		return -EINVAL;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	pr_debug("template %x\n", t[4]);
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	ret = add_bits(p, t[4], OP_BITS);
2348c2ecf20Sopenharmony_ci	if (ret)
2358c2ecf20Sopenharmony_ci		return ret;
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	for (i = 0; i < 4; i++) {
2388c2ecf20Sopenharmony_ci		pr_debug("op %x\n", t[i]);
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci		switch (t[i] & OP_AMOUNT) {
2418c2ecf20Sopenharmony_ci		case OP_AMOUNT_8:
2428c2ecf20Sopenharmony_ci			if (b)
2438c2ecf20Sopenharmony_ci				inv = true;
2448c2ecf20Sopenharmony_ci			else if (t[i] & OP_ACTION_INDEX)
2458c2ecf20Sopenharmony_ci				ret = add_bits(p, p->index8[0], I8_BITS);
2468c2ecf20Sopenharmony_ci			else if (t[i] & OP_ACTION_DATA)
2478c2ecf20Sopenharmony_ci				ret = add_bits(p, p->data8[0], 64);
2488c2ecf20Sopenharmony_ci			else
2498c2ecf20Sopenharmony_ci				inv = true;
2508c2ecf20Sopenharmony_ci			break;
2518c2ecf20Sopenharmony_ci		case OP_AMOUNT_4:
2528c2ecf20Sopenharmony_ci			if (b == 2 && t[i] & OP_ACTION_DATA)
2538c2ecf20Sopenharmony_ci				ret = add_bits(p, get_input_data(p, 2, 32), 32);
2548c2ecf20Sopenharmony_ci			else if (b != 0 && b != 4)
2558c2ecf20Sopenharmony_ci				inv = true;
2568c2ecf20Sopenharmony_ci			else if (t[i] & OP_ACTION_INDEX)
2578c2ecf20Sopenharmony_ci				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
2588c2ecf20Sopenharmony_ci			else if (t[i] & OP_ACTION_DATA)
2598c2ecf20Sopenharmony_ci				ret = add_bits(p, p->data4[b >> 2], 32);
2608c2ecf20Sopenharmony_ci			else
2618c2ecf20Sopenharmony_ci				inv = true;
2628c2ecf20Sopenharmony_ci			break;
2638c2ecf20Sopenharmony_ci		case OP_AMOUNT_2:
2648c2ecf20Sopenharmony_ci			if (b != 0 && b != 2 && b != 4 && b != 6)
2658c2ecf20Sopenharmony_ci				inv = true;
2668c2ecf20Sopenharmony_ci			if (t[i] & OP_ACTION_INDEX)
2678c2ecf20Sopenharmony_ci				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
2688c2ecf20Sopenharmony_ci			else if (t[i] & OP_ACTION_DATA)
2698c2ecf20Sopenharmony_ci				ret = add_bits(p, p->data2[b >> 1], 16);
2708c2ecf20Sopenharmony_ci			else
2718c2ecf20Sopenharmony_ci				inv = true;
2728c2ecf20Sopenharmony_ci			break;
2738c2ecf20Sopenharmony_ci		case OP_AMOUNT_0:
2748c2ecf20Sopenharmony_ci			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
2758c2ecf20Sopenharmony_ci			break;
2768c2ecf20Sopenharmony_ci		default:
2778c2ecf20Sopenharmony_ci			inv = true;
2788c2ecf20Sopenharmony_ci			break;
2798c2ecf20Sopenharmony_ci		}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci		if (ret)
2828c2ecf20Sopenharmony_ci			return ret;
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci		if (inv) {
2858c2ecf20Sopenharmony_ci			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
2868c2ecf20Sopenharmony_ci			       c, i, t[0], t[1], t[2], t[3]);
2878c2ecf20Sopenharmony_ci			return -EINVAL;
2888c2ecf20Sopenharmony_ci		}
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci		b += t[i] & OP_AMOUNT;
2918c2ecf20Sopenharmony_ci	}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	if (b != 8) {
2948c2ecf20Sopenharmony_ci		pr_err("Invalid template %x len %x : %x %x %x %x\n",
2958c2ecf20Sopenharmony_ci		       c, b, t[0], t[1], t[2], t[3]);
2968c2ecf20Sopenharmony_ci		return -EINVAL;
2978c2ecf20Sopenharmony_ci	}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	if (sw842_template_counts)
3008c2ecf20Sopenharmony_ci		atomic_inc(&template_count[t[4]]);
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	return 0;
3038c2ecf20Sopenharmony_ci}
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_cistatic int add_repeat_template(struct sw842_param *p, u8 r)
3068c2ecf20Sopenharmony_ci{
3078c2ecf20Sopenharmony_ci	int ret;
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	/* repeat param is 0-based */
3108c2ecf20Sopenharmony_ci	if (!r || --r > REPEAT_BITS_MAX)
3118c2ecf20Sopenharmony_ci		return -EINVAL;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	ret = add_bits(p, OP_REPEAT, OP_BITS);
3148c2ecf20Sopenharmony_ci	if (ret)
3158c2ecf20Sopenharmony_ci		return ret;
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	ret = add_bits(p, r, REPEAT_BITS);
3188c2ecf20Sopenharmony_ci	if (ret)
3198c2ecf20Sopenharmony_ci		return ret;
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	if (sw842_template_counts)
3228c2ecf20Sopenharmony_ci		atomic_inc(&template_repeat_count);
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	return 0;
3258c2ecf20Sopenharmony_ci}
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_cistatic int add_short_data_template(struct sw842_param *p, u8 b)
3288c2ecf20Sopenharmony_ci{
3298c2ecf20Sopenharmony_ci	int ret, i;
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	if (!b || b > SHORT_DATA_BITS_MAX)
3328c2ecf20Sopenharmony_ci		return -EINVAL;
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
3358c2ecf20Sopenharmony_ci	if (ret)
3368c2ecf20Sopenharmony_ci		return ret;
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	ret = add_bits(p, b, SHORT_DATA_BITS);
3398c2ecf20Sopenharmony_ci	if (ret)
3408c2ecf20Sopenharmony_ci		return ret;
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci	for (i = 0; i < b; i++) {
3438c2ecf20Sopenharmony_ci		ret = add_bits(p, p->in[i], 8);
3448c2ecf20Sopenharmony_ci		if (ret)
3458c2ecf20Sopenharmony_ci			return ret;
3468c2ecf20Sopenharmony_ci	}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	if (sw842_template_counts)
3498c2ecf20Sopenharmony_ci		atomic_inc(&template_short_data_count);
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	return 0;
3528c2ecf20Sopenharmony_ci}
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_cistatic int add_zeros_template(struct sw842_param *p)
3558c2ecf20Sopenharmony_ci{
3568c2ecf20Sopenharmony_ci	int ret = add_bits(p, OP_ZEROS, OP_BITS);
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci	if (ret)
3598c2ecf20Sopenharmony_ci		return ret;
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	if (sw842_template_counts)
3628c2ecf20Sopenharmony_ci		atomic_inc(&template_zeros_count);
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	return 0;
3658c2ecf20Sopenharmony_ci}
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_cistatic int add_end_template(struct sw842_param *p)
3688c2ecf20Sopenharmony_ci{
3698c2ecf20Sopenharmony_ci	int ret = add_bits(p, OP_END, OP_BITS);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	if (ret)
3728c2ecf20Sopenharmony_ci		return ret;
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	if (sw842_template_counts)
3758c2ecf20Sopenharmony_ci		atomic_inc(&template_end_count);
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	return 0;
3788c2ecf20Sopenharmony_ci}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_cistatic bool check_template(struct sw842_param *p, u8 c)
3818c2ecf20Sopenharmony_ci{
3828c2ecf20Sopenharmony_ci	u8 *t = comp_ops[c];
3838c2ecf20Sopenharmony_ci	int i, match, b = 0;
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci	if (c >= OPS_MAX)
3868c2ecf20Sopenharmony_ci		return false;
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci	for (i = 0; i < 4; i++) {
3898c2ecf20Sopenharmony_ci		if (t[i] & OP_ACTION_INDEX) {
3908c2ecf20Sopenharmony_ci			if (t[i] & OP_AMOUNT_2)
3918c2ecf20Sopenharmony_ci				match = check_index(p, 2, b >> 1);
3928c2ecf20Sopenharmony_ci			else if (t[i] & OP_AMOUNT_4)
3938c2ecf20Sopenharmony_ci				match = check_index(p, 4, b >> 2);
3948c2ecf20Sopenharmony_ci			else if (t[i] & OP_AMOUNT_8)
3958c2ecf20Sopenharmony_ci				match = check_index(p, 8, 0);
3968c2ecf20Sopenharmony_ci			else
3978c2ecf20Sopenharmony_ci				return false;
3988c2ecf20Sopenharmony_ci			if (!match)
3998c2ecf20Sopenharmony_ci				return false;
4008c2ecf20Sopenharmony_ci		}
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci		b += t[i] & OP_AMOUNT;
4038c2ecf20Sopenharmony_ci	}
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci	return true;
4068c2ecf20Sopenharmony_ci}
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_cistatic void get_next_data(struct sw842_param *p)
4098c2ecf20Sopenharmony_ci{
4108c2ecf20Sopenharmony_ci	p->data8[0] = get_input_data(p, 0, 64);
4118c2ecf20Sopenharmony_ci	p->data4[0] = get_input_data(p, 0, 32);
4128c2ecf20Sopenharmony_ci	p->data4[1] = get_input_data(p, 4, 32);
4138c2ecf20Sopenharmony_ci	p->data2[0] = get_input_data(p, 0, 16);
4148c2ecf20Sopenharmony_ci	p->data2[1] = get_input_data(p, 2, 16);
4158c2ecf20Sopenharmony_ci	p->data2[2] = get_input_data(p, 4, 16);
4168c2ecf20Sopenharmony_ci	p->data2[3] = get_input_data(p, 6, 16);
4178c2ecf20Sopenharmony_ci}
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci/* update the hashtable entries.
4208c2ecf20Sopenharmony_ci * only call this after finding/adding the current template
4218c2ecf20Sopenharmony_ci * the dataN fields for the current 8 byte block must be already updated
4228c2ecf20Sopenharmony_ci */
4238c2ecf20Sopenharmony_cistatic void update_hashtables(struct sw842_param *p)
4248c2ecf20Sopenharmony_ci{
4258c2ecf20Sopenharmony_ci	u64 pos = p->in - p->instart;
4268c2ecf20Sopenharmony_ci	u64 n8 = (pos >> 3) % (1 << I8_BITS);
4278c2ecf20Sopenharmony_ci	u64 n4 = (pos >> 2) % (1 << I4_BITS);
4288c2ecf20Sopenharmony_ci	u64 n2 = (pos >> 1) % (1 << I2_BITS);
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	replace_hash(p, 8, n8, 0);
4318c2ecf20Sopenharmony_ci	replace_hash(p, 4, n4, 0);
4328c2ecf20Sopenharmony_ci	replace_hash(p, 4, n4, 1);
4338c2ecf20Sopenharmony_ci	replace_hash(p, 2, n2, 0);
4348c2ecf20Sopenharmony_ci	replace_hash(p, 2, n2, 1);
4358c2ecf20Sopenharmony_ci	replace_hash(p, 2, n2, 2);
4368c2ecf20Sopenharmony_ci	replace_hash(p, 2, n2, 3);
4378c2ecf20Sopenharmony_ci}
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci/* find the next template to use, and add it
4408c2ecf20Sopenharmony_ci * the p->dataN fields must already be set for the current 8 byte block
4418c2ecf20Sopenharmony_ci */
4428c2ecf20Sopenharmony_cistatic int process_next(struct sw842_param *p)
4438c2ecf20Sopenharmony_ci{
4448c2ecf20Sopenharmony_ci	int ret, i;
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_ci	p->index8[0] = INDEX_NOT_CHECKED;
4478c2ecf20Sopenharmony_ci	p->index4[0] = INDEX_NOT_CHECKED;
4488c2ecf20Sopenharmony_ci	p->index4[1] = INDEX_NOT_CHECKED;
4498c2ecf20Sopenharmony_ci	p->index2[0] = INDEX_NOT_CHECKED;
4508c2ecf20Sopenharmony_ci	p->index2[1] = INDEX_NOT_CHECKED;
4518c2ecf20Sopenharmony_ci	p->index2[2] = INDEX_NOT_CHECKED;
4528c2ecf20Sopenharmony_ci	p->index2[3] = INDEX_NOT_CHECKED;
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	/* check up to OPS_MAX - 1; last op is our fallback */
4558c2ecf20Sopenharmony_ci	for (i = 0; i < OPS_MAX - 1; i++) {
4568c2ecf20Sopenharmony_ci		if (check_template(p, i))
4578c2ecf20Sopenharmony_ci			break;
4588c2ecf20Sopenharmony_ci	}
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_ci	ret = add_template(p, i);
4618c2ecf20Sopenharmony_ci	if (ret)
4628c2ecf20Sopenharmony_ci		return ret;
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci	return 0;
4658c2ecf20Sopenharmony_ci}
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci/**
4688c2ecf20Sopenharmony_ci * sw842_compress
4698c2ecf20Sopenharmony_ci *
4708c2ecf20Sopenharmony_ci * Compress the uncompressed buffer of length @ilen at @in to the output buffer
4718c2ecf20Sopenharmony_ci * @out, using no more than @olen bytes, using the 842 compression format.
4728c2ecf20Sopenharmony_ci *
4738c2ecf20Sopenharmony_ci * Returns: 0 on success, error on failure.  The @olen parameter
4748c2ecf20Sopenharmony_ci * will contain the number of output bytes written on success, or
4758c2ecf20Sopenharmony_ci * 0 on error.
4768c2ecf20Sopenharmony_ci */
4778c2ecf20Sopenharmony_ciint sw842_compress(const u8 *in, unsigned int ilen,
4788c2ecf20Sopenharmony_ci		   u8 *out, unsigned int *olen, void *wmem)
4798c2ecf20Sopenharmony_ci{
4808c2ecf20Sopenharmony_ci	struct sw842_param *p = (struct sw842_param *)wmem;
4818c2ecf20Sopenharmony_ci	int ret;
4828c2ecf20Sopenharmony_ci	u64 last, next, pad, total;
4838c2ecf20Sopenharmony_ci	u8 repeat_count = 0;
4848c2ecf20Sopenharmony_ci	u32 crc;
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci	init_hashtable_nodes(p, 8);
4898c2ecf20Sopenharmony_ci	init_hashtable_nodes(p, 4);
4908c2ecf20Sopenharmony_ci	init_hashtable_nodes(p, 2);
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	p->in = (u8 *)in;
4938c2ecf20Sopenharmony_ci	p->instart = p->in;
4948c2ecf20Sopenharmony_ci	p->ilen = ilen;
4958c2ecf20Sopenharmony_ci	p->out = out;
4968c2ecf20Sopenharmony_ci	p->olen = *olen;
4978c2ecf20Sopenharmony_ci	p->bit = 0;
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci	total = p->olen;
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci	*olen = 0;
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	/* if using strict mode, we can only compress a multiple of 8 */
5048c2ecf20Sopenharmony_ci	if (sw842_strict && (ilen % 8)) {
5058c2ecf20Sopenharmony_ci		pr_err("Using strict mode, can't compress len %d\n", ilen);
5068c2ecf20Sopenharmony_ci		return -EINVAL;
5078c2ecf20Sopenharmony_ci	}
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci	/* let's compress at least 8 bytes, mkay? */
5108c2ecf20Sopenharmony_ci	if (unlikely(ilen < 8))
5118c2ecf20Sopenharmony_ci		goto skip_comp;
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	/* make initial 'last' different so we don't match the first time */
5148c2ecf20Sopenharmony_ci	last = ~get_unaligned((u64 *)p->in);
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	while (p->ilen > 7) {
5178c2ecf20Sopenharmony_ci		next = get_unaligned((u64 *)p->in);
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci		/* must get the next data, as we need to update the hashtable
5208c2ecf20Sopenharmony_ci		 * entries with the new data every time
5218c2ecf20Sopenharmony_ci		 */
5228c2ecf20Sopenharmony_ci		get_next_data(p);
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci		/* we don't care about endianness in last or next;
5258c2ecf20Sopenharmony_ci		 * we're just comparing 8 bytes to another 8 bytes,
5268c2ecf20Sopenharmony_ci		 * they're both the same endianness
5278c2ecf20Sopenharmony_ci		 */
5288c2ecf20Sopenharmony_ci		if (next == last) {
5298c2ecf20Sopenharmony_ci			/* repeat count bits are 0-based, so we stop at +1 */
5308c2ecf20Sopenharmony_ci			if (++repeat_count <= REPEAT_BITS_MAX)
5318c2ecf20Sopenharmony_ci				goto repeat;
5328c2ecf20Sopenharmony_ci		}
5338c2ecf20Sopenharmony_ci		if (repeat_count) {
5348c2ecf20Sopenharmony_ci			ret = add_repeat_template(p, repeat_count);
5358c2ecf20Sopenharmony_ci			repeat_count = 0;
5368c2ecf20Sopenharmony_ci			if (next == last) /* reached max repeat bits */
5378c2ecf20Sopenharmony_ci				goto repeat;
5388c2ecf20Sopenharmony_ci		}
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci		if (next == 0)
5418c2ecf20Sopenharmony_ci			ret = add_zeros_template(p);
5428c2ecf20Sopenharmony_ci		else
5438c2ecf20Sopenharmony_ci			ret = process_next(p);
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci		if (ret)
5468c2ecf20Sopenharmony_ci			return ret;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_cirepeat:
5498c2ecf20Sopenharmony_ci		last = next;
5508c2ecf20Sopenharmony_ci		update_hashtables(p);
5518c2ecf20Sopenharmony_ci		p->in += 8;
5528c2ecf20Sopenharmony_ci		p->ilen -= 8;
5538c2ecf20Sopenharmony_ci	}
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci	if (repeat_count) {
5568c2ecf20Sopenharmony_ci		ret = add_repeat_template(p, repeat_count);
5578c2ecf20Sopenharmony_ci		if (ret)
5588c2ecf20Sopenharmony_ci			return ret;
5598c2ecf20Sopenharmony_ci	}
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ciskip_comp:
5628c2ecf20Sopenharmony_ci	if (p->ilen > 0) {
5638c2ecf20Sopenharmony_ci		ret = add_short_data_template(p, p->ilen);
5648c2ecf20Sopenharmony_ci		if (ret)
5658c2ecf20Sopenharmony_ci			return ret;
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci		p->in += p->ilen;
5688c2ecf20Sopenharmony_ci		p->ilen = 0;
5698c2ecf20Sopenharmony_ci	}
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	ret = add_end_template(p);
5728c2ecf20Sopenharmony_ci	if (ret)
5738c2ecf20Sopenharmony_ci		return ret;
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	/*
5768c2ecf20Sopenharmony_ci	 * crc(0:31) is appended to target data starting with the next
5778c2ecf20Sopenharmony_ci	 * bit after End of stream template.
5788c2ecf20Sopenharmony_ci	 * nx842 calculates CRC for data in big-endian format. So doing
5798c2ecf20Sopenharmony_ci	 * same here so that sw842 decompression can be used for both
5808c2ecf20Sopenharmony_ci	 * compressed data.
5818c2ecf20Sopenharmony_ci	 */
5828c2ecf20Sopenharmony_ci	crc = crc32_be(0, in, ilen);
5838c2ecf20Sopenharmony_ci	ret = add_bits(p, crc, CRC_BITS);
5848c2ecf20Sopenharmony_ci	if (ret)
5858c2ecf20Sopenharmony_ci		return ret;
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci	if (p->bit) {
5888c2ecf20Sopenharmony_ci		p->out++;
5898c2ecf20Sopenharmony_ci		p->olen--;
5908c2ecf20Sopenharmony_ci		p->bit = 0;
5918c2ecf20Sopenharmony_ci	}
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ci	/* pad compressed length to multiple of 8 */
5948c2ecf20Sopenharmony_ci	pad = (8 - ((total - p->olen) % 8)) % 8;
5958c2ecf20Sopenharmony_ci	if (pad) {
5968c2ecf20Sopenharmony_ci		if (pad > p->olen) /* we were so close! */
5978c2ecf20Sopenharmony_ci			return -ENOSPC;
5988c2ecf20Sopenharmony_ci		memset(p->out, 0, pad);
5998c2ecf20Sopenharmony_ci		p->out += pad;
6008c2ecf20Sopenharmony_ci		p->olen -= pad;
6018c2ecf20Sopenharmony_ci	}
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci	if (unlikely((total - p->olen) > UINT_MAX))
6048c2ecf20Sopenharmony_ci		return -ENOSPC;
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_ci	*olen = total - p->olen;
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	return 0;
6098c2ecf20Sopenharmony_ci}
6108c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(sw842_compress);
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_cistatic int __init sw842_init(void)
6138c2ecf20Sopenharmony_ci{
6148c2ecf20Sopenharmony_ci	if (sw842_template_counts)
6158c2ecf20Sopenharmony_ci		sw842_debugfs_create();
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci	return 0;
6188c2ecf20Sopenharmony_ci}
6198c2ecf20Sopenharmony_cimodule_init(sw842_init);
6208c2ecf20Sopenharmony_ci
6218c2ecf20Sopenharmony_cistatic void __exit sw842_exit(void)
6228c2ecf20Sopenharmony_ci{
6238c2ecf20Sopenharmony_ci	if (sw842_template_counts)
6248c2ecf20Sopenharmony_ci		sw842_debugfs_remove();
6258c2ecf20Sopenharmony_ci}
6268c2ecf20Sopenharmony_cimodule_exit(sw842_exit);
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
6298c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Software 842 Compressor");
6308c2ecf20Sopenharmony_ciMODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
631