162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci/* P9 gunzip sample code for demonstrating the P9 NX hardware
462306a36Sopenharmony_ci * interface.  Not intended for productive uses or for performance or
562306a36Sopenharmony_ci * compression ratio measurements.  Note also that /dev/crypto/gzip,
662306a36Sopenharmony_ci * VAS and skiboot support are required
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Copyright 2020 IBM Corp.
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci * Author: Bulent Abali <abali@us.ibm.com>
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * https://github.com/libnxz/power-gzip for zlib api and other utils
1362306a36Sopenharmony_ci * Definitions of acronyms used here.  See
1462306a36Sopenharmony_ci * P9 NX Gzip Accelerator User's Manual for details:
1562306a36Sopenharmony_ci * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
1662306a36Sopenharmony_ci *
1762306a36Sopenharmony_ci * adler/crc: 32 bit checksums appended to stream tail
1862306a36Sopenharmony_ci * ce:       completion extension
1962306a36Sopenharmony_ci * cpb:      coprocessor parameter block (metadata)
2062306a36Sopenharmony_ci * crb:      coprocessor request block (command)
2162306a36Sopenharmony_ci * csb:      coprocessor status block (status)
2262306a36Sopenharmony_ci * dht:      dynamic huffman table
2362306a36Sopenharmony_ci * dde:      data descriptor element (address, length)
2462306a36Sopenharmony_ci * ddl:      list of ddes
2562306a36Sopenharmony_ci * dh/fh:    dynamic and fixed huffman types
2662306a36Sopenharmony_ci * fc:       coprocessor function code
2762306a36Sopenharmony_ci * histlen:  history/dictionary length
2862306a36Sopenharmony_ci * history:  sliding window of up to 32KB of data
2962306a36Sopenharmony_ci * lzcount:  Deflate LZ symbol counts
3062306a36Sopenharmony_ci * rembytecnt: remaining byte count
3162306a36Sopenharmony_ci * sfbt:     source final block type; last block's type during decomp
3262306a36Sopenharmony_ci * spbc:     source processed byte count
3362306a36Sopenharmony_ci * subc:     source unprocessed bit count
3462306a36Sopenharmony_ci * tebc:     target ending bit count; valid bits in the last byte
3562306a36Sopenharmony_ci * tpbc:     target processed byte count
3662306a36Sopenharmony_ci * vas:      virtual accelerator switch; the user mode interface
3762306a36Sopenharmony_ci */
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#define _ISOC11_SOURCE	// For aligned_alloc()
4062306a36Sopenharmony_ci#define _DEFAULT_SOURCE	// For endian.h
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci#include <stdio.h>
4362306a36Sopenharmony_ci#include <stdlib.h>
4462306a36Sopenharmony_ci#include <string.h>
4562306a36Sopenharmony_ci#include <unistd.h>
4662306a36Sopenharmony_ci#include <stdint.h>
4762306a36Sopenharmony_ci#include <sys/types.h>
4862306a36Sopenharmony_ci#include <sys/stat.h>
4962306a36Sopenharmony_ci#include <sys/time.h>
5062306a36Sopenharmony_ci#include <sys/fcntl.h>
5162306a36Sopenharmony_ci#include <sys/mman.h>
5262306a36Sopenharmony_ci#include <endian.h>
5362306a36Sopenharmony_ci#include <bits/endian.h>
5462306a36Sopenharmony_ci#include <sys/ioctl.h>
5562306a36Sopenharmony_ci#include <assert.h>
5662306a36Sopenharmony_ci#include <errno.h>
5762306a36Sopenharmony_ci#include <signal.h>
5862306a36Sopenharmony_ci#include "nxu.h"
5962306a36Sopenharmony_ci#include "nx.h"
6062306a36Sopenharmony_ci#include "crb.h"
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ciint nx_dbg;
6362306a36Sopenharmony_ciFILE *nx_gzip_log;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci#define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
6662306a36Sopenharmony_ci#define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci#define GETINPC(X) fgetc(X)
6962306a36Sopenharmony_ci#define FNAME_MAX 1024
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci/* fifo queue management */
7262306a36Sopenharmony_ci#define fifo_used_bytes(used) (used)
7362306a36Sopenharmony_ci#define fifo_free_bytes(used, len) ((len)-(used))
7462306a36Sopenharmony_ci/* amount of free bytes in the first and last parts */
7562306a36Sopenharmony_ci#define fifo_free_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
7662306a36Sopenharmony_ci						  ? (len)-((cur)+(used)) : 0)
7762306a36Sopenharmony_ci#define fifo_free_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
7862306a36Sopenharmony_ci						  ? (cur) : (len)-(used))
7962306a36Sopenharmony_ci/* amount of used bytes in the first and last parts */
8062306a36Sopenharmony_ci#define fifo_used_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
8162306a36Sopenharmony_ci						  ? (used) : (len)-(cur))
8262306a36Sopenharmony_ci#define fifo_used_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
8362306a36Sopenharmony_ci						  ? 0 : ((used)+(cur))-(len))
8462306a36Sopenharmony_ci/* first and last free parts start here */
8562306a36Sopenharmony_ci#define fifo_free_first_offset(cur, used)      ((cur)+(used))
8662306a36Sopenharmony_ci#define fifo_free_last_offset(cur, used, len)  \
8762306a36Sopenharmony_ci					   fifo_used_last_bytes(cur, used, len)
8862306a36Sopenharmony_ci/* first and last used parts start here */
8962306a36Sopenharmony_ci#define fifo_used_first_offset(cur)            (cur)
9062306a36Sopenharmony_ci#define fifo_used_last_offset(cur)             (0)
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ciconst int fifo_in_len = 1<<24;
9362306a36Sopenharmony_ciconst int fifo_out_len = 1<<24;
9462306a36Sopenharmony_ciconst int page_sz = 1<<16;
9562306a36Sopenharmony_ciconst int line_sz = 1<<7;
9662306a36Sopenharmony_ciconst int window_max = 1<<15;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci/*
9962306a36Sopenharmony_ci * Adds an (address, len) pair to the list of ddes (ddl) and updates
10062306a36Sopenharmony_ci * the base dde.  ddl[0] is the only dde in a direct dde which
10162306a36Sopenharmony_ci * contains a single (addr,len) pair.  For more pairs, ddl[0] becomes
10262306a36Sopenharmony_ci * the indirect (base) dde that points to a list of direct ddes.
10362306a36Sopenharmony_ci * See Section 6.4 of the NX-gzip user manual for DDE description.
10462306a36Sopenharmony_ci * Addr=NULL, len=0 clears the ddl[0].  Returns the total number of
10562306a36Sopenharmony_ci * bytes in ddl.  Caller is responsible for allocting the array of
10662306a36Sopenharmony_ci * nx_dde_t *ddl.  If N addresses are required in the scatter-gather
10762306a36Sopenharmony_ci * list, the ddl array must have N+1 entries minimum.
10862306a36Sopenharmony_ci */
10962306a36Sopenharmony_cistatic inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
11062306a36Sopenharmony_ci					uint32_t len)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	uint32_t ddecnt;
11362306a36Sopenharmony_ci	uint32_t bytes;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	if (addr == NULL && len == 0) {
11662306a36Sopenharmony_ci		clearp_dde(ddl);
11762306a36Sopenharmony_ci		return 0;
11862306a36Sopenharmony_ci	}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
12162306a36Sopenharmony_ci			__func__, len));
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	/* Number of ddes in the dde list ; == 0 when it is a direct dde */
12462306a36Sopenharmony_ci	ddecnt = getpnn(ddl, dde_count);
12562306a36Sopenharmony_ci	bytes = getp32(ddl, ddebc);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (ddecnt == 0 && bytes == 0) {
12862306a36Sopenharmony_ci		/* First dde is unused; make it a direct dde */
12962306a36Sopenharmony_ci		bytes = len;
13062306a36Sopenharmony_ci		putp32(ddl, ddebc, bytes);
13162306a36Sopenharmony_ci		putp64(ddl, ddead, (uint64_t) addr);
13262306a36Sopenharmony_ci	} else if (ddecnt == 0) {
13362306a36Sopenharmony_ci		/* Converting direct to indirect dde
13462306a36Sopenharmony_ci		 * ddl[0] becomes head dde of ddl
13562306a36Sopenharmony_ci		 * copy direct to indirect first.
13662306a36Sopenharmony_ci		 */
13762306a36Sopenharmony_ci		ddl[1] = ddl[0];
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci		/* Add the new dde next */
14062306a36Sopenharmony_ci		clear_dde(ddl[2]);
14162306a36Sopenharmony_ci		put32(ddl[2], ddebc, len);
14262306a36Sopenharmony_ci		put64(ddl[2], ddead, (uint64_t) addr);
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci		/* Ddl head points to 2 direct ddes */
14562306a36Sopenharmony_ci		ddecnt = 2;
14662306a36Sopenharmony_ci		putpnn(ddl, dde_count, ddecnt);
14762306a36Sopenharmony_ci		bytes = bytes + len;
14862306a36Sopenharmony_ci		putp32(ddl, ddebc, bytes);
14962306a36Sopenharmony_ci		/* Pointer to the first direct dde */
15062306a36Sopenharmony_ci		putp64(ddl, ddead, (uint64_t) &ddl[1]);
15162306a36Sopenharmony_ci	} else {
15262306a36Sopenharmony_ci		/* Append a dde to an existing indirect ddl */
15362306a36Sopenharmony_ci		++ddecnt;
15462306a36Sopenharmony_ci		clear_dde(ddl[ddecnt]);
15562306a36Sopenharmony_ci		put64(ddl[ddecnt], ddead, (uint64_t) addr);
15662306a36Sopenharmony_ci		put32(ddl[ddecnt], ddebc, len);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci		putpnn(ddl, dde_count, ddecnt);
15962306a36Sopenharmony_ci		bytes = bytes + len;
16062306a36Sopenharmony_ci		putp32(ddl, ddebc, bytes); /* byte sum of all dde */
16162306a36Sopenharmony_ci	}
16262306a36Sopenharmony_ci	return bytes;
16362306a36Sopenharmony_ci}
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci/*
16662306a36Sopenharmony_ci * Touch specified number of pages represented in number bytes
16762306a36Sopenharmony_ci * beginning from the first buffer in a dde list.
16862306a36Sopenharmony_ci * Do not touch the pages past buf_sz-th byte's page.
16962306a36Sopenharmony_ci *
17062306a36Sopenharmony_ci * Set buf_sz = 0 to touch all pages described by the ddep.
17162306a36Sopenharmony_ci */
17262306a36Sopenharmony_cistatic int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
17362306a36Sopenharmony_ci				int wr)
17462306a36Sopenharmony_ci{
17562306a36Sopenharmony_ci	uint32_t indirect_count;
17662306a36Sopenharmony_ci	uint32_t buf_len;
17762306a36Sopenharmony_ci	long total;
17862306a36Sopenharmony_ci	uint64_t buf_addr;
17962306a36Sopenharmony_ci	struct nx_dde_t *dde_list;
18062306a36Sopenharmony_ci	int i;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	assert(!!ddep);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	indirect_count = getpnn(ddep, dde_count);
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
18762306a36Sopenharmony_ci			indirect_count));
18862306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	if (indirect_count == 0) {
19162306a36Sopenharmony_ci		/* Direct dde */
19262306a36Sopenharmony_ci		buf_len = getp32(ddep, ddebc);
19362306a36Sopenharmony_ci		buf_addr = getp64(ddep, ddead);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci		NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
19662306a36Sopenharmony_ci				buf_len, (void *)buf_addr));
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci		if (buf_sz == 0)
19962306a36Sopenharmony_ci			nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
20062306a36Sopenharmony_ci		else
20162306a36Sopenharmony_ci			nxu_touch_pages((void *)buf_addr, NX_MIN(buf_len,
20262306a36Sopenharmony_ci					buf_sz), page_sz, wr);
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci		return ERR_NX_OK;
20562306a36Sopenharmony_ci	}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	/* Indirect dde */
20862306a36Sopenharmony_ci	if (indirect_count > MAX_DDE_COUNT)
20962306a36Sopenharmony_ci		return ERR_NX_EXCESSIVE_DDE;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	/* First address of the list */
21262306a36Sopenharmony_ci	dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	if (buf_sz == 0)
21562306a36Sopenharmony_ci		buf_sz = getp32(ddep, ddebc);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	total = 0;
21862306a36Sopenharmony_ci	for (i = 0; i < indirect_count; i++) {
21962306a36Sopenharmony_ci		buf_len = get32(dde_list[i], ddebc);
22062306a36Sopenharmony_ci		buf_addr = get64(dde_list[i], ddead);
22162306a36Sopenharmony_ci		total += buf_len;
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci		NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
22462306a36Sopenharmony_ci				buf_len, (void *)buf_addr));
22562306a36Sopenharmony_ci		NXPRT(fprintf(stderr, "0x%lx\n", total));
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci		/* Touching fewer pages than encoded in the ddebc */
22862306a36Sopenharmony_ci		if (total > buf_sz) {
22962306a36Sopenharmony_ci			buf_len = NX_MIN(buf_len, total - buf_sz);
23062306a36Sopenharmony_ci			nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
23162306a36Sopenharmony_ci			NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
23262306a36Sopenharmony_ci				      buf_len));
23362306a36Sopenharmony_ci			NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
23462306a36Sopenharmony_ci			break;
23562306a36Sopenharmony_ci		}
23662306a36Sopenharmony_ci		nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
23762306a36Sopenharmony_ci	}
23862306a36Sopenharmony_ci	return ERR_NX_OK;
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci/*
24262306a36Sopenharmony_ci * Src and dst buffers are supplied in scatter gather lists.
24362306a36Sopenharmony_ci * NX function code and other parameters supplied in cmdp.
24462306a36Sopenharmony_ci */
24562306a36Sopenharmony_cistatic int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
24662306a36Sopenharmony_ci			 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
24762306a36Sopenharmony_ci{
24862306a36Sopenharmony_ci	uint64_t csbaddr;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	cmdp->crb.source_dde = *src;
25362306a36Sopenharmony_ci	cmdp->crb.target_dde = *dst;
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	/* Status, output byte count in tpbc */
25662306a36Sopenharmony_ci	csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
25762306a36Sopenharmony_ci	put64(cmdp->crb, csb_address, csbaddr);
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	/* NX reports input bytes in spbc; cleared */
26062306a36Sopenharmony_ci	cmdp->cpb.out_spbc_comp_wrap = 0;
26162306a36Sopenharmony_ci	cmdp->cpb.out_spbc_comp_with_count = 0;
26262306a36Sopenharmony_ci	cmdp->cpb.out_spbc_decomp = 0;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	/* Clear output */
26562306a36Sopenharmony_ci	put32(cmdp->cpb, out_crc, INIT_CRC);
26662306a36Sopenharmony_ci	put32(cmdp->cpb, out_adler, INIT_ADLER);
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	/* Submit the crb, the job descriptor, to the accelerator. */
26962306a36Sopenharmony_ci	return nxu_submit_job(cmdp, handle);
27062306a36Sopenharmony_ci}
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ciint decompress_file(int argc, char **argv, void *devhandle)
27362306a36Sopenharmony_ci{
27462306a36Sopenharmony_ci	FILE *inpf = NULL;
27562306a36Sopenharmony_ci	FILE *outf = NULL;
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	int c, expect, i, cc, rc = 0;
27862306a36Sopenharmony_ci	char gzfname[FNAME_MAX];
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	/* Queuing, file ops, byte counting */
28162306a36Sopenharmony_ci	char *fifo_in, *fifo_out;
28262306a36Sopenharmony_ci	int used_in, cur_in, used_out, cur_out, read_sz, n;
28362306a36Sopenharmony_ci	int first_free, last_free, first_used, last_used;
28462306a36Sopenharmony_ci	int first_offset, last_offset;
28562306a36Sopenharmony_ci	int write_sz, free_space, source_sz;
28662306a36Sopenharmony_ci	int source_sz_estimate, target_sz_estimate;
28762306a36Sopenharmony_ci	uint64_t last_comp_ratio = 0; /* 1000 max */
28862306a36Sopenharmony_ci	uint64_t total_out = 0;
28962306a36Sopenharmony_ci	int is_final, is_eof;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	/* nx hardware */
29262306a36Sopenharmony_ci	int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
29362306a36Sopenharmony_ci	int history_len = 0;
29462306a36Sopenharmony_ci	struct nx_gzip_crb_cpb_t cmd, *cmdp;
29562306a36Sopenharmony_ci	struct nx_dde_t *ddl_in;
29662306a36Sopenharmony_ci	struct nx_dde_t dde_in[6] __aligned(128);
29762306a36Sopenharmony_ci	struct nx_dde_t *ddl_out;
29862306a36Sopenharmony_ci	struct nx_dde_t dde_out[6] __aligned(128);
29962306a36Sopenharmony_ci	int pgfault_retries;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	/* when using mmap'ed files */
30262306a36Sopenharmony_ci	off_t input_file_offset;
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	if (argc > 2) {
30562306a36Sopenharmony_ci		fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
30662306a36Sopenharmony_ci		fprintf(stderr, "    writes to stdout or <fname>.nx.gunzip\n");
30762306a36Sopenharmony_ci		return -1;
30862306a36Sopenharmony_ci	}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	if (argc == 1) {
31162306a36Sopenharmony_ci		inpf = stdin;
31262306a36Sopenharmony_ci		outf = stdout;
31362306a36Sopenharmony_ci	} else if (argc == 2) {
31462306a36Sopenharmony_ci		char w[1024];
31562306a36Sopenharmony_ci		char *wp;
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci		inpf = fopen(argv[1], "r");
31862306a36Sopenharmony_ci		if (inpf == NULL) {
31962306a36Sopenharmony_ci			perror(argv[1]);
32062306a36Sopenharmony_ci			return -1;
32162306a36Sopenharmony_ci		}
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci		/* Make a new file name to write to.  Ignoring '.gz' */
32462306a36Sopenharmony_ci		wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
32562306a36Sopenharmony_ci		strcpy(w, wp);
32662306a36Sopenharmony_ci		strcat(w, ".nx.gunzip");
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci		outf = fopen(w, "w");
32962306a36Sopenharmony_ci		if (outf == NULL) {
33062306a36Sopenharmony_ci			perror(w);
33162306a36Sopenharmony_ci			return -1;
33262306a36Sopenharmony_ci		}
33362306a36Sopenharmony_ci	}
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	/* Decode the gzip header */
33662306a36Sopenharmony_ci	c = GETINPC(inpf); expect = 0x1f; /* ID1 */
33762306a36Sopenharmony_ci	if (c != expect)
33862306a36Sopenharmony_ci		goto err1;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	c = GETINPC(inpf); expect = 0x8b; /* ID2 */
34162306a36Sopenharmony_ci	if (c != expect)
34262306a36Sopenharmony_ci		goto err1;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	c = GETINPC(inpf); expect = 0x08; /* CM */
34562306a36Sopenharmony_ci	if (c != expect)
34662306a36Sopenharmony_ci		goto err1;
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	int flg = GETINPC(inpf); /* FLG */
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	if (flg & 0xE0 || flg & 0x4 || flg == EOF)
35162306a36Sopenharmony_ci		goto err2;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	fprintf(stderr, "gzHeader FLG %x\n", flg);
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	/* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
35662306a36Sopenharmony_ci	 * sample code.
35762306a36Sopenharmony_ci	 */
35862306a36Sopenharmony_ci	for (i = 0; i < 6; i++) {
35962306a36Sopenharmony_ci		char tmp[10];
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci		tmp[i] = GETINPC(inpf);
36262306a36Sopenharmony_ci		if (tmp[i] == EOF)
36362306a36Sopenharmony_ci			goto err3;
36462306a36Sopenharmony_ci		fprintf(stderr, "%02x ", tmp[i]);
36562306a36Sopenharmony_ci		if (i == 5)
36662306a36Sopenharmony_ci			fprintf(stderr, "\n");
36762306a36Sopenharmony_ci	}
36862306a36Sopenharmony_ci	fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	/* FNAME */
37162306a36Sopenharmony_ci	if (flg & 0x8) {
37262306a36Sopenharmony_ci		int k = 0;
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci		do {
37562306a36Sopenharmony_ci			c = GETINPC(inpf);
37662306a36Sopenharmony_ci			if (c == EOF || k >= FNAME_MAX)
37762306a36Sopenharmony_ci				goto err3;
37862306a36Sopenharmony_ci			gzfname[k++] = c;
37962306a36Sopenharmony_ci		} while (c);
38062306a36Sopenharmony_ci		fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
38162306a36Sopenharmony_ci	}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	/* FHCRC */
38462306a36Sopenharmony_ci	if (flg & 0x2) {
38562306a36Sopenharmony_ci		c = GETINPC(inpf);
38662306a36Sopenharmony_ci		if (c == EOF)
38762306a36Sopenharmony_ci			goto err3;
38862306a36Sopenharmony_ci		c = GETINPC(inpf);
38962306a36Sopenharmony_ci		if (c == EOF)
39062306a36Sopenharmony_ci			goto err3;
39162306a36Sopenharmony_ci		fprintf(stderr, "gzHeader FHCRC: ignored\n");
39262306a36Sopenharmony_ci	}
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	used_in = cur_in = used_out = cur_out = 0;
39562306a36Sopenharmony_ci	is_final = is_eof = 0;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	/* Allocate one page larger to prevent page faults due to NX
39862306a36Sopenharmony_ci	 * overfetching.
39962306a36Sopenharmony_ci	 * Either do this (char*)(uintptr_t)aligned_alloc or use
40062306a36Sopenharmony_ci	 * -std=c11 flag to make the int-to-pointer warning go away.
40162306a36Sopenharmony_ci	 */
40262306a36Sopenharmony_ci	assert((fifo_in  = (char *)(uintptr_t)aligned_alloc(line_sz,
40362306a36Sopenharmony_ci				   fifo_in_len + page_sz)) != NULL);
40462306a36Sopenharmony_ci	assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
40562306a36Sopenharmony_ci				   fifo_out_len + page_sz + line_sz)) != NULL);
40662306a36Sopenharmony_ci	/* Leave unused space due to history rounding rules */
40762306a36Sopenharmony_ci	fifo_out = fifo_out + line_sz;
40862306a36Sopenharmony_ci	nxu_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci	ddl_in  = &dde_in[0];
41162306a36Sopenharmony_ci	ddl_out = &dde_out[0];
41262306a36Sopenharmony_ci	cmdp = &cmd;
41362306a36Sopenharmony_ci	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ciread_state:
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	/* Read from .gz file */
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "read_state:\n"));
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	if (is_eof != 0)
42262306a36Sopenharmony_ci		goto write_state;
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	/* We read in to fifo_in in two steps: first: read in to from
42562306a36Sopenharmony_ci	 * cur_in to the end of the buffer.  last: if free space wrapped
42662306a36Sopenharmony_ci	 * around, read from fifo_in offset 0 to offset cur_in.
42762306a36Sopenharmony_ci	 */
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	/* Reset fifo head to reduce unnecessary wrap arounds */
43062306a36Sopenharmony_ci	cur_in = (used_in == 0) ? 0 : cur_in;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	/* Free space total is reduced by a gap */
43362306a36Sopenharmony_ci	free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
43462306a36Sopenharmony_ci			    - line_sz);
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	/* Free space may wrap around as first and last */
43762306a36Sopenharmony_ci	first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
43862306a36Sopenharmony_ci	last_free  = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	/* Start offsets of the free memory */
44162306a36Sopenharmony_ci	first_offset = fifo_free_first_offset(cur_in, used_in);
44262306a36Sopenharmony_ci	last_offset  = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	/* Reduce read_sz because of the line_sz gap */
44562306a36Sopenharmony_ci	read_sz = NX_MIN(free_space, first_free);
44662306a36Sopenharmony_ci	n = 0;
44762306a36Sopenharmony_ci	if (read_sz > 0) {
44862306a36Sopenharmony_ci		/* Read in to offset cur_in + used_in */
44962306a36Sopenharmony_ci		n = fread(fifo_in + first_offset, 1, read_sz, inpf);
45062306a36Sopenharmony_ci		used_in = used_in + n;
45162306a36Sopenharmony_ci		free_space = free_space - n;
45262306a36Sopenharmony_ci		assert(n <= read_sz);
45362306a36Sopenharmony_ci		if (n != read_sz) {
45462306a36Sopenharmony_ci			/* Either EOF or error; exit the read loop */
45562306a36Sopenharmony_ci			is_eof = 1;
45662306a36Sopenharmony_ci			goto write_state;
45762306a36Sopenharmony_ci		}
45862306a36Sopenharmony_ci	}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	/* If free space wrapped around */
46162306a36Sopenharmony_ci	if (last_free > 0) {
46262306a36Sopenharmony_ci		/* Reduce read_sz because of the line_sz gap */
46362306a36Sopenharmony_ci		read_sz = NX_MIN(free_space, last_free);
46462306a36Sopenharmony_ci		n = 0;
46562306a36Sopenharmony_ci		if (read_sz > 0) {
46662306a36Sopenharmony_ci			n = fread(fifo_in + last_offset, 1, read_sz, inpf);
46762306a36Sopenharmony_ci			used_in = used_in + n;       /* Increase used space */
46862306a36Sopenharmony_ci			free_space = free_space - n; /* Decrease free space */
46962306a36Sopenharmony_ci			assert(n <= read_sz);
47062306a36Sopenharmony_ci			if (n != read_sz) {
47162306a36Sopenharmony_ci				/* Either EOF or error; exit the read loop */
47262306a36Sopenharmony_ci				is_eof = 1;
47362306a36Sopenharmony_ci				goto write_state;
47462306a36Sopenharmony_ci			}
47562306a36Sopenharmony_ci		}
47662306a36Sopenharmony_ci	}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	/* At this point we have used_in bytes in fifo_in with the
47962306a36Sopenharmony_ci	 * data head starting at cur_in and possibly wrapping around.
48062306a36Sopenharmony_ci	 */
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ciwrite_state:
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	/* Write decompressed data to output file */
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "write_state:\n"));
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	if (used_out == 0)
48962306a36Sopenharmony_ci		goto decomp_state;
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	/* If fifo_out has data waiting, write it out to the file to
49262306a36Sopenharmony_ci	 * make free target space for the accelerator used bytes in
49362306a36Sopenharmony_ci	 * the first and last parts of fifo_out.
49462306a36Sopenharmony_ci	 */
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
49762306a36Sopenharmony_ci	last_used  = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	write_sz = first_used;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	n = 0;
50262306a36Sopenharmony_ci	if (write_sz > 0) {
50362306a36Sopenharmony_ci		n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
50462306a36Sopenharmony_ci		used_out = used_out - n;
50562306a36Sopenharmony_ci		/* Move head of the fifo */
50662306a36Sopenharmony_ci		cur_out = (cur_out + n) % fifo_out_len;
50762306a36Sopenharmony_ci		assert(n <= write_sz);
50862306a36Sopenharmony_ci		if (n != write_sz) {
50962306a36Sopenharmony_ci			fprintf(stderr, "error: write\n");
51062306a36Sopenharmony_ci			rc = -1;
51162306a36Sopenharmony_ci			goto err5;
51262306a36Sopenharmony_ci		}
51362306a36Sopenharmony_ci	}
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	if (last_used > 0) { /* If more data available in the last part */
51662306a36Sopenharmony_ci		write_sz = last_used; /* Keep it here for later */
51762306a36Sopenharmony_ci		n = 0;
51862306a36Sopenharmony_ci		if (write_sz > 0) {
51962306a36Sopenharmony_ci			n = fwrite(fifo_out, 1, write_sz, outf);
52062306a36Sopenharmony_ci			used_out = used_out - n;
52162306a36Sopenharmony_ci			cur_out = (cur_out + n) % fifo_out_len;
52262306a36Sopenharmony_ci			assert(n <= write_sz);
52362306a36Sopenharmony_ci			if (n != write_sz) {
52462306a36Sopenharmony_ci				fprintf(stderr, "error: write\n");
52562306a36Sopenharmony_ci				rc = -1;
52662306a36Sopenharmony_ci				goto err5;
52762306a36Sopenharmony_ci			}
52862306a36Sopenharmony_ci		}
52962306a36Sopenharmony_ci	}
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_cidecomp_state:
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	/* NX decompresses input data */
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "decomp_state:\n"));
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	if (is_final)
53862306a36Sopenharmony_ci		goto finish_state;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	/* Address/len lists */
54162306a36Sopenharmony_ci	clearp_dde(ddl_in);
54262306a36Sopenharmony_ci	clearp_dde(ddl_out);
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	/* FC, CRC, HistLen, Table 6-6 */
54562306a36Sopenharmony_ci	if (resuming) {
54662306a36Sopenharmony_ci		/* Resuming a partially decompressed input.
54762306a36Sopenharmony_ci		 * The key to resume is supplying the 32KB
54862306a36Sopenharmony_ci		 * dictionary (history) to NX, which is basically
54962306a36Sopenharmony_ci		 * the last 32KB of output produced.
55062306a36Sopenharmony_ci		 */
55162306a36Sopenharmony_ci		fc = GZIP_FC_DECOMPRESS_RESUME;
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci		cmdp->cpb.in_crc   = cmdp->cpb.out_crc;
55462306a36Sopenharmony_ci		cmdp->cpb.in_adler = cmdp->cpb.out_adler;
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci		/* Round up the history size to quadword.  Section 2.10 */
55762306a36Sopenharmony_ci		history_len = (history_len + 15) / 16;
55862306a36Sopenharmony_ci		putnn(cmdp->cpb, in_histlen, history_len);
55962306a36Sopenharmony_ci		history_len = history_len * 16; /* bytes */
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci		if (history_len > 0) {
56262306a36Sopenharmony_ci			/* Chain in the history buffer to the DDE list */
56362306a36Sopenharmony_ci			if (cur_out >= history_len) {
56462306a36Sopenharmony_ci				nx_append_dde(ddl_in, fifo_out
56562306a36Sopenharmony_ci					      + (cur_out - history_len),
56662306a36Sopenharmony_ci					      history_len);
56762306a36Sopenharmony_ci			} else {
56862306a36Sopenharmony_ci				nx_append_dde(ddl_in, fifo_out
56962306a36Sopenharmony_ci					      + ((fifo_out_len + cur_out)
57062306a36Sopenharmony_ci					      - history_len),
57162306a36Sopenharmony_ci					      history_len - cur_out);
57262306a36Sopenharmony_ci				/* Up to 32KB history wraps around fifo_out */
57362306a36Sopenharmony_ci				nx_append_dde(ddl_in, fifo_out, cur_out);
57462306a36Sopenharmony_ci			}
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci		}
57762306a36Sopenharmony_ci	} else {
57862306a36Sopenharmony_ci		/* First decompress job */
57962306a36Sopenharmony_ci		fc = GZIP_FC_DECOMPRESS;
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci		history_len = 0;
58262306a36Sopenharmony_ci		/* Writing 0 clears out subc as well */
58362306a36Sopenharmony_ci		cmdp->cpb.in_histlen = 0;
58462306a36Sopenharmony_ci		total_out = 0;
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci		put32(cmdp->cpb, in_crc, INIT_CRC);
58762306a36Sopenharmony_ci		put32(cmdp->cpb, in_adler, INIT_ADLER);
58862306a36Sopenharmony_ci		put32(cmdp->cpb, out_crc, INIT_CRC);
58962306a36Sopenharmony_ci		put32(cmdp->cpb, out_adler, INIT_ADLER);
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci		/* Assuming 10% compression ratio initially; use the
59262306a36Sopenharmony_ci		 * most recently measured compression ratio as a
59362306a36Sopenharmony_ci		 * heuristic to estimate the input and output
59462306a36Sopenharmony_ci		 * sizes.  If we give too much input, the target buffer
59562306a36Sopenharmony_ci		 * overflows and NX cycles are wasted, and then we
59662306a36Sopenharmony_ci		 * must retry with smaller input size.  1000 is 100%.
59762306a36Sopenharmony_ci		 */
59862306a36Sopenharmony_ci		last_comp_ratio = 100UL;
59962306a36Sopenharmony_ci	}
60062306a36Sopenharmony_ci	cmdp->crb.gzip_fc = 0;
60162306a36Sopenharmony_ci	putnn(cmdp->crb, gzip_fc, fc);
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	/*
60462306a36Sopenharmony_ci	 * NX source buffers
60562306a36Sopenharmony_ci	 */
60662306a36Sopenharmony_ci	first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
60762306a36Sopenharmony_ci	last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	if (first_used > 0)
61062306a36Sopenharmony_ci		nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	if (last_used > 0)
61362306a36Sopenharmony_ci		nx_append_dde(ddl_in, fifo_in, last_used);
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	/*
61662306a36Sopenharmony_ci	 * NX target buffers
61762306a36Sopenharmony_ci	 */
61862306a36Sopenharmony_ci	first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
61962306a36Sopenharmony_ci	last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	/* Reduce output free space amount not to overwrite the history */
62262306a36Sopenharmony_ci	int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
62362306a36Sopenharmony_ci				- (1<<16));
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
62662306a36Sopenharmony_ci		      target_max));
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	first_free = NX_MIN(target_max, first_free);
62962306a36Sopenharmony_ci	if (first_free > 0) {
63062306a36Sopenharmony_ci		first_offset = fifo_free_first_offset(cur_out, used_out);
63162306a36Sopenharmony_ci		nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
63262306a36Sopenharmony_ci	}
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ci	if (last_free > 0) {
63562306a36Sopenharmony_ci		last_free = NX_MIN(target_max - first_free, last_free);
63662306a36Sopenharmony_ci		if (last_free > 0) {
63762306a36Sopenharmony_ci			last_offset = fifo_free_last_offset(cur_out, used_out,
63862306a36Sopenharmony_ci							    fifo_out_len);
63962306a36Sopenharmony_ci			nx_append_dde(ddl_out, fifo_out + last_offset,
64062306a36Sopenharmony_ci				      last_free);
64162306a36Sopenharmony_ci		}
64262306a36Sopenharmony_ci	}
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	/* Target buffer size is used to limit the source data size
64562306a36Sopenharmony_ci	 * based on previous measurements of compression ratio.
64662306a36Sopenharmony_ci	 */
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	/* source_sz includes history */
64962306a36Sopenharmony_ci	source_sz = getp32(ddl_in, ddebc);
65062306a36Sopenharmony_ci	assert(source_sz > history_len);
65162306a36Sopenharmony_ci	source_sz = source_sz - history_len;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	/* Estimating how much source is needed to 3/4 fill a
65462306a36Sopenharmony_ci	 * target_max size target buffer.  If we overshoot, then NX
65562306a36Sopenharmony_ci	 * must repeat the job with smaller input and we waste
65662306a36Sopenharmony_ci	 * bandwidth.  If we undershoot then we use more NX calls than
65762306a36Sopenharmony_ci	 * necessary.
65862306a36Sopenharmony_ci	 */
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
66162306a36Sopenharmony_ci				/ 4000;
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci	if (source_sz_estimate < source_sz) {
66462306a36Sopenharmony_ci		/* Target might be small, therefore limiting the
66562306a36Sopenharmony_ci		 * source data.
66662306a36Sopenharmony_ci		 */
66762306a36Sopenharmony_ci		source_sz = source_sz_estimate;
66862306a36Sopenharmony_ci		target_sz_estimate = target_max;
66962306a36Sopenharmony_ci	} else {
67062306a36Sopenharmony_ci		/* Source file might be small, therefore limiting target
67162306a36Sopenharmony_ci		 * touch pages to a smaller value to save processor cycles.
67262306a36Sopenharmony_ci		 */
67362306a36Sopenharmony_ci		target_sz_estimate = ((uint64_t)source_sz * 1000UL)
67462306a36Sopenharmony_ci					/ (last_comp_ratio + 1);
67562306a36Sopenharmony_ci		target_sz_estimate = NX_MIN(2 * target_sz_estimate,
67662306a36Sopenharmony_ci					    target_max);
67762306a36Sopenharmony_ci	}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_ci	source_sz = source_sz + history_len;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	/* Some NX condition codes require submitting the NX job again.
68262306a36Sopenharmony_ci	 * Kernel doesn't handle NX page faults. Expects user code to
68362306a36Sopenharmony_ci	 * touch pages.
68462306a36Sopenharmony_ci	 */
68562306a36Sopenharmony_ci	pgfault_retries = NX_MAX_FAULTS;
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_cirestart_nx:
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	putp32(ddl_in, ddebc, source_sz);
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	/* Fault in pages */
69262306a36Sopenharmony_ci	nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), page_sz, 1);
69362306a36Sopenharmony_ci	nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
69462306a36Sopenharmony_ci	nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci	/* Send job to NX */
69762306a36Sopenharmony_ci	cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	switch (cc) {
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_ci	case ERR_NX_AT_FAULT:
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci		/* We touched the pages ahead of time.  In the most common case
70462306a36Sopenharmony_ci		 * we shouldn't be here.  But may be some pages were paged out.
70562306a36Sopenharmony_ci		 * Kernel should have placed the faulting address to fsaddr.
70662306a36Sopenharmony_ci		 */
70762306a36Sopenharmony_ci		NXPRT(fprintf(stderr, "ERR_NX_AT_FAULT %p\n",
70862306a36Sopenharmony_ci			      (void *)cmdp->crb.csb.fsaddr));
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci		if (pgfault_retries == NX_MAX_FAULTS) {
71162306a36Sopenharmony_ci			/* Try once with exact number of pages */
71262306a36Sopenharmony_ci			--pgfault_retries;
71362306a36Sopenharmony_ci			goto restart_nx;
71462306a36Sopenharmony_ci		} else if (pgfault_retries > 0) {
71562306a36Sopenharmony_ci			/* If still faulting try fewer input pages
71662306a36Sopenharmony_ci			 * assuming memory outage
71762306a36Sopenharmony_ci			 */
71862306a36Sopenharmony_ci			if (source_sz > page_sz)
71962306a36Sopenharmony_ci				source_sz = NX_MAX(source_sz / 2, page_sz);
72062306a36Sopenharmony_ci			--pgfault_retries;
72162306a36Sopenharmony_ci			goto restart_nx;
72262306a36Sopenharmony_ci		} else {
72362306a36Sopenharmony_ci			fprintf(stderr, "cannot make progress; too many ");
72462306a36Sopenharmony_ci			fprintf(stderr, "page fault retries cc= %d\n", cc);
72562306a36Sopenharmony_ci			rc = -1;
72662306a36Sopenharmony_ci			goto err5;
72762306a36Sopenharmony_ci		}
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	case ERR_NX_DATA_LENGTH:
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci		NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
73262306a36Sopenharmony_ci		NXPRT(fprintf(stderr, "stream may have trailing data\n"));
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci		/* Not an error in the most common case; it just says
73562306a36Sopenharmony_ci		 * there is trailing data that we must examine.
73662306a36Sopenharmony_ci		 *
73762306a36Sopenharmony_ci		 * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
73862306a36Sopenharmony_ci		 * Fig.6-7 and Table 6-8.
73962306a36Sopenharmony_ci		 */
74062306a36Sopenharmony_ci		nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci		if (!csb_ce_termination(nx_ce) &&
74362306a36Sopenharmony_ci		    csb_ce_partial_completion(nx_ce)) {
74462306a36Sopenharmony_ci			/* Check CPB for more information
74562306a36Sopenharmony_ci			 * spbc and tpbc are valid
74662306a36Sopenharmony_ci			 */
74762306a36Sopenharmony_ci			sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
74862306a36Sopenharmony_ci			subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
74962306a36Sopenharmony_ci			spbc = get32(cmdp->cpb, out_spbc_decomp);
75062306a36Sopenharmony_ci			tpbc = get32(cmdp->crb.csb, tpbc);
75162306a36Sopenharmony_ci			assert(target_max >= tpbc);
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci			goto ok_cc3; /* not an error */
75462306a36Sopenharmony_ci		} else {
75562306a36Sopenharmony_ci			/* History length error when CE(1)=1 CE(0)=0. */
75662306a36Sopenharmony_ci			rc = -1;
75762306a36Sopenharmony_ci			fprintf(stderr, "history length error cc= %d\n", cc);
75862306a36Sopenharmony_ci			goto err5;
75962306a36Sopenharmony_ci		}
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	case ERR_NX_TARGET_SPACE:
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci		/* Target buffer not large enough; retry smaller input
76462306a36Sopenharmony_ci		 * data; give at least 1 byte.  SPBC/TPBC are not valid.
76562306a36Sopenharmony_ci		 */
76662306a36Sopenharmony_ci		assert(source_sz > history_len);
76762306a36Sopenharmony_ci		source_sz = ((source_sz - history_len + 2) / 2) + history_len;
76862306a36Sopenharmony_ci		NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
76962306a36Sopenharmony_ci		NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
77062306a36Sopenharmony_ci			      source_sz, history_len));
77162306a36Sopenharmony_ci		goto restart_nx;
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	case ERR_NX_OK:
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci		/* This should not happen for gzip formatted data;
77662306a36Sopenharmony_ci		 * we need trailing crc and isize
77762306a36Sopenharmony_ci		 */
77862306a36Sopenharmony_ci		fprintf(stderr, "ERR_NX_OK\n");
77962306a36Sopenharmony_ci		spbc = get32(cmdp->cpb, out_spbc_decomp);
78062306a36Sopenharmony_ci		tpbc = get32(cmdp->crb.csb, tpbc);
78162306a36Sopenharmony_ci		assert(target_max >= tpbc);
78262306a36Sopenharmony_ci		assert(spbc >= history_len);
78362306a36Sopenharmony_ci		source_sz = spbc - history_len;
78462306a36Sopenharmony_ci		goto offsets_state;
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci	default:
78762306a36Sopenharmony_ci		fprintf(stderr, "error: cc= %d\n", cc);
78862306a36Sopenharmony_ci		rc = -1;
78962306a36Sopenharmony_ci		goto err5;
79062306a36Sopenharmony_ci	}
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ciok_cc3:
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	assert(spbc > history_len);
79762306a36Sopenharmony_ci	source_sz = spbc - history_len;
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	/* Table 6-4: Source Final Block Type (SFBT) describes the
80062306a36Sopenharmony_ci	 * last processed deflate block and clues the software how to
80162306a36Sopenharmony_ci	 * resume the next job.  SUBC indicates how many input bits NX
80262306a36Sopenharmony_ci	 * consumed but did not process.  SPBC indicates how many
80362306a36Sopenharmony_ci	 * bytes of source were given to the accelerator including
80462306a36Sopenharmony_ci	 * history bytes.
80562306a36Sopenharmony_ci	 */
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	switch (sfbt) {
80862306a36Sopenharmony_ci		int dhtlen;
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	case 0x0: /* Deflate final EOB received */
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci		/* Calculating the checksum start position. */
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci		source_sz = source_sz - subc / 8;
81562306a36Sopenharmony_ci		is_final = 1;
81662306a36Sopenharmony_ci		break;
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci		/* Resume decompression cases are below. Basically
81962306a36Sopenharmony_ci		 * indicates where NX has suspended and how to resume
82062306a36Sopenharmony_ci		 * the input stream.
82162306a36Sopenharmony_ci		 */
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci	case 0x8: /* Within a literal block; use rembytecount */
82462306a36Sopenharmony_ci	case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci		/* Supply the partially processed source byte again */
82762306a36Sopenharmony_ci		source_sz = source_sz - ((subc + 7) / 8);
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci		/* SUBC LS 3bits: number of bits in the first source byte need
83062306a36Sopenharmony_ci		 * to be processed.
83162306a36Sopenharmony_ci		 * 000 means all 8 bits;  Table 6-3
83262306a36Sopenharmony_ci		 * Clear subc, histlen, sfbt, rembytecnt, dhtlen
83362306a36Sopenharmony_ci		 */
83462306a36Sopenharmony_ci		cmdp->cpb.in_subc = 0;
83562306a36Sopenharmony_ci		cmdp->cpb.in_sfbt = 0;
83662306a36Sopenharmony_ci		putnn(cmdp->cpb, in_subc, subc % 8);
83762306a36Sopenharmony_ci		putnn(cmdp->cpb, in_sfbt, sfbt);
83862306a36Sopenharmony_ci		putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
83962306a36Sopenharmony_ci						      out_rembytecnt));
84062306a36Sopenharmony_ci		break;
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci	case 0xA: /* Within a FH block; */
84362306a36Sopenharmony_ci	case 0xB: /* Within a FH block; bfinal=1 */
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci		source_sz = source_sz - ((subc + 7) / 8);
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
84862306a36Sopenharmony_ci		cmdp->cpb.in_subc = 0;
84962306a36Sopenharmony_ci		cmdp->cpb.in_sfbt = 0;
85062306a36Sopenharmony_ci		putnn(cmdp->cpb, in_subc, subc % 8);
85162306a36Sopenharmony_ci		putnn(cmdp->cpb, in_sfbt, sfbt);
85262306a36Sopenharmony_ci		break;
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci	case 0xC: /* Within a DH block; */
85562306a36Sopenharmony_ci	case 0xD: /* Within a DH block; bfinal=1 */
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci		source_sz = source_sz - ((subc + 7) / 8);
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
86062306a36Sopenharmony_ci		cmdp->cpb.in_subc = 0;
86162306a36Sopenharmony_ci		cmdp->cpb.in_sfbt = 0;
86262306a36Sopenharmony_ci		putnn(cmdp->cpb, in_subc, subc % 8);
86362306a36Sopenharmony_ci		putnn(cmdp->cpb, in_sfbt, sfbt);
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci		dhtlen = getnn(cmdp->cpb, out_dhtlen);
86662306a36Sopenharmony_ci		putnn(cmdp->cpb, in_dhtlen, dhtlen);
86762306a36Sopenharmony_ci		assert(dhtlen >= 42);
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci		/* Round up to a qword */
87062306a36Sopenharmony_ci		dhtlen = (dhtlen + 127) / 128;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci		while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
87362306a36Sopenharmony_ci			--dhtlen;
87462306a36Sopenharmony_ci			cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
87562306a36Sopenharmony_ci		}
87662306a36Sopenharmony_ci		break;
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	case 0xE: /* Within a block header; bfinal=0; */
87962306a36Sopenharmony_ci		     /* Also given if source data exactly ends (SUBC=0) with
88062306a36Sopenharmony_ci		      * EOB code with BFINAL=0.  Means the next byte will
88162306a36Sopenharmony_ci		      * contain a block header.
88262306a36Sopenharmony_ci		      */
88362306a36Sopenharmony_ci	case 0xF: /* within a block header with BFINAL=1. */
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci		source_sz = source_sz - ((subc + 7) / 8);
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
88862306a36Sopenharmony_ci		cmdp->cpb.in_subc = 0;
88962306a36Sopenharmony_ci		cmdp->cpb.in_sfbt = 0;
89062306a36Sopenharmony_ci		putnn(cmdp->cpb, in_subc, subc % 8);
89162306a36Sopenharmony_ci		putnn(cmdp->cpb, in_sfbt, sfbt);
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci		/* Engine did not process any data */
89462306a36Sopenharmony_ci		if (is_eof && (source_sz == 0))
89562306a36Sopenharmony_ci			is_final = 1;
89662306a36Sopenharmony_ci	}
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_cioffsets_state:
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci	/* Adjust the source and target buffer offsets and lengths  */
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "offsets_state:\n"));
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ci	/* Delete input data from fifo_in */
90562306a36Sopenharmony_ci	used_in = used_in - source_sz;
90662306a36Sopenharmony_ci	cur_in = (cur_in + source_sz) % fifo_in_len;
90762306a36Sopenharmony_ci	input_file_offset = input_file_offset + source_sz;
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	/* Add output data to fifo_out */
91062306a36Sopenharmony_ci	used_out = used_out + tpbc;
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	assert(used_out <= fifo_out_len);
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	total_out = total_out + tpbc;
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	/* Deflate history is 32KB max.  No need to supply more
91762306a36Sopenharmony_ci	 * than 32KB on a resume.
91862306a36Sopenharmony_ci	 */
91962306a36Sopenharmony_ci	history_len = (total_out > window_max) ? window_max : total_out;
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	/* To estimate expected expansion in the next NX job; 500 means 50%.
92262306a36Sopenharmony_ci	 * Deflate best case is around 1 to 1000.
92362306a36Sopenharmony_ci	 */
92462306a36Sopenharmony_ci	last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
92562306a36Sopenharmony_ci			  / ((uint64_t)tpbc + 1);
92662306a36Sopenharmony_ci	last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
92762306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
92862306a36Sopenharmony_ci		      last_comp_ratio, source_sz, spbc, tpbc));
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci	resuming = 1;
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_cifinish_state:
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	NXPRT(fprintf(stderr, "finish_state:\n"));
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	if (is_final) {
93762306a36Sopenharmony_ci		if (used_out)
93862306a36Sopenharmony_ci			goto write_state; /* More data to write out */
93962306a36Sopenharmony_ci		else if (used_in < 8) {
94062306a36Sopenharmony_ci			/* Need at least 8 more bytes containing gzip crc
94162306a36Sopenharmony_ci			 * and isize.
94262306a36Sopenharmony_ci			 */
94362306a36Sopenharmony_ci			rc = -1;
94462306a36Sopenharmony_ci			goto err4;
94562306a36Sopenharmony_ci		} else {
94662306a36Sopenharmony_ci			/* Compare checksums and exit */
94762306a36Sopenharmony_ci			int i;
94862306a36Sopenharmony_ci			unsigned char tail[8];
94962306a36Sopenharmony_ci			uint32_t cksum, isize;
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci			for (i = 0; i < 8; i++)
95262306a36Sopenharmony_ci				tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
95362306a36Sopenharmony_ci			fprintf(stderr, "computed checksum %08x isize %08x\n",
95462306a36Sopenharmony_ci				cmdp->cpb.out_crc, (uint32_t) (total_out
95562306a36Sopenharmony_ci				% (1ULL<<32)));
95662306a36Sopenharmony_ci			cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
95762306a36Sopenharmony_ci				 | (uint32_t) tail[2]<<16
95862306a36Sopenharmony_ci				 | (uint32_t) tail[3]<<24);
95962306a36Sopenharmony_ci			isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
96062306a36Sopenharmony_ci				 | (uint32_t) tail[6]<<16
96162306a36Sopenharmony_ci				 | (uint32_t) tail[7]<<24);
96262306a36Sopenharmony_ci			fprintf(stderr, "stored   checksum %08x isize %08x\n",
96362306a36Sopenharmony_ci				cksum, isize);
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci			if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
96662306a36Sopenharmony_ci			    (total_out % (1ULL<<32))) {
96762306a36Sopenharmony_ci				rc = 0;	goto ok1;
96862306a36Sopenharmony_ci			} else {
96962306a36Sopenharmony_ci				rc = -1; goto err4;
97062306a36Sopenharmony_ci			}
97162306a36Sopenharmony_ci		}
97262306a36Sopenharmony_ci	} else
97362306a36Sopenharmony_ci		goto read_state;
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci	return -1;
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_cierr1:
97862306a36Sopenharmony_ci	fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
97962306a36Sopenharmony_ci		expect, c);
98062306a36Sopenharmony_ci	return -1;
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_cierr2:
98362306a36Sopenharmony_ci	fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
98462306a36Sopenharmony_ci	return -1;
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_cierr3:
98762306a36Sopenharmony_ci	fprintf(stderr, "error: gzip header\n");
98862306a36Sopenharmony_ci	return -1;
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_cierr4:
99162306a36Sopenharmony_ci	fprintf(stderr, "error: checksum missing or mismatch\n");
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_cierr5:
99462306a36Sopenharmony_ciok1:
99562306a36Sopenharmony_ci	fprintf(stderr, "decomp is complete: fclose\n");
99662306a36Sopenharmony_ci	fclose(outf);
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	return rc;
99962306a36Sopenharmony_ci}
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ciint main(int argc, char **argv)
100362306a36Sopenharmony_ci{
100462306a36Sopenharmony_ci	int rc;
100562306a36Sopenharmony_ci	struct sigaction act;
100662306a36Sopenharmony_ci	void *handle;
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ci	nx_dbg = 0;
100962306a36Sopenharmony_ci	nx_gzip_log = NULL;
101062306a36Sopenharmony_ci	act.sa_handler = 0;
101162306a36Sopenharmony_ci	act.sa_sigaction = nxu_sigsegv_handler;
101262306a36Sopenharmony_ci	act.sa_flags = SA_SIGINFO;
101362306a36Sopenharmony_ci	act.sa_restorer = 0;
101462306a36Sopenharmony_ci	sigemptyset(&act.sa_mask);
101562306a36Sopenharmony_ci	sigaction(SIGSEGV, &act, NULL);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
101862306a36Sopenharmony_ci	if (!handle) {
101962306a36Sopenharmony_ci		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
102062306a36Sopenharmony_ci		exit(-1);
102162306a36Sopenharmony_ci	}
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	rc = decompress_file(argc, argv, handle);
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci	nx_function_end(handle);
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci	return rc;
102862306a36Sopenharmony_ci}
1029