1// SPDX-License-Identifier: GPL-2.0-or-later
2
3/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
4 * Not intended for productive uses or for performance or compression
5 * ratio measurements.  For simplicity of demonstration, this sample
6 * code compresses in to fixed Huffman blocks only (Deflate btype=1)
7 * and has very simple memory management.  Dynamic Huffman blocks
8 * (Deflate btype=2) are more involved as detailed in the user guide.
9 * Note also that /dev/crypto/gzip, VAS and skiboot support are
10 * required.
11 *
12 * Copyright 2020 IBM Corp.
13 *
14 * https://github.com/libnxz/power-gzip for zlib api and other utils
15 *
16 * Author: Bulent Abali <abali@us.ibm.com>
17 *
18 * Definitions of acronyms used here. See
19 * P9 NX Gzip Accelerator User's Manual for details:
20 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
21 *
22 * adler/crc: 32 bit checksums appended to stream tail
23 * ce:       completion extension
24 * cpb:      coprocessor parameter block (metadata)
25 * crb:      coprocessor request block (command)
26 * csb:      coprocessor status block (status)
27 * dht:      dynamic huffman table
28 * dde:      data descriptor element (address, length)
29 * ddl:      list of ddes
30 * dh/fh:    dynamic and fixed huffman types
31 * fc:       coprocessor function code
32 * histlen:  history/dictionary length
33 * history:  sliding window of up to 32KB of data
34 * lzcount:  Deflate LZ symbol counts
35 * rembytecnt: remaining byte count
36 * sfbt:     source final block type; last block's type during decomp
37 * spbc:     source processed byte count
38 * subc:     source unprocessed bit count
39 * tebc:     target ending bit count; valid bits in the last byte
40 * tpbc:     target processed byte count
41 * vas:      virtual accelerator switch; the user mode interface
42 */
43
44#define _ISOC11_SOURCE	// For aligned_alloc()
45#define _DEFAULT_SOURCE	// For endian.h
46
47#include <stdio.h>
48#include <stdlib.h>
49#include <string.h>
50#include <unistd.h>
51#include <stdint.h>
52#include <sys/types.h>
53#include <sys/stat.h>
54#include <sys/time.h>
55#include <sys/fcntl.h>
56#include <sys/mman.h>
57#include <endian.h>
58#include <bits/endian.h>
59#include <sys/ioctl.h>
60#include <assert.h>
61#include <errno.h>
62#include <signal.h>
63#include "nxu.h"
64#include "nx.h"
65
66int nx_dbg;
67FILE *nx_gzip_log;
68
69#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
70#define FNAME_MAX 1024
71#define FEXT ".nx.gz"
72
73/*
74 * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
75 */
76static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
77				uint32_t dstlen, int with_count,
78				struct nx_gzip_crb_cpb_t *cmdp, void *handle)
79{
80	uint32_t fc;
81
82	assert(!!cmdp);
83
84	put32(cmdp->crb, gzip_fc, 0);  /* clear */
85	fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
86			    GZIP_FC_COMPRESS_RESUME_FHT;
87	putnn(cmdp->crb, gzip_fc, fc);
88	putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
89	memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
90
91	/* Section 6.6 programming notes; spbc may be in two different
92	 * places depending on FC.
93	 */
94	if (!with_count)
95		put32(cmdp->cpb, out_spbc_comp, 0);
96	else
97		put32(cmdp->cpb, out_spbc_comp_with_count, 0);
98
99	/* Figure 6-3 6-4; CSB location */
100	put64(cmdp->crb, csb_address, 0);
101	put64(cmdp->crb, csb_address,
102	      (uint64_t) &cmdp->crb.csb & csb_address_mask);
103
104	/* Source direct dde (scatter-gather list) */
105	clear_dde(cmdp->crb.source_dde);
106	putnn(cmdp->crb.source_dde, dde_count, 0);
107	put32(cmdp->crb.source_dde, ddebc, srclen);
108	put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
109
110	/* Target direct dde (scatter-gather list) */
111	clear_dde(cmdp->crb.target_dde);
112	putnn(cmdp->crb.target_dde, dde_count, 0);
113	put32(cmdp->crb.target_dde, ddebc, dstlen);
114	put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
115
116	/* Submit the crb, the job descriptor, to the accelerator */
117	return nxu_submit_job(cmdp, handle);
118}
119
120/*
121 * Prepares a blank no filename no timestamp gzip header and returns
122 * the number of bytes written to buf.
123 * Gzip specification at https://tools.ietf.org/html/rfc1952
124 */
125int gzip_header_blank(char *buf)
126{
127	int i = 0;
128
129	buf[i++] = 0x1f; /* ID1 */
130	buf[i++] = 0x8b; /* ID2 */
131	buf[i++] = 0x08; /* CM  */
132	buf[i++] = 0x00; /* FLG */
133	buf[i++] = 0x00; /* MTIME */
134	buf[i++] = 0x00; /* MTIME */
135	buf[i++] = 0x00; /* MTIME */
136	buf[i++] = 0x00; /* MTIME */
137	buf[i++] = 0x04; /* XFL 4=fastest */
138	buf[i++] = 0x03; /* OS UNIX */
139
140	return i;
141}
142
143/* Caller must free the allocated buffer return nonzero on error. */
144int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
145{
146	struct stat statbuf;
147	FILE *fp;
148	char *p;
149	size_t num_bytes;
150
151	if (stat(fname, &statbuf)) {
152		perror(fname);
153		return(-1);
154	}
155	fp = fopen(fname, "r");
156	if (fp == NULL) {
157		perror(fname);
158		return(-1);
159	}
160	assert(NULL != (p = (char *) malloc(statbuf.st_size)));
161	num_bytes = fread(p, 1, statbuf.st_size, fp);
162	if (ferror(fp) || (num_bytes != statbuf.st_size)) {
163		perror(fname);
164		return(-1);
165	}
166	*buf = p;
167	*bufsize = num_bytes;
168	return 0;
169}
170
171/* Returns nonzero on error */
172int write_output_file(char *fname, char *buf, size_t bufsize)
173{
174	FILE *fp;
175	size_t num_bytes;
176
177	fp = fopen(fname, "w");
178	if (fp == NULL) {
179		perror(fname);
180		return(-1);
181	}
182	num_bytes = fwrite(buf, 1, bufsize, fp);
183	if (ferror(fp) || (num_bytes != bufsize)) {
184		perror(fname);
185		return(-1);
186	}
187	fclose(fp);
188	return 0;
189}
190
191/*
192 * Z_SYNC_FLUSH as described in zlib.h.
193 * Returns number of appended bytes
194 */
195int append_sync_flush(char *buf, int tebc, int final)
196{
197	uint64_t flush;
198	int shift = (tebc & 0x7);
199
200	if (tebc > 0) {
201		/* Last byte is partially full */
202		buf = buf - 1;
203		*buf = *buf & (unsigned char) ((1<<tebc)-1);
204	} else
205		*buf = 0;
206	flush = ((0x1ULL & final) << shift) | *buf;
207	shift = shift + 3; /* BFINAL and BTYPE written */
208	shift = (shift <= 8) ? 8 : 16;
209	flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
210	shift = shift + 32;
211	while (shift > 0) {
212		*buf++ = (unsigned char) (flush & 0xffULL);
213		flush = flush >> 8;
214		shift = shift - 8;
215	}
216	return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
217}
218
219/*
220 * Final deflate block bit.  This call assumes the block
221 * beginning is byte aligned.
222 */
223static void set_bfinal(void *buf, int bfinal)
224{
225	char *b = buf;
226
227	if (bfinal)
228		*b = *b | (unsigned char) 0x01;
229	else
230		*b = *b & (unsigned char) 0xfe;
231}
232
233int compress_file(int argc, char **argv, void *handle)
234{
235	char *inbuf, *outbuf, *srcbuf, *dstbuf;
236	char outname[FNAME_MAX];
237	uint32_t srclen, dstlen;
238	uint32_t flushlen, chunk;
239	size_t inlen, outlen, dsttotlen, srctotlen;
240	uint32_t crc, spbc, tpbc, tebc;
241	int lzcounts = 0;
242	int cc;
243	int num_hdr_bytes;
244	struct nx_gzip_crb_cpb_t *cmdp;
245	uint32_t pagelen = 65536;
246	int fault_tries = NX_MAX_FAULTS;
247
248	cmdp = (void *)(uintptr_t)
249		aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
250			      sizeof(struct nx_gzip_crb_cpb_t));
251
252	if (argc != 2) {
253		fprintf(stderr, "usage: %s <fname>\n", argv[0]);
254		exit(-1);
255	}
256	if (read_alloc_input_file(argv[1], &inbuf, &inlen))
257		exit(-1);
258	fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
259
260	/* Generous output buffer for header/trailer */
261	outlen = 2 * inlen + 1024;
262
263	assert(NULL != (outbuf = (char *)malloc(outlen)));
264	nxu_touch_pages(outbuf, outlen, pagelen, 1);
265
266	/* Compress piecemeal in smallish chunks */
267	chunk = 1<<22;
268
269	/* Write the gzip header to the stream */
270	num_hdr_bytes = gzip_header_blank(outbuf);
271	dstbuf    = outbuf + num_hdr_bytes;
272	outlen    = outlen - num_hdr_bytes;
273	dsttotlen = num_hdr_bytes;
274
275	srcbuf    = inbuf;
276	srctotlen = 0;
277
278	/* Init the CRB, the coprocessor request block */
279	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
280
281	/* Initial gzip crc32 */
282	put32(cmdp->cpb, in_crc, 0);
283
284	while (inlen > 0) {
285
286		/* Submit chunk size source data per job */
287		srclen = NX_MIN(chunk, inlen);
288		/* Supply large target in case data expands */
289		dstlen = NX_MIN(2*srclen, outlen);
290
291		/* Page faults are handled by the user code */
292
293		/* Fault-in pages; an improved code wouldn't touch so
294		 * many pages but would try to estimate the
295		 * compression ratio and adjust both the src and dst
296		 * touch amounts.
297		 */
298		nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
299				1);
300		nxu_touch_pages(srcbuf, srclen, pagelen, 0);
301		nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
302
303		cc = compress_fht_sample(
304			srcbuf, srclen,
305			dstbuf, dstlen,
306			lzcounts, cmdp, handle);
307
308		if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
309		    cc != ERR_NX_AT_FAULT) {
310			fprintf(stderr, "nx error: cc= %d\n", cc);
311			exit(-1);
312		}
313
314		/* Page faults are handled by the user code */
315		if (cc == ERR_NX_AT_FAULT) {
316			NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
317			NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
318				  fault_tries,
319				  (unsigned long long) cmdp->crb.csb.fsaddr));
320			fault_tries--;
321			if (fault_tries > 0) {
322				continue;
323			} else {
324				fprintf(stderr, "error: cannot progress; ");
325				fprintf(stderr, "too many faults\n");
326				exit(-1);
327			};
328		}
329
330		fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
331
332		inlen     = inlen - srclen;
333		srcbuf    = srcbuf + srclen;
334		srctotlen = srctotlen + srclen;
335
336		/* Two possible locations for spbc depending on the function
337		 * code.
338		 */
339		spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
340			get32(cmdp->cpb, out_spbc_comp_with_count);
341		assert(spbc == srclen);
342
343		/* Target byte count */
344		tpbc = get32(cmdp->crb.csb, tpbc);
345		/* Target ending bit count */
346		tebc = getnn(cmdp->cpb, out_tebc);
347		NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
348		NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
349
350		if (inlen > 0) { /* More chunks to go */
351			set_bfinal(dstbuf, 0);
352			dstbuf    = dstbuf + tpbc;
353			dsttotlen = dsttotlen + tpbc;
354			outlen    = outlen - tpbc;
355			/* Round up to the next byte with a flush
356			 * block; do not set the BFINAqL bit.
357			 */
358			flushlen  = append_sync_flush(dstbuf, tebc, 0);
359			dsttotlen = dsttotlen + flushlen;
360			outlen    = outlen - flushlen;
361			dstbuf    = dstbuf + flushlen;
362			NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
363					flushlen));
364		} else {  /* Done */
365			/* Set the BFINAL bit of the last block per Deflate
366			 * specification.
367			 */
368			set_bfinal(dstbuf, 1);
369			dstbuf    = dstbuf + tpbc;
370			dsttotlen = dsttotlen + tpbc;
371			outlen    = outlen - tpbc;
372		}
373
374		/* Resuming crc32 for the next chunk */
375		crc = get32(cmdp->cpb, out_crc);
376		put32(cmdp->cpb, in_crc, crc);
377		crc = be32toh(crc);
378	}
379
380	/* Append crc32 and ISIZE to the end */
381	memcpy(dstbuf, &crc, 4);
382	memcpy(dstbuf+4, &srctotlen, 4);
383	dsttotlen = dsttotlen + 8;
384	outlen    = outlen - 8;
385
386	assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
387	strcpy(outname, argv[1]);
388	strcat(outname, FEXT);
389	if (write_output_file(outname, outbuf, dsttotlen)) {
390		fprintf(stderr, "write error: %s\n", outname);
391		exit(-1);
392	}
393
394	fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
395		dsttotlen);
396	fprintf(stderr, "crc32 checksum = %08x\n", crc);
397
398	if (inbuf != NULL)
399		free(inbuf);
400
401	if (outbuf != NULL)
402		free(outbuf);
403
404	return 0;
405}
406
407int main(int argc, char **argv)
408{
409	int rc;
410	struct sigaction act;
411	void *handle;
412
413	nx_dbg = 0;
414	nx_gzip_log = NULL;
415	act.sa_handler = 0;
416	act.sa_sigaction = nxu_sigsegv_handler;
417	act.sa_flags = SA_SIGINFO;
418	act.sa_restorer = 0;
419	sigemptyset(&act.sa_mask);
420	sigaction(SIGSEGV, &act, NULL);
421
422	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
423	if (!handle) {
424		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
425		exit(-1);
426	}
427
428	rc = compress_file(argc, argv, handle);
429
430	nx_function_end(handle);
431
432	return rc;
433}
434