1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Driver for IBM PowerNV compression accelerator
4 *
5 * Copyright (C) 2015 Dan Streetman, IBM Corp
6 */
7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10#include "nx-842.h"
11
12#include <linux/timer.h>
13
14#include <asm/prom.h>
15#include <asm/icswx.h>
16#include <asm/vas.h>
17#include <asm/reg.h>
18#include <asm/opal-api.h>
19#include <asm/opal.h>
20
21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
23MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors");
24MODULE_ALIAS_CRYPTO("842");
25MODULE_ALIAS_CRYPTO("842-nx");
26
27#define WORKMEM_ALIGN	(CRB_ALIGN)
28#define CSB_WAIT_MAX	(5000) /* ms */
29#define VAS_RETRIES	(10)
30
31struct nx842_workmem {
32	/* Below fields must be properly aligned */
33	struct coprocessor_request_block crb; /* CRB_ALIGN align */
34	struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
35	struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
36	/* Above fields must be properly aligned */
37
38	ktime_t start;
39
40	char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
41} __packed __aligned(WORKMEM_ALIGN);
42
43struct nx_coproc {
44	unsigned int chip_id;
45	unsigned int ct;	/* Can be 842 or GZIP high/normal*/
46	unsigned int ci;	/* Coprocessor instance, used with icswx */
47	struct {
48		struct vas_window *rxwin;
49		int id;
50	} vas;
51	struct list_head list;
52};
53
54/*
55 * Send the request to NX engine on the chip for the corresponding CPU
56 * where the process is executing. Use with VAS function.
57 */
58static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
59
60/* no cpu hotplug on powernv, so this list never changes after init */
61static LIST_HEAD(nx_coprocs);
62static unsigned int nx842_ct;	/* used in icswx function */
63
64/*
65 * Using same values as in skiboot or coprocessor type representing
66 * in NX workbook.
67 */
68#define NX_CT_GZIP	(2)	/* on P9 and later */
69#define NX_CT_842	(3)
70
71static int (*nx842_powernv_exec)(const unsigned char *in,
72				unsigned int inlen, unsigned char *out,
73				unsigned int *outlenp, void *workmem, int fc);
74
75/**
76 * setup_indirect_dde - Setup an indirect DDE
77 *
78 * The DDE is setup with the the DDE count, byte count, and address of
79 * first direct DDE in the list.
80 */
81static void setup_indirect_dde(struct data_descriptor_entry *dde,
82			       struct data_descriptor_entry *ddl,
83			       unsigned int dde_count, unsigned int byte_count)
84{
85	dde->flags = 0;
86	dde->count = dde_count;
87	dde->index = 0;
88	dde->length = cpu_to_be32(byte_count);
89	dde->address = cpu_to_be64(nx842_get_pa(ddl));
90}
91
92/**
93 * setup_direct_dde - Setup single DDE from buffer
94 *
95 * The DDE is setup with the buffer and length.  The buffer must be properly
96 * aligned.  The used length is returned.
97 * Returns:
98 *   N    Successfully set up DDE with N bytes
99 */
100static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
101				     unsigned long pa, unsigned int len)
102{
103	unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
104
105	dde->flags = 0;
106	dde->count = 0;
107	dde->index = 0;
108	dde->length = cpu_to_be32(l);
109	dde->address = cpu_to_be64(pa);
110
111	return l;
112}
113
114/**
115 * setup_ddl - Setup DDL from buffer
116 *
117 * Returns:
118 *   0		Successfully set up DDL
119 */
120static int setup_ddl(struct data_descriptor_entry *dde,
121		     struct data_descriptor_entry *ddl,
122		     unsigned char *buf, unsigned int len,
123		     bool in)
124{
125	unsigned long pa = nx842_get_pa(buf);
126	int i, ret, total_len = len;
127
128	if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
129		pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
130			 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
131		return -EINVAL;
132	}
133
134	/* only need to check last mult; since buffer must be
135	 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
136	 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
137	 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
138	 */
139	if (len % DDE_BUFFER_LAST_MULT) {
140		pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
141			 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
142		if (in)
143			return -EINVAL;
144		len = round_down(len, DDE_BUFFER_LAST_MULT);
145	}
146
147	/* use a single direct DDE */
148	if (len <= LEN_ON_PAGE(pa)) {
149		ret = setup_direct_dde(dde, pa, len);
150		WARN_ON(ret < len);
151		return 0;
152	}
153
154	/* use the DDL */
155	for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
156		ret = setup_direct_dde(&ddl[i], pa, len);
157		buf += ret;
158		len -= ret;
159		pa = nx842_get_pa(buf);
160	}
161
162	if (len > 0) {
163		pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
164			 total_len, in ? "input" : "output", len);
165		if (in)
166			return -EMSGSIZE;
167		total_len -= len;
168	}
169	setup_indirect_dde(dde, ddl, i, total_len);
170
171	return 0;
172}
173
174#define CSB_ERR(csb, msg, ...)					\
175	pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n",	\
176	       ##__VA_ARGS__, (csb)->flags,			\
177	       (csb)->cs, (csb)->cc, (csb)->ce,			\
178	       be32_to_cpu((csb)->count))
179
180#define CSB_ERR_ADDR(csb, msg, ...)				\
181	CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__,		\
182		(unsigned long)be64_to_cpu((csb)->address))
183
184/**
185 * wait_for_csb
186 */
187static int wait_for_csb(struct nx842_workmem *wmem,
188			struct coprocessor_status_block *csb)
189{
190	ktime_t start = wmem->start, now = ktime_get();
191	ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
192
193	while (!(READ_ONCE(csb->flags) & CSB_V)) {
194		cpu_relax();
195		now = ktime_get();
196		if (ktime_after(now, timeout))
197			break;
198	}
199
200	/* hw has updated csb and output buffer */
201	barrier();
202
203	/* check CSB flags */
204	if (!(csb->flags & CSB_V)) {
205		CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
206			(long)ktime_us_delta(now, start));
207		return -ETIMEDOUT;
208	}
209	if (csb->flags & CSB_F) {
210		CSB_ERR(csb, "Invalid CSB format");
211		return -EPROTO;
212	}
213	if (csb->flags & CSB_CH) {
214		CSB_ERR(csb, "Invalid CSB chaining state");
215		return -EPROTO;
216	}
217
218	/* verify CSB completion sequence is 0 */
219	if (csb->cs) {
220		CSB_ERR(csb, "Invalid CSB completion sequence");
221		return -EPROTO;
222	}
223
224	/* check CSB Completion Code */
225	switch (csb->cc) {
226	/* no error */
227	case CSB_CC_SUCCESS:
228		break;
229	case CSB_CC_TPBC_GT_SPBC:
230		/* not an error, but the compressed data is
231		 * larger than the uncompressed data :(
232		 */
233		break;
234
235	/* input data errors */
236	case CSB_CC_OPERAND_OVERLAP:
237		/* input and output buffers overlap */
238		CSB_ERR(csb, "Operand Overlap error");
239		return -EINVAL;
240	case CSB_CC_INVALID_OPERAND:
241		CSB_ERR(csb, "Invalid operand");
242		return -EINVAL;
243	case CSB_CC_NOSPC:
244		/* output buffer too small */
245		return -ENOSPC;
246	case CSB_CC_ABORT:
247		CSB_ERR(csb, "Function aborted");
248		return -EINTR;
249	case CSB_CC_CRC_MISMATCH:
250		CSB_ERR(csb, "CRC mismatch");
251		return -EINVAL;
252	case CSB_CC_TEMPL_INVALID:
253		CSB_ERR(csb, "Compressed data template invalid");
254		return -EINVAL;
255	case CSB_CC_TEMPL_OVERFLOW:
256		CSB_ERR(csb, "Compressed data template shows data past end");
257		return -EINVAL;
258	case CSB_CC_EXCEED_BYTE_COUNT:	/* P9 or later */
259		/*
260		 * DDE byte count exceeds the limit specified in Maximum
261		 * byte count register.
262		 */
263		CSB_ERR(csb, "DDE byte count exceeds the limit");
264		return -EINVAL;
265
266	/* these should not happen */
267	case CSB_CC_INVALID_ALIGN:
268		/* setup_ddl should have detected this */
269		CSB_ERR_ADDR(csb, "Invalid alignment");
270		return -EINVAL;
271	case CSB_CC_DATA_LENGTH:
272		/* setup_ddl should have detected this */
273		CSB_ERR(csb, "Invalid data length");
274		return -EINVAL;
275	case CSB_CC_WR_TRANSLATION:
276	case CSB_CC_TRANSLATION:
277	case CSB_CC_TRANSLATION_DUP1:
278	case CSB_CC_TRANSLATION_DUP2:
279	case CSB_CC_TRANSLATION_DUP3:
280	case CSB_CC_TRANSLATION_DUP4:
281	case CSB_CC_TRANSLATION_DUP5:
282	case CSB_CC_TRANSLATION_DUP6:
283		/* should not happen, we use physical addrs */
284		CSB_ERR_ADDR(csb, "Translation error");
285		return -EPROTO;
286	case CSB_CC_WR_PROTECTION:
287	case CSB_CC_PROTECTION:
288	case CSB_CC_PROTECTION_DUP1:
289	case CSB_CC_PROTECTION_DUP2:
290	case CSB_CC_PROTECTION_DUP3:
291	case CSB_CC_PROTECTION_DUP4:
292	case CSB_CC_PROTECTION_DUP5:
293	case CSB_CC_PROTECTION_DUP6:
294		/* should not happen, we use physical addrs */
295		CSB_ERR_ADDR(csb, "Protection error");
296		return -EPROTO;
297	case CSB_CC_PRIVILEGE:
298		/* shouldn't happen, we're in HYP mode */
299		CSB_ERR(csb, "Insufficient Privilege error");
300		return -EPROTO;
301	case CSB_CC_EXCESSIVE_DDE:
302		/* shouldn't happen, setup_ddl doesn't use many dde's */
303		CSB_ERR(csb, "Too many DDEs in DDL");
304		return -EINVAL;
305	case CSB_CC_TRANSPORT:
306	case CSB_CC_INVALID_CRB:	/* P9 or later */
307		/* shouldn't happen, we setup CRB correctly */
308		CSB_ERR(csb, "Invalid CRB");
309		return -EINVAL;
310	case CSB_CC_INVALID_DDE:	/* P9 or later */
311		/*
312		 * shouldn't happen, setup_direct/indirect_dde creates
313		 * DDE right
314		 */
315		CSB_ERR(csb, "Invalid DDE");
316		return -EINVAL;
317	case CSB_CC_SEGMENTED_DDL:
318		/* shouldn't happen, setup_ddl creates DDL right */
319		CSB_ERR(csb, "Segmented DDL error");
320		return -EINVAL;
321	case CSB_CC_DDE_OVERFLOW:
322		/* shouldn't happen, setup_ddl creates DDL right */
323		CSB_ERR(csb, "DDE overflow error");
324		return -EINVAL;
325	case CSB_CC_SESSION:
326		/* should not happen with ICSWX */
327		CSB_ERR(csb, "Session violation error");
328		return -EPROTO;
329	case CSB_CC_CHAIN:
330		/* should not happen, we don't use chained CRBs */
331		CSB_ERR(csb, "Chained CRB error");
332		return -EPROTO;
333	case CSB_CC_SEQUENCE:
334		/* should not happen, we don't use chained CRBs */
335		CSB_ERR(csb, "CRB sequence number error");
336		return -EPROTO;
337	case CSB_CC_UNKNOWN_CODE:
338		CSB_ERR(csb, "Unknown subfunction code");
339		return -EPROTO;
340
341	/* hardware errors */
342	case CSB_CC_RD_EXTERNAL:
343	case CSB_CC_RD_EXTERNAL_DUP1:
344	case CSB_CC_RD_EXTERNAL_DUP2:
345	case CSB_CC_RD_EXTERNAL_DUP3:
346		CSB_ERR_ADDR(csb, "Read error outside coprocessor");
347		return -EPROTO;
348	case CSB_CC_WR_EXTERNAL:
349		CSB_ERR_ADDR(csb, "Write error outside coprocessor");
350		return -EPROTO;
351	case CSB_CC_INTERNAL:
352		CSB_ERR(csb, "Internal error in coprocessor");
353		return -EPROTO;
354	case CSB_CC_PROVISION:
355		CSB_ERR(csb, "Storage provision error");
356		return -EPROTO;
357	case CSB_CC_HW:
358		CSB_ERR(csb, "Correctable hardware error");
359		return -EPROTO;
360	case CSB_CC_HW_EXPIRED_TIMER:	/* P9 or later */
361		CSB_ERR(csb, "Job did not finish within allowed time");
362		return -EPROTO;
363
364	default:
365		CSB_ERR(csb, "Invalid CC %d", csb->cc);
366		return -EPROTO;
367	}
368
369	/* check Completion Extension state */
370	if (csb->ce & CSB_CE_TERMINATION) {
371		CSB_ERR(csb, "CSB request was terminated");
372		return -EPROTO;
373	}
374	if (csb->ce & CSB_CE_INCOMPLETE) {
375		CSB_ERR(csb, "CSB request not complete");
376		return -EPROTO;
377	}
378	if (!(csb->ce & CSB_CE_TPBC)) {
379		CSB_ERR(csb, "TPBC not provided, unknown target length");
380		return -EPROTO;
381	}
382
383	/* successful completion */
384	pr_debug_ratelimited("Processed %u bytes in %lu us\n",
385			     be32_to_cpu(csb->count),
386			     (unsigned long)ktime_us_delta(now, start));
387
388	return 0;
389}
390
391static int nx842_config_crb(const unsigned char *in, unsigned int inlen,
392			unsigned char *out, unsigned int outlen,
393			struct nx842_workmem *wmem)
394{
395	struct coprocessor_request_block *crb;
396	struct coprocessor_status_block *csb;
397	u64 csb_addr;
398	int ret;
399
400	crb = &wmem->crb;
401	csb = &crb->csb;
402
403	/* Clear any previous values */
404	memset(crb, 0, sizeof(*crb));
405
406	/* set up DDLs */
407	ret = setup_ddl(&crb->source, wmem->ddl_in,
408			(unsigned char *)in, inlen, true);
409	if (ret)
410		return ret;
411
412	ret = setup_ddl(&crb->target, wmem->ddl_out,
413			out, outlen, false);
414	if (ret)
415		return ret;
416
417	/* set up CRB's CSB addr */
418	csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
419	csb_addr |= CRB_CSB_AT; /* Addrs are phys */
420	crb->csb_addr = cpu_to_be64(csb_addr);
421
422	return 0;
423}
424
425/**
426 * nx842_exec_icswx - compress/decompress data using the 842 algorithm
427 *
428 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
429 * This compresses or decompresses the provided input buffer into the provided
430 * output buffer.
431 *
432 * Upon return from this function @outlen contains the length of the
433 * output data.  If there is an error then @outlen will be 0 and an
434 * error will be specified by the return code from this function.
435 *
436 * The @workmem buffer should only be used by one function call at a time.
437 *
438 * @in: input buffer pointer
439 * @inlen: input buffer size
440 * @out: output buffer pointer
441 * @outlenp: output buffer size pointer
442 * @workmem: working memory buffer pointer, size determined by
443 *           nx842_powernv_driver.workmem_size
444 * @fc: function code, see CCW Function Codes in nx-842.h
445 *
446 * Returns:
447 *   0		Success, output of length @outlenp stored in the buffer at @out
448 *   -ENODEV	Hardware unavailable
449 *   -ENOSPC	Output buffer is to small
450 *   -EMSGSIZE	Input buffer too large
451 *   -EINVAL	buffer constraints do not fix nx842_constraints
452 *   -EPROTO	hardware error during operation
453 *   -ETIMEDOUT	hardware did not complete operation in reasonable time
454 *   -EINTR	operation was aborted
455 */
456static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
457				  unsigned char *out, unsigned int *outlenp,
458				  void *workmem, int fc)
459{
460	struct coprocessor_request_block *crb;
461	struct coprocessor_status_block *csb;
462	struct nx842_workmem *wmem;
463	int ret;
464	u32 ccw;
465	unsigned int outlen = *outlenp;
466
467	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
468
469	*outlenp = 0;
470
471	/* shoudn't happen, we don't load without a coproc */
472	if (!nx842_ct) {
473		pr_err_ratelimited("coprocessor CT is 0");
474		return -ENODEV;
475	}
476
477	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
478	if (ret)
479		return ret;
480
481	crb = &wmem->crb;
482	csb = &crb->csb;
483
484	/* set up CCW */
485	ccw = 0;
486	ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
487	ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
488	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
489
490	wmem->start = ktime_get();
491
492	/* do ICSWX */
493	ret = icswx(cpu_to_be32(ccw), crb);
494
495	pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
496			     (unsigned int)ccw,
497			     (unsigned int)be32_to_cpu(crb->ccw));
498
499	/*
500	 * NX842 coprocessor sets 3rd bit in CR register with XER[S0].
501	 * XER[S0] is the integer summary overflow bit which is nothing
502	 * to do NX. Since this bit can be set with other return values,
503	 * mask this bit.
504	 */
505	ret &= ~ICSWX_XERS0;
506
507	switch (ret) {
508	case ICSWX_INITIATED:
509		ret = wait_for_csb(wmem, csb);
510		break;
511	case ICSWX_BUSY:
512		pr_debug_ratelimited("842 Coprocessor busy\n");
513		ret = -EBUSY;
514		break;
515	case ICSWX_REJECTED:
516		pr_err_ratelimited("ICSWX rejected\n");
517		ret = -EPROTO;
518		break;
519	}
520
521	if (!ret)
522		*outlenp = be32_to_cpu(csb->count);
523
524	return ret;
525}
526
527/**
528 * nx842_exec_vas - compress/decompress data using the 842 algorithm
529 *
530 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
531 * This compresses or decompresses the provided input buffer into the provided
532 * output buffer.
533 *
534 * Upon return from this function @outlen contains the length of the
535 * output data.  If there is an error then @outlen will be 0 and an
536 * error will be specified by the return code from this function.
537 *
538 * The @workmem buffer should only be used by one function call at a time.
539 *
540 * @in: input buffer pointer
541 * @inlen: input buffer size
542 * @out: output buffer pointer
543 * @outlenp: output buffer size pointer
544 * @workmem: working memory buffer pointer, size determined by
545 *           nx842_powernv_driver.workmem_size
546 * @fc: function code, see CCW Function Codes in nx-842.h
547 *
548 * Returns:
549 *   0		Success, output of length @outlenp stored in the buffer
550 *		at @out
551 *   -ENODEV	Hardware unavailable
552 *   -ENOSPC	Output buffer is to small
553 *   -EMSGSIZE	Input buffer too large
554 *   -EINVAL	buffer constraints do not fix nx842_constraints
555 *   -EPROTO	hardware error during operation
556 *   -ETIMEDOUT	hardware did not complete operation in reasonable time
557 *   -EINTR	operation was aborted
558 */
559static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
560				  unsigned char *out, unsigned int *outlenp,
561				  void *workmem, int fc)
562{
563	struct coprocessor_request_block *crb;
564	struct coprocessor_status_block *csb;
565	struct nx842_workmem *wmem;
566	struct vas_window *txwin;
567	int ret, i = 0;
568	u32 ccw;
569	unsigned int outlen = *outlenp;
570
571	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
572
573	*outlenp = 0;
574
575	crb = &wmem->crb;
576	csb = &crb->csb;
577
578	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
579	if (ret)
580		return ret;
581
582	ccw = 0;
583	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
584	crb->ccw = cpu_to_be32(ccw);
585
586	do {
587		wmem->start = ktime_get();
588		preempt_disable();
589		txwin = this_cpu_read(cpu_txwin);
590
591		/*
592		 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
593		 * @crb and @offset.
594		 */
595		vas_copy_crb(crb, 0);
596
597		/*
598		 * VAS paste previously copied CRB to NX.
599		 * @txwin, @offset and @last (must be true).
600		 */
601		ret = vas_paste_crb(txwin, 0, 1);
602		preempt_enable();
603		/*
604		 * Retry copy/paste function for VAS failures.
605		 */
606	} while (ret && (i++ < VAS_RETRIES));
607
608	if (ret) {
609		pr_err_ratelimited("VAS copy/paste failed\n");
610		return ret;
611	}
612
613	ret = wait_for_csb(wmem, csb);
614	if (!ret)
615		*outlenp = be32_to_cpu(csb->count);
616
617	return ret;
618}
619
620/**
621 * nx842_powernv_compress - Compress data using the 842 algorithm
622 *
623 * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
624 * The input buffer is compressed and the result is stored in the
625 * provided output buffer.
626 *
627 * Upon return from this function @outlen contains the length of the
628 * compressed data.  If there is an error then @outlen will be 0 and an
629 * error will be specified by the return code from this function.
630 *
631 * @in: input buffer pointer
632 * @inlen: input buffer size
633 * @out: output buffer pointer
634 * @outlenp: output buffer size pointer
635 * @workmem: working memory buffer pointer, size determined by
636 *           nx842_powernv_driver.workmem_size
637 *
638 * Returns: see @nx842_powernv_exec()
639 */
640static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
641				  unsigned char *out, unsigned int *outlenp,
642				  void *wmem)
643{
644	return nx842_powernv_exec(in, inlen, out, outlenp,
645				      wmem, CCW_FC_842_COMP_CRC);
646}
647
648/**
649 * nx842_powernv_decompress - Decompress data using the 842 algorithm
650 *
651 * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
652 * The input buffer is decompressed and the result is stored in the
653 * provided output buffer.
654 *
655 * Upon return from this function @outlen contains the length of the
656 * decompressed data.  If there is an error then @outlen will be 0 and an
657 * error will be specified by the return code from this function.
658 *
659 * @in: input buffer pointer
660 * @inlen: input buffer size
661 * @out: output buffer pointer
662 * @outlenp: output buffer size pointer
663 * @workmem: working memory buffer pointer, size determined by
664 *           nx842_powernv_driver.workmem_size
665 *
666 * Returns: see @nx842_powernv_exec()
667 */
668static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
669				    unsigned char *out, unsigned int *outlenp,
670				    void *wmem)
671{
672	return nx842_powernv_exec(in, inlen, out, outlenp,
673				      wmem, CCW_FC_842_DECOMP_CRC);
674}
675
676static inline void nx_add_coprocs_list(struct nx_coproc *coproc,
677					int chipid)
678{
679	coproc->chip_id = chipid;
680	INIT_LIST_HEAD(&coproc->list);
681	list_add(&coproc->list, &nx_coprocs);
682}
683
684static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc)
685{
686	struct vas_window *txwin = NULL;
687	struct vas_tx_win_attr txattr;
688
689	/*
690	 * Kernel requests will be high priority. So open send
691	 * windows only for high priority RxFIFO entries.
692	 */
693	vas_init_tx_win_attr(&txattr, coproc->ct);
694	txattr.lpid = 0;	/* lpid is 0 for kernel requests */
695
696	/*
697	 * Open a VAS send window which is used to send request to NX.
698	 */
699	txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
700	if (IS_ERR(txwin))
701		pr_err("ibm,nx-842: Can not open TX window: %ld\n",
702				PTR_ERR(txwin));
703
704	return txwin;
705}
706
707/*
708 * Identify chip ID for each CPU, open send wndow for the corresponding NX
709 * engine and save txwin in percpu cpu_txwin.
710 * cpu_txwin is used in copy/paste operation for each compression /
711 * decompression request.
712 */
713static int nx_open_percpu_txwins(void)
714{
715	struct nx_coproc *coproc, *n;
716	unsigned int i, chip_id;
717
718	for_each_possible_cpu(i) {
719		struct vas_window *txwin = NULL;
720
721		chip_id = cpu_to_chip_id(i);
722
723		list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
724			/*
725			 * Kernel requests use only high priority FIFOs. So
726			 * open send windows for these FIFOs.
727			 * GZIP is not supported in kernel right now.
728			 */
729
730			if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
731				continue;
732
733			if (coproc->chip_id == chip_id) {
734				txwin = nx_alloc_txwin(coproc);
735				if (IS_ERR(txwin))
736					return PTR_ERR(txwin);
737
738				per_cpu(cpu_txwin, i) = txwin;
739				break;
740			}
741		}
742
743		if (!per_cpu(cpu_txwin, i)) {
744			/* shouldn't happen, Each chip will have NX engine */
745			pr_err("NX engine is not available for CPU %d\n", i);
746			return -EINVAL;
747		}
748	}
749
750	return 0;
751}
752
753static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority,
754				int high, int normal)
755{
756	if (!strcmp(priority, "High"))
757		coproc->ct = high;
758	else if (!strcmp(priority, "Normal"))
759		coproc->ct = normal;
760	else {
761		pr_err("Invalid RxFIFO priority value\n");
762		return -EINVAL;
763	}
764
765	return 0;
766}
767
768static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
769					int vasid, int type, int *ct)
770{
771	struct vas_window *rxwin = NULL;
772	struct vas_rx_win_attr rxattr;
773	u32 lpid, pid, tid, fifo_size;
774	struct nx_coproc *coproc;
775	u64 rx_fifo;
776	const char *priority;
777	int ret;
778
779	ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo);
780	if (ret) {
781		pr_err("Missing rx-fifo-address property\n");
782		return ret;
783	}
784
785	ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
786	if (ret) {
787		pr_err("Missing rx-fifo-size property\n");
788		return ret;
789	}
790
791	ret = of_property_read_u32(dn, "lpid", &lpid);
792	if (ret) {
793		pr_err("Missing lpid property\n");
794		return ret;
795	}
796
797	ret = of_property_read_u32(dn, "pid", &pid);
798	if (ret) {
799		pr_err("Missing pid property\n");
800		return ret;
801	}
802
803	ret = of_property_read_u32(dn, "tid", &tid);
804	if (ret) {
805		pr_err("Missing tid property\n");
806		return ret;
807	}
808
809	ret = of_property_read_string(dn, "priority", &priority);
810	if (ret) {
811		pr_err("Missing priority property\n");
812		return ret;
813	}
814
815	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
816	if (!coproc)
817		return -ENOMEM;
818
819	if (type == NX_CT_842)
820		ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI,
821			VAS_COP_TYPE_842);
822	else if (type == NX_CT_GZIP)
823		ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI,
824				VAS_COP_TYPE_GZIP);
825
826	if (ret)
827		goto err_out;
828
829	vas_init_rx_win_attr(&rxattr, coproc->ct);
830	rxattr.rx_fifo = rx_fifo;
831	rxattr.rx_fifo_size = fifo_size;
832	rxattr.lnotify_lpid = lpid;
833	rxattr.lnotify_pid = pid;
834	rxattr.lnotify_tid = tid;
835	/*
836	 * Maximum RX window credits can not be more than #CRBs in
837	 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
838	 */
839	rxattr.wcreds_max = fifo_size / CRB_SIZE;
840
841	/*
842	 * Open a VAS receice window which is used to configure RxFIFO
843	 * for NX.
844	 */
845	rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
846	if (IS_ERR(rxwin)) {
847		ret = PTR_ERR(rxwin);
848		pr_err("setting RxFIFO with VAS failed: %d\n",
849			ret);
850		goto err_out;
851	}
852
853	coproc->vas.rxwin = rxwin;
854	coproc->vas.id = vasid;
855	nx_add_coprocs_list(coproc, chip_id);
856
857	/*
858	 * (lpid, pid, tid) combination has to be unique for each
859	 * coprocessor instance in the system. So to make it
860	 * unique, skiboot uses coprocessor type such as 842 or
861	 * GZIP for pid and provides this value to kernel in pid
862	 * device-tree property.
863	 */
864	*ct = pid;
865
866	return 0;
867
868err_out:
869	kfree(coproc);
870	return ret;
871}
872
873static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip)
874{
875	int ret = 0;
876
877	if (opal_check_token(OPAL_NX_COPROC_INIT)) {
878		ret = opal_nx_coproc_init(chip_id, ct_842);
879
880		if (!ret)
881			ret = opal_nx_coproc_init(chip_id, ct_gzip);
882
883		if (ret) {
884			ret = opal_error_code(ret);
885			pr_err("Failed to initialize NX for chip(%d): %d\n",
886				chip_id, ret);
887		}
888	} else
889		pr_warn("Firmware doesn't support NX initialization\n");
890
891	return ret;
892}
893
894static int __init find_nx_device_tree(struct device_node *dn, int chip_id,
895					int vasid, int type, char *devname,
896					int *ct)
897{
898	int ret = 0;
899
900	if (of_device_is_compatible(dn, devname)) {
901		ret  = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct);
902		if (ret)
903			of_node_put(dn);
904	}
905
906	return ret;
907}
908
909static int __init nx_powernv_probe_vas(struct device_node *pn)
910{
911	int chip_id, vasid, ret = 0;
912	int ct_842 = 0, ct_gzip = 0;
913	struct device_node *dn;
914
915	chip_id = of_get_ibm_chip_id(pn);
916	if (chip_id < 0) {
917		pr_err("ibm,chip-id missing\n");
918		return -EINVAL;
919	}
920
921	vasid = chip_to_vas_id(chip_id);
922	if (vasid < 0) {
923		pr_err("Unable to map chip_id %d to vasid\n", chip_id);
924		return -EINVAL;
925	}
926
927	for_each_child_of_node(pn, dn) {
928		ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842,
929					"ibm,p9-nx-842", &ct_842);
930
931		if (!ret)
932			ret = find_nx_device_tree(dn, chip_id, vasid,
933				NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip);
934
935		if (ret) {
936			of_node_put(dn);
937			return ret;
938		}
939	}
940
941	if (!ct_842 || !ct_gzip) {
942		pr_err("NX FIFO nodes are missing\n");
943		return -EINVAL;
944	}
945
946	/*
947	 * Initialize NX instance for both high and normal priority FIFOs.
948	 */
949	ret = nx_coproc_init(chip_id, ct_842, ct_gzip);
950
951	return ret;
952}
953
954static int __init nx842_powernv_probe(struct device_node *dn)
955{
956	struct nx_coproc *coproc;
957	unsigned int ct, ci;
958	int chip_id;
959
960	chip_id = of_get_ibm_chip_id(dn);
961	if (chip_id < 0) {
962		pr_err("ibm,chip-id missing\n");
963		return -EINVAL;
964	}
965
966	if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) {
967		pr_err("ibm,842-coprocessor-type missing\n");
968		return -EINVAL;
969	}
970
971	if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) {
972		pr_err("ibm,842-coprocessor-instance missing\n");
973		return -EINVAL;
974	}
975
976	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
977	if (!coproc)
978		return -ENOMEM;
979
980	coproc->ct = ct;
981	coproc->ci = ci;
982	nx_add_coprocs_list(coproc, chip_id);
983
984	pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
985
986	if (!nx842_ct)
987		nx842_ct = ct;
988	else if (nx842_ct != ct)
989		pr_err("NX842 chip %d, CT %d != first found CT %d\n",
990		       chip_id, ct, nx842_ct);
991
992	return 0;
993}
994
995static void nx_delete_coprocs(void)
996{
997	struct nx_coproc *coproc, *n;
998	struct vas_window *txwin;
999	int i;
1000
1001	/*
1002	 * close percpu txwins that are opened for the corresponding coproc.
1003	 */
1004	for_each_possible_cpu(i) {
1005		txwin = per_cpu(cpu_txwin, i);
1006		if (txwin)
1007			vas_win_close(txwin);
1008
1009		per_cpu(cpu_txwin, i) = NULL;
1010	}
1011
1012	list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
1013		if (coproc->vas.rxwin)
1014			vas_win_close(coproc->vas.rxwin);
1015
1016		list_del(&coproc->list);
1017		kfree(coproc);
1018	}
1019}
1020
1021static struct nx842_constraints nx842_powernv_constraints = {
1022	.alignment =	DDE_BUFFER_ALIGN,
1023	.multiple =	DDE_BUFFER_LAST_MULT,
1024	.minimum =	DDE_BUFFER_LAST_MULT,
1025	.maximum =	(DDL_LEN_MAX - 1) * PAGE_SIZE,
1026};
1027
1028static struct nx842_driver nx842_powernv_driver = {
1029	.name =		KBUILD_MODNAME,
1030	.owner =	THIS_MODULE,
1031	.workmem_size =	sizeof(struct nx842_workmem),
1032	.constraints =	&nx842_powernv_constraints,
1033	.compress =	nx842_powernv_compress,
1034	.decompress =	nx842_powernv_decompress,
1035};
1036
1037static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
1038{
1039	return nx842_crypto_init(tfm, &nx842_powernv_driver);
1040}
1041
1042static struct crypto_alg nx842_powernv_alg = {
1043	.cra_name		= "842",
1044	.cra_driver_name	= "842-nx",
1045	.cra_priority		= 300,
1046	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
1047	.cra_ctxsize		= sizeof(struct nx842_crypto_ctx),
1048	.cra_module		= THIS_MODULE,
1049	.cra_init		= nx842_powernv_crypto_init,
1050	.cra_exit		= nx842_crypto_exit,
1051	.cra_u			= { .compress = {
1052	.coa_compress		= nx842_crypto_compress,
1053	.coa_decompress		= nx842_crypto_decompress } }
1054};
1055
1056static __init int nx_compress_powernv_init(void)
1057{
1058	struct device_node *dn;
1059	int ret;
1060
1061	/* verify workmem size/align restrictions */
1062	BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
1063	BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
1064	BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
1065	/* verify buffer size/align restrictions */
1066	BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
1067	BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
1068	BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
1069
1070	for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
1071		ret = nx_powernv_probe_vas(dn);
1072		if (ret) {
1073			nx_delete_coprocs();
1074			of_node_put(dn);
1075			return ret;
1076		}
1077	}
1078
1079	if (list_empty(&nx_coprocs)) {
1080		for_each_compatible_node(dn, NULL, "ibm,power-nx")
1081			nx842_powernv_probe(dn);
1082
1083		if (!nx842_ct)
1084			return -ENODEV;
1085
1086		nx842_powernv_exec = nx842_exec_icswx;
1087	} else {
1088		/*
1089		 * Register VAS user space API for NX GZIP so
1090		 * that user space can use GZIP engine.
1091		 * Using high FIFO priority for kernel requests and
1092		 * normal FIFO priority is assigned for userspace.
1093		 * 842 compression is supported only in kernel.
1094		 */
1095		ret = vas_register_coproc_api(THIS_MODULE, VAS_COP_TYPE_GZIP,
1096						"nx-gzip");
1097
1098		/*
1099		 * GZIP is not supported in kernel right now.
1100		 * So open tx windows only for 842.
1101		 */
1102		if (!ret)
1103			ret = nx_open_percpu_txwins();
1104
1105		if (ret) {
1106			nx_delete_coprocs();
1107			return ret;
1108		}
1109
1110		nx842_powernv_exec = nx842_exec_vas;
1111	}
1112
1113	ret = crypto_register_alg(&nx842_powernv_alg);
1114	if (ret) {
1115		nx_delete_coprocs();
1116		return ret;
1117	}
1118
1119	return 0;
1120}
1121module_init(nx_compress_powernv_init);
1122
1123static void __exit nx_compress_powernv_exit(void)
1124{
1125	/*
1126	 * GZIP engine is supported only in power9 or later and nx842_ct
1127	 * is used on power8 (icswx).
1128	 * VAS API for NX GZIP is registered during init for user space
1129	 * use. So delete this API use for GZIP engine.
1130	 */
1131	if (!nx842_ct)
1132		vas_unregister_coproc_api();
1133
1134	crypto_unregister_alg(&nx842_powernv_alg);
1135
1136	nx_delete_coprocs();
1137}
1138module_exit(nx_compress_powernv_exit);
1139