162306a36Sopenharmony_ci// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
262306a36Sopenharmony_ci/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci#define pr_fmt(fmt)	"NFP net bpf: " fmt
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/bug.h>
762306a36Sopenharmony_ci#include <linux/bpf.h>
862306a36Sopenharmony_ci#include <linux/filter.h>
962306a36Sopenharmony_ci#include <linux/kernel.h>
1062306a36Sopenharmony_ci#include <linux/pkt_cls.h>
1162306a36Sopenharmony_ci#include <linux/reciprocal_div.h>
1262306a36Sopenharmony_ci#include <linux/unistd.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include "main.h"
1562306a36Sopenharmony_ci#include "../nfp_asm.h"
1662306a36Sopenharmony_ci#include "../nfp_net_ctrl.h"
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci/* --- NFP prog --- */
1962306a36Sopenharmony_ci/* Foreach "multiple" entries macros provide pos and next<n> pointers.
2062306a36Sopenharmony_ci * It's safe to modify the next pointers (but not pos).
2162306a36Sopenharmony_ci */
2262306a36Sopenharmony_ci#define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
2362306a36Sopenharmony_ci	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
2462306a36Sopenharmony_ci	     next = list_next_entry(pos, l);			\
2562306a36Sopenharmony_ci	     &(nfp_prog)->insns != &pos->l &&			\
2662306a36Sopenharmony_ci	     &(nfp_prog)->insns != &next->l;			\
2762306a36Sopenharmony_ci	     pos = nfp_meta_next(pos),				\
2862306a36Sopenharmony_ci	     next = nfp_meta_next(pos))
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
3162306a36Sopenharmony_ci	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
3262306a36Sopenharmony_ci	     next = list_next_entry(pos, l),			\
3362306a36Sopenharmony_ci	     next2 = list_next_entry(next, l);			\
3462306a36Sopenharmony_ci	     &(nfp_prog)->insns != &pos->l &&			\
3562306a36Sopenharmony_ci	     &(nfp_prog)->insns != &next->l &&			\
3662306a36Sopenharmony_ci	     &(nfp_prog)->insns != &next2->l;			\
3762306a36Sopenharmony_ci	     pos = nfp_meta_next(pos),				\
3862306a36Sopenharmony_ci	     next = nfp_meta_next(pos),				\
3962306a36Sopenharmony_ci	     next2 = nfp_meta_next(next))
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_cistatic bool
4262306a36Sopenharmony_cinfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
4362306a36Sopenharmony_ci{
4462306a36Sopenharmony_ci	return meta->l.prev != &nfp_prog->insns;
4562306a36Sopenharmony_ci}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
5062306a36Sopenharmony_ci		pr_warn("instruction limit reached (%u NFP instructions)\n",
5162306a36Sopenharmony_ci			nfp_prog->prog_len);
5262306a36Sopenharmony_ci		nfp_prog->error = -ENOSPC;
5362306a36Sopenharmony_ci		return;
5462306a36Sopenharmony_ci	}
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	nfp_prog->prog[nfp_prog->prog_len] = insn;
5762306a36Sopenharmony_ci	nfp_prog->prog_len++;
5862306a36Sopenharmony_ci}
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_cistatic unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
6162306a36Sopenharmony_ci{
6262306a36Sopenharmony_ci	return nfp_prog->prog_len;
6362306a36Sopenharmony_ci}
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_cistatic bool
6662306a36Sopenharmony_cinfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
6762306a36Sopenharmony_ci{
6862306a36Sopenharmony_ci	/* If there is a recorded error we may have dropped instructions;
6962306a36Sopenharmony_ci	 * that doesn't have to be due to translator bug, and the translation
7062306a36Sopenharmony_ci	 * will fail anyway, so just return OK.
7162306a36Sopenharmony_ci	 */
7262306a36Sopenharmony_ci	if (nfp_prog->error)
7362306a36Sopenharmony_ci		return true;
7462306a36Sopenharmony_ci	return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
7562306a36Sopenharmony_ci}
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci/* --- Emitters --- */
7862306a36Sopenharmony_cistatic void
7962306a36Sopenharmony_ci__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
8062306a36Sopenharmony_ci	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
8162306a36Sopenharmony_ci	   bool indir)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	u64 insn;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
8662306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_CTX, ctx) |
8762306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_B_SRC, breg) |
8862306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
8962306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_XFER, xfer) |
9062306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_CNT, size) |
9162306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
9262306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
9362306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_INDIR, indir) |
9462306a36Sopenharmony_ci		FIELD_PREP(OP_CMD_MODE, mode);
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
9762306a36Sopenharmony_ci}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_cistatic void
10062306a36Sopenharmony_ciemit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
10162306a36Sopenharmony_ci	     swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
10262306a36Sopenharmony_ci{
10362306a36Sopenharmony_ci	struct nfp_insn_re_regs reg;
10462306a36Sopenharmony_ci	int err;
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
10762306a36Sopenharmony_ci	if (err) {
10862306a36Sopenharmony_ci		nfp_prog->error = err;
10962306a36Sopenharmony_ci		return;
11062306a36Sopenharmony_ci	}
11162306a36Sopenharmony_ci	if (reg.swap) {
11262306a36Sopenharmony_ci		pr_err("cmd can't swap arguments\n");
11362306a36Sopenharmony_ci		nfp_prog->error = -EFAULT;
11462306a36Sopenharmony_ci		return;
11562306a36Sopenharmony_ci	}
11662306a36Sopenharmony_ci	if (reg.dst_lmextn || reg.src_lmextn) {
11762306a36Sopenharmony_ci		pr_err("cmd can't use LMextn\n");
11862306a36Sopenharmony_ci		nfp_prog->error = -EFAULT;
11962306a36Sopenharmony_ci		return;
12062306a36Sopenharmony_ci	}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
12362306a36Sopenharmony_ci		   indir);
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_cistatic void
12762306a36Sopenharmony_ciemit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
12862306a36Sopenharmony_ci	 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
12962306a36Sopenharmony_ci{
13062306a36Sopenharmony_ci	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic void
13462306a36Sopenharmony_ciemit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
13562306a36Sopenharmony_ci	       swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
13862306a36Sopenharmony_ci}
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_cistatic void
14162306a36Sopenharmony_ci__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
14262306a36Sopenharmony_ci	  enum br_ctx_signal_state css, u16 addr, u8 defer)
14362306a36Sopenharmony_ci{
14462306a36Sopenharmony_ci	u16 addr_lo, addr_hi;
14562306a36Sopenharmony_ci	u64 insn;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
14862306a36Sopenharmony_ci	addr_hi = addr != addr_lo;
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	insn = OP_BR_BASE |
15162306a36Sopenharmony_ci		FIELD_PREP(OP_BR_MASK, mask) |
15262306a36Sopenharmony_ci		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
15362306a36Sopenharmony_ci		FIELD_PREP(OP_BR_CSS, css) |
15462306a36Sopenharmony_ci		FIELD_PREP(OP_BR_DEFBR, defer) |
15562306a36Sopenharmony_ci		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
15662306a36Sopenharmony_ci		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cistatic void
16262306a36Sopenharmony_ciemit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
16362306a36Sopenharmony_ci	     enum nfp_relo_type relo)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	if (mask == BR_UNC && defer > 2) {
16662306a36Sopenharmony_ci		pr_err("BUG: branch defer out of bounds %d\n", defer);
16762306a36Sopenharmony_ci		nfp_prog->error = -EFAULT;
16862306a36Sopenharmony_ci		return;
16962306a36Sopenharmony_ci	}
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	__emit_br(nfp_prog, mask,
17262306a36Sopenharmony_ci		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
17362306a36Sopenharmony_ci		  BR_CSS_NONE, addr, defer);
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	nfp_prog->prog[nfp_prog->prog_len - 1] |=
17662306a36Sopenharmony_ci		FIELD_PREP(OP_RELO_TYPE, relo);
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_cistatic void
18062306a36Sopenharmony_ciemit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
18162306a36Sopenharmony_ci{
18262306a36Sopenharmony_ci	emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
18362306a36Sopenharmony_ci}
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_cistatic void
18662306a36Sopenharmony_ci__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
18762306a36Sopenharmony_ci	      bool set, bool src_lmextn)
18862306a36Sopenharmony_ci{
18962306a36Sopenharmony_ci	u16 addr_lo, addr_hi;
19062306a36Sopenharmony_ci	u64 insn;
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
19362306a36Sopenharmony_ci	addr_hi = addr != addr_lo;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	insn = OP_BR_BIT_BASE |
19662306a36Sopenharmony_ci		FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
19762306a36Sopenharmony_ci		FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
19862306a36Sopenharmony_ci		FIELD_PREP(OP_BR_BIT_BV, set) |
19962306a36Sopenharmony_ci		FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
20062306a36Sopenharmony_ci		FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
20162306a36Sopenharmony_ci		FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
20262306a36Sopenharmony_ci		FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
20562306a36Sopenharmony_ci}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_cistatic void
20862306a36Sopenharmony_ciemit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
20962306a36Sopenharmony_ci		 u8 defer, bool set, enum nfp_relo_type relo)
21062306a36Sopenharmony_ci{
21162306a36Sopenharmony_ci	struct nfp_insn_re_regs reg;
21262306a36Sopenharmony_ci	int err;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	/* NOTE: The bit to test is specified as an rotation amount, such that
21562306a36Sopenharmony_ci	 *	 the bit to test will be placed on the MSB of the result when
21662306a36Sopenharmony_ci	 *	 doing a rotate right. For bit X, we need right rotate X + 1.
21762306a36Sopenharmony_ci	 */
21862306a36Sopenharmony_ci	bit += 1;
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	err = swreg_to_restricted(reg_none(), src, reg_imm(bit), &reg, false);
22162306a36Sopenharmony_ci	if (err) {
22262306a36Sopenharmony_ci		nfp_prog->error = err;
22362306a36Sopenharmony_ci		return;
22462306a36Sopenharmony_ci	}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	__emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
22762306a36Sopenharmony_ci		      reg.src_lmextn);
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	nfp_prog->prog[nfp_prog->prog_len - 1] |=
23062306a36Sopenharmony_ci		FIELD_PREP(OP_RELO_TYPE, relo);
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic void
23462306a36Sopenharmony_ciemit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
23562306a36Sopenharmony_ci{
23662306a36Sopenharmony_ci	emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_cistatic void
24062306a36Sopenharmony_ci__emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
24162306a36Sopenharmony_ci	      u8 defer, bool dst_lmextn, bool src_lmextn)
24262306a36Sopenharmony_ci{
24362306a36Sopenharmony_ci	u64 insn;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	insn = OP_BR_ALU_BASE |
24662306a36Sopenharmony_ci		FIELD_PREP(OP_BR_ALU_A_SRC, areg) |
24762306a36Sopenharmony_ci		FIELD_PREP(OP_BR_ALU_B_SRC, breg) |
24862306a36Sopenharmony_ci		FIELD_PREP(OP_BR_ALU_DEFBR, defer) |
24962306a36Sopenharmony_ci		FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) |
25062306a36Sopenharmony_ci		FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) |
25162306a36Sopenharmony_ci		FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn);
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
25462306a36Sopenharmony_ci}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_cistatic void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer)
25762306a36Sopenharmony_ci{
25862306a36Sopenharmony_ci	struct nfp_insn_ur_regs reg;
25962306a36Sopenharmony_ci	int err;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), &reg);
26262306a36Sopenharmony_ci	if (err) {
26362306a36Sopenharmony_ci		nfp_prog->error = err;
26462306a36Sopenharmony_ci		return;
26562306a36Sopenharmony_ci	}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	__emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn,
26862306a36Sopenharmony_ci		      reg.src_lmextn);
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_cistatic void
27262306a36Sopenharmony_ci__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
27362306a36Sopenharmony_ci	     enum immed_width width, bool invert,
27462306a36Sopenharmony_ci	     enum immed_shift shift, bool wr_both,
27562306a36Sopenharmony_ci	     bool dst_lmextn, bool src_lmextn)
27662306a36Sopenharmony_ci{
27762306a36Sopenharmony_ci	u64 insn;
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	insn = OP_IMMED_BASE |
28062306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_A_SRC, areg) |
28162306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_B_SRC, breg) |
28262306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
28362306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_WIDTH, width) |
28462306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_INV, invert) |
28562306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_SHIFT, shift) |
28662306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
28762306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
28862306a36Sopenharmony_ci		FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
29162306a36Sopenharmony_ci}
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_cistatic void
29462306a36Sopenharmony_ciemit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
29562306a36Sopenharmony_ci	   enum immed_width width, bool invert, enum immed_shift shift)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	struct nfp_insn_ur_regs reg;
29862306a36Sopenharmony_ci	int err;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	if (swreg_type(dst) == NN_REG_IMM) {
30162306a36Sopenharmony_ci		nfp_prog->error = -EFAULT;
30262306a36Sopenharmony_ci		return;
30362306a36Sopenharmony_ci	}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
30662306a36Sopenharmony_ci	if (err) {
30762306a36Sopenharmony_ci		nfp_prog->error = err;
30862306a36Sopenharmony_ci		return;
30962306a36Sopenharmony_ci	}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	/* Use reg.dst when destination is No-Dest. */
31262306a36Sopenharmony_ci	__emit_immed(nfp_prog,
31362306a36Sopenharmony_ci		     swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
31462306a36Sopenharmony_ci		     reg.breg, imm >> 8, width, invert, shift,
31562306a36Sopenharmony_ci		     reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
31662306a36Sopenharmony_ci}
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_cistatic void
31962306a36Sopenharmony_ci__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
32062306a36Sopenharmony_ci	   enum shf_sc sc, u8 shift,
32162306a36Sopenharmony_ci	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
32262306a36Sopenharmony_ci	   bool dst_lmextn, bool src_lmextn)
32362306a36Sopenharmony_ci{
32462306a36Sopenharmony_ci	u64 insn;
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
32762306a36Sopenharmony_ci		nfp_prog->error = -EFAULT;
32862306a36Sopenharmony_ci		return;
32962306a36Sopenharmony_ci	}
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	/* NFP shift instruction has something special. If shift direction is
33262306a36Sopenharmony_ci	 * left then shift amount of 1 to 31 is specified as 32 minus the amount
33362306a36Sopenharmony_ci	 * to shift.
33462306a36Sopenharmony_ci	 *
33562306a36Sopenharmony_ci	 * But no need to do this for indirect shift which has shift amount be
33662306a36Sopenharmony_ci	 * 0. Even after we do this subtraction, shift amount 0 will be turned
33762306a36Sopenharmony_ci	 * into 32 which will eventually be encoded the same as 0 because only
33862306a36Sopenharmony_ci	 * low 5 bits are encoded, but shift amount be 32 will fail the
33962306a36Sopenharmony_ci	 * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of
34062306a36Sopenharmony_ci	 * mask range.
34162306a36Sopenharmony_ci	 */
34262306a36Sopenharmony_ci	if (sc == SHF_SC_L_SHF && shift)
34362306a36Sopenharmony_ci		shift = 32 - shift;
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci	insn = OP_SHF_BASE |
34662306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_A_SRC, areg) |
34762306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_SC, sc) |
34862306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_B_SRC, breg) |
34962306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_I8, i8) |
35062306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_SW, sw) |
35162306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_DST, dst) |
35262306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_SHIFT, shift) |
35362306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_OP, op) |
35462306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
35562306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_WR_AB, wr_both) |
35662306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
35762306a36Sopenharmony_ci		FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
36062306a36Sopenharmony_ci}
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_cistatic void
36362306a36Sopenharmony_ciemit_shf(struct nfp_prog *nfp_prog, swreg dst,
36462306a36Sopenharmony_ci	 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
36562306a36Sopenharmony_ci{
36662306a36Sopenharmony_ci	struct nfp_insn_re_regs reg;
36762306a36Sopenharmony_ci	int err;
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
37062306a36Sopenharmony_ci	if (err) {
37162306a36Sopenharmony_ci		nfp_prog->error = err;
37262306a36Sopenharmony_ci		return;
37362306a36Sopenharmony_ci	}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
37662306a36Sopenharmony_ci		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
37762306a36Sopenharmony_ci		   reg.dst_lmextn, reg.src_lmextn);
37862306a36Sopenharmony_ci}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_cistatic void
38162306a36Sopenharmony_ciemit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
38262306a36Sopenharmony_ci	       swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
38362306a36Sopenharmony_ci{
38462306a36Sopenharmony_ci	if (sc == SHF_SC_R_ROT) {
38562306a36Sopenharmony_ci		pr_err("indirect shift is not allowed on rotation\n");
38662306a36Sopenharmony_ci		nfp_prog->error = -EFAULT;
38762306a36Sopenharmony_ci		return;
38862306a36Sopenharmony_ci	}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
39162306a36Sopenharmony_ci}
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_cistatic void
39462306a36Sopenharmony_ci__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
39562306a36Sopenharmony_ci	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
39662306a36Sopenharmony_ci	   bool dst_lmextn, bool src_lmextn)
39762306a36Sopenharmony_ci{
39862306a36Sopenharmony_ci	u64 insn;
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	insn = OP_ALU_BASE |
40162306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_A_SRC, areg) |
40262306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_B_SRC, breg) |
40362306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_DST, dst) |
40462306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_SW, swap) |
40562306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_OP, op) |
40662306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
40762306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_WR_AB, wr_both) |
40862306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
40962306a36Sopenharmony_ci		FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
41262306a36Sopenharmony_ci}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_cistatic void
41562306a36Sopenharmony_ciemit_alu(struct nfp_prog *nfp_prog, swreg dst,
41662306a36Sopenharmony_ci	 swreg lreg, enum alu_op op, swreg rreg)
41762306a36Sopenharmony_ci{
41862306a36Sopenharmony_ci	struct nfp_insn_ur_regs reg;
41962306a36Sopenharmony_ci	int err;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
42262306a36Sopenharmony_ci	if (err) {
42362306a36Sopenharmony_ci		nfp_prog->error = err;
42462306a36Sopenharmony_ci		return;
42562306a36Sopenharmony_ci	}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
42862306a36Sopenharmony_ci		   reg.areg, op, reg.breg, reg.swap, reg.wr_both,
42962306a36Sopenharmony_ci		   reg.dst_lmextn, reg.src_lmextn);
43062306a36Sopenharmony_ci}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_cistatic void
43362306a36Sopenharmony_ci__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg,
43462306a36Sopenharmony_ci	   enum mul_type type, enum mul_step step, u16 breg, bool swap,
43562306a36Sopenharmony_ci	   bool wr_both, bool dst_lmextn, bool src_lmextn)
43662306a36Sopenharmony_ci{
43762306a36Sopenharmony_ci	u64 insn;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	insn = OP_MUL_BASE |
44062306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_A_SRC, areg) |
44162306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_B_SRC, breg) |
44262306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_STEP, step) |
44362306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_DST_AB, dst_ab) |
44462306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_SW, swap) |
44562306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_TYPE, type) |
44662306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_WR_AB, wr_both) |
44762306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) |
44862306a36Sopenharmony_ci		FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn);
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
45162306a36Sopenharmony_ci}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_cistatic void
45462306a36Sopenharmony_ciemit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type,
45562306a36Sopenharmony_ci	 enum mul_step step, swreg rreg)
45662306a36Sopenharmony_ci{
45762306a36Sopenharmony_ci	struct nfp_insn_ur_regs reg;
45862306a36Sopenharmony_ci	u16 areg;
45962306a36Sopenharmony_ci	int err;
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	if (type == MUL_TYPE_START && step != MUL_STEP_NONE) {
46262306a36Sopenharmony_ci		nfp_prog->error = -EINVAL;
46362306a36Sopenharmony_ci		return;
46462306a36Sopenharmony_ci	}
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	if (step == MUL_LAST || step == MUL_LAST_2) {
46762306a36Sopenharmony_ci		/* When type is step and step Number is LAST or LAST2, left
46862306a36Sopenharmony_ci		 * source is used as destination.
46962306a36Sopenharmony_ci		 */
47062306a36Sopenharmony_ci		err = swreg_to_unrestricted(lreg, reg_none(), rreg, &reg);
47162306a36Sopenharmony_ci		areg = reg.dst;
47262306a36Sopenharmony_ci	} else {
47362306a36Sopenharmony_ci		err = swreg_to_unrestricted(reg_none(), lreg, rreg, &reg);
47462306a36Sopenharmony_ci		areg = reg.areg;
47562306a36Sopenharmony_ci	}
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	if (err) {
47862306a36Sopenharmony_ci		nfp_prog->error = err;
47962306a36Sopenharmony_ci		return;
48062306a36Sopenharmony_ci	}
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	__emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap,
48362306a36Sopenharmony_ci		   reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
48462306a36Sopenharmony_ci}
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_cistatic void
48762306a36Sopenharmony_ci__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
48862306a36Sopenharmony_ci		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
48962306a36Sopenharmony_ci		bool zero, bool swap, bool wr_both,
49062306a36Sopenharmony_ci		bool dst_lmextn, bool src_lmextn)
49162306a36Sopenharmony_ci{
49262306a36Sopenharmony_ci	u64 insn;
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	insn = OP_LDF_BASE |
49562306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_A_SRC, areg) |
49662306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_SC, sc) |
49762306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_B_SRC, breg) |
49862306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_I8, imm8) |
49962306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_SW, swap) |
50062306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_ZF, zero) |
50162306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_BMASK, bmask) |
50262306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_SHF, shift) |
50362306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_WR_AB, wr_both) |
50462306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
50562306a36Sopenharmony_ci		FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
50862306a36Sopenharmony_ci}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_cistatic void
51162306a36Sopenharmony_ciemit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
51262306a36Sopenharmony_ci		  enum shf_sc sc, u8 shift, bool zero)
51362306a36Sopenharmony_ci{
51462306a36Sopenharmony_ci	struct nfp_insn_re_regs reg;
51562306a36Sopenharmony_ci	int err;
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	/* Note: ld_field is special as it uses one of the src regs as dst */
51862306a36Sopenharmony_ci	err = swreg_to_restricted(dst, dst, src, &reg, true);
51962306a36Sopenharmony_ci	if (err) {
52062306a36Sopenharmony_ci		nfp_prog->error = err;
52162306a36Sopenharmony_ci		return;
52262306a36Sopenharmony_ci	}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
52562306a36Sopenharmony_ci			reg.i8, zero, reg.swap, reg.wr_both,
52662306a36Sopenharmony_ci			reg.dst_lmextn, reg.src_lmextn);
52762306a36Sopenharmony_ci}
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_cistatic void
53062306a36Sopenharmony_ciemit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
53162306a36Sopenharmony_ci	      enum shf_sc sc, u8 shift)
53262306a36Sopenharmony_ci{
53362306a36Sopenharmony_ci	emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
53462306a36Sopenharmony_ci}
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_cistatic void
53762306a36Sopenharmony_ci__emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
53862306a36Sopenharmony_ci	    bool dst_lmextn, bool src_lmextn)
53962306a36Sopenharmony_ci{
54062306a36Sopenharmony_ci	u64 insn;
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	insn = OP_LCSR_BASE |
54362306a36Sopenharmony_ci		FIELD_PREP(OP_LCSR_A_SRC, areg) |
54462306a36Sopenharmony_ci		FIELD_PREP(OP_LCSR_B_SRC, breg) |
54562306a36Sopenharmony_ci		FIELD_PREP(OP_LCSR_WRITE, wr) |
54662306a36Sopenharmony_ci		FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
54762306a36Sopenharmony_ci		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
54862306a36Sopenharmony_ci		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	nfp_prog_push(nfp_prog, insn);
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_cistatic void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
55462306a36Sopenharmony_ci{
55562306a36Sopenharmony_ci	struct nfp_insn_ur_regs reg;
55662306a36Sopenharmony_ci	int err;
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	/* This instruction takes immeds instead of reg_none() for the ignored
55962306a36Sopenharmony_ci	 * operand, but we can't encode 2 immeds in one instr with our normal
56062306a36Sopenharmony_ci	 * swreg infra so if param is an immed, we encode as reg_none() and
56162306a36Sopenharmony_ci	 * copy the immed to both operands.
56262306a36Sopenharmony_ci	 */
56362306a36Sopenharmony_ci	if (swreg_type(src) == NN_REG_IMM) {
56462306a36Sopenharmony_ci		err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
56562306a36Sopenharmony_ci		reg.breg = reg.areg;
56662306a36Sopenharmony_ci	} else {
56762306a36Sopenharmony_ci		err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
56862306a36Sopenharmony_ci	}
56962306a36Sopenharmony_ci	if (err) {
57062306a36Sopenharmony_ci		nfp_prog->error = err;
57162306a36Sopenharmony_ci		return;
57262306a36Sopenharmony_ci	}
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
57562306a36Sopenharmony_ci		    false, reg.src_lmextn);
57662306a36Sopenharmony_ci}
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci/* CSR value is read in following immed[gpr, 0] */
57962306a36Sopenharmony_cistatic void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
58062306a36Sopenharmony_ci{
58162306a36Sopenharmony_ci	__emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
58262306a36Sopenharmony_ci}
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_cistatic void emit_nop(struct nfp_prog *nfp_prog)
58562306a36Sopenharmony_ci{
58662306a36Sopenharmony_ci	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
58762306a36Sopenharmony_ci}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci/* --- Wrappers --- */
59062306a36Sopenharmony_cistatic bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
59162306a36Sopenharmony_ci{
59262306a36Sopenharmony_ci	if (!(imm & 0xffff0000)) {
59362306a36Sopenharmony_ci		*val = imm;
59462306a36Sopenharmony_ci		*shift = IMMED_SHIFT_0B;
59562306a36Sopenharmony_ci	} else if (!(imm & 0xff0000ff)) {
59662306a36Sopenharmony_ci		*val = imm >> 8;
59762306a36Sopenharmony_ci		*shift = IMMED_SHIFT_1B;
59862306a36Sopenharmony_ci	} else if (!(imm & 0x0000ffff)) {
59962306a36Sopenharmony_ci		*val = imm >> 16;
60062306a36Sopenharmony_ci		*shift = IMMED_SHIFT_2B;
60162306a36Sopenharmony_ci	} else {
60262306a36Sopenharmony_ci		return false;
60362306a36Sopenharmony_ci	}
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	return true;
60662306a36Sopenharmony_ci}
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_cistatic void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
60962306a36Sopenharmony_ci{
61062306a36Sopenharmony_ci	enum immed_shift shift;
61162306a36Sopenharmony_ci	u16 val;
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	if (pack_immed(imm, &val, &shift)) {
61462306a36Sopenharmony_ci		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
61562306a36Sopenharmony_ci	} else if (pack_immed(~imm, &val, &shift)) {
61662306a36Sopenharmony_ci		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
61762306a36Sopenharmony_ci	} else {
61862306a36Sopenharmony_ci		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
61962306a36Sopenharmony_ci			   false, IMMED_SHIFT_0B);
62062306a36Sopenharmony_ci		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
62162306a36Sopenharmony_ci			   false, IMMED_SHIFT_2B);
62262306a36Sopenharmony_ci	}
62362306a36Sopenharmony_ci}
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_cistatic void
62662306a36Sopenharmony_ciwrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst)
62762306a36Sopenharmony_ci{
62862306a36Sopenharmony_ci	if (meta->flags & FLAG_INSN_DO_ZEXT)
62962306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
63062306a36Sopenharmony_ci}
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_cistatic void
63362306a36Sopenharmony_ciwrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
63462306a36Sopenharmony_ci	       enum nfp_relo_type relo)
63562306a36Sopenharmony_ci{
63662306a36Sopenharmony_ci	if (imm > 0xffff) {
63762306a36Sopenharmony_ci		pr_err("relocation of a large immediate!\n");
63862306a36Sopenharmony_ci		nfp_prog->error = -EFAULT;
63962306a36Sopenharmony_ci		return;
64062306a36Sopenharmony_ci	}
64162306a36Sopenharmony_ci	emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_ci	nfp_prog->prog[nfp_prog->prog_len - 1] |=
64462306a36Sopenharmony_ci		FIELD_PREP(OP_RELO_TYPE, relo);
64562306a36Sopenharmony_ci}
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
64862306a36Sopenharmony_ci * If the @imm is small enough encode it directly in operand and return
64962306a36Sopenharmony_ci * otherwise load @imm to a spare register and return its encoding.
65062306a36Sopenharmony_ci */
65162306a36Sopenharmony_cistatic swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
65262306a36Sopenharmony_ci{
65362306a36Sopenharmony_ci	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
65462306a36Sopenharmony_ci		return reg_imm(imm);
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci	wrp_immed(nfp_prog, tmp_reg, imm);
65762306a36Sopenharmony_ci	return tmp_reg;
65862306a36Sopenharmony_ci}
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci/* re_load_imm_any() - encode immediate or use tmp register (restricted)
66162306a36Sopenharmony_ci * If the @imm is small enough encode it directly in operand and return
66262306a36Sopenharmony_ci * otherwise load @imm to a spare register and return its encoding.
66362306a36Sopenharmony_ci */
66462306a36Sopenharmony_cistatic swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
66762306a36Sopenharmony_ci		return reg_imm(imm);
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	wrp_immed(nfp_prog, tmp_reg, imm);
67062306a36Sopenharmony_ci	return tmp_reg;
67162306a36Sopenharmony_ci}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_cistatic void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
67462306a36Sopenharmony_ci{
67562306a36Sopenharmony_ci	while (count--)
67662306a36Sopenharmony_ci		emit_nop(nfp_prog);
67762306a36Sopenharmony_ci}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_cistatic void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
68062306a36Sopenharmony_ci{
68162306a36Sopenharmony_ci	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
68262306a36Sopenharmony_ci}
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_cistatic void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
68562306a36Sopenharmony_ci{
68662306a36Sopenharmony_ci	wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
68762306a36Sopenharmony_ci}
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci/* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
69062306a36Sopenharmony_ci * result to @dst from low end.
69162306a36Sopenharmony_ci */
69262306a36Sopenharmony_cistatic void
69362306a36Sopenharmony_ciwrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
69462306a36Sopenharmony_ci		u8 offset)
69562306a36Sopenharmony_ci{
69662306a36Sopenharmony_ci	enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
69762306a36Sopenharmony_ci	u8 mask = (1 << field_len) - 1;
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
70062306a36Sopenharmony_ci}
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
70362306a36Sopenharmony_ci * result to @dst from offset, there is no change on the other bits of @dst.
70462306a36Sopenharmony_ci */
70562306a36Sopenharmony_cistatic void
70662306a36Sopenharmony_ciwrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
70762306a36Sopenharmony_ci		   u8 field_len, u8 offset)
70862306a36Sopenharmony_ci{
70962306a36Sopenharmony_ci	enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
71062306a36Sopenharmony_ci	u8 mask = ((1 << field_len) - 1) << offset;
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci	emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
71362306a36Sopenharmony_ci}
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_cistatic void
71662306a36Sopenharmony_ciaddr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
71762306a36Sopenharmony_ci	      swreg *rega, swreg *regb)
71862306a36Sopenharmony_ci{
71962306a36Sopenharmony_ci	if (offset == reg_imm(0)) {
72062306a36Sopenharmony_ci		*rega = reg_a(src_gpr);
72162306a36Sopenharmony_ci		*regb = reg_b(src_gpr + 1);
72262306a36Sopenharmony_ci		return;
72362306a36Sopenharmony_ci	}
72462306a36Sopenharmony_ci
72562306a36Sopenharmony_ci	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
72662306a36Sopenharmony_ci	emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
72762306a36Sopenharmony_ci		 reg_imm(0));
72862306a36Sopenharmony_ci	*rega = imm_a(nfp_prog);
72962306a36Sopenharmony_ci	*regb = imm_b(nfp_prog);
73062306a36Sopenharmony_ci}
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci/* NFP has Command Push Pull bus which supports bluk memory operations. */
73362306a36Sopenharmony_cistatic int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
73462306a36Sopenharmony_ci{
73562306a36Sopenharmony_ci	bool descending_seq = meta->ldst_gather_len < 0;
73662306a36Sopenharmony_ci	s16 len = abs(meta->ldst_gather_len);
73762306a36Sopenharmony_ci	swreg src_base, off;
73862306a36Sopenharmony_ci	bool src_40bit_addr;
73962306a36Sopenharmony_ci	unsigned int i;
74062306a36Sopenharmony_ci	u8 xfer_num;
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci	off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
74362306a36Sopenharmony_ci	src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
74462306a36Sopenharmony_ci	src_base = reg_a(meta->insn.src_reg * 2);
74562306a36Sopenharmony_ci	xfer_num = round_up(len, 4) / 4;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	if (src_40bit_addr)
74862306a36Sopenharmony_ci		addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base,
74962306a36Sopenharmony_ci			      &off);
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	/* Setup PREV_ALU fields to override memory read length. */
75262306a36Sopenharmony_ci	if (len > 32)
75362306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_none(),
75462306a36Sopenharmony_ci			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ci	/* Memory read from source addr into transfer-in registers. */
75762306a36Sopenharmony_ci	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
75862306a36Sopenharmony_ci		     src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
75962306a36Sopenharmony_ci		     src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	/* Move from transfer-in to transfer-out. */
76262306a36Sopenharmony_ci	for (i = 0; i < xfer_num; i++)
76362306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci	off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	if (len <= 8) {
76862306a36Sopenharmony_ci		/* Use single direct_ref write8. */
76962306a36Sopenharmony_ci		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
77062306a36Sopenharmony_ci			 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
77162306a36Sopenharmony_ci			 CMD_CTX_SWAP);
77262306a36Sopenharmony_ci	} else if (len <= 32 && IS_ALIGNED(len, 4)) {
77362306a36Sopenharmony_ci		/* Use single direct_ref write32. */
77462306a36Sopenharmony_ci		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
77562306a36Sopenharmony_ci			 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
77662306a36Sopenharmony_ci			 CMD_CTX_SWAP);
77762306a36Sopenharmony_ci	} else if (len <= 32) {
77862306a36Sopenharmony_ci		/* Use single indirect_ref write8. */
77962306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_none(),
78062306a36Sopenharmony_ci			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
78162306a36Sopenharmony_ci		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
78262306a36Sopenharmony_ci			       reg_a(meta->paired_st->dst_reg * 2), off,
78362306a36Sopenharmony_ci			       len - 1, CMD_CTX_SWAP);
78462306a36Sopenharmony_ci	} else if (IS_ALIGNED(len, 4)) {
78562306a36Sopenharmony_ci		/* Use single indirect_ref write32. */
78662306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_none(),
78762306a36Sopenharmony_ci			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
78862306a36Sopenharmony_ci		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
78962306a36Sopenharmony_ci			       reg_a(meta->paired_st->dst_reg * 2), off,
79062306a36Sopenharmony_ci			       xfer_num - 1, CMD_CTX_SWAP);
79162306a36Sopenharmony_ci	} else if (len <= 40) {
79262306a36Sopenharmony_ci		/* Use one direct_ref write32 to write the first 32-bytes, then
79362306a36Sopenharmony_ci		 * another direct_ref write8 to write the remaining bytes.
79462306a36Sopenharmony_ci		 */
79562306a36Sopenharmony_ci		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
79662306a36Sopenharmony_ci			 reg_a(meta->paired_st->dst_reg * 2), off, 7,
79762306a36Sopenharmony_ci			 CMD_CTX_SWAP);
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci		off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
80062306a36Sopenharmony_ci				      imm_b(nfp_prog));
80162306a36Sopenharmony_ci		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
80262306a36Sopenharmony_ci			 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
80362306a36Sopenharmony_ci			 CMD_CTX_SWAP);
80462306a36Sopenharmony_ci	} else {
80562306a36Sopenharmony_ci		/* Use one indirect_ref write32 to write 4-bytes aligned length,
80662306a36Sopenharmony_ci		 * then another direct_ref write8 to write the remaining bytes.
80762306a36Sopenharmony_ci		 */
80862306a36Sopenharmony_ci		u8 new_off;
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_none(),
81162306a36Sopenharmony_ci			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
81262306a36Sopenharmony_ci		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
81362306a36Sopenharmony_ci			       reg_a(meta->paired_st->dst_reg * 2), off,
81462306a36Sopenharmony_ci			       xfer_num - 2, CMD_CTX_SWAP);
81562306a36Sopenharmony_ci		new_off = meta->paired_st->off + (xfer_num - 1) * 4;
81662306a36Sopenharmony_ci		off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
81762306a36Sopenharmony_ci		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
81862306a36Sopenharmony_ci			 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
81962306a36Sopenharmony_ci			 (len & 0x3) - 1, CMD_CTX_SWAP);
82062306a36Sopenharmony_ci	}
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	/* TODO: The following extra load is to make sure data flow be identical
82362306a36Sopenharmony_ci	 *  before and after we do memory copy optimization.
82462306a36Sopenharmony_ci	 *
82562306a36Sopenharmony_ci	 *  The load destination register is not guaranteed to be dead, so we
82662306a36Sopenharmony_ci	 *  need to make sure it is loaded with the value the same as before
82762306a36Sopenharmony_ci	 *  this transformation.
82862306a36Sopenharmony_ci	 *
82962306a36Sopenharmony_ci	 *  These extra loads could be removed once we have accurate register
83062306a36Sopenharmony_ci	 *  usage information.
83162306a36Sopenharmony_ci	 */
83262306a36Sopenharmony_ci	if (descending_seq)
83362306a36Sopenharmony_ci		xfer_num = 0;
83462306a36Sopenharmony_ci	else if (BPF_SIZE(meta->insn.code) != BPF_DW)
83562306a36Sopenharmony_ci		xfer_num = xfer_num - 1;
83662306a36Sopenharmony_ci	else
83762306a36Sopenharmony_ci		xfer_num = xfer_num - 2;
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci	switch (BPF_SIZE(meta->insn.code)) {
84062306a36Sopenharmony_ci	case BPF_B:
84162306a36Sopenharmony_ci		wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
84262306a36Sopenharmony_ci				reg_xfer(xfer_num), 1,
84362306a36Sopenharmony_ci				IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
84462306a36Sopenharmony_ci		break;
84562306a36Sopenharmony_ci	case BPF_H:
84662306a36Sopenharmony_ci		wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
84762306a36Sopenharmony_ci				reg_xfer(xfer_num), 2, (len & 3) ^ 2);
84862306a36Sopenharmony_ci		break;
84962306a36Sopenharmony_ci	case BPF_W:
85062306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
85162306a36Sopenharmony_ci			reg_xfer(0));
85262306a36Sopenharmony_ci		break;
85362306a36Sopenharmony_ci	case BPF_DW:
85462306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
85562306a36Sopenharmony_ci			reg_xfer(xfer_num));
85662306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
85762306a36Sopenharmony_ci			reg_xfer(xfer_num + 1));
85862306a36Sopenharmony_ci		break;
85962306a36Sopenharmony_ci	}
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_ci	if (BPF_SIZE(meta->insn.code) != BPF_DW)
86262306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci	return 0;
86562306a36Sopenharmony_ci}
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_cistatic int
86862306a36Sopenharmony_cidata_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset,
86962306a36Sopenharmony_ci	u8 dst_gpr, int size)
87062306a36Sopenharmony_ci{
87162306a36Sopenharmony_ci	unsigned int i;
87262306a36Sopenharmony_ci	u16 shift, sz;
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	/* We load the value from the address indicated in @offset and then
87562306a36Sopenharmony_ci	 * shift out the data we don't need.  Note: this is big endian!
87662306a36Sopenharmony_ci	 */
87762306a36Sopenharmony_ci	sz = max(size, 4);
87862306a36Sopenharmony_ci	shift = size < 4 ? 4 - size : 0;
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
88162306a36Sopenharmony_ci		 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	i = 0;
88462306a36Sopenharmony_ci	if (shift)
88562306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
88662306a36Sopenharmony_ci			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
88762306a36Sopenharmony_ci	else
88862306a36Sopenharmony_ci		for (; i * 4 < size; i++)
88962306a36Sopenharmony_ci			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci	if (i < 2)
89262306a36Sopenharmony_ci		wrp_zext(nfp_prog, meta, dst_gpr);
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	return 0;
89562306a36Sopenharmony_ci}
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_cistatic int
89862306a36Sopenharmony_cidata_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
89962306a36Sopenharmony_ci		   u8 dst_gpr, swreg lreg, swreg rreg, int size,
90062306a36Sopenharmony_ci		   enum cmd_mode mode)
90162306a36Sopenharmony_ci{
90262306a36Sopenharmony_ci	unsigned int i;
90362306a36Sopenharmony_ci	u8 mask, sz;
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_ci	/* We load the value from the address indicated in rreg + lreg and then
90662306a36Sopenharmony_ci	 * mask out the data we don't need.  Note: this is little endian!
90762306a36Sopenharmony_ci	 */
90862306a36Sopenharmony_ci	sz = max(size, 4);
90962306a36Sopenharmony_ci	mask = size < 4 ? GENMASK(size - 1, 0) : 0;
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
91262306a36Sopenharmony_ci		 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	i = 0;
91562306a36Sopenharmony_ci	if (mask)
91662306a36Sopenharmony_ci		emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
91762306a36Sopenharmony_ci				  reg_xfer(0), SHF_SC_NONE, 0, true);
91862306a36Sopenharmony_ci	else
91962306a36Sopenharmony_ci		for (; i * 4 < size; i++)
92062306a36Sopenharmony_ci			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci	if (i < 2)
92362306a36Sopenharmony_ci		wrp_zext(nfp_prog, meta, dst_gpr);
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	return 0;
92662306a36Sopenharmony_ci}
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_cistatic int
92962306a36Sopenharmony_cidata_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
93062306a36Sopenharmony_ci			  u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
93162306a36Sopenharmony_ci{
93262306a36Sopenharmony_ci	return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr),
93362306a36Sopenharmony_ci				  offset, size, CMD_MODE_32b);
93462306a36Sopenharmony_ci}
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_cistatic int
93762306a36Sopenharmony_cidata_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
93862306a36Sopenharmony_ci			  u8 src_gpr, swreg offset, u8 dst_gpr, u8 size)
93962306a36Sopenharmony_ci{
94062306a36Sopenharmony_ci	swreg rega, regb;
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci	addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci	return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb,
94562306a36Sopenharmony_ci				  size, CMD_MODE_40b_BA);
94662306a36Sopenharmony_ci}
94762306a36Sopenharmony_ci
94862306a36Sopenharmony_cistatic int
94962306a36Sopenharmony_ciconstruct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
95062306a36Sopenharmony_ci		      u16 offset, u16 src, u8 size)
95162306a36Sopenharmony_ci{
95262306a36Sopenharmony_ci	swreg tmp_reg;
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci	/* Calculate the true offset (src_reg + imm) */
95562306a36Sopenharmony_ci	tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
95662306a36Sopenharmony_ci	emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	/* Check packet length (size guaranteed to fit b/c it's u8) */
95962306a36Sopenharmony_ci	emit_alu(nfp_prog, imm_a(nfp_prog),
96062306a36Sopenharmony_ci		 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
96162306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(),
96262306a36Sopenharmony_ci		 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
96362306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	/* Load data */
96662306a36Sopenharmony_ci	return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size);
96762306a36Sopenharmony_ci}
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_cistatic int
97062306a36Sopenharmony_ciconstruct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
97162306a36Sopenharmony_ci		  u16 offset, u8 size)
97262306a36Sopenharmony_ci{
97362306a36Sopenharmony_ci	swreg tmp_reg;
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci	/* Check packet length */
97662306a36Sopenharmony_ci	tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
97762306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
97862306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	/* Load data */
98162306a36Sopenharmony_ci	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
98262306a36Sopenharmony_ci	return data_ld(nfp_prog, meta, tmp_reg, 0, size);
98362306a36Sopenharmony_ci}
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_cistatic int
98662306a36Sopenharmony_cidata_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
98762306a36Sopenharmony_ci		    u8 src_gpr, u8 size)
98862306a36Sopenharmony_ci{
98962306a36Sopenharmony_ci	unsigned int i;
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci	for (i = 0; i * 4 < size; i++)
99262306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
99562306a36Sopenharmony_ci		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci	return 0;
99862306a36Sopenharmony_ci}
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_cistatic int
100162306a36Sopenharmony_cidata_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
100262306a36Sopenharmony_ci		   u64 imm, u8 size)
100362306a36Sopenharmony_ci{
100462306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_xfer(0), imm);
100562306a36Sopenharmony_ci	if (size == 8)
100662306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ci	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
100962306a36Sopenharmony_ci		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	return 0;
101262306a36Sopenharmony_ci}
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_citypedef int
101562306a36Sopenharmony_ci(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
101662306a36Sopenharmony_ci	     unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
101762306a36Sopenharmony_ci	     bool needs_inc);
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_cistatic int
102062306a36Sopenharmony_ciwrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
102162306a36Sopenharmony_ci	      unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
102262306a36Sopenharmony_ci	      bool needs_inc)
102362306a36Sopenharmony_ci{
102462306a36Sopenharmony_ci	bool should_inc = needs_inc && new_gpr && !last;
102562306a36Sopenharmony_ci	u32 idx, src_byte;
102662306a36Sopenharmony_ci	enum shf_sc sc;
102762306a36Sopenharmony_ci	swreg reg;
102862306a36Sopenharmony_ci	int shf;
102962306a36Sopenharmony_ci	u8 mask;
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci	if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
103262306a36Sopenharmony_ci		return -EOPNOTSUPP;
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci	idx = off / 4;
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_ci	/* Move the entire word */
103762306a36Sopenharmony_ci	if (size == 4) {
103862306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_both(dst),
103962306a36Sopenharmony_ci			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
104062306a36Sopenharmony_ci		return 0;
104162306a36Sopenharmony_ci	}
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
104462306a36Sopenharmony_ci		return -EOPNOTSUPP;
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci	src_byte = off % 4;
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci	mask = (1 << size) - 1;
104962306a36Sopenharmony_ci	mask <<= dst_byte;
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	if (WARN_ON_ONCE(mask > 0xf))
105262306a36Sopenharmony_ci		return -EOPNOTSUPP;
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci	shf = abs(src_byte - dst_byte) * 8;
105562306a36Sopenharmony_ci	if (src_byte == dst_byte) {
105662306a36Sopenharmony_ci		sc = SHF_SC_NONE;
105762306a36Sopenharmony_ci	} else if (src_byte < dst_byte) {
105862306a36Sopenharmony_ci		shf = 32 - shf;
105962306a36Sopenharmony_ci		sc = SHF_SC_L_SHF;
106062306a36Sopenharmony_ci	} else {
106162306a36Sopenharmony_ci		sc = SHF_SC_R_SHF;
106262306a36Sopenharmony_ci	}
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	/* ld_field can address fewer indexes, if offset too large do RMW.
106562306a36Sopenharmony_ci	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
106662306a36Sopenharmony_ci	 */
106762306a36Sopenharmony_ci	if (idx <= RE_REG_LM_IDX_MAX) {
106862306a36Sopenharmony_ci		reg = reg_lm(lm3 ? 3 : 0, idx);
106962306a36Sopenharmony_ci	} else {
107062306a36Sopenharmony_ci		reg = imm_a(nfp_prog);
107162306a36Sopenharmony_ci		/* If it's not the first part of the load and we start a new GPR
107262306a36Sopenharmony_ci		 * that means we are loading a second part of the LMEM word into
107362306a36Sopenharmony_ci		 * a new GPR.  IOW we've already looked that LMEM word and
107462306a36Sopenharmony_ci		 * therefore it has been loaded into imm_a().
107562306a36Sopenharmony_ci		 */
107662306a36Sopenharmony_ci		if (first || !new_gpr)
107762306a36Sopenharmony_ci			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
107862306a36Sopenharmony_ci	}
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci	emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci	if (should_inc)
108362306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
108462306a36Sopenharmony_ci
108562306a36Sopenharmony_ci	return 0;
108662306a36Sopenharmony_ci}
108762306a36Sopenharmony_ci
108862306a36Sopenharmony_cistatic int
108962306a36Sopenharmony_ciwrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
109062306a36Sopenharmony_ci	       unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
109162306a36Sopenharmony_ci	       bool needs_inc)
109262306a36Sopenharmony_ci{
109362306a36Sopenharmony_ci	bool should_inc = needs_inc && new_gpr && !last;
109462306a36Sopenharmony_ci	u32 idx, dst_byte;
109562306a36Sopenharmony_ci	enum shf_sc sc;
109662306a36Sopenharmony_ci	swreg reg;
109762306a36Sopenharmony_ci	int shf;
109862306a36Sopenharmony_ci	u8 mask;
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci	if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
110162306a36Sopenharmony_ci		return -EOPNOTSUPP;
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	idx = off / 4;
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	/* Move the entire word */
110662306a36Sopenharmony_ci	if (size == 4) {
110762306a36Sopenharmony_ci		wrp_mov(nfp_prog,
110862306a36Sopenharmony_ci			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
110962306a36Sopenharmony_ci			reg_b(src));
111062306a36Sopenharmony_ci		return 0;
111162306a36Sopenharmony_ci	}
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
111462306a36Sopenharmony_ci		return -EOPNOTSUPP;
111562306a36Sopenharmony_ci
111662306a36Sopenharmony_ci	dst_byte = off % 4;
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci	mask = (1 << size) - 1;
111962306a36Sopenharmony_ci	mask <<= dst_byte;
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_ci	if (WARN_ON_ONCE(mask > 0xf))
112262306a36Sopenharmony_ci		return -EOPNOTSUPP;
112362306a36Sopenharmony_ci
112462306a36Sopenharmony_ci	shf = abs(src_byte - dst_byte) * 8;
112562306a36Sopenharmony_ci	if (src_byte == dst_byte) {
112662306a36Sopenharmony_ci		sc = SHF_SC_NONE;
112762306a36Sopenharmony_ci	} else if (src_byte < dst_byte) {
112862306a36Sopenharmony_ci		shf = 32 - shf;
112962306a36Sopenharmony_ci		sc = SHF_SC_L_SHF;
113062306a36Sopenharmony_ci	} else {
113162306a36Sopenharmony_ci		sc = SHF_SC_R_SHF;
113262306a36Sopenharmony_ci	}
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci	/* ld_field can address fewer indexes, if offset too large do RMW.
113562306a36Sopenharmony_ci	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
113662306a36Sopenharmony_ci	 */
113762306a36Sopenharmony_ci	if (idx <= RE_REG_LM_IDX_MAX) {
113862306a36Sopenharmony_ci		reg = reg_lm(lm3 ? 3 : 0, idx);
113962306a36Sopenharmony_ci	} else {
114062306a36Sopenharmony_ci		reg = imm_a(nfp_prog);
114162306a36Sopenharmony_ci		/* Only first and last LMEM locations are going to need RMW,
114262306a36Sopenharmony_ci		 * the middle location will be overwritten fully.
114362306a36Sopenharmony_ci		 */
114462306a36Sopenharmony_ci		if (first || last)
114562306a36Sopenharmony_ci			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
114662306a36Sopenharmony_ci	}
114762306a36Sopenharmony_ci
114862306a36Sopenharmony_ci	emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_ci	if (new_gpr || last) {
115162306a36Sopenharmony_ci		if (idx > RE_REG_LM_IDX_MAX)
115262306a36Sopenharmony_ci			wrp_mov(nfp_prog, reg_lm(0, idx), reg);
115362306a36Sopenharmony_ci		if (should_inc)
115462306a36Sopenharmony_ci			wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
115562306a36Sopenharmony_ci	}
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_ci	return 0;
115862306a36Sopenharmony_ci}
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_cistatic int
116162306a36Sopenharmony_cimem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
116262306a36Sopenharmony_ci	     unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
116362306a36Sopenharmony_ci	     bool clr_gpr, lmem_step step)
116462306a36Sopenharmony_ci{
116562306a36Sopenharmony_ci	s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off;
116662306a36Sopenharmony_ci	bool first = true, narrow_ld, last;
116762306a36Sopenharmony_ci	bool needs_inc = false;
116862306a36Sopenharmony_ci	swreg stack_off_reg;
116962306a36Sopenharmony_ci	u8 prev_gpr = 255;
117062306a36Sopenharmony_ci	u32 gpr_byte = 0;
117162306a36Sopenharmony_ci	bool lm3 = true;
117262306a36Sopenharmony_ci	int ret;
117362306a36Sopenharmony_ci
117462306a36Sopenharmony_ci	if (meta->ptr_not_const ||
117562306a36Sopenharmony_ci	    meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) {
117662306a36Sopenharmony_ci		/* Use of the last encountered ptr_off is OK, they all have
117762306a36Sopenharmony_ci		 * the same alignment.  Depend on low bits of value being
117862306a36Sopenharmony_ci		 * discarded when written to LMaddr register.
117962306a36Sopenharmony_ci		 */
118062306a36Sopenharmony_ci		stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
118162306a36Sopenharmony_ci						stack_imm(nfp_prog));
118262306a36Sopenharmony_ci
118362306a36Sopenharmony_ci		emit_alu(nfp_prog, imm_b(nfp_prog),
118462306a36Sopenharmony_ci			 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ci		needs_inc = true;
118762306a36Sopenharmony_ci	} else if (off + size <= 64) {
118862306a36Sopenharmony_ci		/* We can reach bottom 64B with LMaddr0 */
118962306a36Sopenharmony_ci		lm3 = false;
119062306a36Sopenharmony_ci	} else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
119162306a36Sopenharmony_ci		/* We have to set up a new pointer.  If we know the offset
119262306a36Sopenharmony_ci		 * and the entire access falls into a single 32 byte aligned
119362306a36Sopenharmony_ci		 * window we won't have to increment the LM pointer.
119462306a36Sopenharmony_ci		 * The 32 byte alignment is imporant because offset is ORed in
119562306a36Sopenharmony_ci		 * not added when doing *l$indexN[off].
119662306a36Sopenharmony_ci		 */
119762306a36Sopenharmony_ci		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
119862306a36Sopenharmony_ci						stack_imm(nfp_prog));
119962306a36Sopenharmony_ci		emit_alu(nfp_prog, imm_b(nfp_prog),
120062306a36Sopenharmony_ci			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
120162306a36Sopenharmony_ci
120262306a36Sopenharmony_ci		off %= 32;
120362306a36Sopenharmony_ci	} else {
120462306a36Sopenharmony_ci		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
120562306a36Sopenharmony_ci						stack_imm(nfp_prog));
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_ci		emit_alu(nfp_prog, imm_b(nfp_prog),
120862306a36Sopenharmony_ci			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
120962306a36Sopenharmony_ci
121062306a36Sopenharmony_ci		needs_inc = true;
121162306a36Sopenharmony_ci	}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_ci	narrow_ld = clr_gpr && size < 8;
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	if (lm3) {
121662306a36Sopenharmony_ci		unsigned int nop_cnt;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci		emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
121962306a36Sopenharmony_ci		/* For size < 4 one slot will be filled by zeroing of upper,
122062306a36Sopenharmony_ci		 * but be careful, that zeroing could be eliminated by zext
122162306a36Sopenharmony_ci		 * optimization.
122262306a36Sopenharmony_ci		 */
122362306a36Sopenharmony_ci		nop_cnt = narrow_ld && meta->flags & FLAG_INSN_DO_ZEXT ? 2 : 3;
122462306a36Sopenharmony_ci		wrp_nops(nfp_prog, nop_cnt);
122562306a36Sopenharmony_ci	}
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci	if (narrow_ld)
122862306a36Sopenharmony_ci		wrp_zext(nfp_prog, meta, gpr);
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci	while (size) {
123162306a36Sopenharmony_ci		u32 slice_end;
123262306a36Sopenharmony_ci		u8 slice_size;
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_ci		slice_size = min(size, 4 - gpr_byte);
123562306a36Sopenharmony_ci		slice_end = min(off + slice_size, round_up(off + 1, 4));
123662306a36Sopenharmony_ci		slice_size = slice_end - off;
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci		last = slice_size == size;
123962306a36Sopenharmony_ci
124062306a36Sopenharmony_ci		if (needs_inc)
124162306a36Sopenharmony_ci			off %= 4;
124262306a36Sopenharmony_ci
124362306a36Sopenharmony_ci		ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
124462306a36Sopenharmony_ci			   first, gpr != prev_gpr, last, lm3, needs_inc);
124562306a36Sopenharmony_ci		if (ret)
124662306a36Sopenharmony_ci			return ret;
124762306a36Sopenharmony_ci
124862306a36Sopenharmony_ci		prev_gpr = gpr;
124962306a36Sopenharmony_ci		first = false;
125062306a36Sopenharmony_ci
125162306a36Sopenharmony_ci		gpr_byte += slice_size;
125262306a36Sopenharmony_ci		if (gpr_byte >= 4) {
125362306a36Sopenharmony_ci			gpr_byte -= 4;
125462306a36Sopenharmony_ci			gpr++;
125562306a36Sopenharmony_ci		}
125662306a36Sopenharmony_ci
125762306a36Sopenharmony_ci		size -= slice_size;
125862306a36Sopenharmony_ci		off += slice_size;
125962306a36Sopenharmony_ci	}
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_ci	return 0;
126262306a36Sopenharmony_ci}
126362306a36Sopenharmony_ci
126462306a36Sopenharmony_cistatic void
126562306a36Sopenharmony_ciwrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
126662306a36Sopenharmony_ci{
126762306a36Sopenharmony_ci	swreg tmp_reg;
126862306a36Sopenharmony_ci
126962306a36Sopenharmony_ci	if (alu_op == ALU_OP_AND) {
127062306a36Sopenharmony_ci		if (!imm)
127162306a36Sopenharmony_ci			wrp_immed(nfp_prog, reg_both(dst), 0);
127262306a36Sopenharmony_ci		if (!imm || !~imm)
127362306a36Sopenharmony_ci			return;
127462306a36Sopenharmony_ci	}
127562306a36Sopenharmony_ci	if (alu_op == ALU_OP_OR) {
127662306a36Sopenharmony_ci		if (!~imm)
127762306a36Sopenharmony_ci			wrp_immed(nfp_prog, reg_both(dst), ~0U);
127862306a36Sopenharmony_ci		if (!imm || !~imm)
127962306a36Sopenharmony_ci			return;
128062306a36Sopenharmony_ci	}
128162306a36Sopenharmony_ci	if (alu_op == ALU_OP_XOR) {
128262306a36Sopenharmony_ci		if (!~imm)
128362306a36Sopenharmony_ci			emit_alu(nfp_prog, reg_both(dst), reg_none(),
128462306a36Sopenharmony_ci				 ALU_OP_NOT, reg_b(dst));
128562306a36Sopenharmony_ci		if (!imm || !~imm)
128662306a36Sopenharmony_ci			return;
128762306a36Sopenharmony_ci	}
128862306a36Sopenharmony_ci
128962306a36Sopenharmony_ci	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
129062306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
129162306a36Sopenharmony_ci}
129262306a36Sopenharmony_ci
129362306a36Sopenharmony_cistatic int
129462306a36Sopenharmony_ciwrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
129562306a36Sopenharmony_ci	      enum alu_op alu_op, bool skip)
129662306a36Sopenharmony_ci{
129762306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
129862306a36Sopenharmony_ci	u64 imm = insn->imm; /* sign extend */
129962306a36Sopenharmony_ci
130062306a36Sopenharmony_ci	if (skip) {
130162306a36Sopenharmony_ci		meta->flags |= FLAG_INSN_SKIP_NOOP;
130262306a36Sopenharmony_ci		return 0;
130362306a36Sopenharmony_ci	}
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_ci	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
130662306a36Sopenharmony_ci	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_ci	return 0;
130962306a36Sopenharmony_ci}
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_cistatic int
131262306a36Sopenharmony_ciwrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
131362306a36Sopenharmony_ci	      enum alu_op alu_op)
131462306a36Sopenharmony_ci{
131562306a36Sopenharmony_ci	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
131862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(dst + 1),
131962306a36Sopenharmony_ci		 reg_a(dst + 1), alu_op, reg_b(src + 1));
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ci	return 0;
132262306a36Sopenharmony_ci}
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_cistatic int
132562306a36Sopenharmony_ciwrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
132662306a36Sopenharmony_ci	      enum alu_op alu_op)
132762306a36Sopenharmony_ci{
132862306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
132962306a36Sopenharmony_ci	u8 dst = insn->dst_reg * 2;
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm);
133262306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci	return 0;
133562306a36Sopenharmony_ci}
133662306a36Sopenharmony_ci
133762306a36Sopenharmony_cistatic int
133862306a36Sopenharmony_ciwrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
133962306a36Sopenharmony_ci	      enum alu_op alu_op)
134062306a36Sopenharmony_ci{
134162306a36Sopenharmony_ci	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
134262306a36Sopenharmony_ci
134362306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
134462306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
134562306a36Sopenharmony_ci
134662306a36Sopenharmony_ci	return 0;
134762306a36Sopenharmony_ci}
134862306a36Sopenharmony_ci
134962306a36Sopenharmony_cistatic void
135062306a36Sopenharmony_ciwrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
135162306a36Sopenharmony_ci		 enum br_mask br_mask, u16 off)
135262306a36Sopenharmony_ci{
135362306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
135462306a36Sopenharmony_ci	emit_br(nfp_prog, br_mask, off, 0);
135562306a36Sopenharmony_ci}
135662306a36Sopenharmony_ci
135762306a36Sopenharmony_cistatic int
135862306a36Sopenharmony_ciwrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
135962306a36Sopenharmony_ci	     enum alu_op alu_op, enum br_mask br_mask)
136062306a36Sopenharmony_ci{
136162306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
136262306a36Sopenharmony_ci
136362306a36Sopenharmony_ci	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
136462306a36Sopenharmony_ci			 insn->src_reg * 2, br_mask, insn->off);
136562306a36Sopenharmony_ci	if (is_mbpf_jmp64(meta))
136662306a36Sopenharmony_ci		wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
136762306a36Sopenharmony_ci				 insn->src_reg * 2 + 1, br_mask, insn->off);
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci	return 0;
137062306a36Sopenharmony_ci}
137162306a36Sopenharmony_ci
137262306a36Sopenharmony_cistatic const struct jmp_code_map {
137362306a36Sopenharmony_ci	enum br_mask br_mask;
137462306a36Sopenharmony_ci	bool swap;
137562306a36Sopenharmony_ci} jmp_code_map[] = {
137662306a36Sopenharmony_ci	[BPF_JGT >> 4]	= { BR_BLO, true },
137762306a36Sopenharmony_ci	[BPF_JGE >> 4]	= { BR_BHS, false },
137862306a36Sopenharmony_ci	[BPF_JLT >> 4]	= { BR_BLO, false },
137962306a36Sopenharmony_ci	[BPF_JLE >> 4]	= { BR_BHS, true },
138062306a36Sopenharmony_ci	[BPF_JSGT >> 4]	= { BR_BLT, true },
138162306a36Sopenharmony_ci	[BPF_JSGE >> 4]	= { BR_BGE, false },
138262306a36Sopenharmony_ci	[BPF_JSLT >> 4]	= { BR_BLT, false },
138362306a36Sopenharmony_ci	[BPF_JSLE >> 4]	= { BR_BGE, true },
138462306a36Sopenharmony_ci};
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_cistatic const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
138762306a36Sopenharmony_ci{
138862306a36Sopenharmony_ci	unsigned int op;
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci	op = BPF_OP(meta->insn.code) >> 4;
139162306a36Sopenharmony_ci	/* br_mask of 0 is BR_BEQ which we don't use in jump code table */
139262306a36Sopenharmony_ci	if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
139362306a36Sopenharmony_ci		      !jmp_code_map[op].br_mask,
139462306a36Sopenharmony_ci		      "no code found for jump instruction"))
139562306a36Sopenharmony_ci		return NULL;
139662306a36Sopenharmony_ci
139762306a36Sopenharmony_ci	return &jmp_code_map[op];
139862306a36Sopenharmony_ci}
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_cistatic int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
140162306a36Sopenharmony_ci{
140262306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
140362306a36Sopenharmony_ci	u64 imm = insn->imm; /* sign extend */
140462306a36Sopenharmony_ci	const struct jmp_code_map *code;
140562306a36Sopenharmony_ci	enum alu_op alu_op, carry_op;
140662306a36Sopenharmony_ci	u8 reg = insn->dst_reg * 2;
140762306a36Sopenharmony_ci	swreg tmp_reg;
140862306a36Sopenharmony_ci
140962306a36Sopenharmony_ci	code = nfp_jmp_code_get(meta);
141062306a36Sopenharmony_ci	if (!code)
141162306a36Sopenharmony_ci		return -EINVAL;
141262306a36Sopenharmony_ci
141362306a36Sopenharmony_ci	alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
141462306a36Sopenharmony_ci	carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
141562306a36Sopenharmony_ci
141662306a36Sopenharmony_ci	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
141762306a36Sopenharmony_ci	if (!code->swap)
141862306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
141962306a36Sopenharmony_ci	else
142062306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
142162306a36Sopenharmony_ci
142262306a36Sopenharmony_ci	if (is_mbpf_jmp64(meta)) {
142362306a36Sopenharmony_ci		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
142462306a36Sopenharmony_ci		if (!code->swap)
142562306a36Sopenharmony_ci			emit_alu(nfp_prog, reg_none(),
142662306a36Sopenharmony_ci				 reg_a(reg + 1), carry_op, tmp_reg);
142762306a36Sopenharmony_ci		else
142862306a36Sopenharmony_ci			emit_alu(nfp_prog, reg_none(),
142962306a36Sopenharmony_ci				 tmp_reg, carry_op, reg_a(reg + 1));
143062306a36Sopenharmony_ci	}
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_ci	emit_br(nfp_prog, code->br_mask, insn->off, 0);
143362306a36Sopenharmony_ci
143462306a36Sopenharmony_ci	return 0;
143562306a36Sopenharmony_ci}
143662306a36Sopenharmony_ci
143762306a36Sopenharmony_cistatic int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
143862306a36Sopenharmony_ci{
143962306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
144062306a36Sopenharmony_ci	const struct jmp_code_map *code;
144162306a36Sopenharmony_ci	u8 areg, breg;
144262306a36Sopenharmony_ci
144362306a36Sopenharmony_ci	code = nfp_jmp_code_get(meta);
144462306a36Sopenharmony_ci	if (!code)
144562306a36Sopenharmony_ci		return -EINVAL;
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci	areg = insn->dst_reg * 2;
144862306a36Sopenharmony_ci	breg = insn->src_reg * 2;
144962306a36Sopenharmony_ci
145062306a36Sopenharmony_ci	if (code->swap) {
145162306a36Sopenharmony_ci		areg ^= breg;
145262306a36Sopenharmony_ci		breg ^= areg;
145362306a36Sopenharmony_ci		areg ^= breg;
145462306a36Sopenharmony_ci	}
145562306a36Sopenharmony_ci
145662306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
145762306a36Sopenharmony_ci	if (is_mbpf_jmp64(meta))
145862306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(),
145962306a36Sopenharmony_ci			 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
146062306a36Sopenharmony_ci	emit_br(nfp_prog, code->br_mask, insn->off, 0);
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_ci	return 0;
146362306a36Sopenharmony_ci}
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_cistatic void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
146662306a36Sopenharmony_ci{
146762306a36Sopenharmony_ci	emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
146862306a36Sopenharmony_ci		      SHF_SC_R_ROT, 8);
146962306a36Sopenharmony_ci	emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
147062306a36Sopenharmony_ci		      SHF_SC_R_ROT, 16);
147162306a36Sopenharmony_ci}
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_cistatic void
147462306a36Sopenharmony_ciwrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
147562306a36Sopenharmony_ci	    swreg rreg, bool gen_high_half)
147662306a36Sopenharmony_ci{
147762306a36Sopenharmony_ci	emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
147862306a36Sopenharmony_ci	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg);
147962306a36Sopenharmony_ci	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg);
148062306a36Sopenharmony_ci	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg);
148162306a36Sopenharmony_ci	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg);
148262306a36Sopenharmony_ci	emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none());
148362306a36Sopenharmony_ci	if (gen_high_half)
148462306a36Sopenharmony_ci		emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2,
148562306a36Sopenharmony_ci			 reg_none());
148662306a36Sopenharmony_ci	else
148762306a36Sopenharmony_ci		wrp_immed(nfp_prog, dst_hi, 0);
148862306a36Sopenharmony_ci}
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_cistatic void
149162306a36Sopenharmony_ciwrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
149262306a36Sopenharmony_ci	    swreg rreg)
149362306a36Sopenharmony_ci{
149462306a36Sopenharmony_ci	emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
149562306a36Sopenharmony_ci	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg);
149662306a36Sopenharmony_ci	emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg);
149762306a36Sopenharmony_ci	emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none());
149862306a36Sopenharmony_ci}
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_cistatic int
150162306a36Sopenharmony_ciwrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
150262306a36Sopenharmony_ci	bool gen_high_half, bool ropnd_from_reg)
150362306a36Sopenharmony_ci{
150462306a36Sopenharmony_ci	swreg multiplier, multiplicand, dst_hi, dst_lo;
150562306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
150662306a36Sopenharmony_ci	u32 lopnd_max, ropnd_max;
150762306a36Sopenharmony_ci	u8 dst_reg;
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci	dst_reg = insn->dst_reg;
151062306a36Sopenharmony_ci	multiplicand = reg_a(dst_reg * 2);
151162306a36Sopenharmony_ci	dst_hi = reg_both(dst_reg * 2 + 1);
151262306a36Sopenharmony_ci	dst_lo = reg_both(dst_reg * 2);
151362306a36Sopenharmony_ci	lopnd_max = meta->umax_dst;
151462306a36Sopenharmony_ci	if (ropnd_from_reg) {
151562306a36Sopenharmony_ci		multiplier = reg_b(insn->src_reg * 2);
151662306a36Sopenharmony_ci		ropnd_max = meta->umax_src;
151762306a36Sopenharmony_ci	} else {
151862306a36Sopenharmony_ci		u32 imm = insn->imm;
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_ci		multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
152162306a36Sopenharmony_ci		ropnd_max = imm;
152262306a36Sopenharmony_ci	}
152362306a36Sopenharmony_ci	if (lopnd_max > U16_MAX || ropnd_max > U16_MAX)
152462306a36Sopenharmony_ci		wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier,
152562306a36Sopenharmony_ci			    gen_high_half);
152662306a36Sopenharmony_ci	else
152762306a36Sopenharmony_ci		wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier);
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci	return 0;
153062306a36Sopenharmony_ci}
153162306a36Sopenharmony_ci
153262306a36Sopenharmony_cistatic int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
153362306a36Sopenharmony_ci{
153462306a36Sopenharmony_ci	swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
153562306a36Sopenharmony_ci	struct reciprocal_value_adv rvalue;
153662306a36Sopenharmony_ci	u8 pre_shift, exp;
153762306a36Sopenharmony_ci	swreg magic;
153862306a36Sopenharmony_ci
153962306a36Sopenharmony_ci	if (imm > U32_MAX) {
154062306a36Sopenharmony_ci		wrp_immed(nfp_prog, dst_both, 0);
154162306a36Sopenharmony_ci		return 0;
154262306a36Sopenharmony_ci	}
154362306a36Sopenharmony_ci
154462306a36Sopenharmony_ci	/* NOTE: because we are using "reciprocal_value_adv" which doesn't
154562306a36Sopenharmony_ci	 * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
154662306a36Sopenharmony_ci	 * to handle such case which actually equals to the result of unsigned
154762306a36Sopenharmony_ci	 * comparison "dst >= imm" which could be calculated using the following
154862306a36Sopenharmony_ci	 * NFP sequence:
154962306a36Sopenharmony_ci	 *
155062306a36Sopenharmony_ci	 *  alu[--, dst, -, imm]
155162306a36Sopenharmony_ci	 *  immed[imm, 0]
155262306a36Sopenharmony_ci	 *  alu[dst, imm, +carry, 0]
155362306a36Sopenharmony_ci	 *
155462306a36Sopenharmony_ci	 */
155562306a36Sopenharmony_ci	if (imm > 1U << 31) {
155662306a36Sopenharmony_ci		swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
155962306a36Sopenharmony_ci		wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
156062306a36Sopenharmony_ci		emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
156162306a36Sopenharmony_ci			 reg_imm(0));
156262306a36Sopenharmony_ci		return 0;
156362306a36Sopenharmony_ci	}
156462306a36Sopenharmony_ci
156562306a36Sopenharmony_ci	rvalue = reciprocal_value_adv(imm, 32);
156662306a36Sopenharmony_ci	exp = rvalue.exp;
156762306a36Sopenharmony_ci	if (rvalue.is_wide_m && !(imm & 1)) {
156862306a36Sopenharmony_ci		pre_shift = fls(imm & -imm) - 1;
156962306a36Sopenharmony_ci		rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
157062306a36Sopenharmony_ci	} else {
157162306a36Sopenharmony_ci		pre_shift = 0;
157262306a36Sopenharmony_ci	}
157362306a36Sopenharmony_ci	magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
157462306a36Sopenharmony_ci	if (imm == 1U << exp) {
157562306a36Sopenharmony_ci		emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
157662306a36Sopenharmony_ci			 SHF_SC_R_SHF, exp);
157762306a36Sopenharmony_ci	} else if (rvalue.is_wide_m) {
157862306a36Sopenharmony_ci		wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
157962306a36Sopenharmony_ci			    magic, true);
158062306a36Sopenharmony_ci		emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
158162306a36Sopenharmony_ci			 imm_b(nfp_prog));
158262306a36Sopenharmony_ci		emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
158362306a36Sopenharmony_ci			 SHF_SC_R_SHF, 1);
158462306a36Sopenharmony_ci		emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
158562306a36Sopenharmony_ci			 imm_b(nfp_prog));
158662306a36Sopenharmony_ci		emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
158762306a36Sopenharmony_ci			 SHF_SC_R_SHF, rvalue.sh - 1);
158862306a36Sopenharmony_ci	} else {
158962306a36Sopenharmony_ci		if (pre_shift)
159062306a36Sopenharmony_ci			emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
159162306a36Sopenharmony_ci				 dst_b, SHF_SC_R_SHF, pre_shift);
159262306a36Sopenharmony_ci		wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
159362306a36Sopenharmony_ci		emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
159462306a36Sopenharmony_ci			 dst_b, SHF_SC_R_SHF, rvalue.sh);
159562306a36Sopenharmony_ci	}
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci	return 0;
159862306a36Sopenharmony_ci}
159962306a36Sopenharmony_ci
160062306a36Sopenharmony_cistatic int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
160162306a36Sopenharmony_ci{
160262306a36Sopenharmony_ci	swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
160362306a36Sopenharmony_ci	struct nfp_bpf_cap_adjust_head *adjust_head;
160462306a36Sopenharmony_ci	u32 ret_einval, end;
160562306a36Sopenharmony_ci
160662306a36Sopenharmony_ci	adjust_head = &nfp_prog->bpf->adjust_head;
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	/* Optimized version - 5 vs 14 cycles */
160962306a36Sopenharmony_ci	if (nfp_prog->adjust_head_location != UINT_MAX) {
161062306a36Sopenharmony_ci		if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
161162306a36Sopenharmony_ci			return -EINVAL;
161262306a36Sopenharmony_ci
161362306a36Sopenharmony_ci		emit_alu(nfp_prog, pptr_reg(nfp_prog),
161462306a36Sopenharmony_ci			 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
161562306a36Sopenharmony_ci		emit_alu(nfp_prog, plen_reg(nfp_prog),
161662306a36Sopenharmony_ci			 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
161762306a36Sopenharmony_ci		emit_alu(nfp_prog, pv_len(nfp_prog),
161862306a36Sopenharmony_ci			 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
161962306a36Sopenharmony_ci
162062306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(0), 0);
162162306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(1), 0);
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci		/* TODO: when adjust head is guaranteed to succeed we can
162462306a36Sopenharmony_ci		 * also eliminate the following if (r0 == 0) branch.
162562306a36Sopenharmony_ci		 */
162662306a36Sopenharmony_ci
162762306a36Sopenharmony_ci		return 0;
162862306a36Sopenharmony_ci	}
162962306a36Sopenharmony_ci
163062306a36Sopenharmony_ci	ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
163162306a36Sopenharmony_ci	end = ret_einval + 2;
163262306a36Sopenharmony_ci
163362306a36Sopenharmony_ci	/* We need to use a temp because offset is just a part of the pkt ptr */
163462306a36Sopenharmony_ci	emit_alu(nfp_prog, tmp,
163562306a36Sopenharmony_ci		 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
163662306a36Sopenharmony_ci
163762306a36Sopenharmony_ci	/* Validate result will fit within FW datapath constraints */
163862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(),
163962306a36Sopenharmony_ci		 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
164062306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BLO, ret_einval, 0);
164162306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(),
164262306a36Sopenharmony_ci		 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
164362306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BLO, ret_einval, 0);
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_ci	/* Validate the length is at least ETH_HLEN */
164662306a36Sopenharmony_ci	emit_alu(nfp_prog, tmp_len,
164762306a36Sopenharmony_ci		 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
164862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(),
164962306a36Sopenharmony_ci		 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
165062306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BMI, ret_einval, 0);
165162306a36Sopenharmony_ci
165262306a36Sopenharmony_ci	/* Load the ret code */
165362306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(0), 0);
165462306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(1), 0);
165562306a36Sopenharmony_ci
165662306a36Sopenharmony_ci	/* Modify the packet metadata */
165762306a36Sopenharmony_ci	emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ci	/* Skip over the -EINVAL ret code (defer 2) */
166062306a36Sopenharmony_ci	emit_br(nfp_prog, BR_UNC, end, 2);
166162306a36Sopenharmony_ci
166262306a36Sopenharmony_ci	emit_alu(nfp_prog, plen_reg(nfp_prog),
166362306a36Sopenharmony_ci		 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
166462306a36Sopenharmony_ci	emit_alu(nfp_prog, pv_len(nfp_prog),
166562306a36Sopenharmony_ci		 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
166662306a36Sopenharmony_ci
166762306a36Sopenharmony_ci	/* return -EINVAL target */
166862306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
166962306a36Sopenharmony_ci		return -EINVAL;
167062306a36Sopenharmony_ci
167162306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(0), -22);
167262306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(1), ~0);
167362306a36Sopenharmony_ci
167462306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, end))
167562306a36Sopenharmony_ci		return -EINVAL;
167662306a36Sopenharmony_ci
167762306a36Sopenharmony_ci	return 0;
167862306a36Sopenharmony_ci}
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_cistatic int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
168162306a36Sopenharmony_ci{
168262306a36Sopenharmony_ci	u32 ret_einval, end;
168362306a36Sopenharmony_ci	swreg plen, delta;
168462306a36Sopenharmony_ci
168562306a36Sopenharmony_ci	BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN));
168662306a36Sopenharmony_ci
168762306a36Sopenharmony_ci	plen = imm_a(nfp_prog);
168862306a36Sopenharmony_ci	delta = reg_a(2 * 2);
168962306a36Sopenharmony_ci
169062306a36Sopenharmony_ci	ret_einval = nfp_prog_current_offset(nfp_prog) + 9;
169162306a36Sopenharmony_ci	end = nfp_prog_current_offset(nfp_prog) + 11;
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_ci	/* Calculate resulting length */
169462306a36Sopenharmony_ci	emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta);
169562306a36Sopenharmony_ci	/* delta == 0 is not allowed by the kernel, add must overflow to make
169662306a36Sopenharmony_ci	 * length smaller.
169762306a36Sopenharmony_ci	 */
169862306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BCC, ret_einval, 0);
169962306a36Sopenharmony_ci
170062306a36Sopenharmony_ci	/* if (new_len < 14) then -EINVAL */
170162306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN));
170262306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BMI, ret_einval, 0);
170362306a36Sopenharmony_ci
170462306a36Sopenharmony_ci	emit_alu(nfp_prog, plen_reg(nfp_prog),
170562306a36Sopenharmony_ci		 plen_reg(nfp_prog), ALU_OP_ADD, delta);
170662306a36Sopenharmony_ci	emit_alu(nfp_prog, pv_len(nfp_prog),
170762306a36Sopenharmony_ci		 pv_len(nfp_prog), ALU_OP_ADD, delta);
170862306a36Sopenharmony_ci
170962306a36Sopenharmony_ci	emit_br(nfp_prog, BR_UNC, end, 2);
171062306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(0), 0);
171162306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(1), 0);
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
171462306a36Sopenharmony_ci		return -EINVAL;
171562306a36Sopenharmony_ci
171662306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(0), -22);
171762306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(1), ~0);
171862306a36Sopenharmony_ci
171962306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, end))
172062306a36Sopenharmony_ci		return -EINVAL;
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci	return 0;
172362306a36Sopenharmony_ci}
172462306a36Sopenharmony_ci
172562306a36Sopenharmony_cistatic int
172662306a36Sopenharmony_cimap_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
172762306a36Sopenharmony_ci{
172862306a36Sopenharmony_ci	bool load_lm_ptr;
172962306a36Sopenharmony_ci	u32 ret_tgt;
173062306a36Sopenharmony_ci	s64 lm_off;
173162306a36Sopenharmony_ci
173262306a36Sopenharmony_ci	/* We only have to reload LM0 if the key is not at start of stack */
173362306a36Sopenharmony_ci	lm_off = nfp_prog->stack_frame_depth;
173462306a36Sopenharmony_ci	lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
173562306a36Sopenharmony_ci	load_lm_ptr = meta->arg2.var_off || lm_off;
173662306a36Sopenharmony_ci
173762306a36Sopenharmony_ci	/* Set LM0 to start of key */
173862306a36Sopenharmony_ci	if (load_lm_ptr)
173962306a36Sopenharmony_ci		emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
174062306a36Sopenharmony_ci	if (meta->func_id == BPF_FUNC_map_update_elem)
174162306a36Sopenharmony_ci		emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
174462306a36Sopenharmony_ci		     2, RELO_BR_HELPER);
174562306a36Sopenharmony_ci	ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
174662306a36Sopenharmony_ci
174762306a36Sopenharmony_ci	/* Load map ID into A0 */
174862306a36Sopenharmony_ci	wrp_mov(nfp_prog, reg_a(0), reg_a(2));
174962306a36Sopenharmony_ci
175062306a36Sopenharmony_ci	/* Load the return address into B0 */
175162306a36Sopenharmony_ci	wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
175262306a36Sopenharmony_ci
175362306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
175462306a36Sopenharmony_ci		return -EINVAL;
175562306a36Sopenharmony_ci
175662306a36Sopenharmony_ci	/* Reset the LM0 pointer */
175762306a36Sopenharmony_ci	if (!load_lm_ptr)
175862306a36Sopenharmony_ci		return 0;
175962306a36Sopenharmony_ci
176062306a36Sopenharmony_ci	emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
176162306a36Sopenharmony_ci	wrp_nops(nfp_prog, 3);
176262306a36Sopenharmony_ci
176362306a36Sopenharmony_ci	return 0;
176462306a36Sopenharmony_ci}
176562306a36Sopenharmony_ci
176662306a36Sopenharmony_cistatic int
176762306a36Sopenharmony_cinfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
176862306a36Sopenharmony_ci{
176962306a36Sopenharmony_ci	__emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
177062306a36Sopenharmony_ci	/* CSR value is read in following immed[gpr, 0] */
177162306a36Sopenharmony_ci	emit_immed(nfp_prog, reg_both(0), 0,
177262306a36Sopenharmony_ci		   IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
177362306a36Sopenharmony_ci	emit_immed(nfp_prog, reg_both(1), 0,
177462306a36Sopenharmony_ci		   IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
177562306a36Sopenharmony_ci	return 0;
177662306a36Sopenharmony_ci}
177762306a36Sopenharmony_ci
177862306a36Sopenharmony_cistatic int
177962306a36Sopenharmony_cinfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
178062306a36Sopenharmony_ci{
178162306a36Sopenharmony_ci	swreg ptr_type;
178262306a36Sopenharmony_ci	u32 ret_tgt;
178362306a36Sopenharmony_ci
178462306a36Sopenharmony_ci	ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
178562306a36Sopenharmony_ci
178662306a36Sopenharmony_ci	ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
178762306a36Sopenharmony_ci
178862306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
178962306a36Sopenharmony_ci		     2, RELO_BR_HELPER);
179062306a36Sopenharmony_ci
179162306a36Sopenharmony_ci	/* Load ptr type into A1 */
179262306a36Sopenharmony_ci	wrp_mov(nfp_prog, reg_a(1), ptr_type);
179362306a36Sopenharmony_ci
179462306a36Sopenharmony_ci	/* Load the return address into B0 */
179562306a36Sopenharmony_ci	wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
179662306a36Sopenharmony_ci
179762306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
179862306a36Sopenharmony_ci		return -EINVAL;
179962306a36Sopenharmony_ci
180062306a36Sopenharmony_ci	return 0;
180162306a36Sopenharmony_ci}
180262306a36Sopenharmony_ci
180362306a36Sopenharmony_cistatic int
180462306a36Sopenharmony_cinfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
180562306a36Sopenharmony_ci{
180662306a36Sopenharmony_ci	u32 jmp_tgt;
180762306a36Sopenharmony_ci
180862306a36Sopenharmony_ci	jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5;
180962306a36Sopenharmony_ci
181062306a36Sopenharmony_ci	/* Make sure the queue id fits into FW field */
181162306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2),
181262306a36Sopenharmony_ci		 ALU_OP_AND_NOT_B, reg_imm(0xff));
181362306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2);
181462306a36Sopenharmony_ci
181562306a36Sopenharmony_ci	/* Set the 'queue selected' bit and the queue value */
181662306a36Sopenharmony_ci	emit_shf(nfp_prog, pv_qsel_set(nfp_prog),
181762306a36Sopenharmony_ci		 pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1),
181862306a36Sopenharmony_ci		 SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT);
181962306a36Sopenharmony_ci	emit_ld_field(nfp_prog,
182062306a36Sopenharmony_ci		      pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2),
182162306a36Sopenharmony_ci		      SHF_SC_NONE, 0);
182262306a36Sopenharmony_ci	/* Delay slots end here, we will jump over next instruction if queue
182362306a36Sopenharmony_ci	 * value fits into the field.
182462306a36Sopenharmony_ci	 */
182562306a36Sopenharmony_ci	emit_ld_field(nfp_prog,
182662306a36Sopenharmony_ci		      pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX),
182762306a36Sopenharmony_ci		      SHF_SC_NONE, 0);
182862306a36Sopenharmony_ci
182962306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt))
183062306a36Sopenharmony_ci		return -EINVAL;
183162306a36Sopenharmony_ci
183262306a36Sopenharmony_ci	return 0;
183362306a36Sopenharmony_ci}
183462306a36Sopenharmony_ci
183562306a36Sopenharmony_ci/* --- Callbacks --- */
183662306a36Sopenharmony_cistatic int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
183762306a36Sopenharmony_ci{
183862306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
183962306a36Sopenharmony_ci	u8 dst = insn->dst_reg * 2;
184062306a36Sopenharmony_ci	u8 src = insn->src_reg * 2;
184162306a36Sopenharmony_ci
184262306a36Sopenharmony_ci	if (insn->src_reg == BPF_REG_10) {
184362306a36Sopenharmony_ci		swreg stack_depth_reg;
184462306a36Sopenharmony_ci
184562306a36Sopenharmony_ci		stack_depth_reg = ur_load_imm_any(nfp_prog,
184662306a36Sopenharmony_ci						  nfp_prog->stack_frame_depth,
184762306a36Sopenharmony_ci						  stack_imm(nfp_prog));
184862306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog),
184962306a36Sopenharmony_ci			 ALU_OP_ADD, stack_depth_reg);
185062306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
185162306a36Sopenharmony_ci	} else {
185262306a36Sopenharmony_ci		wrp_reg_mov(nfp_prog, dst, src);
185362306a36Sopenharmony_ci		wrp_reg_mov(nfp_prog, dst + 1, src + 1);
185462306a36Sopenharmony_ci	}
185562306a36Sopenharmony_ci
185662306a36Sopenharmony_ci	return 0;
185762306a36Sopenharmony_ci}
185862306a36Sopenharmony_ci
185962306a36Sopenharmony_cistatic int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
186062306a36Sopenharmony_ci{
186162306a36Sopenharmony_ci	u64 imm = meta->insn.imm; /* sign extend */
186262306a36Sopenharmony_ci
186362306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
186462306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
186562306a36Sopenharmony_ci
186662306a36Sopenharmony_ci	return 0;
186762306a36Sopenharmony_ci}
186862306a36Sopenharmony_ci
186962306a36Sopenharmony_cistatic int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
187062306a36Sopenharmony_ci{
187162306a36Sopenharmony_ci	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
187262306a36Sopenharmony_ci}
187362306a36Sopenharmony_ci
187462306a36Sopenharmony_cistatic int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
187562306a36Sopenharmony_ci{
187662306a36Sopenharmony_ci	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
187762306a36Sopenharmony_ci}
187862306a36Sopenharmony_ci
187962306a36Sopenharmony_cistatic int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
188062306a36Sopenharmony_ci{
188162306a36Sopenharmony_ci	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
188262306a36Sopenharmony_ci}
188362306a36Sopenharmony_ci
188462306a36Sopenharmony_cistatic int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
188562306a36Sopenharmony_ci{
188662306a36Sopenharmony_ci	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
188762306a36Sopenharmony_ci}
188862306a36Sopenharmony_ci
188962306a36Sopenharmony_cistatic int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
189062306a36Sopenharmony_ci{
189162306a36Sopenharmony_ci	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
189262306a36Sopenharmony_ci}
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_cistatic int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
189562306a36Sopenharmony_ci{
189662306a36Sopenharmony_ci	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
189762306a36Sopenharmony_ci}
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_cistatic int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
190062306a36Sopenharmony_ci{
190162306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
190262306a36Sopenharmony_ci
190362306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
190462306a36Sopenharmony_ci		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
190562306a36Sopenharmony_ci		 reg_b(insn->src_reg * 2));
190662306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
190762306a36Sopenharmony_ci		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
190862306a36Sopenharmony_ci		 reg_b(insn->src_reg * 2 + 1));
190962306a36Sopenharmony_ci
191062306a36Sopenharmony_ci	return 0;
191162306a36Sopenharmony_ci}
191262306a36Sopenharmony_ci
191362306a36Sopenharmony_cistatic int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
191462306a36Sopenharmony_ci{
191562306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
191662306a36Sopenharmony_ci	u64 imm = insn->imm; /* sign extend */
191762306a36Sopenharmony_ci
191862306a36Sopenharmony_ci	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
191962306a36Sopenharmony_ci	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
192062306a36Sopenharmony_ci
192162306a36Sopenharmony_ci	return 0;
192262306a36Sopenharmony_ci}
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_cistatic int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
192562306a36Sopenharmony_ci{
192662306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
192762306a36Sopenharmony_ci
192862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
192962306a36Sopenharmony_ci		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
193062306a36Sopenharmony_ci		 reg_b(insn->src_reg * 2));
193162306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
193262306a36Sopenharmony_ci		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
193362306a36Sopenharmony_ci		 reg_b(insn->src_reg * 2 + 1));
193462306a36Sopenharmony_ci
193562306a36Sopenharmony_ci	return 0;
193662306a36Sopenharmony_ci}
193762306a36Sopenharmony_ci
193862306a36Sopenharmony_cistatic int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
193962306a36Sopenharmony_ci{
194062306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
194162306a36Sopenharmony_ci	u64 imm = insn->imm; /* sign extend */
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_ci	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
194462306a36Sopenharmony_ci	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
194562306a36Sopenharmony_ci
194662306a36Sopenharmony_ci	return 0;
194762306a36Sopenharmony_ci}
194862306a36Sopenharmony_ci
194962306a36Sopenharmony_cistatic int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
195062306a36Sopenharmony_ci{
195162306a36Sopenharmony_ci	return wrp_mul(nfp_prog, meta, true, true);
195262306a36Sopenharmony_ci}
195362306a36Sopenharmony_ci
195462306a36Sopenharmony_cistatic int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
195562306a36Sopenharmony_ci{
195662306a36Sopenharmony_ci	return wrp_mul(nfp_prog, meta, true, false);
195762306a36Sopenharmony_ci}
195862306a36Sopenharmony_ci
195962306a36Sopenharmony_cistatic int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
196062306a36Sopenharmony_ci{
196162306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm);
196462306a36Sopenharmony_ci}
196562306a36Sopenharmony_ci
196662306a36Sopenharmony_cistatic int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
196762306a36Sopenharmony_ci{
196862306a36Sopenharmony_ci	/* NOTE: verifier hook has rejected cases for which verifier doesn't
196962306a36Sopenharmony_ci	 * know whether the source operand is constant or not.
197062306a36Sopenharmony_ci	 */
197162306a36Sopenharmony_ci	return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src);
197262306a36Sopenharmony_ci}
197362306a36Sopenharmony_ci
197462306a36Sopenharmony_cistatic int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
197562306a36Sopenharmony_ci{
197662306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
197962306a36Sopenharmony_ci		 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
198062306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
198162306a36Sopenharmony_ci		 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
198262306a36Sopenharmony_ci
198362306a36Sopenharmony_ci	return 0;
198462306a36Sopenharmony_ci}
198562306a36Sopenharmony_ci
198662306a36Sopenharmony_ci/* Pseudo code:
198762306a36Sopenharmony_ci *   if shift_amt >= 32
198862306a36Sopenharmony_ci *     dst_high = dst_low << shift_amt[4:0]
198962306a36Sopenharmony_ci *     dst_low = 0;
199062306a36Sopenharmony_ci *   else
199162306a36Sopenharmony_ci *     dst_high = (dst_high, dst_low) >> (32 - shift_amt)
199262306a36Sopenharmony_ci *     dst_low = dst_low << shift_amt
199362306a36Sopenharmony_ci *
199462306a36Sopenharmony_ci * The indirect shift will use the same logic at runtime.
199562306a36Sopenharmony_ci */
199662306a36Sopenharmony_cistatic int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
199762306a36Sopenharmony_ci{
199862306a36Sopenharmony_ci	if (!shift_amt)
199962306a36Sopenharmony_ci		return 0;
200062306a36Sopenharmony_ci
200162306a36Sopenharmony_ci	if (shift_amt < 32) {
200262306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
200362306a36Sopenharmony_ci			 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
200462306a36Sopenharmony_ci			 32 - shift_amt);
200562306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
200662306a36Sopenharmony_ci			 reg_b(dst), SHF_SC_L_SHF, shift_amt);
200762306a36Sopenharmony_ci	} else if (shift_amt == 32) {
200862306a36Sopenharmony_ci		wrp_reg_mov(nfp_prog, dst + 1, dst);
200962306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(dst), 0);
201062306a36Sopenharmony_ci	} else if (shift_amt > 32) {
201162306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
201262306a36Sopenharmony_ci			 reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
201362306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(dst), 0);
201462306a36Sopenharmony_ci	}
201562306a36Sopenharmony_ci
201662306a36Sopenharmony_ci	return 0;
201762306a36Sopenharmony_ci}
201862306a36Sopenharmony_ci
201962306a36Sopenharmony_cistatic int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
202062306a36Sopenharmony_ci{
202162306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
202262306a36Sopenharmony_ci	u8 dst = insn->dst_reg * 2;
202362306a36Sopenharmony_ci
202462306a36Sopenharmony_ci	return __shl_imm64(nfp_prog, dst, insn->imm);
202562306a36Sopenharmony_ci}
202662306a36Sopenharmony_ci
202762306a36Sopenharmony_cistatic void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
202862306a36Sopenharmony_ci{
202962306a36Sopenharmony_ci	emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
203062306a36Sopenharmony_ci		 reg_b(src));
203162306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
203262306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
203362306a36Sopenharmony_ci		       reg_b(dst), SHF_SC_R_DSHF);
203462306a36Sopenharmony_ci}
203562306a36Sopenharmony_ci
203662306a36Sopenharmony_ci/* NOTE: for indirect left shift, HIGH part should be calculated first. */
203762306a36Sopenharmony_cistatic void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
203862306a36Sopenharmony_ci{
203962306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
204062306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
204162306a36Sopenharmony_ci		       reg_b(dst), SHF_SC_L_SHF);
204262306a36Sopenharmony_ci}
204362306a36Sopenharmony_ci
204462306a36Sopenharmony_cistatic void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
204562306a36Sopenharmony_ci{
204662306a36Sopenharmony_ci	shl_reg64_lt32_high(nfp_prog, dst, src);
204762306a36Sopenharmony_ci	shl_reg64_lt32_low(nfp_prog, dst, src);
204862306a36Sopenharmony_ci}
204962306a36Sopenharmony_ci
205062306a36Sopenharmony_cistatic void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
205162306a36Sopenharmony_ci{
205262306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
205362306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
205462306a36Sopenharmony_ci		       reg_b(dst), SHF_SC_L_SHF);
205562306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(dst), 0);
205662306a36Sopenharmony_ci}
205762306a36Sopenharmony_ci
205862306a36Sopenharmony_cistatic int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
205962306a36Sopenharmony_ci{
206062306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
206162306a36Sopenharmony_ci	u64 umin, umax;
206262306a36Sopenharmony_ci	u8 dst, src;
206362306a36Sopenharmony_ci
206462306a36Sopenharmony_ci	dst = insn->dst_reg * 2;
206562306a36Sopenharmony_ci	umin = meta->umin_src;
206662306a36Sopenharmony_ci	umax = meta->umax_src;
206762306a36Sopenharmony_ci	if (umin == umax)
206862306a36Sopenharmony_ci		return __shl_imm64(nfp_prog, dst, umin);
206962306a36Sopenharmony_ci
207062306a36Sopenharmony_ci	src = insn->src_reg * 2;
207162306a36Sopenharmony_ci	if (umax < 32) {
207262306a36Sopenharmony_ci		shl_reg64_lt32(nfp_prog, dst, src);
207362306a36Sopenharmony_ci	} else if (umin >= 32) {
207462306a36Sopenharmony_ci		shl_reg64_ge32(nfp_prog, dst, src);
207562306a36Sopenharmony_ci	} else {
207662306a36Sopenharmony_ci		/* Generate different instruction sequences depending on runtime
207762306a36Sopenharmony_ci		 * value of shift amount.
207862306a36Sopenharmony_ci		 */
207962306a36Sopenharmony_ci		u16 label_ge32, label_end;
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_ci		label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
208262306a36Sopenharmony_ci		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_ci		shl_reg64_lt32_high(nfp_prog, dst, src);
208562306a36Sopenharmony_ci		label_end = nfp_prog_current_offset(nfp_prog) + 6;
208662306a36Sopenharmony_ci		emit_br(nfp_prog, BR_UNC, label_end, 2);
208762306a36Sopenharmony_ci		/* shl_reg64_lt32_low packed in delay slot. */
208862306a36Sopenharmony_ci		shl_reg64_lt32_low(nfp_prog, dst, src);
208962306a36Sopenharmony_ci
209062306a36Sopenharmony_ci		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
209162306a36Sopenharmony_ci			return -EINVAL;
209262306a36Sopenharmony_ci		shl_reg64_ge32(nfp_prog, dst, src);
209362306a36Sopenharmony_ci
209462306a36Sopenharmony_ci		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
209562306a36Sopenharmony_ci			return -EINVAL;
209662306a36Sopenharmony_ci	}
209762306a36Sopenharmony_ci
209862306a36Sopenharmony_ci	return 0;
209962306a36Sopenharmony_ci}
210062306a36Sopenharmony_ci
210162306a36Sopenharmony_ci/* Pseudo code:
210262306a36Sopenharmony_ci *   if shift_amt >= 32
210362306a36Sopenharmony_ci *     dst_high = 0;
210462306a36Sopenharmony_ci *     dst_low = dst_high >> shift_amt[4:0]
210562306a36Sopenharmony_ci *   else
210662306a36Sopenharmony_ci *     dst_high = dst_high >> shift_amt
210762306a36Sopenharmony_ci *     dst_low = (dst_high, dst_low) >> shift_amt
210862306a36Sopenharmony_ci *
210962306a36Sopenharmony_ci * The indirect shift will use the same logic at runtime.
211062306a36Sopenharmony_ci */
211162306a36Sopenharmony_cistatic int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
211262306a36Sopenharmony_ci{
211362306a36Sopenharmony_ci	if (!shift_amt)
211462306a36Sopenharmony_ci		return 0;
211562306a36Sopenharmony_ci
211662306a36Sopenharmony_ci	if (shift_amt < 32) {
211762306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
211862306a36Sopenharmony_ci			 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
211962306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
212062306a36Sopenharmony_ci			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
212162306a36Sopenharmony_ci	} else if (shift_amt == 32) {
212262306a36Sopenharmony_ci		wrp_reg_mov(nfp_prog, dst, dst + 1);
212362306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
212462306a36Sopenharmony_ci	} else if (shift_amt > 32) {
212562306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
212662306a36Sopenharmony_ci			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
212762306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
212862306a36Sopenharmony_ci	}
212962306a36Sopenharmony_ci
213062306a36Sopenharmony_ci	return 0;
213162306a36Sopenharmony_ci}
213262306a36Sopenharmony_ci
213362306a36Sopenharmony_cistatic int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
213462306a36Sopenharmony_ci{
213562306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
213662306a36Sopenharmony_ci	u8 dst = insn->dst_reg * 2;
213762306a36Sopenharmony_ci
213862306a36Sopenharmony_ci	return __shr_imm64(nfp_prog, dst, insn->imm);
213962306a36Sopenharmony_ci}
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_ci/* NOTE: for indirect right shift, LOW part should be calculated first. */
214262306a36Sopenharmony_cistatic void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
214362306a36Sopenharmony_ci{
214462306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
214562306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
214662306a36Sopenharmony_ci		       reg_b(dst + 1), SHF_SC_R_SHF);
214762306a36Sopenharmony_ci}
214862306a36Sopenharmony_ci
214962306a36Sopenharmony_cistatic void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
215062306a36Sopenharmony_ci{
215162306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
215262306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
215362306a36Sopenharmony_ci		       reg_b(dst), SHF_SC_R_DSHF);
215462306a36Sopenharmony_ci}
215562306a36Sopenharmony_ci
215662306a36Sopenharmony_cistatic void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
215762306a36Sopenharmony_ci{
215862306a36Sopenharmony_ci	shr_reg64_lt32_low(nfp_prog, dst, src);
215962306a36Sopenharmony_ci	shr_reg64_lt32_high(nfp_prog, dst, src);
216062306a36Sopenharmony_ci}
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_cistatic void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
216362306a36Sopenharmony_ci{
216462306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
216562306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
216662306a36Sopenharmony_ci		       reg_b(dst + 1), SHF_SC_R_SHF);
216762306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(dst + 1), 0);
216862306a36Sopenharmony_ci}
216962306a36Sopenharmony_ci
217062306a36Sopenharmony_cistatic int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
217162306a36Sopenharmony_ci{
217262306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
217362306a36Sopenharmony_ci	u64 umin, umax;
217462306a36Sopenharmony_ci	u8 dst, src;
217562306a36Sopenharmony_ci
217662306a36Sopenharmony_ci	dst = insn->dst_reg * 2;
217762306a36Sopenharmony_ci	umin = meta->umin_src;
217862306a36Sopenharmony_ci	umax = meta->umax_src;
217962306a36Sopenharmony_ci	if (umin == umax)
218062306a36Sopenharmony_ci		return __shr_imm64(nfp_prog, dst, umin);
218162306a36Sopenharmony_ci
218262306a36Sopenharmony_ci	src = insn->src_reg * 2;
218362306a36Sopenharmony_ci	if (umax < 32) {
218462306a36Sopenharmony_ci		shr_reg64_lt32(nfp_prog, dst, src);
218562306a36Sopenharmony_ci	} else if (umin >= 32) {
218662306a36Sopenharmony_ci		shr_reg64_ge32(nfp_prog, dst, src);
218762306a36Sopenharmony_ci	} else {
218862306a36Sopenharmony_ci		/* Generate different instruction sequences depending on runtime
218962306a36Sopenharmony_ci		 * value of shift amount.
219062306a36Sopenharmony_ci		 */
219162306a36Sopenharmony_ci		u16 label_ge32, label_end;
219262306a36Sopenharmony_ci
219362306a36Sopenharmony_ci		label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
219462306a36Sopenharmony_ci		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
219562306a36Sopenharmony_ci		shr_reg64_lt32_low(nfp_prog, dst, src);
219662306a36Sopenharmony_ci		label_end = nfp_prog_current_offset(nfp_prog) + 6;
219762306a36Sopenharmony_ci		emit_br(nfp_prog, BR_UNC, label_end, 2);
219862306a36Sopenharmony_ci		/* shr_reg64_lt32_high packed in delay slot. */
219962306a36Sopenharmony_ci		shr_reg64_lt32_high(nfp_prog, dst, src);
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
220262306a36Sopenharmony_ci			return -EINVAL;
220362306a36Sopenharmony_ci		shr_reg64_ge32(nfp_prog, dst, src);
220462306a36Sopenharmony_ci
220562306a36Sopenharmony_ci		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
220662306a36Sopenharmony_ci			return -EINVAL;
220762306a36Sopenharmony_ci	}
220862306a36Sopenharmony_ci
220962306a36Sopenharmony_ci	return 0;
221062306a36Sopenharmony_ci}
221162306a36Sopenharmony_ci
221262306a36Sopenharmony_ci/* Code logic is the same as __shr_imm64 except ashr requires signedness bit
221362306a36Sopenharmony_ci * told through PREV_ALU result.
221462306a36Sopenharmony_ci */
221562306a36Sopenharmony_cistatic int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
221662306a36Sopenharmony_ci{
221762306a36Sopenharmony_ci	if (!shift_amt)
221862306a36Sopenharmony_ci		return 0;
221962306a36Sopenharmony_ci
222062306a36Sopenharmony_ci	if (shift_amt < 32) {
222162306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
222262306a36Sopenharmony_ci			 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
222362306a36Sopenharmony_ci		/* Set signedness bit. */
222462306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
222562306a36Sopenharmony_ci			 reg_imm(0));
222662306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
222762306a36Sopenharmony_ci			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
222862306a36Sopenharmony_ci	} else if (shift_amt == 32) {
222962306a36Sopenharmony_ci		/* NOTE: this also helps setting signedness bit. */
223062306a36Sopenharmony_ci		wrp_reg_mov(nfp_prog, dst, dst + 1);
223162306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
223262306a36Sopenharmony_ci			 reg_b(dst + 1), SHF_SC_R_SHF, 31);
223362306a36Sopenharmony_ci	} else if (shift_amt > 32) {
223462306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
223562306a36Sopenharmony_ci			 reg_imm(0));
223662306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
223762306a36Sopenharmony_ci			 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
223862306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
223962306a36Sopenharmony_ci			 reg_b(dst + 1), SHF_SC_R_SHF, 31);
224062306a36Sopenharmony_ci	}
224162306a36Sopenharmony_ci
224262306a36Sopenharmony_ci	return 0;
224362306a36Sopenharmony_ci}
224462306a36Sopenharmony_ci
224562306a36Sopenharmony_cistatic int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
224662306a36Sopenharmony_ci{
224762306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
224862306a36Sopenharmony_ci	u8 dst = insn->dst_reg * 2;
224962306a36Sopenharmony_ci
225062306a36Sopenharmony_ci	return __ashr_imm64(nfp_prog, dst, insn->imm);
225162306a36Sopenharmony_ci}
225262306a36Sopenharmony_ci
225362306a36Sopenharmony_cistatic void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
225462306a36Sopenharmony_ci{
225562306a36Sopenharmony_ci	/* NOTE: the first insn will set both indirect shift amount (source A)
225662306a36Sopenharmony_ci	 * and signedness bit (MSB of result).
225762306a36Sopenharmony_ci	 */
225862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
225962306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
226062306a36Sopenharmony_ci		       reg_b(dst + 1), SHF_SC_R_SHF);
226162306a36Sopenharmony_ci}
226262306a36Sopenharmony_ci
226362306a36Sopenharmony_cistatic void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
226462306a36Sopenharmony_ci{
226562306a36Sopenharmony_ci	/* NOTE: it is the same as logic shift because we don't need to shift in
226662306a36Sopenharmony_ci	 * signedness bit when the shift amount is less than 32.
226762306a36Sopenharmony_ci	 */
226862306a36Sopenharmony_ci	return shr_reg64_lt32_low(nfp_prog, dst, src);
226962306a36Sopenharmony_ci}
227062306a36Sopenharmony_ci
227162306a36Sopenharmony_cistatic void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
227262306a36Sopenharmony_ci{
227362306a36Sopenharmony_ci	ashr_reg64_lt32_low(nfp_prog, dst, src);
227462306a36Sopenharmony_ci	ashr_reg64_lt32_high(nfp_prog, dst, src);
227562306a36Sopenharmony_ci}
227662306a36Sopenharmony_ci
227762306a36Sopenharmony_cistatic void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
227862306a36Sopenharmony_ci{
227962306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
228062306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
228162306a36Sopenharmony_ci		       reg_b(dst + 1), SHF_SC_R_SHF);
228262306a36Sopenharmony_ci	emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
228362306a36Sopenharmony_ci		 reg_b(dst + 1), SHF_SC_R_SHF, 31);
228462306a36Sopenharmony_ci}
228562306a36Sopenharmony_ci
228662306a36Sopenharmony_ci/* Like ashr_imm64, but need to use indirect shift. */
228762306a36Sopenharmony_cistatic int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
228862306a36Sopenharmony_ci{
228962306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
229062306a36Sopenharmony_ci	u64 umin, umax;
229162306a36Sopenharmony_ci	u8 dst, src;
229262306a36Sopenharmony_ci
229362306a36Sopenharmony_ci	dst = insn->dst_reg * 2;
229462306a36Sopenharmony_ci	umin = meta->umin_src;
229562306a36Sopenharmony_ci	umax = meta->umax_src;
229662306a36Sopenharmony_ci	if (umin == umax)
229762306a36Sopenharmony_ci		return __ashr_imm64(nfp_prog, dst, umin);
229862306a36Sopenharmony_ci
229962306a36Sopenharmony_ci	src = insn->src_reg * 2;
230062306a36Sopenharmony_ci	if (umax < 32) {
230162306a36Sopenharmony_ci		ashr_reg64_lt32(nfp_prog, dst, src);
230262306a36Sopenharmony_ci	} else if (umin >= 32) {
230362306a36Sopenharmony_ci		ashr_reg64_ge32(nfp_prog, dst, src);
230462306a36Sopenharmony_ci	} else {
230562306a36Sopenharmony_ci		u16 label_ge32, label_end;
230662306a36Sopenharmony_ci
230762306a36Sopenharmony_ci		label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
230862306a36Sopenharmony_ci		emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
230962306a36Sopenharmony_ci		ashr_reg64_lt32_low(nfp_prog, dst, src);
231062306a36Sopenharmony_ci		label_end = nfp_prog_current_offset(nfp_prog) + 6;
231162306a36Sopenharmony_ci		emit_br(nfp_prog, BR_UNC, label_end, 2);
231262306a36Sopenharmony_ci		/* ashr_reg64_lt32_high packed in delay slot. */
231362306a36Sopenharmony_ci		ashr_reg64_lt32_high(nfp_prog, dst, src);
231462306a36Sopenharmony_ci
231562306a36Sopenharmony_ci		if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
231662306a36Sopenharmony_ci			return -EINVAL;
231762306a36Sopenharmony_ci		ashr_reg64_ge32(nfp_prog, dst, src);
231862306a36Sopenharmony_ci
231962306a36Sopenharmony_ci		if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
232062306a36Sopenharmony_ci			return -EINVAL;
232162306a36Sopenharmony_ci	}
232262306a36Sopenharmony_ci
232362306a36Sopenharmony_ci	return 0;
232462306a36Sopenharmony_ci}
232562306a36Sopenharmony_ci
232662306a36Sopenharmony_cistatic int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
232762306a36Sopenharmony_ci{
232862306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
232962306a36Sopenharmony_ci
233062306a36Sopenharmony_ci	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
233162306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
233262306a36Sopenharmony_ci
233362306a36Sopenharmony_ci	return 0;
233462306a36Sopenharmony_ci}
233562306a36Sopenharmony_ci
233662306a36Sopenharmony_cistatic int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
233762306a36Sopenharmony_ci{
233862306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
233962306a36Sopenharmony_ci
234062306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
234162306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci	return 0;
234462306a36Sopenharmony_ci}
234562306a36Sopenharmony_ci
234662306a36Sopenharmony_cistatic int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
234762306a36Sopenharmony_ci{
234862306a36Sopenharmony_ci	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
234962306a36Sopenharmony_ci}
235062306a36Sopenharmony_ci
235162306a36Sopenharmony_cistatic int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
235262306a36Sopenharmony_ci{
235362306a36Sopenharmony_ci	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR);
235462306a36Sopenharmony_ci}
235562306a36Sopenharmony_ci
235662306a36Sopenharmony_cistatic int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
235762306a36Sopenharmony_ci{
235862306a36Sopenharmony_ci	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
235962306a36Sopenharmony_ci}
236062306a36Sopenharmony_ci
236162306a36Sopenharmony_cistatic int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
236262306a36Sopenharmony_ci{
236362306a36Sopenharmony_ci	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND);
236462306a36Sopenharmony_ci}
236562306a36Sopenharmony_ci
236662306a36Sopenharmony_cistatic int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
236762306a36Sopenharmony_ci{
236862306a36Sopenharmony_ci	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
236962306a36Sopenharmony_ci}
237062306a36Sopenharmony_ci
237162306a36Sopenharmony_cistatic int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
237262306a36Sopenharmony_ci{
237362306a36Sopenharmony_ci	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR);
237462306a36Sopenharmony_ci}
237562306a36Sopenharmony_ci
237662306a36Sopenharmony_cistatic int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
237762306a36Sopenharmony_ci{
237862306a36Sopenharmony_ci	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
237962306a36Sopenharmony_ci}
238062306a36Sopenharmony_ci
238162306a36Sopenharmony_cistatic int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
238262306a36Sopenharmony_ci{
238362306a36Sopenharmony_ci	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD);
238462306a36Sopenharmony_ci}
238562306a36Sopenharmony_ci
238662306a36Sopenharmony_cistatic int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
238762306a36Sopenharmony_ci{
238862306a36Sopenharmony_ci	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
238962306a36Sopenharmony_ci}
239062306a36Sopenharmony_ci
239162306a36Sopenharmony_cistatic int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
239262306a36Sopenharmony_ci{
239362306a36Sopenharmony_ci	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB);
239462306a36Sopenharmony_ci}
239562306a36Sopenharmony_ci
239662306a36Sopenharmony_cistatic int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
239762306a36Sopenharmony_ci{
239862306a36Sopenharmony_ci	return wrp_mul(nfp_prog, meta, false, true);
239962306a36Sopenharmony_ci}
240062306a36Sopenharmony_ci
240162306a36Sopenharmony_cistatic int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
240262306a36Sopenharmony_ci{
240362306a36Sopenharmony_ci	return wrp_mul(nfp_prog, meta, false, false);
240462306a36Sopenharmony_ci}
240562306a36Sopenharmony_ci
240662306a36Sopenharmony_cistatic int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
240762306a36Sopenharmony_ci{
240862306a36Sopenharmony_ci	return div_reg64(nfp_prog, meta);
240962306a36Sopenharmony_ci}
241062306a36Sopenharmony_ci
241162306a36Sopenharmony_cistatic int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
241262306a36Sopenharmony_ci{
241362306a36Sopenharmony_ci	return div_imm64(nfp_prog, meta);
241462306a36Sopenharmony_ci}
241562306a36Sopenharmony_ci
241662306a36Sopenharmony_cistatic int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
241762306a36Sopenharmony_ci{
241862306a36Sopenharmony_ci	u8 dst = meta->insn.dst_reg * 2;
241962306a36Sopenharmony_ci
242062306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
242162306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
242262306a36Sopenharmony_ci
242362306a36Sopenharmony_ci	return 0;
242462306a36Sopenharmony_ci}
242562306a36Sopenharmony_ci
242662306a36Sopenharmony_cistatic int
242762306a36Sopenharmony_ci__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
242862306a36Sopenharmony_ci	   u8 shift_amt)
242962306a36Sopenharmony_ci{
243062306a36Sopenharmony_ci	if (shift_amt) {
243162306a36Sopenharmony_ci		/* Set signedness bit (MSB of result). */
243262306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR,
243362306a36Sopenharmony_ci			 reg_imm(0));
243462306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
243562306a36Sopenharmony_ci			 reg_b(dst), SHF_SC_R_SHF, shift_amt);
243662306a36Sopenharmony_ci	}
243762306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
243862306a36Sopenharmony_ci
243962306a36Sopenharmony_ci	return 0;
244062306a36Sopenharmony_ci}
244162306a36Sopenharmony_ci
244262306a36Sopenharmony_cistatic int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
244362306a36Sopenharmony_ci{
244462306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
244562306a36Sopenharmony_ci	u64 umin, umax;
244662306a36Sopenharmony_ci	u8 dst, src;
244762306a36Sopenharmony_ci
244862306a36Sopenharmony_ci	dst = insn->dst_reg * 2;
244962306a36Sopenharmony_ci	umin = meta->umin_src;
245062306a36Sopenharmony_ci	umax = meta->umax_src;
245162306a36Sopenharmony_ci	if (umin == umax)
245262306a36Sopenharmony_ci		return __ashr_imm(nfp_prog, meta, dst, umin);
245362306a36Sopenharmony_ci
245462306a36Sopenharmony_ci	src = insn->src_reg * 2;
245562306a36Sopenharmony_ci	/* NOTE: the first insn will set both indirect shift amount (source A)
245662306a36Sopenharmony_ci	 * and signedness bit (MSB of result).
245762306a36Sopenharmony_ci	 */
245862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst));
245962306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
246062306a36Sopenharmony_ci		       reg_b(dst), SHF_SC_R_SHF);
246162306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
246262306a36Sopenharmony_ci
246362306a36Sopenharmony_ci	return 0;
246462306a36Sopenharmony_ci}
246562306a36Sopenharmony_ci
246662306a36Sopenharmony_cistatic int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
246762306a36Sopenharmony_ci{
246862306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
246962306a36Sopenharmony_ci	u8 dst = insn->dst_reg * 2;
247062306a36Sopenharmony_ci
247162306a36Sopenharmony_ci	return __ashr_imm(nfp_prog, meta, dst, insn->imm);
247262306a36Sopenharmony_ci}
247362306a36Sopenharmony_ci
247462306a36Sopenharmony_cistatic int
247562306a36Sopenharmony_ci__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
247662306a36Sopenharmony_ci	  u8 shift_amt)
247762306a36Sopenharmony_ci{
247862306a36Sopenharmony_ci	if (shift_amt)
247962306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
248062306a36Sopenharmony_ci			 reg_b(dst), SHF_SC_R_SHF, shift_amt);
248162306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
248262306a36Sopenharmony_ci	return 0;
248362306a36Sopenharmony_ci}
248462306a36Sopenharmony_ci
248562306a36Sopenharmony_cistatic int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
248662306a36Sopenharmony_ci{
248762306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
248862306a36Sopenharmony_ci	u8 dst = insn->dst_reg * 2;
248962306a36Sopenharmony_ci
249062306a36Sopenharmony_ci	return __shr_imm(nfp_prog, meta, dst, insn->imm);
249162306a36Sopenharmony_ci}
249262306a36Sopenharmony_ci
249362306a36Sopenharmony_cistatic int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
249462306a36Sopenharmony_ci{
249562306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
249662306a36Sopenharmony_ci	u64 umin, umax;
249762306a36Sopenharmony_ci	u8 dst, src;
249862306a36Sopenharmony_ci
249962306a36Sopenharmony_ci	dst = insn->dst_reg * 2;
250062306a36Sopenharmony_ci	umin = meta->umin_src;
250162306a36Sopenharmony_ci	umax = meta->umax_src;
250262306a36Sopenharmony_ci	if (umin == umax)
250362306a36Sopenharmony_ci		return __shr_imm(nfp_prog, meta, dst, umin);
250462306a36Sopenharmony_ci
250562306a36Sopenharmony_ci	src = insn->src_reg * 2;
250662306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
250762306a36Sopenharmony_ci	emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
250862306a36Sopenharmony_ci		       reg_b(dst), SHF_SC_R_SHF);
250962306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
251062306a36Sopenharmony_ci	return 0;
251162306a36Sopenharmony_ci}
251262306a36Sopenharmony_ci
251362306a36Sopenharmony_cistatic int
251462306a36Sopenharmony_ci__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst,
251562306a36Sopenharmony_ci	  u8 shift_amt)
251662306a36Sopenharmony_ci{
251762306a36Sopenharmony_ci	if (shift_amt)
251862306a36Sopenharmony_ci		emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
251962306a36Sopenharmony_ci			 reg_b(dst), SHF_SC_L_SHF, shift_amt);
252062306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
252162306a36Sopenharmony_ci	return 0;
252262306a36Sopenharmony_ci}
252362306a36Sopenharmony_ci
252462306a36Sopenharmony_cistatic int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
252562306a36Sopenharmony_ci{
252662306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
252762306a36Sopenharmony_ci	u8 dst = insn->dst_reg * 2;
252862306a36Sopenharmony_ci
252962306a36Sopenharmony_ci	return __shl_imm(nfp_prog, meta, dst, insn->imm);
253062306a36Sopenharmony_ci}
253162306a36Sopenharmony_ci
253262306a36Sopenharmony_cistatic int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
253362306a36Sopenharmony_ci{
253462306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
253562306a36Sopenharmony_ci	u64 umin, umax;
253662306a36Sopenharmony_ci	u8 dst, src;
253762306a36Sopenharmony_ci
253862306a36Sopenharmony_ci	dst = insn->dst_reg * 2;
253962306a36Sopenharmony_ci	umin = meta->umin_src;
254062306a36Sopenharmony_ci	umax = meta->umax_src;
254162306a36Sopenharmony_ci	if (umin == umax)
254262306a36Sopenharmony_ci		return __shl_imm(nfp_prog, meta, dst, umin);
254362306a36Sopenharmony_ci
254462306a36Sopenharmony_ci	src = insn->src_reg * 2;
254562306a36Sopenharmony_ci	shl_reg64_lt32_low(nfp_prog, dst, src);
254662306a36Sopenharmony_ci	wrp_zext(nfp_prog, meta, dst);
254762306a36Sopenharmony_ci	return 0;
254862306a36Sopenharmony_ci}
254962306a36Sopenharmony_ci
255062306a36Sopenharmony_cistatic int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
255162306a36Sopenharmony_ci{
255262306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
255362306a36Sopenharmony_ci	u8 gpr = insn->dst_reg * 2;
255462306a36Sopenharmony_ci
255562306a36Sopenharmony_ci	switch (insn->imm) {
255662306a36Sopenharmony_ci	case 16:
255762306a36Sopenharmony_ci		emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
255862306a36Sopenharmony_ci			      SHF_SC_R_ROT, 8);
255962306a36Sopenharmony_ci		emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
256062306a36Sopenharmony_ci			      SHF_SC_R_SHF, 16);
256162306a36Sopenharmony_ci
256262306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
256362306a36Sopenharmony_ci		break;
256462306a36Sopenharmony_ci	case 32:
256562306a36Sopenharmony_ci		wrp_end32(nfp_prog, reg_a(gpr), gpr);
256662306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
256762306a36Sopenharmony_ci		break;
256862306a36Sopenharmony_ci	case 64:
256962306a36Sopenharmony_ci		wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
257062306a36Sopenharmony_ci
257162306a36Sopenharmony_ci		wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
257262306a36Sopenharmony_ci		wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
257362306a36Sopenharmony_ci		break;
257462306a36Sopenharmony_ci	}
257562306a36Sopenharmony_ci
257662306a36Sopenharmony_ci	return 0;
257762306a36Sopenharmony_ci}
257862306a36Sopenharmony_ci
257962306a36Sopenharmony_cistatic int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
258062306a36Sopenharmony_ci{
258162306a36Sopenharmony_ci	struct nfp_insn_meta *prev = nfp_meta_prev(meta);
258262306a36Sopenharmony_ci	u32 imm_lo, imm_hi;
258362306a36Sopenharmony_ci	u8 dst;
258462306a36Sopenharmony_ci
258562306a36Sopenharmony_ci	dst = prev->insn.dst_reg * 2;
258662306a36Sopenharmony_ci	imm_lo = prev->insn.imm;
258762306a36Sopenharmony_ci	imm_hi = meta->insn.imm;
258862306a36Sopenharmony_ci
258962306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(dst), imm_lo);
259062306a36Sopenharmony_ci
259162306a36Sopenharmony_ci	/* mov is always 1 insn, load imm may be two, so try to use mov */
259262306a36Sopenharmony_ci	if (imm_hi == imm_lo)
259362306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
259462306a36Sopenharmony_ci	else
259562306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
259662306a36Sopenharmony_ci
259762306a36Sopenharmony_ci	return 0;
259862306a36Sopenharmony_ci}
259962306a36Sopenharmony_ci
260062306a36Sopenharmony_cistatic int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
260162306a36Sopenharmony_ci{
260262306a36Sopenharmony_ci	meta->double_cb = imm_ld8_part2;
260362306a36Sopenharmony_ci	return 0;
260462306a36Sopenharmony_ci}
260562306a36Sopenharmony_ci
260662306a36Sopenharmony_cistatic int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
260762306a36Sopenharmony_ci{
260862306a36Sopenharmony_ci	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1);
260962306a36Sopenharmony_ci}
261062306a36Sopenharmony_ci
261162306a36Sopenharmony_cistatic int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
261262306a36Sopenharmony_ci{
261362306a36Sopenharmony_ci	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2);
261462306a36Sopenharmony_ci}
261562306a36Sopenharmony_ci
261662306a36Sopenharmony_cistatic int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
261762306a36Sopenharmony_ci{
261862306a36Sopenharmony_ci	return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4);
261962306a36Sopenharmony_ci}
262062306a36Sopenharmony_ci
262162306a36Sopenharmony_cistatic int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
262262306a36Sopenharmony_ci{
262362306a36Sopenharmony_ci	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
262462306a36Sopenharmony_ci				     meta->insn.src_reg * 2, 1);
262562306a36Sopenharmony_ci}
262662306a36Sopenharmony_ci
262762306a36Sopenharmony_cistatic int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
262862306a36Sopenharmony_ci{
262962306a36Sopenharmony_ci	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
263062306a36Sopenharmony_ci				     meta->insn.src_reg * 2, 2);
263162306a36Sopenharmony_ci}
263262306a36Sopenharmony_ci
263362306a36Sopenharmony_cistatic int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
263462306a36Sopenharmony_ci{
263562306a36Sopenharmony_ci	return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm,
263662306a36Sopenharmony_ci				     meta->insn.src_reg * 2, 4);
263762306a36Sopenharmony_ci}
263862306a36Sopenharmony_ci
263962306a36Sopenharmony_cistatic int
264062306a36Sopenharmony_cimem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
264162306a36Sopenharmony_ci	      unsigned int size, unsigned int ptr_off)
264262306a36Sopenharmony_ci{
264362306a36Sopenharmony_ci	return mem_op_stack(nfp_prog, meta, size, ptr_off,
264462306a36Sopenharmony_ci			    meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
264562306a36Sopenharmony_ci			    true, wrp_lmem_load);
264662306a36Sopenharmony_ci}
264762306a36Sopenharmony_ci
264862306a36Sopenharmony_cistatic int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
264962306a36Sopenharmony_ci		       u8 size)
265062306a36Sopenharmony_ci{
265162306a36Sopenharmony_ci	swreg dst = reg_both(meta->insn.dst_reg * 2);
265262306a36Sopenharmony_ci
265362306a36Sopenharmony_ci	switch (meta->insn.off) {
265462306a36Sopenharmony_ci	case offsetof(struct __sk_buff, len):
265562306a36Sopenharmony_ci		if (size != sizeof_field(struct __sk_buff, len))
265662306a36Sopenharmony_ci			return -EOPNOTSUPP;
265762306a36Sopenharmony_ci		wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
265862306a36Sopenharmony_ci		break;
265962306a36Sopenharmony_ci	case offsetof(struct __sk_buff, data):
266062306a36Sopenharmony_ci		if (size != sizeof_field(struct __sk_buff, data))
266162306a36Sopenharmony_ci			return -EOPNOTSUPP;
266262306a36Sopenharmony_ci		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
266362306a36Sopenharmony_ci		break;
266462306a36Sopenharmony_ci	case offsetof(struct __sk_buff, data_end):
266562306a36Sopenharmony_ci		if (size != sizeof_field(struct __sk_buff, data_end))
266662306a36Sopenharmony_ci			return -EOPNOTSUPP;
266762306a36Sopenharmony_ci		emit_alu(nfp_prog, dst,
266862306a36Sopenharmony_ci			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
266962306a36Sopenharmony_ci		break;
267062306a36Sopenharmony_ci	default:
267162306a36Sopenharmony_ci		return -EOPNOTSUPP;
267262306a36Sopenharmony_ci	}
267362306a36Sopenharmony_ci
267462306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
267562306a36Sopenharmony_ci
267662306a36Sopenharmony_ci	return 0;
267762306a36Sopenharmony_ci}
267862306a36Sopenharmony_ci
267962306a36Sopenharmony_cistatic int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
268062306a36Sopenharmony_ci		       u8 size)
268162306a36Sopenharmony_ci{
268262306a36Sopenharmony_ci	swreg dst = reg_both(meta->insn.dst_reg * 2);
268362306a36Sopenharmony_ci
268462306a36Sopenharmony_ci	switch (meta->insn.off) {
268562306a36Sopenharmony_ci	case offsetof(struct xdp_md, data):
268662306a36Sopenharmony_ci		if (size != sizeof_field(struct xdp_md, data))
268762306a36Sopenharmony_ci			return -EOPNOTSUPP;
268862306a36Sopenharmony_ci		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
268962306a36Sopenharmony_ci		break;
269062306a36Sopenharmony_ci	case offsetof(struct xdp_md, data_end):
269162306a36Sopenharmony_ci		if (size != sizeof_field(struct xdp_md, data_end))
269262306a36Sopenharmony_ci			return -EOPNOTSUPP;
269362306a36Sopenharmony_ci		emit_alu(nfp_prog, dst,
269462306a36Sopenharmony_ci			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
269562306a36Sopenharmony_ci		break;
269662306a36Sopenharmony_ci	default:
269762306a36Sopenharmony_ci		return -EOPNOTSUPP;
269862306a36Sopenharmony_ci	}
269962306a36Sopenharmony_ci
270062306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
270162306a36Sopenharmony_ci
270262306a36Sopenharmony_ci	return 0;
270362306a36Sopenharmony_ci}
270462306a36Sopenharmony_ci
270562306a36Sopenharmony_cistatic int
270662306a36Sopenharmony_cimem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
270762306a36Sopenharmony_ci	     unsigned int size)
270862306a36Sopenharmony_ci{
270962306a36Sopenharmony_ci	swreg tmp_reg;
271062306a36Sopenharmony_ci
271162306a36Sopenharmony_ci	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
271262306a36Sopenharmony_ci
271362306a36Sopenharmony_ci	return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2,
271462306a36Sopenharmony_ci					 tmp_reg, meta->insn.dst_reg * 2, size);
271562306a36Sopenharmony_ci}
271662306a36Sopenharmony_ci
271762306a36Sopenharmony_cistatic int
271862306a36Sopenharmony_cimem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
271962306a36Sopenharmony_ci	     unsigned int size)
272062306a36Sopenharmony_ci{
272162306a36Sopenharmony_ci	swreg tmp_reg;
272262306a36Sopenharmony_ci
272362306a36Sopenharmony_ci	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
272462306a36Sopenharmony_ci
272562306a36Sopenharmony_ci	return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2,
272662306a36Sopenharmony_ci					 tmp_reg, meta->insn.dst_reg * 2, size);
272762306a36Sopenharmony_ci}
272862306a36Sopenharmony_ci
272962306a36Sopenharmony_cistatic void
273062306a36Sopenharmony_cimem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
273162306a36Sopenharmony_ci			   struct nfp_insn_meta *meta)
273262306a36Sopenharmony_ci{
273362306a36Sopenharmony_ci	s16 range_start = meta->pkt_cache.range_start;
273462306a36Sopenharmony_ci	s16 range_end = meta->pkt_cache.range_end;
273562306a36Sopenharmony_ci	swreg src_base, off;
273662306a36Sopenharmony_ci	u8 xfer_num, len;
273762306a36Sopenharmony_ci	bool indir;
273862306a36Sopenharmony_ci
273962306a36Sopenharmony_ci	off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
274062306a36Sopenharmony_ci	src_base = reg_a(meta->insn.src_reg * 2);
274162306a36Sopenharmony_ci	len = range_end - range_start;
274262306a36Sopenharmony_ci	xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
274362306a36Sopenharmony_ci
274462306a36Sopenharmony_ci	indir = len > 8 * REG_WIDTH;
274562306a36Sopenharmony_ci	/* Setup PREV_ALU for indirect mode. */
274662306a36Sopenharmony_ci	if (indir)
274762306a36Sopenharmony_ci		wrp_immed(nfp_prog, reg_none(),
274862306a36Sopenharmony_ci			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
274962306a36Sopenharmony_ci
275062306a36Sopenharmony_ci	/* Cache memory into transfer-in registers. */
275162306a36Sopenharmony_ci	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
275262306a36Sopenharmony_ci		     off, xfer_num - 1, CMD_CTX_SWAP, indir);
275362306a36Sopenharmony_ci}
275462306a36Sopenharmony_ci
275562306a36Sopenharmony_cistatic int
275662306a36Sopenharmony_cimem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
275762306a36Sopenharmony_ci				     struct nfp_insn_meta *meta,
275862306a36Sopenharmony_ci				     unsigned int size)
275962306a36Sopenharmony_ci{
276062306a36Sopenharmony_ci	s16 range_start = meta->pkt_cache.range_start;
276162306a36Sopenharmony_ci	s16 insn_off = meta->insn.off - range_start;
276262306a36Sopenharmony_ci	swreg dst_lo, dst_hi, src_lo, src_mid;
276362306a36Sopenharmony_ci	u8 dst_gpr = meta->insn.dst_reg * 2;
276462306a36Sopenharmony_ci	u8 len_lo = size, len_mid = 0;
276562306a36Sopenharmony_ci	u8 idx = insn_off / REG_WIDTH;
276662306a36Sopenharmony_ci	u8 off = insn_off % REG_WIDTH;
276762306a36Sopenharmony_ci
276862306a36Sopenharmony_ci	dst_hi = reg_both(dst_gpr + 1);
276962306a36Sopenharmony_ci	dst_lo = reg_both(dst_gpr);
277062306a36Sopenharmony_ci	src_lo = reg_xfer(idx);
277162306a36Sopenharmony_ci
277262306a36Sopenharmony_ci	/* The read length could involve as many as three registers. */
277362306a36Sopenharmony_ci	if (size > REG_WIDTH - off) {
277462306a36Sopenharmony_ci		/* Calculate the part in the second register. */
277562306a36Sopenharmony_ci		len_lo = REG_WIDTH - off;
277662306a36Sopenharmony_ci		len_mid = size - len_lo;
277762306a36Sopenharmony_ci
277862306a36Sopenharmony_ci		/* Calculate the part in the third register. */
277962306a36Sopenharmony_ci		if (size > 2 * REG_WIDTH - off)
278062306a36Sopenharmony_ci			len_mid = REG_WIDTH;
278162306a36Sopenharmony_ci	}
278262306a36Sopenharmony_ci
278362306a36Sopenharmony_ci	wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
278462306a36Sopenharmony_ci
278562306a36Sopenharmony_ci	if (!len_mid) {
278662306a36Sopenharmony_ci		wrp_zext(nfp_prog, meta, dst_gpr);
278762306a36Sopenharmony_ci		return 0;
278862306a36Sopenharmony_ci	}
278962306a36Sopenharmony_ci
279062306a36Sopenharmony_ci	src_mid = reg_xfer(idx + 1);
279162306a36Sopenharmony_ci
279262306a36Sopenharmony_ci	if (size <= REG_WIDTH) {
279362306a36Sopenharmony_ci		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
279462306a36Sopenharmony_ci		wrp_zext(nfp_prog, meta, dst_gpr);
279562306a36Sopenharmony_ci	} else {
279662306a36Sopenharmony_ci		swreg src_hi = reg_xfer(idx + 2);
279762306a36Sopenharmony_ci
279862306a36Sopenharmony_ci		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
279962306a36Sopenharmony_ci				   REG_WIDTH - len_lo, len_lo);
280062306a36Sopenharmony_ci		wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
280162306a36Sopenharmony_ci				REG_WIDTH - len_lo);
280262306a36Sopenharmony_ci		wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
280362306a36Sopenharmony_ci				   len_lo);
280462306a36Sopenharmony_ci	}
280562306a36Sopenharmony_ci
280662306a36Sopenharmony_ci	return 0;
280762306a36Sopenharmony_ci}
280862306a36Sopenharmony_ci
280962306a36Sopenharmony_cistatic int
281062306a36Sopenharmony_cimem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
281162306a36Sopenharmony_ci				   struct nfp_insn_meta *meta,
281262306a36Sopenharmony_ci				   unsigned int size)
281362306a36Sopenharmony_ci{
281462306a36Sopenharmony_ci	swreg dst_lo, dst_hi, src_lo;
281562306a36Sopenharmony_ci	u8 dst_gpr, idx;
281662306a36Sopenharmony_ci
281762306a36Sopenharmony_ci	idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
281862306a36Sopenharmony_ci	dst_gpr = meta->insn.dst_reg * 2;
281962306a36Sopenharmony_ci	dst_hi = reg_both(dst_gpr + 1);
282062306a36Sopenharmony_ci	dst_lo = reg_both(dst_gpr);
282162306a36Sopenharmony_ci	src_lo = reg_xfer(idx);
282262306a36Sopenharmony_ci
282362306a36Sopenharmony_ci	if (size < REG_WIDTH) {
282462306a36Sopenharmony_ci		wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
282562306a36Sopenharmony_ci		wrp_zext(nfp_prog, meta, dst_gpr);
282662306a36Sopenharmony_ci	} else if (size == REG_WIDTH) {
282762306a36Sopenharmony_ci		wrp_mov(nfp_prog, dst_lo, src_lo);
282862306a36Sopenharmony_ci		wrp_zext(nfp_prog, meta, dst_gpr);
282962306a36Sopenharmony_ci	} else {
283062306a36Sopenharmony_ci		swreg src_hi = reg_xfer(idx + 1);
283162306a36Sopenharmony_ci
283262306a36Sopenharmony_ci		wrp_mov(nfp_prog, dst_lo, src_lo);
283362306a36Sopenharmony_ci		wrp_mov(nfp_prog, dst_hi, src_hi);
283462306a36Sopenharmony_ci	}
283562306a36Sopenharmony_ci
283662306a36Sopenharmony_ci	return 0;
283762306a36Sopenharmony_ci}
283862306a36Sopenharmony_ci
283962306a36Sopenharmony_cistatic int
284062306a36Sopenharmony_cimem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
284162306a36Sopenharmony_ci			   struct nfp_insn_meta *meta, unsigned int size)
284262306a36Sopenharmony_ci{
284362306a36Sopenharmony_ci	u8 off = meta->insn.off - meta->pkt_cache.range_start;
284462306a36Sopenharmony_ci
284562306a36Sopenharmony_ci	if (IS_ALIGNED(off, REG_WIDTH))
284662306a36Sopenharmony_ci		return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
284762306a36Sopenharmony_ci
284862306a36Sopenharmony_ci	return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
284962306a36Sopenharmony_ci}
285062306a36Sopenharmony_ci
285162306a36Sopenharmony_cistatic int
285262306a36Sopenharmony_cimem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
285362306a36Sopenharmony_ci	unsigned int size)
285462306a36Sopenharmony_ci{
285562306a36Sopenharmony_ci	if (meta->ldst_gather_len)
285662306a36Sopenharmony_ci		return nfp_cpp_memcpy(nfp_prog, meta);
285762306a36Sopenharmony_ci
285862306a36Sopenharmony_ci	if (meta->ptr.type == PTR_TO_CTX) {
285962306a36Sopenharmony_ci		if (nfp_prog->type == BPF_PROG_TYPE_XDP)
286062306a36Sopenharmony_ci			return mem_ldx_xdp(nfp_prog, meta, size);
286162306a36Sopenharmony_ci		else
286262306a36Sopenharmony_ci			return mem_ldx_skb(nfp_prog, meta, size);
286362306a36Sopenharmony_ci	}
286462306a36Sopenharmony_ci
286562306a36Sopenharmony_ci	if (meta->ptr.type == PTR_TO_PACKET) {
286662306a36Sopenharmony_ci		if (meta->pkt_cache.range_end) {
286762306a36Sopenharmony_ci			if (meta->pkt_cache.do_init)
286862306a36Sopenharmony_ci				mem_ldx_data_init_pktcache(nfp_prog, meta);
286962306a36Sopenharmony_ci
287062306a36Sopenharmony_ci			return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
287162306a36Sopenharmony_ci		} else {
287262306a36Sopenharmony_ci			return mem_ldx_data(nfp_prog, meta, size);
287362306a36Sopenharmony_ci		}
287462306a36Sopenharmony_ci	}
287562306a36Sopenharmony_ci
287662306a36Sopenharmony_ci	if (meta->ptr.type == PTR_TO_STACK)
287762306a36Sopenharmony_ci		return mem_ldx_stack(nfp_prog, meta, size,
287862306a36Sopenharmony_ci				     meta->ptr.off + meta->ptr.var_off.value);
287962306a36Sopenharmony_ci
288062306a36Sopenharmony_ci	if (meta->ptr.type == PTR_TO_MAP_VALUE)
288162306a36Sopenharmony_ci		return mem_ldx_emem(nfp_prog, meta, size);
288262306a36Sopenharmony_ci
288362306a36Sopenharmony_ci	return -EOPNOTSUPP;
288462306a36Sopenharmony_ci}
288562306a36Sopenharmony_ci
288662306a36Sopenharmony_cistatic int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
288762306a36Sopenharmony_ci{
288862306a36Sopenharmony_ci	return mem_ldx(nfp_prog, meta, 1);
288962306a36Sopenharmony_ci}
289062306a36Sopenharmony_ci
289162306a36Sopenharmony_cistatic int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
289262306a36Sopenharmony_ci{
289362306a36Sopenharmony_ci	return mem_ldx(nfp_prog, meta, 2);
289462306a36Sopenharmony_ci}
289562306a36Sopenharmony_ci
289662306a36Sopenharmony_cistatic int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
289762306a36Sopenharmony_ci{
289862306a36Sopenharmony_ci	return mem_ldx(nfp_prog, meta, 4);
289962306a36Sopenharmony_ci}
290062306a36Sopenharmony_ci
290162306a36Sopenharmony_cistatic int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
290262306a36Sopenharmony_ci{
290362306a36Sopenharmony_ci	return mem_ldx(nfp_prog, meta, 8);
290462306a36Sopenharmony_ci}
290562306a36Sopenharmony_ci
290662306a36Sopenharmony_cistatic int
290762306a36Sopenharmony_cimem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
290862306a36Sopenharmony_ci	    unsigned int size)
290962306a36Sopenharmony_ci{
291062306a36Sopenharmony_ci	u64 imm = meta->insn.imm; /* sign extend */
291162306a36Sopenharmony_ci	swreg off_reg;
291262306a36Sopenharmony_ci
291362306a36Sopenharmony_ci	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
291462306a36Sopenharmony_ci
291562306a36Sopenharmony_ci	return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
291662306a36Sopenharmony_ci				  imm, size);
291762306a36Sopenharmony_ci}
291862306a36Sopenharmony_ci
291962306a36Sopenharmony_cistatic int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
292062306a36Sopenharmony_ci		  unsigned int size)
292162306a36Sopenharmony_ci{
292262306a36Sopenharmony_ci	if (meta->ptr.type == PTR_TO_PACKET)
292362306a36Sopenharmony_ci		return mem_st_data(nfp_prog, meta, size);
292462306a36Sopenharmony_ci
292562306a36Sopenharmony_ci	return -EOPNOTSUPP;
292662306a36Sopenharmony_ci}
292762306a36Sopenharmony_ci
292862306a36Sopenharmony_cistatic int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
292962306a36Sopenharmony_ci{
293062306a36Sopenharmony_ci	return mem_st(nfp_prog, meta, 1);
293162306a36Sopenharmony_ci}
293262306a36Sopenharmony_ci
293362306a36Sopenharmony_cistatic int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
293462306a36Sopenharmony_ci{
293562306a36Sopenharmony_ci	return mem_st(nfp_prog, meta, 2);
293662306a36Sopenharmony_ci}
293762306a36Sopenharmony_ci
293862306a36Sopenharmony_cistatic int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
293962306a36Sopenharmony_ci{
294062306a36Sopenharmony_ci	return mem_st(nfp_prog, meta, 4);
294162306a36Sopenharmony_ci}
294262306a36Sopenharmony_ci
294362306a36Sopenharmony_cistatic int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
294462306a36Sopenharmony_ci{
294562306a36Sopenharmony_ci	return mem_st(nfp_prog, meta, 8);
294662306a36Sopenharmony_ci}
294762306a36Sopenharmony_ci
294862306a36Sopenharmony_cistatic int
294962306a36Sopenharmony_cimem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
295062306a36Sopenharmony_ci	     unsigned int size)
295162306a36Sopenharmony_ci{
295262306a36Sopenharmony_ci	swreg off_reg;
295362306a36Sopenharmony_ci
295462306a36Sopenharmony_ci	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
295562306a36Sopenharmony_ci
295662306a36Sopenharmony_ci	return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
295762306a36Sopenharmony_ci				   meta->insn.src_reg * 2, size);
295862306a36Sopenharmony_ci}
295962306a36Sopenharmony_ci
296062306a36Sopenharmony_cistatic int
296162306a36Sopenharmony_cimem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
296262306a36Sopenharmony_ci	      unsigned int size, unsigned int ptr_off)
296362306a36Sopenharmony_ci{
296462306a36Sopenharmony_ci	return mem_op_stack(nfp_prog, meta, size, ptr_off,
296562306a36Sopenharmony_ci			    meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
296662306a36Sopenharmony_ci			    false, wrp_lmem_store);
296762306a36Sopenharmony_ci}
296862306a36Sopenharmony_ci
296962306a36Sopenharmony_cistatic int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
297062306a36Sopenharmony_ci{
297162306a36Sopenharmony_ci	switch (meta->insn.off) {
297262306a36Sopenharmony_ci	case offsetof(struct xdp_md, rx_queue_index):
297362306a36Sopenharmony_ci		return nfp_queue_select(nfp_prog, meta);
297462306a36Sopenharmony_ci	}
297562306a36Sopenharmony_ci
297662306a36Sopenharmony_ci	WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */
297762306a36Sopenharmony_ci	return -EOPNOTSUPP;
297862306a36Sopenharmony_ci}
297962306a36Sopenharmony_ci
298062306a36Sopenharmony_cistatic int
298162306a36Sopenharmony_cimem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
298262306a36Sopenharmony_ci	unsigned int size)
298362306a36Sopenharmony_ci{
298462306a36Sopenharmony_ci	if (meta->ptr.type == PTR_TO_PACKET)
298562306a36Sopenharmony_ci		return mem_stx_data(nfp_prog, meta, size);
298662306a36Sopenharmony_ci
298762306a36Sopenharmony_ci	if (meta->ptr.type == PTR_TO_STACK)
298862306a36Sopenharmony_ci		return mem_stx_stack(nfp_prog, meta, size,
298962306a36Sopenharmony_ci				     meta->ptr.off + meta->ptr.var_off.value);
299062306a36Sopenharmony_ci
299162306a36Sopenharmony_ci	return -EOPNOTSUPP;
299262306a36Sopenharmony_ci}
299362306a36Sopenharmony_ci
299462306a36Sopenharmony_cistatic int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
299562306a36Sopenharmony_ci{
299662306a36Sopenharmony_ci	return mem_stx(nfp_prog, meta, 1);
299762306a36Sopenharmony_ci}
299862306a36Sopenharmony_ci
299962306a36Sopenharmony_cistatic int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
300062306a36Sopenharmony_ci{
300162306a36Sopenharmony_ci	return mem_stx(nfp_prog, meta, 2);
300262306a36Sopenharmony_ci}
300362306a36Sopenharmony_ci
300462306a36Sopenharmony_cistatic int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
300562306a36Sopenharmony_ci{
300662306a36Sopenharmony_ci	if (meta->ptr.type == PTR_TO_CTX)
300762306a36Sopenharmony_ci		if (nfp_prog->type == BPF_PROG_TYPE_XDP)
300862306a36Sopenharmony_ci			return mem_stx_xdp(nfp_prog, meta);
300962306a36Sopenharmony_ci	return mem_stx(nfp_prog, meta, 4);
301062306a36Sopenharmony_ci}
301162306a36Sopenharmony_ci
301262306a36Sopenharmony_cistatic int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
301362306a36Sopenharmony_ci{
301462306a36Sopenharmony_ci	return mem_stx(nfp_prog, meta, 8);
301562306a36Sopenharmony_ci}
301662306a36Sopenharmony_ci
301762306a36Sopenharmony_cistatic int
301862306a36Sopenharmony_cimem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
301962306a36Sopenharmony_ci{
302062306a36Sopenharmony_ci	u8 dst_gpr = meta->insn.dst_reg * 2;
302162306a36Sopenharmony_ci	u8 src_gpr = meta->insn.src_reg * 2;
302262306a36Sopenharmony_ci	unsigned int full_add, out;
302362306a36Sopenharmony_ci	swreg addra, addrb, off;
302462306a36Sopenharmony_ci
302562306a36Sopenharmony_ci	off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
302662306a36Sopenharmony_ci
302762306a36Sopenharmony_ci	/* We can fit 16 bits into command immediate, if we know the immediate
302862306a36Sopenharmony_ci	 * is guaranteed to either always or never fit into 16 bit we only
302962306a36Sopenharmony_ci	 * generate code to handle that particular case, otherwise generate
303062306a36Sopenharmony_ci	 * code for both.
303162306a36Sopenharmony_ci	 */
303262306a36Sopenharmony_ci	out = nfp_prog_current_offset(nfp_prog);
303362306a36Sopenharmony_ci	full_add = nfp_prog_current_offset(nfp_prog);
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci	if (meta->insn.off) {
303662306a36Sopenharmony_ci		out += 2;
303762306a36Sopenharmony_ci		full_add += 2;
303862306a36Sopenharmony_ci	}
303962306a36Sopenharmony_ci	if (meta->xadd_maybe_16bit) {
304062306a36Sopenharmony_ci		out += 3;
304162306a36Sopenharmony_ci		full_add += 3;
304262306a36Sopenharmony_ci	}
304362306a36Sopenharmony_ci	if (meta->xadd_over_16bit)
304462306a36Sopenharmony_ci		out += 2 + is64;
304562306a36Sopenharmony_ci	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
304662306a36Sopenharmony_ci		out += 5;
304762306a36Sopenharmony_ci		full_add += 5;
304862306a36Sopenharmony_ci	}
304962306a36Sopenharmony_ci
305062306a36Sopenharmony_ci	/* Generate the branch for choosing add_imm vs add */
305162306a36Sopenharmony_ci	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
305262306a36Sopenharmony_ci		swreg max_imm = imm_a(nfp_prog);
305362306a36Sopenharmony_ci
305462306a36Sopenharmony_ci		wrp_immed(nfp_prog, max_imm, 0xffff);
305562306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(),
305662306a36Sopenharmony_ci			 max_imm, ALU_OP_SUB, reg_b(src_gpr));
305762306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(),
305862306a36Sopenharmony_ci			 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
305962306a36Sopenharmony_ci		emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
306062306a36Sopenharmony_ci		/* defer for add */
306162306a36Sopenharmony_ci	}
306262306a36Sopenharmony_ci
306362306a36Sopenharmony_ci	/* If insn has an offset add to the address */
306462306a36Sopenharmony_ci	if (!meta->insn.off) {
306562306a36Sopenharmony_ci		addra = reg_a(dst_gpr);
306662306a36Sopenharmony_ci		addrb = reg_b(dst_gpr + 1);
306762306a36Sopenharmony_ci	} else {
306862306a36Sopenharmony_ci		emit_alu(nfp_prog, imma_a(nfp_prog),
306962306a36Sopenharmony_ci			 reg_a(dst_gpr), ALU_OP_ADD, off);
307062306a36Sopenharmony_ci		emit_alu(nfp_prog, imma_b(nfp_prog),
307162306a36Sopenharmony_ci			 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
307262306a36Sopenharmony_ci		addra = imma_a(nfp_prog);
307362306a36Sopenharmony_ci		addrb = imma_b(nfp_prog);
307462306a36Sopenharmony_ci	}
307562306a36Sopenharmony_ci
307662306a36Sopenharmony_ci	/* Generate the add_imm if 16 bits are possible */
307762306a36Sopenharmony_ci	if (meta->xadd_maybe_16bit) {
307862306a36Sopenharmony_ci		swreg prev_alu = imm_a(nfp_prog);
307962306a36Sopenharmony_ci
308062306a36Sopenharmony_ci		wrp_immed(nfp_prog, prev_alu,
308162306a36Sopenharmony_ci			  FIELD_PREP(CMD_OVE_DATA, 2) |
308262306a36Sopenharmony_ci			  CMD_OVE_LEN |
308362306a36Sopenharmony_ci			  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
308462306a36Sopenharmony_ci		wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
308562306a36Sopenharmony_ci		emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
308662306a36Sopenharmony_ci			       addra, addrb, 0, CMD_CTX_NO_SWAP);
308762306a36Sopenharmony_ci
308862306a36Sopenharmony_ci		if (meta->xadd_over_16bit)
308962306a36Sopenharmony_ci			emit_br(nfp_prog, BR_UNC, out, 0);
309062306a36Sopenharmony_ci	}
309162306a36Sopenharmony_ci
309262306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
309362306a36Sopenharmony_ci		return -EINVAL;
309462306a36Sopenharmony_ci
309562306a36Sopenharmony_ci	/* Generate the add if 16 bits are not guaranteed */
309662306a36Sopenharmony_ci	if (meta->xadd_over_16bit) {
309762306a36Sopenharmony_ci		emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
309862306a36Sopenharmony_ci			 addra, addrb, is64 << 2,
309962306a36Sopenharmony_ci			 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
310062306a36Sopenharmony_ci
310162306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
310262306a36Sopenharmony_ci		if (is64)
310362306a36Sopenharmony_ci			wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
310462306a36Sopenharmony_ci	}
310562306a36Sopenharmony_ci
310662306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, out))
310762306a36Sopenharmony_ci		return -EINVAL;
310862306a36Sopenharmony_ci
310962306a36Sopenharmony_ci	return 0;
311062306a36Sopenharmony_ci}
311162306a36Sopenharmony_ci
311262306a36Sopenharmony_cistatic int mem_atomic4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
311362306a36Sopenharmony_ci{
311462306a36Sopenharmony_ci	if (meta->insn.imm != BPF_ADD)
311562306a36Sopenharmony_ci		return -EOPNOTSUPP;
311662306a36Sopenharmony_ci
311762306a36Sopenharmony_ci	return mem_xadd(nfp_prog, meta, false);
311862306a36Sopenharmony_ci}
311962306a36Sopenharmony_ci
312062306a36Sopenharmony_cistatic int mem_atomic8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
312162306a36Sopenharmony_ci{
312262306a36Sopenharmony_ci	if (meta->insn.imm != BPF_ADD)
312362306a36Sopenharmony_ci		return -EOPNOTSUPP;
312462306a36Sopenharmony_ci
312562306a36Sopenharmony_ci	return mem_xadd(nfp_prog, meta, true);
312662306a36Sopenharmony_ci}
312762306a36Sopenharmony_ci
312862306a36Sopenharmony_cistatic int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
312962306a36Sopenharmony_ci{
313062306a36Sopenharmony_ci	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
313162306a36Sopenharmony_ci
313262306a36Sopenharmony_ci	return 0;
313362306a36Sopenharmony_ci}
313462306a36Sopenharmony_ci
313562306a36Sopenharmony_cistatic int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
313662306a36Sopenharmony_ci{
313762306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
313862306a36Sopenharmony_ci	u64 imm = insn->imm; /* sign extend */
313962306a36Sopenharmony_ci	swreg or1, or2, tmp_reg;
314062306a36Sopenharmony_ci
314162306a36Sopenharmony_ci	or1 = reg_a(insn->dst_reg * 2);
314262306a36Sopenharmony_ci	or2 = reg_b(insn->dst_reg * 2 + 1);
314362306a36Sopenharmony_ci
314462306a36Sopenharmony_ci	if (imm & ~0U) {
314562306a36Sopenharmony_ci		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
314662306a36Sopenharmony_ci		emit_alu(nfp_prog, imm_a(nfp_prog),
314762306a36Sopenharmony_ci			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
314862306a36Sopenharmony_ci		or1 = imm_a(nfp_prog);
314962306a36Sopenharmony_ci	}
315062306a36Sopenharmony_ci
315162306a36Sopenharmony_ci	if (imm >> 32) {
315262306a36Sopenharmony_ci		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
315362306a36Sopenharmony_ci		emit_alu(nfp_prog, imm_b(nfp_prog),
315462306a36Sopenharmony_ci			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
315562306a36Sopenharmony_ci		or2 = imm_b(nfp_prog);
315662306a36Sopenharmony_ci	}
315762306a36Sopenharmony_ci
315862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
315962306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
316062306a36Sopenharmony_ci
316162306a36Sopenharmony_ci	return 0;
316262306a36Sopenharmony_ci}
316362306a36Sopenharmony_ci
316462306a36Sopenharmony_cistatic int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
316562306a36Sopenharmony_ci{
316662306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
316762306a36Sopenharmony_ci	swreg tmp_reg;
316862306a36Sopenharmony_ci
316962306a36Sopenharmony_ci	tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog));
317062306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(),
317162306a36Sopenharmony_ci		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
317262306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
317362306a36Sopenharmony_ci
317462306a36Sopenharmony_ci	return 0;
317562306a36Sopenharmony_ci}
317662306a36Sopenharmony_ci
317762306a36Sopenharmony_cistatic int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
317862306a36Sopenharmony_ci{
317962306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
318062306a36Sopenharmony_ci	u64 imm = insn->imm; /* sign extend */
318162306a36Sopenharmony_ci	u8 dst_gpr = insn->dst_reg * 2;
318262306a36Sopenharmony_ci	swreg tmp_reg;
318362306a36Sopenharmony_ci
318462306a36Sopenharmony_ci	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
318562306a36Sopenharmony_ci	emit_alu(nfp_prog, imm_b(nfp_prog),
318662306a36Sopenharmony_ci		 reg_a(dst_gpr), ALU_OP_AND, tmp_reg);
318762306a36Sopenharmony_ci	/* Upper word of the mask can only be 0 or ~0 from sign extension,
318862306a36Sopenharmony_ci	 * so either ignore it or OR the whole thing in.
318962306a36Sopenharmony_ci	 */
319062306a36Sopenharmony_ci	if (is_mbpf_jmp64(meta) && imm >> 32) {
319162306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(),
319262306a36Sopenharmony_ci			 reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog));
319362306a36Sopenharmony_ci	}
319462306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BNE, insn->off, 0);
319562306a36Sopenharmony_ci
319662306a36Sopenharmony_ci	return 0;
319762306a36Sopenharmony_ci}
319862306a36Sopenharmony_ci
319962306a36Sopenharmony_cistatic int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
320062306a36Sopenharmony_ci{
320162306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
320262306a36Sopenharmony_ci	u64 imm = insn->imm; /* sign extend */
320362306a36Sopenharmony_ci	bool is_jmp32 = is_mbpf_jmp32(meta);
320462306a36Sopenharmony_ci	swreg tmp_reg;
320562306a36Sopenharmony_ci
320662306a36Sopenharmony_ci	if (!imm) {
320762306a36Sopenharmony_ci		if (is_jmp32)
320862306a36Sopenharmony_ci			emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE,
320962306a36Sopenharmony_ci				 reg_b(insn->dst_reg * 2));
321062306a36Sopenharmony_ci		else
321162306a36Sopenharmony_ci			emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
321262306a36Sopenharmony_ci				 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
321362306a36Sopenharmony_ci		emit_br(nfp_prog, BR_BNE, insn->off, 0);
321462306a36Sopenharmony_ci		return 0;
321562306a36Sopenharmony_ci	}
321662306a36Sopenharmony_ci
321762306a36Sopenharmony_ci	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
321862306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(),
321962306a36Sopenharmony_ci		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
322062306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BNE, insn->off, 0);
322162306a36Sopenharmony_ci
322262306a36Sopenharmony_ci	if (is_jmp32)
322362306a36Sopenharmony_ci		return 0;
322462306a36Sopenharmony_ci
322562306a36Sopenharmony_ci	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
322662306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(),
322762306a36Sopenharmony_ci		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
322862306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BNE, insn->off, 0);
322962306a36Sopenharmony_ci
323062306a36Sopenharmony_ci	return 0;
323162306a36Sopenharmony_ci}
323262306a36Sopenharmony_ci
323362306a36Sopenharmony_cistatic int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
323462306a36Sopenharmony_ci{
323562306a36Sopenharmony_ci	const struct bpf_insn *insn = &meta->insn;
323662306a36Sopenharmony_ci
323762306a36Sopenharmony_ci	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
323862306a36Sopenharmony_ci		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
323962306a36Sopenharmony_ci	if (is_mbpf_jmp64(meta)) {
324062306a36Sopenharmony_ci		emit_alu(nfp_prog, imm_b(nfp_prog),
324162306a36Sopenharmony_ci			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR,
324262306a36Sopenharmony_ci			 reg_b(insn->src_reg * 2 + 1));
324362306a36Sopenharmony_ci		emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR,
324462306a36Sopenharmony_ci			 imm_b(nfp_prog));
324562306a36Sopenharmony_ci	}
324662306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
324762306a36Sopenharmony_ci
324862306a36Sopenharmony_ci	return 0;
324962306a36Sopenharmony_ci}
325062306a36Sopenharmony_ci
325162306a36Sopenharmony_cistatic int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
325262306a36Sopenharmony_ci{
325362306a36Sopenharmony_ci	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
325462306a36Sopenharmony_ci}
325562306a36Sopenharmony_ci
325662306a36Sopenharmony_cistatic int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
325762306a36Sopenharmony_ci{
325862306a36Sopenharmony_ci	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
325962306a36Sopenharmony_ci}
326062306a36Sopenharmony_ci
326162306a36Sopenharmony_cistatic int
326262306a36Sopenharmony_cibpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
326362306a36Sopenharmony_ci{
326462306a36Sopenharmony_ci	u32 ret_tgt, stack_depth, offset_br;
326562306a36Sopenharmony_ci	swreg tmp_reg;
326662306a36Sopenharmony_ci
326762306a36Sopenharmony_ci	stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN);
326862306a36Sopenharmony_ci	/* Space for saving the return address is accounted for by the callee,
326962306a36Sopenharmony_ci	 * so stack_depth can be zero for the main function.
327062306a36Sopenharmony_ci	 */
327162306a36Sopenharmony_ci	if (stack_depth) {
327262306a36Sopenharmony_ci		tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
327362306a36Sopenharmony_ci					  stack_imm(nfp_prog));
327462306a36Sopenharmony_ci		emit_alu(nfp_prog, stack_reg(nfp_prog),
327562306a36Sopenharmony_ci			 stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg);
327662306a36Sopenharmony_ci		emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
327762306a36Sopenharmony_ci			    NFP_CSR_ACT_LM_ADDR0);
327862306a36Sopenharmony_ci	}
327962306a36Sopenharmony_ci
328062306a36Sopenharmony_ci	/* Two cases for jumping to the callee:
328162306a36Sopenharmony_ci	 *
328262306a36Sopenharmony_ci	 * - If callee uses and needs to save R6~R9 then:
328362306a36Sopenharmony_ci	 *     1. Put the start offset of the callee into imm_b(). This will
328462306a36Sopenharmony_ci	 *        require a fixup step, as we do not necessarily know this
328562306a36Sopenharmony_ci	 *        address yet.
328662306a36Sopenharmony_ci	 *     2. Put the return address from the callee to the caller into
328762306a36Sopenharmony_ci	 *        register ret_reg().
328862306a36Sopenharmony_ci	 *     3. (After defer slots are consumed) Jump to the subroutine that
328962306a36Sopenharmony_ci	 *        pushes the registers to the stack.
329062306a36Sopenharmony_ci	 *   The subroutine acts as a trampoline, and returns to the address in
329162306a36Sopenharmony_ci	 *   imm_b(), i.e. jumps to the callee.
329262306a36Sopenharmony_ci	 *
329362306a36Sopenharmony_ci	 * - If callee does not need to save R6~R9 then just load return
329462306a36Sopenharmony_ci	 *   address to the caller in ret_reg(), and jump to the callee
329562306a36Sopenharmony_ci	 *   directly.
329662306a36Sopenharmony_ci	 *
329762306a36Sopenharmony_ci	 * Using ret_reg() to pass the return address to the callee is set here
329862306a36Sopenharmony_ci	 * as a convention. The callee can then push this address onto its
329962306a36Sopenharmony_ci	 * stack frame in its prologue. The advantages of passing the return
330062306a36Sopenharmony_ci	 * address through ret_reg(), instead of pushing it to the stack right
330162306a36Sopenharmony_ci	 * here, are the following:
330262306a36Sopenharmony_ci	 * - It looks cleaner.
330362306a36Sopenharmony_ci	 * - If the called function is called multiple time, we get a lower
330462306a36Sopenharmony_ci	 *   program size.
330562306a36Sopenharmony_ci	 * - We save two no-op instructions that should be added just before
330662306a36Sopenharmony_ci	 *   the emit_br() when stack depth is not null otherwise.
330762306a36Sopenharmony_ci	 * - If we ever find a register to hold the return address during whole
330862306a36Sopenharmony_ci	 *   execution of the callee, we will not have to push the return
330962306a36Sopenharmony_ci	 *   address to the stack for leaf functions.
331062306a36Sopenharmony_ci	 */
331162306a36Sopenharmony_ci	if (!meta->jmp_dst) {
331262306a36Sopenharmony_ci		pr_err("BUG: BPF-to-BPF call has no destination recorded\n");
331362306a36Sopenharmony_ci		return -ELOOP;
331462306a36Sopenharmony_ci	}
331562306a36Sopenharmony_ci	if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) {
331662306a36Sopenharmony_ci		ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
331762306a36Sopenharmony_ci		emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2,
331862306a36Sopenharmony_ci			     RELO_BR_GO_CALL_PUSH_REGS);
331962306a36Sopenharmony_ci		offset_br = nfp_prog_current_offset(nfp_prog);
332062306a36Sopenharmony_ci		wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL);
332162306a36Sopenharmony_ci	} else {
332262306a36Sopenharmony_ci		ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
332362306a36Sopenharmony_ci		emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1);
332462306a36Sopenharmony_ci		offset_br = nfp_prog_current_offset(nfp_prog);
332562306a36Sopenharmony_ci	}
332662306a36Sopenharmony_ci	wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL);
332762306a36Sopenharmony_ci
332862306a36Sopenharmony_ci	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
332962306a36Sopenharmony_ci		return -EINVAL;
333062306a36Sopenharmony_ci
333162306a36Sopenharmony_ci	if (stack_depth) {
333262306a36Sopenharmony_ci		tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
333362306a36Sopenharmony_ci					  stack_imm(nfp_prog));
333462306a36Sopenharmony_ci		emit_alu(nfp_prog, stack_reg(nfp_prog),
333562306a36Sopenharmony_ci			 stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
333662306a36Sopenharmony_ci		emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
333762306a36Sopenharmony_ci			    NFP_CSR_ACT_LM_ADDR0);
333862306a36Sopenharmony_ci		wrp_nops(nfp_prog, 3);
333962306a36Sopenharmony_ci	}
334062306a36Sopenharmony_ci
334162306a36Sopenharmony_ci	meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog);
334262306a36Sopenharmony_ci	meta->num_insns_after_br -= offset_br;
334362306a36Sopenharmony_ci
334462306a36Sopenharmony_ci	return 0;
334562306a36Sopenharmony_ci}
334662306a36Sopenharmony_ci
334762306a36Sopenharmony_cistatic int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
334862306a36Sopenharmony_ci{
334962306a36Sopenharmony_ci	switch (meta->insn.imm) {
335062306a36Sopenharmony_ci	case BPF_FUNC_xdp_adjust_head:
335162306a36Sopenharmony_ci		return adjust_head(nfp_prog, meta);
335262306a36Sopenharmony_ci	case BPF_FUNC_xdp_adjust_tail:
335362306a36Sopenharmony_ci		return adjust_tail(nfp_prog, meta);
335462306a36Sopenharmony_ci	case BPF_FUNC_map_lookup_elem:
335562306a36Sopenharmony_ci	case BPF_FUNC_map_update_elem:
335662306a36Sopenharmony_ci	case BPF_FUNC_map_delete_elem:
335762306a36Sopenharmony_ci		return map_call_stack_common(nfp_prog, meta);
335862306a36Sopenharmony_ci	case BPF_FUNC_get_prandom_u32:
335962306a36Sopenharmony_ci		return nfp_get_prandom_u32(nfp_prog, meta);
336062306a36Sopenharmony_ci	case BPF_FUNC_perf_event_output:
336162306a36Sopenharmony_ci		return nfp_perf_event_output(nfp_prog, meta);
336262306a36Sopenharmony_ci	default:
336362306a36Sopenharmony_ci		WARN_ONCE(1, "verifier allowed unsupported function\n");
336462306a36Sopenharmony_ci		return -EOPNOTSUPP;
336562306a36Sopenharmony_ci	}
336662306a36Sopenharmony_ci}
336762306a36Sopenharmony_ci
336862306a36Sopenharmony_cistatic int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
336962306a36Sopenharmony_ci{
337062306a36Sopenharmony_ci	if (is_mbpf_pseudo_call(meta))
337162306a36Sopenharmony_ci		return bpf_to_bpf_call(nfp_prog, meta);
337262306a36Sopenharmony_ci	else
337362306a36Sopenharmony_ci		return helper_call(nfp_prog, meta);
337462306a36Sopenharmony_ci}
337562306a36Sopenharmony_ci
337662306a36Sopenharmony_cistatic bool nfp_is_main_function(struct nfp_insn_meta *meta)
337762306a36Sopenharmony_ci{
337862306a36Sopenharmony_ci	return meta->subprog_idx == 0;
337962306a36Sopenharmony_ci}
338062306a36Sopenharmony_ci
338162306a36Sopenharmony_cistatic int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
338262306a36Sopenharmony_ci{
338362306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
338462306a36Sopenharmony_ci
338562306a36Sopenharmony_ci	return 0;
338662306a36Sopenharmony_ci}
338762306a36Sopenharmony_ci
338862306a36Sopenharmony_cistatic int
338962306a36Sopenharmony_cinfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
339062306a36Sopenharmony_ci{
339162306a36Sopenharmony_ci	if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) {
339262306a36Sopenharmony_ci		/* Pop R6~R9 to the stack via related subroutine.
339362306a36Sopenharmony_ci		 * We loaded the return address to the caller into ret_reg().
339462306a36Sopenharmony_ci		 * This means that the subroutine does not come back here, we
339562306a36Sopenharmony_ci		 * make it jump back to the subprogram caller directly!
339662306a36Sopenharmony_ci		 */
339762306a36Sopenharmony_ci		emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1,
339862306a36Sopenharmony_ci			     RELO_BR_GO_CALL_POP_REGS);
339962306a36Sopenharmony_ci		/* Pop return address from the stack. */
340062306a36Sopenharmony_ci		wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
340162306a36Sopenharmony_ci	} else {
340262306a36Sopenharmony_ci		/* Pop return address from the stack. */
340362306a36Sopenharmony_ci		wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
340462306a36Sopenharmony_ci		/* Jump back to caller if no callee-saved registers were used
340562306a36Sopenharmony_ci		 * by the subprogram.
340662306a36Sopenharmony_ci		 */
340762306a36Sopenharmony_ci		emit_rtn(nfp_prog, ret_reg(nfp_prog), 0);
340862306a36Sopenharmony_ci	}
340962306a36Sopenharmony_ci
341062306a36Sopenharmony_ci	return 0;
341162306a36Sopenharmony_ci}
341262306a36Sopenharmony_ci
341362306a36Sopenharmony_cistatic int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
341462306a36Sopenharmony_ci{
341562306a36Sopenharmony_ci	if (nfp_is_main_function(meta))
341662306a36Sopenharmony_ci		return goto_out(nfp_prog, meta);
341762306a36Sopenharmony_ci	else
341862306a36Sopenharmony_ci		return nfp_subprog_epilogue(nfp_prog, meta);
341962306a36Sopenharmony_ci}
342062306a36Sopenharmony_ci
342162306a36Sopenharmony_cistatic const instr_cb_t instr_cb[256] = {
342262306a36Sopenharmony_ci	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
342362306a36Sopenharmony_ci	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
342462306a36Sopenharmony_ci	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
342562306a36Sopenharmony_ci	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
342662306a36Sopenharmony_ci	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
342762306a36Sopenharmony_ci	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
342862306a36Sopenharmony_ci	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
342962306a36Sopenharmony_ci	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
343062306a36Sopenharmony_ci	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
343162306a36Sopenharmony_ci	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
343262306a36Sopenharmony_ci	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
343362306a36Sopenharmony_ci	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
343462306a36Sopenharmony_ci	[BPF_ALU64 | BPF_MUL | BPF_X] =	mul_reg64,
343562306a36Sopenharmony_ci	[BPF_ALU64 | BPF_MUL | BPF_K] =	mul_imm64,
343662306a36Sopenharmony_ci	[BPF_ALU64 | BPF_DIV | BPF_X] =	div_reg64,
343762306a36Sopenharmony_ci	[BPF_ALU64 | BPF_DIV | BPF_K] =	div_imm64,
343862306a36Sopenharmony_ci	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
343962306a36Sopenharmony_ci	[BPF_ALU64 | BPF_LSH | BPF_X] =	shl_reg64,
344062306a36Sopenharmony_ci	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
344162306a36Sopenharmony_ci	[BPF_ALU64 | BPF_RSH | BPF_X] =	shr_reg64,
344262306a36Sopenharmony_ci	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
344362306a36Sopenharmony_ci	[BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
344462306a36Sopenharmony_ci	[BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
344562306a36Sopenharmony_ci	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
344662306a36Sopenharmony_ci	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
344762306a36Sopenharmony_ci	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
344862306a36Sopenharmony_ci	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
344962306a36Sopenharmony_ci	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
345062306a36Sopenharmony_ci	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
345162306a36Sopenharmony_ci	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
345262306a36Sopenharmony_ci	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
345362306a36Sopenharmony_ci	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
345462306a36Sopenharmony_ci	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
345562306a36Sopenharmony_ci	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
345662306a36Sopenharmony_ci	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
345762306a36Sopenharmony_ci	[BPF_ALU | BPF_MUL | BPF_X] =	mul_reg,
345862306a36Sopenharmony_ci	[BPF_ALU | BPF_MUL | BPF_K] =	mul_imm,
345962306a36Sopenharmony_ci	[BPF_ALU | BPF_DIV | BPF_X] =	div_reg,
346062306a36Sopenharmony_ci	[BPF_ALU | BPF_DIV | BPF_K] =	div_imm,
346162306a36Sopenharmony_ci	[BPF_ALU | BPF_NEG] =		neg_reg,
346262306a36Sopenharmony_ci	[BPF_ALU | BPF_LSH | BPF_X] =	shl_reg,
346362306a36Sopenharmony_ci	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
346462306a36Sopenharmony_ci	[BPF_ALU | BPF_RSH | BPF_X] =	shr_reg,
346562306a36Sopenharmony_ci	[BPF_ALU | BPF_RSH | BPF_K] =	shr_imm,
346662306a36Sopenharmony_ci	[BPF_ALU | BPF_ARSH | BPF_X] =	ashr_reg,
346762306a36Sopenharmony_ci	[BPF_ALU | BPF_ARSH | BPF_K] =	ashr_imm,
346862306a36Sopenharmony_ci	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
346962306a36Sopenharmony_ci	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
347062306a36Sopenharmony_ci	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
347162306a36Sopenharmony_ci	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
347262306a36Sopenharmony_ci	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
347362306a36Sopenharmony_ci	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
347462306a36Sopenharmony_ci	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
347562306a36Sopenharmony_ci	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
347662306a36Sopenharmony_ci	[BPF_LDX | BPF_MEM | BPF_B] =	mem_ldx1,
347762306a36Sopenharmony_ci	[BPF_LDX | BPF_MEM | BPF_H] =	mem_ldx2,
347862306a36Sopenharmony_ci	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
347962306a36Sopenharmony_ci	[BPF_LDX | BPF_MEM | BPF_DW] =	mem_ldx8,
348062306a36Sopenharmony_ci	[BPF_STX | BPF_MEM | BPF_B] =	mem_stx1,
348162306a36Sopenharmony_ci	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
348262306a36Sopenharmony_ci	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
348362306a36Sopenharmony_ci	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
348462306a36Sopenharmony_ci	[BPF_STX | BPF_ATOMIC | BPF_W] =	mem_atomic4,
348562306a36Sopenharmony_ci	[BPF_STX | BPF_ATOMIC | BPF_DW] =	mem_atomic8,
348662306a36Sopenharmony_ci	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
348762306a36Sopenharmony_ci	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
348862306a36Sopenharmony_ci	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
348962306a36Sopenharmony_ci	[BPF_ST | BPF_MEM | BPF_DW] =	mem_st8,
349062306a36Sopenharmony_ci	[BPF_JMP | BPF_JA | BPF_K] =	jump,
349162306a36Sopenharmony_ci	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
349262306a36Sopenharmony_ci	[BPF_JMP | BPF_JGT | BPF_K] =	cmp_imm,
349362306a36Sopenharmony_ci	[BPF_JMP | BPF_JGE | BPF_K] =	cmp_imm,
349462306a36Sopenharmony_ci	[BPF_JMP | BPF_JLT | BPF_K] =	cmp_imm,
349562306a36Sopenharmony_ci	[BPF_JMP | BPF_JLE | BPF_K] =	cmp_imm,
349662306a36Sopenharmony_ci	[BPF_JMP | BPF_JSGT | BPF_K] =  cmp_imm,
349762306a36Sopenharmony_ci	[BPF_JMP | BPF_JSGE | BPF_K] =  cmp_imm,
349862306a36Sopenharmony_ci	[BPF_JMP | BPF_JSLT | BPF_K] =  cmp_imm,
349962306a36Sopenharmony_ci	[BPF_JMP | BPF_JSLE | BPF_K] =  cmp_imm,
350062306a36Sopenharmony_ci	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
350162306a36Sopenharmony_ci	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
350262306a36Sopenharmony_ci	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
350362306a36Sopenharmony_ci	[BPF_JMP | BPF_JGT | BPF_X] =	cmp_reg,
350462306a36Sopenharmony_ci	[BPF_JMP | BPF_JGE | BPF_X] =	cmp_reg,
350562306a36Sopenharmony_ci	[BPF_JMP | BPF_JLT | BPF_X] =	cmp_reg,
350662306a36Sopenharmony_ci	[BPF_JMP | BPF_JLE | BPF_X] =	cmp_reg,
350762306a36Sopenharmony_ci	[BPF_JMP | BPF_JSGT | BPF_X] =  cmp_reg,
350862306a36Sopenharmony_ci	[BPF_JMP | BPF_JSGE | BPF_X] =  cmp_reg,
350962306a36Sopenharmony_ci	[BPF_JMP | BPF_JSLT | BPF_X] =  cmp_reg,
351062306a36Sopenharmony_ci	[BPF_JMP | BPF_JSLE | BPF_X] =  cmp_reg,
351162306a36Sopenharmony_ci	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
351262306a36Sopenharmony_ci	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
351362306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JEQ | BPF_K] =	jeq32_imm,
351462306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JGT | BPF_K] =	cmp_imm,
351562306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JGE | BPF_K] =	cmp_imm,
351662306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JLT | BPF_K] =	cmp_imm,
351762306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JLE | BPF_K] =	cmp_imm,
351862306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm,
351962306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm,
352062306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm,
352162306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm,
352262306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm,
352362306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JNE | BPF_K] =	jne_imm,
352462306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JEQ | BPF_X] =	jeq_reg,
352562306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JGT | BPF_X] =	cmp_reg,
352662306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JGE | BPF_X] =	cmp_reg,
352762306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JLT | BPF_X] =	cmp_reg,
352862306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JLE | BPF_X] =	cmp_reg,
352962306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg,
353062306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg,
353162306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg,
353262306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg,
353362306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg,
353462306a36Sopenharmony_ci	[BPF_JMP32 | BPF_JNE | BPF_X] =	jne_reg,
353562306a36Sopenharmony_ci	[BPF_JMP | BPF_CALL] =		call,
353662306a36Sopenharmony_ci	[BPF_JMP | BPF_EXIT] =		jmp_exit,
353762306a36Sopenharmony_ci};
353862306a36Sopenharmony_ci
353962306a36Sopenharmony_ci/* --- Assembler logic --- */
354062306a36Sopenharmony_cistatic int
354162306a36Sopenharmony_cinfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
354262306a36Sopenharmony_ci		     struct nfp_insn_meta *jmp_dst, u32 br_idx)
354362306a36Sopenharmony_ci{
354462306a36Sopenharmony_ci	if (immed_get_value(nfp_prog->prog[br_idx + 1])) {
354562306a36Sopenharmony_ci		pr_err("BUG: failed to fix up callee register saving\n");
354662306a36Sopenharmony_ci		return -EINVAL;
354762306a36Sopenharmony_ci	}
354862306a36Sopenharmony_ci
354962306a36Sopenharmony_ci	immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off);
355062306a36Sopenharmony_ci
355162306a36Sopenharmony_ci	return 0;
355262306a36Sopenharmony_ci}
355362306a36Sopenharmony_ci
355462306a36Sopenharmony_cistatic int nfp_fixup_branches(struct nfp_prog *nfp_prog)
355562306a36Sopenharmony_ci{
355662306a36Sopenharmony_ci	struct nfp_insn_meta *meta, *jmp_dst;
355762306a36Sopenharmony_ci	u32 idx, br_idx;
355862306a36Sopenharmony_ci	int err;
355962306a36Sopenharmony_ci
356062306a36Sopenharmony_ci	list_for_each_entry(meta, &nfp_prog->insns, l) {
356162306a36Sopenharmony_ci		if (meta->flags & FLAG_INSN_SKIP_MASK)
356262306a36Sopenharmony_ci			continue;
356362306a36Sopenharmony_ci		if (!is_mbpf_jmp(meta))
356462306a36Sopenharmony_ci			continue;
356562306a36Sopenharmony_ci		if (meta->insn.code == (BPF_JMP | BPF_EXIT) &&
356662306a36Sopenharmony_ci		    !nfp_is_main_function(meta))
356762306a36Sopenharmony_ci			continue;
356862306a36Sopenharmony_ci		if (is_mbpf_helper_call(meta))
356962306a36Sopenharmony_ci			continue;
357062306a36Sopenharmony_ci
357162306a36Sopenharmony_ci		if (list_is_last(&meta->l, &nfp_prog->insns))
357262306a36Sopenharmony_ci			br_idx = nfp_prog->last_bpf_off;
357362306a36Sopenharmony_ci		else
357462306a36Sopenharmony_ci			br_idx = list_next_entry(meta, l)->off - 1;
357562306a36Sopenharmony_ci
357662306a36Sopenharmony_ci		/* For BPF-to-BPF function call, a stack adjustment sequence is
357762306a36Sopenharmony_ci		 * generated after the return instruction. Therefore, we must
357862306a36Sopenharmony_ci		 * withdraw the length of this sequence to have br_idx pointing
357962306a36Sopenharmony_ci		 * to where the "branch" NFP instruction is expected to be.
358062306a36Sopenharmony_ci		 */
358162306a36Sopenharmony_ci		if (is_mbpf_pseudo_call(meta))
358262306a36Sopenharmony_ci			br_idx -= meta->num_insns_after_br;
358362306a36Sopenharmony_ci
358462306a36Sopenharmony_ci		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
358562306a36Sopenharmony_ci			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
358662306a36Sopenharmony_ci			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
358762306a36Sopenharmony_ci			return -ELOOP;
358862306a36Sopenharmony_ci		}
358962306a36Sopenharmony_ci
359062306a36Sopenharmony_ci		if (meta->insn.code == (BPF_JMP | BPF_EXIT))
359162306a36Sopenharmony_ci			continue;
359262306a36Sopenharmony_ci
359362306a36Sopenharmony_ci		/* Leave special branches for later */
359462306a36Sopenharmony_ci		if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
359562306a36Sopenharmony_ci		    RELO_BR_REL && !is_mbpf_pseudo_call(meta))
359662306a36Sopenharmony_ci			continue;
359762306a36Sopenharmony_ci
359862306a36Sopenharmony_ci		if (!meta->jmp_dst) {
359962306a36Sopenharmony_ci			pr_err("Non-exit jump doesn't have destination info recorded!!\n");
360062306a36Sopenharmony_ci			return -ELOOP;
360162306a36Sopenharmony_ci		}
360262306a36Sopenharmony_ci
360362306a36Sopenharmony_ci		jmp_dst = meta->jmp_dst;
360462306a36Sopenharmony_ci
360562306a36Sopenharmony_ci		if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) {
360662306a36Sopenharmony_ci			pr_err("Branch landing on removed instruction!!\n");
360762306a36Sopenharmony_ci			return -ELOOP;
360862306a36Sopenharmony_ci		}
360962306a36Sopenharmony_ci
361062306a36Sopenharmony_ci		if (is_mbpf_pseudo_call(meta) &&
361162306a36Sopenharmony_ci		    nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) {
361262306a36Sopenharmony_ci			err = nfp_fixup_immed_relo(nfp_prog, meta,
361362306a36Sopenharmony_ci						   jmp_dst, br_idx);
361462306a36Sopenharmony_ci			if (err)
361562306a36Sopenharmony_ci				return err;
361662306a36Sopenharmony_ci		}
361762306a36Sopenharmony_ci
361862306a36Sopenharmony_ci		if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
361962306a36Sopenharmony_ci		    RELO_BR_REL)
362062306a36Sopenharmony_ci			continue;
362162306a36Sopenharmony_ci
362262306a36Sopenharmony_ci		for (idx = meta->off; idx <= br_idx; idx++) {
362362306a36Sopenharmony_ci			if (!nfp_is_br(nfp_prog->prog[idx]))
362462306a36Sopenharmony_ci				continue;
362562306a36Sopenharmony_ci			br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
362662306a36Sopenharmony_ci		}
362762306a36Sopenharmony_ci	}
362862306a36Sopenharmony_ci
362962306a36Sopenharmony_ci	return 0;
363062306a36Sopenharmony_ci}
363162306a36Sopenharmony_ci
363262306a36Sopenharmony_cistatic void nfp_intro(struct nfp_prog *nfp_prog)
363362306a36Sopenharmony_ci{
363462306a36Sopenharmony_ci	wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
363562306a36Sopenharmony_ci	emit_alu(nfp_prog, plen_reg(nfp_prog),
363662306a36Sopenharmony_ci		 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
363762306a36Sopenharmony_ci}
363862306a36Sopenharmony_ci
363962306a36Sopenharmony_cistatic void
364062306a36Sopenharmony_cinfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
364162306a36Sopenharmony_ci{
364262306a36Sopenharmony_ci	/* Save return address into the stack. */
364362306a36Sopenharmony_ci	wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog));
364462306a36Sopenharmony_ci}
364562306a36Sopenharmony_ci
364662306a36Sopenharmony_cistatic void
364762306a36Sopenharmony_cinfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
364862306a36Sopenharmony_ci{
364962306a36Sopenharmony_ci	unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth;
365062306a36Sopenharmony_ci
365162306a36Sopenharmony_ci	nfp_prog->stack_frame_depth = round_up(depth, 4);
365262306a36Sopenharmony_ci	nfp_subprog_prologue(nfp_prog, meta);
365362306a36Sopenharmony_ci}
365462306a36Sopenharmony_ci
365562306a36Sopenharmony_cibool nfp_is_subprog_start(struct nfp_insn_meta *meta)
365662306a36Sopenharmony_ci{
365762306a36Sopenharmony_ci	return meta->flags & FLAG_INSN_IS_SUBPROG_START;
365862306a36Sopenharmony_ci}
365962306a36Sopenharmony_ci
366062306a36Sopenharmony_cistatic void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
366162306a36Sopenharmony_ci{
366262306a36Sopenharmony_ci	/* TC direct-action mode:
366362306a36Sopenharmony_ci	 *   0,1   ok        NOT SUPPORTED[1]
366462306a36Sopenharmony_ci	 *   2   drop  0x22 -> drop,  count as stat1
366562306a36Sopenharmony_ci	 *   4,5 nuke  0x02 -> drop
366662306a36Sopenharmony_ci	 *   7  redir  0x44 -> redir, count as stat2
366762306a36Sopenharmony_ci	 *   * unspec  0x11 -> pass,  count as stat0
366862306a36Sopenharmony_ci	 *
366962306a36Sopenharmony_ci	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
367062306a36Sopenharmony_ci	 *     the exact decision made.  We are forced to support UNSPEC
367162306a36Sopenharmony_ci	 *     to handle aborts so that's the only one we handle for passing
367262306a36Sopenharmony_ci	 *     packets up the stack.
367362306a36Sopenharmony_ci	 */
367462306a36Sopenharmony_ci	/* Target for aborts */
367562306a36Sopenharmony_ci	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
367662306a36Sopenharmony_ci
367762306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
367862306a36Sopenharmony_ci
367962306a36Sopenharmony_ci	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
368062306a36Sopenharmony_ci	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
368162306a36Sopenharmony_ci
368262306a36Sopenharmony_ci	/* Target for normal exits */
368362306a36Sopenharmony_ci	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
368462306a36Sopenharmony_ci
368562306a36Sopenharmony_ci	/* if R0 > 7 jump to abort */
368662306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
368762306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
368862306a36Sopenharmony_ci	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
368962306a36Sopenharmony_ci
369062306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
369162306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
369262306a36Sopenharmony_ci
369362306a36Sopenharmony_ci	emit_shf(nfp_prog, reg_a(1),
369462306a36Sopenharmony_ci		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
369562306a36Sopenharmony_ci
369662306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
369762306a36Sopenharmony_ci	emit_shf(nfp_prog, reg_a(2),
369862306a36Sopenharmony_ci		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
369962306a36Sopenharmony_ci
370062306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
370162306a36Sopenharmony_ci	emit_shf(nfp_prog, reg_b(2),
370262306a36Sopenharmony_ci		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
370362306a36Sopenharmony_ci
370462306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
370562306a36Sopenharmony_ci
370662306a36Sopenharmony_ci	emit_shf(nfp_prog, reg_b(2),
370762306a36Sopenharmony_ci		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
370862306a36Sopenharmony_ci	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
370962306a36Sopenharmony_ci}
371062306a36Sopenharmony_ci
371162306a36Sopenharmony_cistatic void nfp_outro_xdp(struct nfp_prog *nfp_prog)
371262306a36Sopenharmony_ci{
371362306a36Sopenharmony_ci	/* XDP return codes:
371462306a36Sopenharmony_ci	 *   0 aborted  0x82 -> drop,  count as stat3
371562306a36Sopenharmony_ci	 *   1    drop  0x22 -> drop,  count as stat1
371662306a36Sopenharmony_ci	 *   2    pass  0x11 -> pass,  count as stat0
371762306a36Sopenharmony_ci	 *   3      tx  0x44 -> redir, count as stat2
371862306a36Sopenharmony_ci	 *   * unknown  0x82 -> drop,  count as stat3
371962306a36Sopenharmony_ci	 */
372062306a36Sopenharmony_ci	/* Target for aborts */
372162306a36Sopenharmony_ci	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
372262306a36Sopenharmony_ci
372362306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
372462306a36Sopenharmony_ci
372562306a36Sopenharmony_ci	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
372662306a36Sopenharmony_ci	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
372762306a36Sopenharmony_ci
372862306a36Sopenharmony_ci	/* Target for normal exits */
372962306a36Sopenharmony_ci	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
373062306a36Sopenharmony_ci
373162306a36Sopenharmony_ci	/* if R0 > 3 jump to abort */
373262306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
373362306a36Sopenharmony_ci	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
373462306a36Sopenharmony_ci
373562306a36Sopenharmony_ci	wrp_immed(nfp_prog, reg_b(2), 0x44112282);
373662306a36Sopenharmony_ci
373762306a36Sopenharmony_ci	emit_shf(nfp_prog, reg_a(1),
373862306a36Sopenharmony_ci		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
373962306a36Sopenharmony_ci
374062306a36Sopenharmony_ci	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
374162306a36Sopenharmony_ci	emit_shf(nfp_prog, reg_b(2),
374262306a36Sopenharmony_ci		 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
374362306a36Sopenharmony_ci
374462306a36Sopenharmony_ci	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
374762306a36Sopenharmony_ci	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
374862306a36Sopenharmony_ci}
374962306a36Sopenharmony_ci
375062306a36Sopenharmony_cistatic bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog)
375162306a36Sopenharmony_ci{
375262306a36Sopenharmony_ci	unsigned int idx;
375362306a36Sopenharmony_ci
375462306a36Sopenharmony_ci	for (idx = 1; idx < nfp_prog->subprog_cnt; idx++)
375562306a36Sopenharmony_ci		if (nfp_prog->subprog[idx].needs_reg_push)
375662306a36Sopenharmony_ci			return true;
375762306a36Sopenharmony_ci
375862306a36Sopenharmony_ci	return false;
375962306a36Sopenharmony_ci}
376062306a36Sopenharmony_ci
376162306a36Sopenharmony_cistatic void nfp_push_callee_registers(struct nfp_prog *nfp_prog)
376262306a36Sopenharmony_ci{
376362306a36Sopenharmony_ci	u8 reg;
376462306a36Sopenharmony_ci
376562306a36Sopenharmony_ci	/* Subroutine: Save all callee saved registers (R6 ~ R9).
376662306a36Sopenharmony_ci	 * imm_b() holds the return address.
376762306a36Sopenharmony_ci	 */
376862306a36Sopenharmony_ci	nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog);
376962306a36Sopenharmony_ci	for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
377062306a36Sopenharmony_ci		u8 adj = (reg - BPF_REG_0) * 2;
377162306a36Sopenharmony_ci		u8 idx = (reg - BPF_REG_6) * 2;
377262306a36Sopenharmony_ci
377362306a36Sopenharmony_ci		/* The first slot in the stack frame is used to push the return
377462306a36Sopenharmony_ci		 * address in bpf_to_bpf_call(), start just after.
377562306a36Sopenharmony_ci		 */
377662306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj));
377762306a36Sopenharmony_ci
377862306a36Sopenharmony_ci		if (reg == BPF_REG_8)
377962306a36Sopenharmony_ci			/* Prepare to jump back, last 3 insns use defer slots */
378062306a36Sopenharmony_ci			emit_rtn(nfp_prog, imm_b(nfp_prog), 3);
378162306a36Sopenharmony_ci
378262306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1));
378362306a36Sopenharmony_ci	}
378462306a36Sopenharmony_ci}
378562306a36Sopenharmony_ci
378662306a36Sopenharmony_cistatic void nfp_pop_callee_registers(struct nfp_prog *nfp_prog)
378762306a36Sopenharmony_ci{
378862306a36Sopenharmony_ci	u8 reg;
378962306a36Sopenharmony_ci
379062306a36Sopenharmony_ci	/* Subroutine: Restore all callee saved registers (R6 ~ R9).
379162306a36Sopenharmony_ci	 * ret_reg() holds the return address.
379262306a36Sopenharmony_ci	 */
379362306a36Sopenharmony_ci	nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog);
379462306a36Sopenharmony_ci	for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
379562306a36Sopenharmony_ci		u8 adj = (reg - BPF_REG_0) * 2;
379662306a36Sopenharmony_ci		u8 idx = (reg - BPF_REG_6) * 2;
379762306a36Sopenharmony_ci
379862306a36Sopenharmony_ci		/* The first slot in the stack frame holds the return address,
379962306a36Sopenharmony_ci		 * start popping just after that.
380062306a36Sopenharmony_ci		 */
380162306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx));
380262306a36Sopenharmony_ci
380362306a36Sopenharmony_ci		if (reg == BPF_REG_8)
380462306a36Sopenharmony_ci			/* Prepare to jump back, last 3 insns use defer slots */
380562306a36Sopenharmony_ci			emit_rtn(nfp_prog, ret_reg(nfp_prog), 3);
380662306a36Sopenharmony_ci
380762306a36Sopenharmony_ci		wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1));
380862306a36Sopenharmony_ci	}
380962306a36Sopenharmony_ci}
381062306a36Sopenharmony_ci
381162306a36Sopenharmony_cistatic void nfp_outro(struct nfp_prog *nfp_prog)
381262306a36Sopenharmony_ci{
381362306a36Sopenharmony_ci	switch (nfp_prog->type) {
381462306a36Sopenharmony_ci	case BPF_PROG_TYPE_SCHED_CLS:
381562306a36Sopenharmony_ci		nfp_outro_tc_da(nfp_prog);
381662306a36Sopenharmony_ci		break;
381762306a36Sopenharmony_ci	case BPF_PROG_TYPE_XDP:
381862306a36Sopenharmony_ci		nfp_outro_xdp(nfp_prog);
381962306a36Sopenharmony_ci		break;
382062306a36Sopenharmony_ci	default:
382162306a36Sopenharmony_ci		WARN_ON(1);
382262306a36Sopenharmony_ci	}
382362306a36Sopenharmony_ci
382462306a36Sopenharmony_ci	if (!nfp_prog_needs_callee_reg_save(nfp_prog))
382562306a36Sopenharmony_ci		return;
382662306a36Sopenharmony_ci
382762306a36Sopenharmony_ci	nfp_push_callee_registers(nfp_prog);
382862306a36Sopenharmony_ci	nfp_pop_callee_registers(nfp_prog);
382962306a36Sopenharmony_ci}
383062306a36Sopenharmony_ci
383162306a36Sopenharmony_cistatic int nfp_translate(struct nfp_prog *nfp_prog)
383262306a36Sopenharmony_ci{
383362306a36Sopenharmony_ci	struct nfp_insn_meta *meta;
383462306a36Sopenharmony_ci	unsigned int depth;
383562306a36Sopenharmony_ci	int err;
383662306a36Sopenharmony_ci
383762306a36Sopenharmony_ci	depth = nfp_prog->subprog[0].stack_depth;
383862306a36Sopenharmony_ci	nfp_prog->stack_frame_depth = round_up(depth, 4);
383962306a36Sopenharmony_ci
384062306a36Sopenharmony_ci	nfp_intro(nfp_prog);
384162306a36Sopenharmony_ci	if (nfp_prog->error)
384262306a36Sopenharmony_ci		return nfp_prog->error;
384362306a36Sopenharmony_ci
384462306a36Sopenharmony_ci	list_for_each_entry(meta, &nfp_prog->insns, l) {
384562306a36Sopenharmony_ci		instr_cb_t cb = instr_cb[meta->insn.code];
384662306a36Sopenharmony_ci
384762306a36Sopenharmony_ci		meta->off = nfp_prog_current_offset(nfp_prog);
384862306a36Sopenharmony_ci
384962306a36Sopenharmony_ci		if (nfp_is_subprog_start(meta)) {
385062306a36Sopenharmony_ci			nfp_start_subprog(nfp_prog, meta);
385162306a36Sopenharmony_ci			if (nfp_prog->error)
385262306a36Sopenharmony_ci				return nfp_prog->error;
385362306a36Sopenharmony_ci		}
385462306a36Sopenharmony_ci
385562306a36Sopenharmony_ci		if (meta->flags & FLAG_INSN_SKIP_MASK) {
385662306a36Sopenharmony_ci			nfp_prog->n_translated++;
385762306a36Sopenharmony_ci			continue;
385862306a36Sopenharmony_ci		}
385962306a36Sopenharmony_ci
386062306a36Sopenharmony_ci		if (nfp_meta_has_prev(nfp_prog, meta) &&
386162306a36Sopenharmony_ci		    nfp_meta_prev(meta)->double_cb)
386262306a36Sopenharmony_ci			cb = nfp_meta_prev(meta)->double_cb;
386362306a36Sopenharmony_ci		if (!cb)
386462306a36Sopenharmony_ci			return -ENOENT;
386562306a36Sopenharmony_ci		err = cb(nfp_prog, meta);
386662306a36Sopenharmony_ci		if (err)
386762306a36Sopenharmony_ci			return err;
386862306a36Sopenharmony_ci		if (nfp_prog->error)
386962306a36Sopenharmony_ci			return nfp_prog->error;
387062306a36Sopenharmony_ci
387162306a36Sopenharmony_ci		nfp_prog->n_translated++;
387262306a36Sopenharmony_ci	}
387362306a36Sopenharmony_ci
387462306a36Sopenharmony_ci	nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
387562306a36Sopenharmony_ci
387662306a36Sopenharmony_ci	nfp_outro(nfp_prog);
387762306a36Sopenharmony_ci	if (nfp_prog->error)
387862306a36Sopenharmony_ci		return nfp_prog->error;
387962306a36Sopenharmony_ci
388062306a36Sopenharmony_ci	wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
388162306a36Sopenharmony_ci	if (nfp_prog->error)
388262306a36Sopenharmony_ci		return nfp_prog->error;
388362306a36Sopenharmony_ci
388462306a36Sopenharmony_ci	return nfp_fixup_branches(nfp_prog);
388562306a36Sopenharmony_ci}
388662306a36Sopenharmony_ci
388762306a36Sopenharmony_ci/* --- Optimizations --- */
388862306a36Sopenharmony_cistatic void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
388962306a36Sopenharmony_ci{
389062306a36Sopenharmony_ci	struct nfp_insn_meta *meta;
389162306a36Sopenharmony_ci
389262306a36Sopenharmony_ci	list_for_each_entry(meta, &nfp_prog->insns, l) {
389362306a36Sopenharmony_ci		struct bpf_insn insn = meta->insn;
389462306a36Sopenharmony_ci
389562306a36Sopenharmony_ci		/* Programs converted from cBPF start with register xoring */
389662306a36Sopenharmony_ci		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
389762306a36Sopenharmony_ci		    insn.src_reg == insn.dst_reg)
389862306a36Sopenharmony_ci			continue;
389962306a36Sopenharmony_ci
390062306a36Sopenharmony_ci		/* Programs start with R6 = R1 but we ignore the skb pointer */
390162306a36Sopenharmony_ci		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
390262306a36Sopenharmony_ci		    insn.src_reg == 1 && insn.dst_reg == 6)
390362306a36Sopenharmony_ci			meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
390462306a36Sopenharmony_ci
390562306a36Sopenharmony_ci		/* Return as soon as something doesn't match */
390662306a36Sopenharmony_ci		if (!(meta->flags & FLAG_INSN_SKIP_MASK))
390762306a36Sopenharmony_ci			return;
390862306a36Sopenharmony_ci	}
390962306a36Sopenharmony_ci}
391062306a36Sopenharmony_ci
391162306a36Sopenharmony_ci/* abs(insn.imm) will fit better into unrestricted reg immediate -
391262306a36Sopenharmony_ci * convert add/sub of a negative number into a sub/add of a positive one.
391362306a36Sopenharmony_ci */
391462306a36Sopenharmony_cistatic void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
391562306a36Sopenharmony_ci{
391662306a36Sopenharmony_ci	struct nfp_insn_meta *meta;
391762306a36Sopenharmony_ci
391862306a36Sopenharmony_ci	list_for_each_entry(meta, &nfp_prog->insns, l) {
391962306a36Sopenharmony_ci		struct bpf_insn insn = meta->insn;
392062306a36Sopenharmony_ci
392162306a36Sopenharmony_ci		if (meta->flags & FLAG_INSN_SKIP_MASK)
392262306a36Sopenharmony_ci			continue;
392362306a36Sopenharmony_ci
392462306a36Sopenharmony_ci		if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta))
392562306a36Sopenharmony_ci			continue;
392662306a36Sopenharmony_ci		if (BPF_SRC(insn.code) != BPF_K)
392762306a36Sopenharmony_ci			continue;
392862306a36Sopenharmony_ci		if (insn.imm >= 0)
392962306a36Sopenharmony_ci			continue;
393062306a36Sopenharmony_ci
393162306a36Sopenharmony_ci		if (is_mbpf_jmp(meta)) {
393262306a36Sopenharmony_ci			switch (BPF_OP(insn.code)) {
393362306a36Sopenharmony_ci			case BPF_JGE:
393462306a36Sopenharmony_ci			case BPF_JSGE:
393562306a36Sopenharmony_ci			case BPF_JLT:
393662306a36Sopenharmony_ci			case BPF_JSLT:
393762306a36Sopenharmony_ci				meta->jump_neg_op = true;
393862306a36Sopenharmony_ci				break;
393962306a36Sopenharmony_ci			default:
394062306a36Sopenharmony_ci				continue;
394162306a36Sopenharmony_ci			}
394262306a36Sopenharmony_ci		} else {
394362306a36Sopenharmony_ci			if (BPF_OP(insn.code) == BPF_ADD)
394462306a36Sopenharmony_ci				insn.code = BPF_CLASS(insn.code) | BPF_SUB;
394562306a36Sopenharmony_ci			else if (BPF_OP(insn.code) == BPF_SUB)
394662306a36Sopenharmony_ci				insn.code = BPF_CLASS(insn.code) | BPF_ADD;
394762306a36Sopenharmony_ci			else
394862306a36Sopenharmony_ci				continue;
394962306a36Sopenharmony_ci
395062306a36Sopenharmony_ci			meta->insn.code = insn.code | BPF_K;
395162306a36Sopenharmony_ci		}
395262306a36Sopenharmony_ci
395362306a36Sopenharmony_ci		meta->insn.imm = -insn.imm;
395462306a36Sopenharmony_ci	}
395562306a36Sopenharmony_ci}
395662306a36Sopenharmony_ci
395762306a36Sopenharmony_ci/* Remove masking after load since our load guarantees this is not needed */
395862306a36Sopenharmony_cistatic void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
395962306a36Sopenharmony_ci{
396062306a36Sopenharmony_ci	struct nfp_insn_meta *meta1, *meta2;
396162306a36Sopenharmony_ci	static const s32 exp_mask[] = {
396262306a36Sopenharmony_ci		[BPF_B] = 0x000000ffU,
396362306a36Sopenharmony_ci		[BPF_H] = 0x0000ffffU,
396462306a36Sopenharmony_ci		[BPF_W] = 0xffffffffU,
396562306a36Sopenharmony_ci	};
396662306a36Sopenharmony_ci
396762306a36Sopenharmony_ci	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
396862306a36Sopenharmony_ci		struct bpf_insn insn, next;
396962306a36Sopenharmony_ci
397062306a36Sopenharmony_ci		insn = meta1->insn;
397162306a36Sopenharmony_ci		next = meta2->insn;
397262306a36Sopenharmony_ci
397362306a36Sopenharmony_ci		if (BPF_CLASS(insn.code) != BPF_LD)
397462306a36Sopenharmony_ci			continue;
397562306a36Sopenharmony_ci		if (BPF_MODE(insn.code) != BPF_ABS &&
397662306a36Sopenharmony_ci		    BPF_MODE(insn.code) != BPF_IND)
397762306a36Sopenharmony_ci			continue;
397862306a36Sopenharmony_ci
397962306a36Sopenharmony_ci		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
398062306a36Sopenharmony_ci			continue;
398162306a36Sopenharmony_ci
398262306a36Sopenharmony_ci		if (!exp_mask[BPF_SIZE(insn.code)])
398362306a36Sopenharmony_ci			continue;
398462306a36Sopenharmony_ci		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
398562306a36Sopenharmony_ci			continue;
398662306a36Sopenharmony_ci
398762306a36Sopenharmony_ci		if (next.src_reg || next.dst_reg)
398862306a36Sopenharmony_ci			continue;
398962306a36Sopenharmony_ci
399062306a36Sopenharmony_ci		if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
399162306a36Sopenharmony_ci			continue;
399262306a36Sopenharmony_ci
399362306a36Sopenharmony_ci		meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
399462306a36Sopenharmony_ci	}
399562306a36Sopenharmony_ci}
399662306a36Sopenharmony_ci
399762306a36Sopenharmony_cistatic void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
399862306a36Sopenharmony_ci{
399962306a36Sopenharmony_ci	struct nfp_insn_meta *meta1, *meta2, *meta3;
400062306a36Sopenharmony_ci
400162306a36Sopenharmony_ci	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
400262306a36Sopenharmony_ci		struct bpf_insn insn, next1, next2;
400362306a36Sopenharmony_ci
400462306a36Sopenharmony_ci		insn = meta1->insn;
400562306a36Sopenharmony_ci		next1 = meta2->insn;
400662306a36Sopenharmony_ci		next2 = meta3->insn;
400762306a36Sopenharmony_ci
400862306a36Sopenharmony_ci		if (BPF_CLASS(insn.code) != BPF_LD)
400962306a36Sopenharmony_ci			continue;
401062306a36Sopenharmony_ci		if (BPF_MODE(insn.code) != BPF_ABS &&
401162306a36Sopenharmony_ci		    BPF_MODE(insn.code) != BPF_IND)
401262306a36Sopenharmony_ci			continue;
401362306a36Sopenharmony_ci		if (BPF_SIZE(insn.code) != BPF_W)
401462306a36Sopenharmony_ci			continue;
401562306a36Sopenharmony_ci
401662306a36Sopenharmony_ci		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
401762306a36Sopenharmony_ci		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
401862306a36Sopenharmony_ci		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
401962306a36Sopenharmony_ci		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
402062306a36Sopenharmony_ci			continue;
402162306a36Sopenharmony_ci
402262306a36Sopenharmony_ci		if (next1.src_reg || next1.dst_reg ||
402362306a36Sopenharmony_ci		    next2.src_reg || next2.dst_reg)
402462306a36Sopenharmony_ci			continue;
402562306a36Sopenharmony_ci
402662306a36Sopenharmony_ci		if (next1.imm != 0x20 || next2.imm != 0x20)
402762306a36Sopenharmony_ci			continue;
402862306a36Sopenharmony_ci
402962306a36Sopenharmony_ci		if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
403062306a36Sopenharmony_ci		    meta3->flags & FLAG_INSN_IS_JUMP_DST)
403162306a36Sopenharmony_ci			continue;
403262306a36Sopenharmony_ci
403362306a36Sopenharmony_ci		meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
403462306a36Sopenharmony_ci		meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
403562306a36Sopenharmony_ci	}
403662306a36Sopenharmony_ci}
403762306a36Sopenharmony_ci
403862306a36Sopenharmony_ci/* load/store pair that forms memory copy sould look like the following:
403962306a36Sopenharmony_ci *
404062306a36Sopenharmony_ci *   ld_width R, [addr_src + offset_src]
404162306a36Sopenharmony_ci *   st_width [addr_dest + offset_dest], R
404262306a36Sopenharmony_ci *
404362306a36Sopenharmony_ci * The destination register of load and source register of store should
404462306a36Sopenharmony_ci * be the same, load and store should also perform at the same width.
404562306a36Sopenharmony_ci * If either of addr_src or addr_dest is stack pointer, we don't do the
404662306a36Sopenharmony_ci * CPP optimization as stack is modelled by registers on NFP.
404762306a36Sopenharmony_ci */
404862306a36Sopenharmony_cistatic bool
404962306a36Sopenharmony_cicurr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
405062306a36Sopenharmony_ci		    struct nfp_insn_meta *st_meta)
405162306a36Sopenharmony_ci{
405262306a36Sopenharmony_ci	struct bpf_insn *ld = &ld_meta->insn;
405362306a36Sopenharmony_ci	struct bpf_insn *st = &st_meta->insn;
405462306a36Sopenharmony_ci
405562306a36Sopenharmony_ci	if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
405662306a36Sopenharmony_ci		return false;
405762306a36Sopenharmony_ci
405862306a36Sopenharmony_ci	if (ld_meta->ptr.type != PTR_TO_PACKET &&
405962306a36Sopenharmony_ci	    ld_meta->ptr.type != PTR_TO_MAP_VALUE)
406062306a36Sopenharmony_ci		return false;
406162306a36Sopenharmony_ci
406262306a36Sopenharmony_ci	if (st_meta->ptr.type != PTR_TO_PACKET)
406362306a36Sopenharmony_ci		return false;
406462306a36Sopenharmony_ci
406562306a36Sopenharmony_ci	if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
406662306a36Sopenharmony_ci		return false;
406762306a36Sopenharmony_ci
406862306a36Sopenharmony_ci	if (ld->dst_reg != st->src_reg)
406962306a36Sopenharmony_ci		return false;
407062306a36Sopenharmony_ci
407162306a36Sopenharmony_ci	/* There is jump to the store insn in this pair. */
407262306a36Sopenharmony_ci	if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
407362306a36Sopenharmony_ci		return false;
407462306a36Sopenharmony_ci
407562306a36Sopenharmony_ci	return true;
407662306a36Sopenharmony_ci}
407762306a36Sopenharmony_ci
407862306a36Sopenharmony_ci/* Currently, we only support chaining load/store pairs if:
407962306a36Sopenharmony_ci *
408062306a36Sopenharmony_ci *  - Their address base registers are the same.
408162306a36Sopenharmony_ci *  - Their address offsets are in the same order.
408262306a36Sopenharmony_ci *  - They operate at the same memory width.
408362306a36Sopenharmony_ci *  - There is no jump into the middle of them.
408462306a36Sopenharmony_ci */
408562306a36Sopenharmony_cistatic bool
408662306a36Sopenharmony_cicurr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
408762306a36Sopenharmony_ci			      struct nfp_insn_meta *st_meta,
408862306a36Sopenharmony_ci			      struct bpf_insn *prev_ld,
408962306a36Sopenharmony_ci			      struct bpf_insn *prev_st)
409062306a36Sopenharmony_ci{
409162306a36Sopenharmony_ci	u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
409262306a36Sopenharmony_ci	struct bpf_insn *ld = &ld_meta->insn;
409362306a36Sopenharmony_ci	struct bpf_insn *st = &st_meta->insn;
409462306a36Sopenharmony_ci	s16 prev_ld_off, prev_st_off;
409562306a36Sopenharmony_ci
409662306a36Sopenharmony_ci	/* This pair is the start pair. */
409762306a36Sopenharmony_ci	if (!prev_ld)
409862306a36Sopenharmony_ci		return true;
409962306a36Sopenharmony_ci
410062306a36Sopenharmony_ci	prev_size = BPF_LDST_BYTES(prev_ld);
410162306a36Sopenharmony_ci	curr_size = BPF_LDST_BYTES(ld);
410262306a36Sopenharmony_ci	prev_ld_base = prev_ld->src_reg;
410362306a36Sopenharmony_ci	prev_st_base = prev_st->dst_reg;
410462306a36Sopenharmony_ci	prev_ld_dst = prev_ld->dst_reg;
410562306a36Sopenharmony_ci	prev_ld_off = prev_ld->off;
410662306a36Sopenharmony_ci	prev_st_off = prev_st->off;
410762306a36Sopenharmony_ci
410862306a36Sopenharmony_ci	if (ld->dst_reg != prev_ld_dst)
410962306a36Sopenharmony_ci		return false;
411062306a36Sopenharmony_ci
411162306a36Sopenharmony_ci	if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
411262306a36Sopenharmony_ci		return false;
411362306a36Sopenharmony_ci
411462306a36Sopenharmony_ci	if (curr_size != prev_size)
411562306a36Sopenharmony_ci		return false;
411662306a36Sopenharmony_ci
411762306a36Sopenharmony_ci	/* There is jump to the head of this pair. */
411862306a36Sopenharmony_ci	if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
411962306a36Sopenharmony_ci		return false;
412062306a36Sopenharmony_ci
412162306a36Sopenharmony_ci	/* Both in ascending order. */
412262306a36Sopenharmony_ci	if (prev_ld_off + prev_size == ld->off &&
412362306a36Sopenharmony_ci	    prev_st_off + prev_size == st->off)
412462306a36Sopenharmony_ci		return true;
412562306a36Sopenharmony_ci
412662306a36Sopenharmony_ci	/* Both in descending order. */
412762306a36Sopenharmony_ci	if (ld->off + curr_size == prev_ld_off &&
412862306a36Sopenharmony_ci	    st->off + curr_size == prev_st_off)
412962306a36Sopenharmony_ci		return true;
413062306a36Sopenharmony_ci
413162306a36Sopenharmony_ci	return false;
413262306a36Sopenharmony_ci}
413362306a36Sopenharmony_ci
413462306a36Sopenharmony_ci/* Return TRUE if cross memory access happens. Cross memory access means
413562306a36Sopenharmony_ci * store area is overlapping with load area that a later load might load
413662306a36Sopenharmony_ci * the value from previous store, for this case we can't treat the sequence
413762306a36Sopenharmony_ci * as an memory copy.
413862306a36Sopenharmony_ci */
413962306a36Sopenharmony_cistatic bool
414062306a36Sopenharmony_cicross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
414162306a36Sopenharmony_ci		 struct nfp_insn_meta *head_st_meta)
414262306a36Sopenharmony_ci{
414362306a36Sopenharmony_ci	s16 head_ld_off, head_st_off, ld_off;
414462306a36Sopenharmony_ci
414562306a36Sopenharmony_ci	/* Different pointer types does not overlap. */
414662306a36Sopenharmony_ci	if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
414762306a36Sopenharmony_ci		return false;
414862306a36Sopenharmony_ci
414962306a36Sopenharmony_ci	/* load and store are both PTR_TO_PACKET, check ID info.  */
415062306a36Sopenharmony_ci	if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
415162306a36Sopenharmony_ci		return true;
415262306a36Sopenharmony_ci
415362306a36Sopenharmony_ci	/* Canonicalize the offsets. Turn all of them against the original
415462306a36Sopenharmony_ci	 * base register.
415562306a36Sopenharmony_ci	 */
415662306a36Sopenharmony_ci	head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
415762306a36Sopenharmony_ci	head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
415862306a36Sopenharmony_ci	ld_off = ld->off + head_ld_meta->ptr.off;
415962306a36Sopenharmony_ci
416062306a36Sopenharmony_ci	/* Ascending order cross. */
416162306a36Sopenharmony_ci	if (ld_off > head_ld_off &&
416262306a36Sopenharmony_ci	    head_ld_off < head_st_off && ld_off >= head_st_off)
416362306a36Sopenharmony_ci		return true;
416462306a36Sopenharmony_ci
416562306a36Sopenharmony_ci	/* Descending order cross. */
416662306a36Sopenharmony_ci	if (ld_off < head_ld_off &&
416762306a36Sopenharmony_ci	    head_ld_off > head_st_off && ld_off <= head_st_off)
416862306a36Sopenharmony_ci		return true;
416962306a36Sopenharmony_ci
417062306a36Sopenharmony_ci	return false;
417162306a36Sopenharmony_ci}
417262306a36Sopenharmony_ci
417362306a36Sopenharmony_ci/* This pass try to identify the following instructoin sequences.
417462306a36Sopenharmony_ci *
417562306a36Sopenharmony_ci *   load R, [regA + offA]
417662306a36Sopenharmony_ci *   store [regB + offB], R
417762306a36Sopenharmony_ci *   load R, [regA + offA + const_imm_A]
417862306a36Sopenharmony_ci *   store [regB + offB + const_imm_A], R
417962306a36Sopenharmony_ci *   load R, [regA + offA + 2 * const_imm_A]
418062306a36Sopenharmony_ci *   store [regB + offB + 2 * const_imm_A], R
418162306a36Sopenharmony_ci *   ...
418262306a36Sopenharmony_ci *
418362306a36Sopenharmony_ci * Above sequence is typically generated by compiler when lowering
418462306a36Sopenharmony_ci * memcpy. NFP prefer using CPP instructions to accelerate it.
418562306a36Sopenharmony_ci */
418662306a36Sopenharmony_cistatic void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
418762306a36Sopenharmony_ci{
418862306a36Sopenharmony_ci	struct nfp_insn_meta *head_ld_meta = NULL;
418962306a36Sopenharmony_ci	struct nfp_insn_meta *head_st_meta = NULL;
419062306a36Sopenharmony_ci	struct nfp_insn_meta *meta1, *meta2;
419162306a36Sopenharmony_ci	struct bpf_insn *prev_ld = NULL;
419262306a36Sopenharmony_ci	struct bpf_insn *prev_st = NULL;
419362306a36Sopenharmony_ci	u8 count = 0;
419462306a36Sopenharmony_ci
419562306a36Sopenharmony_ci	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
419662306a36Sopenharmony_ci		struct bpf_insn *ld = &meta1->insn;
419762306a36Sopenharmony_ci		struct bpf_insn *st = &meta2->insn;
419862306a36Sopenharmony_ci
419962306a36Sopenharmony_ci		/* Reset record status if any of the following if true:
420062306a36Sopenharmony_ci		 *   - The current insn pair is not load/store.
420162306a36Sopenharmony_ci		 *   - The load/store pair doesn't chain with previous one.
420262306a36Sopenharmony_ci		 *   - The chained load/store pair crossed with previous pair.
420362306a36Sopenharmony_ci		 *   - The chained load/store pair has a total size of memory
420462306a36Sopenharmony_ci		 *     copy beyond 128 bytes which is the maximum length a
420562306a36Sopenharmony_ci		 *     single NFP CPP command can transfer.
420662306a36Sopenharmony_ci		 */
420762306a36Sopenharmony_ci		if (!curr_pair_is_memcpy(meta1, meta2) ||
420862306a36Sopenharmony_ci		    !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
420962306a36Sopenharmony_ci						   prev_st) ||
421062306a36Sopenharmony_ci		    (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
421162306a36Sopenharmony_ci						       head_st_meta) ||
421262306a36Sopenharmony_ci				      head_ld_meta->ldst_gather_len >= 128))) {
421362306a36Sopenharmony_ci			if (!count)
421462306a36Sopenharmony_ci				continue;
421562306a36Sopenharmony_ci
421662306a36Sopenharmony_ci			if (count > 1) {
421762306a36Sopenharmony_ci				s16 prev_ld_off = prev_ld->off;
421862306a36Sopenharmony_ci				s16 prev_st_off = prev_st->off;
421962306a36Sopenharmony_ci				s16 head_ld_off = head_ld_meta->insn.off;
422062306a36Sopenharmony_ci
422162306a36Sopenharmony_ci				if (prev_ld_off < head_ld_off) {
422262306a36Sopenharmony_ci					head_ld_meta->insn.off = prev_ld_off;
422362306a36Sopenharmony_ci					head_st_meta->insn.off = prev_st_off;
422462306a36Sopenharmony_ci					head_ld_meta->ldst_gather_len =
422562306a36Sopenharmony_ci						-head_ld_meta->ldst_gather_len;
422662306a36Sopenharmony_ci				}
422762306a36Sopenharmony_ci
422862306a36Sopenharmony_ci				head_ld_meta->paired_st = &head_st_meta->insn;
422962306a36Sopenharmony_ci				head_st_meta->flags |=
423062306a36Sopenharmony_ci					FLAG_INSN_SKIP_PREC_DEPENDENT;
423162306a36Sopenharmony_ci			} else {
423262306a36Sopenharmony_ci				head_ld_meta->ldst_gather_len = 0;
423362306a36Sopenharmony_ci			}
423462306a36Sopenharmony_ci
423562306a36Sopenharmony_ci			/* If the chain is ended by an load/store pair then this
423662306a36Sopenharmony_ci			 * could serve as the new head of the next chain.
423762306a36Sopenharmony_ci			 */
423862306a36Sopenharmony_ci			if (curr_pair_is_memcpy(meta1, meta2)) {
423962306a36Sopenharmony_ci				head_ld_meta = meta1;
424062306a36Sopenharmony_ci				head_st_meta = meta2;
424162306a36Sopenharmony_ci				head_ld_meta->ldst_gather_len =
424262306a36Sopenharmony_ci					BPF_LDST_BYTES(ld);
424362306a36Sopenharmony_ci				meta1 = nfp_meta_next(meta1);
424462306a36Sopenharmony_ci				meta2 = nfp_meta_next(meta2);
424562306a36Sopenharmony_ci				prev_ld = ld;
424662306a36Sopenharmony_ci				prev_st = st;
424762306a36Sopenharmony_ci				count = 1;
424862306a36Sopenharmony_ci			} else {
424962306a36Sopenharmony_ci				head_ld_meta = NULL;
425062306a36Sopenharmony_ci				head_st_meta = NULL;
425162306a36Sopenharmony_ci				prev_ld = NULL;
425262306a36Sopenharmony_ci				prev_st = NULL;
425362306a36Sopenharmony_ci				count = 0;
425462306a36Sopenharmony_ci			}
425562306a36Sopenharmony_ci
425662306a36Sopenharmony_ci			continue;
425762306a36Sopenharmony_ci		}
425862306a36Sopenharmony_ci
425962306a36Sopenharmony_ci		if (!head_ld_meta) {
426062306a36Sopenharmony_ci			head_ld_meta = meta1;
426162306a36Sopenharmony_ci			head_st_meta = meta2;
426262306a36Sopenharmony_ci		} else {
426362306a36Sopenharmony_ci			meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
426462306a36Sopenharmony_ci			meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
426562306a36Sopenharmony_ci		}
426662306a36Sopenharmony_ci
426762306a36Sopenharmony_ci		head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
426862306a36Sopenharmony_ci		meta1 = nfp_meta_next(meta1);
426962306a36Sopenharmony_ci		meta2 = nfp_meta_next(meta2);
427062306a36Sopenharmony_ci		prev_ld = ld;
427162306a36Sopenharmony_ci		prev_st = st;
427262306a36Sopenharmony_ci		count++;
427362306a36Sopenharmony_ci	}
427462306a36Sopenharmony_ci}
427562306a36Sopenharmony_ci
427662306a36Sopenharmony_cistatic void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
427762306a36Sopenharmony_ci{
427862306a36Sopenharmony_ci	struct nfp_insn_meta *meta, *range_node = NULL;
427962306a36Sopenharmony_ci	s16 range_start = 0, range_end = 0;
428062306a36Sopenharmony_ci	bool cache_avail = false;
428162306a36Sopenharmony_ci	struct bpf_insn *insn;
428262306a36Sopenharmony_ci	s32 range_ptr_off = 0;
428362306a36Sopenharmony_ci	u32 range_ptr_id = 0;
428462306a36Sopenharmony_ci
428562306a36Sopenharmony_ci	list_for_each_entry(meta, &nfp_prog->insns, l) {
428662306a36Sopenharmony_ci		if (meta->flags & FLAG_INSN_IS_JUMP_DST)
428762306a36Sopenharmony_ci			cache_avail = false;
428862306a36Sopenharmony_ci
428962306a36Sopenharmony_ci		if (meta->flags & FLAG_INSN_SKIP_MASK)
429062306a36Sopenharmony_ci			continue;
429162306a36Sopenharmony_ci
429262306a36Sopenharmony_ci		insn = &meta->insn;
429362306a36Sopenharmony_ci
429462306a36Sopenharmony_ci		if (is_mbpf_store_pkt(meta) ||
429562306a36Sopenharmony_ci		    insn->code == (BPF_JMP | BPF_CALL) ||
429662306a36Sopenharmony_ci		    is_mbpf_classic_store_pkt(meta) ||
429762306a36Sopenharmony_ci		    is_mbpf_classic_load(meta)) {
429862306a36Sopenharmony_ci			cache_avail = false;
429962306a36Sopenharmony_ci			continue;
430062306a36Sopenharmony_ci		}
430162306a36Sopenharmony_ci
430262306a36Sopenharmony_ci		if (!is_mbpf_load(meta))
430362306a36Sopenharmony_ci			continue;
430462306a36Sopenharmony_ci
430562306a36Sopenharmony_ci		if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
430662306a36Sopenharmony_ci			cache_avail = false;
430762306a36Sopenharmony_ci			continue;
430862306a36Sopenharmony_ci		}
430962306a36Sopenharmony_ci
431062306a36Sopenharmony_ci		if (!cache_avail) {
431162306a36Sopenharmony_ci			cache_avail = true;
431262306a36Sopenharmony_ci			if (range_node)
431362306a36Sopenharmony_ci				goto end_current_then_start_new;
431462306a36Sopenharmony_ci			goto start_new;
431562306a36Sopenharmony_ci		}
431662306a36Sopenharmony_ci
431762306a36Sopenharmony_ci		/* Check ID to make sure two reads share the same
431862306a36Sopenharmony_ci		 * variable offset against PTR_TO_PACKET, and check OFF
431962306a36Sopenharmony_ci		 * to make sure they also share the same constant
432062306a36Sopenharmony_ci		 * offset.
432162306a36Sopenharmony_ci		 *
432262306a36Sopenharmony_ci		 * OFFs don't really need to be the same, because they
432362306a36Sopenharmony_ci		 * are the constant offsets against PTR_TO_PACKET, so
432462306a36Sopenharmony_ci		 * for different OFFs, we could canonicalize them to
432562306a36Sopenharmony_ci		 * offsets against original packet pointer. We don't
432662306a36Sopenharmony_ci		 * support this.
432762306a36Sopenharmony_ci		 */
432862306a36Sopenharmony_ci		if (meta->ptr.id == range_ptr_id &&
432962306a36Sopenharmony_ci		    meta->ptr.off == range_ptr_off) {
433062306a36Sopenharmony_ci			s16 new_start = range_start;
433162306a36Sopenharmony_ci			s16 end, off = insn->off;
433262306a36Sopenharmony_ci			s16 new_end = range_end;
433362306a36Sopenharmony_ci			bool changed = false;
433462306a36Sopenharmony_ci
433562306a36Sopenharmony_ci			if (off < range_start) {
433662306a36Sopenharmony_ci				new_start = off;
433762306a36Sopenharmony_ci				changed = true;
433862306a36Sopenharmony_ci			}
433962306a36Sopenharmony_ci
434062306a36Sopenharmony_ci			end = off + BPF_LDST_BYTES(insn);
434162306a36Sopenharmony_ci			if (end > range_end) {
434262306a36Sopenharmony_ci				new_end = end;
434362306a36Sopenharmony_ci				changed = true;
434462306a36Sopenharmony_ci			}
434562306a36Sopenharmony_ci
434662306a36Sopenharmony_ci			if (!changed)
434762306a36Sopenharmony_ci				continue;
434862306a36Sopenharmony_ci
434962306a36Sopenharmony_ci			if (new_end - new_start <= 64) {
435062306a36Sopenharmony_ci				/* Install new range. */
435162306a36Sopenharmony_ci				range_start = new_start;
435262306a36Sopenharmony_ci				range_end = new_end;
435362306a36Sopenharmony_ci				continue;
435462306a36Sopenharmony_ci			}
435562306a36Sopenharmony_ci		}
435662306a36Sopenharmony_ci
435762306a36Sopenharmony_ciend_current_then_start_new:
435862306a36Sopenharmony_ci		range_node->pkt_cache.range_start = range_start;
435962306a36Sopenharmony_ci		range_node->pkt_cache.range_end = range_end;
436062306a36Sopenharmony_cistart_new:
436162306a36Sopenharmony_ci		range_node = meta;
436262306a36Sopenharmony_ci		range_node->pkt_cache.do_init = true;
436362306a36Sopenharmony_ci		range_ptr_id = range_node->ptr.id;
436462306a36Sopenharmony_ci		range_ptr_off = range_node->ptr.off;
436562306a36Sopenharmony_ci		range_start = insn->off;
436662306a36Sopenharmony_ci		range_end = insn->off + BPF_LDST_BYTES(insn);
436762306a36Sopenharmony_ci	}
436862306a36Sopenharmony_ci
436962306a36Sopenharmony_ci	if (range_node) {
437062306a36Sopenharmony_ci		range_node->pkt_cache.range_start = range_start;
437162306a36Sopenharmony_ci		range_node->pkt_cache.range_end = range_end;
437262306a36Sopenharmony_ci	}
437362306a36Sopenharmony_ci
437462306a36Sopenharmony_ci	list_for_each_entry(meta, &nfp_prog->insns, l) {
437562306a36Sopenharmony_ci		if (meta->flags & FLAG_INSN_SKIP_MASK)
437662306a36Sopenharmony_ci			continue;
437762306a36Sopenharmony_ci
437862306a36Sopenharmony_ci		if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
437962306a36Sopenharmony_ci			if (meta->pkt_cache.do_init) {
438062306a36Sopenharmony_ci				range_start = meta->pkt_cache.range_start;
438162306a36Sopenharmony_ci				range_end = meta->pkt_cache.range_end;
438262306a36Sopenharmony_ci			} else {
438362306a36Sopenharmony_ci				meta->pkt_cache.range_start = range_start;
438462306a36Sopenharmony_ci				meta->pkt_cache.range_end = range_end;
438562306a36Sopenharmony_ci			}
438662306a36Sopenharmony_ci		}
438762306a36Sopenharmony_ci	}
438862306a36Sopenharmony_ci}
438962306a36Sopenharmony_ci
439062306a36Sopenharmony_cistatic int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
439162306a36Sopenharmony_ci{
439262306a36Sopenharmony_ci	nfp_bpf_opt_reg_init(nfp_prog);
439362306a36Sopenharmony_ci
439462306a36Sopenharmony_ci	nfp_bpf_opt_neg_add_sub(nfp_prog);
439562306a36Sopenharmony_ci	nfp_bpf_opt_ld_mask(nfp_prog);
439662306a36Sopenharmony_ci	nfp_bpf_opt_ld_shift(nfp_prog);
439762306a36Sopenharmony_ci	nfp_bpf_opt_ldst_gather(nfp_prog);
439862306a36Sopenharmony_ci	nfp_bpf_opt_pkt_cache(nfp_prog);
439962306a36Sopenharmony_ci
440062306a36Sopenharmony_ci	return 0;
440162306a36Sopenharmony_ci}
440262306a36Sopenharmony_ci
440362306a36Sopenharmony_cistatic int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
440462306a36Sopenharmony_ci{
440562306a36Sopenharmony_ci	struct nfp_insn_meta *meta1, *meta2;
440662306a36Sopenharmony_ci	struct nfp_bpf_map *nfp_map;
440762306a36Sopenharmony_ci	struct bpf_map *map;
440862306a36Sopenharmony_ci	u32 id;
440962306a36Sopenharmony_ci
441062306a36Sopenharmony_ci	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
441162306a36Sopenharmony_ci		if (meta1->flags & FLAG_INSN_SKIP_MASK ||
441262306a36Sopenharmony_ci		    meta2->flags & FLAG_INSN_SKIP_MASK)
441362306a36Sopenharmony_ci			continue;
441462306a36Sopenharmony_ci
441562306a36Sopenharmony_ci		if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
441662306a36Sopenharmony_ci		    meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
441762306a36Sopenharmony_ci			continue;
441862306a36Sopenharmony_ci
441962306a36Sopenharmony_ci		map = (void *)(unsigned long)((u32)meta1->insn.imm |
442062306a36Sopenharmony_ci					      (u64)meta2->insn.imm << 32);
442162306a36Sopenharmony_ci		if (bpf_map_offload_neutral(map)) {
442262306a36Sopenharmony_ci			id = map->id;
442362306a36Sopenharmony_ci		} else {
442462306a36Sopenharmony_ci			nfp_map = map_to_offmap(map)->dev_priv;
442562306a36Sopenharmony_ci			id = nfp_map->tid;
442662306a36Sopenharmony_ci		}
442762306a36Sopenharmony_ci
442862306a36Sopenharmony_ci		meta1->insn.imm = id;
442962306a36Sopenharmony_ci		meta2->insn.imm = 0;
443062306a36Sopenharmony_ci	}
443162306a36Sopenharmony_ci
443262306a36Sopenharmony_ci	return 0;
443362306a36Sopenharmony_ci}
443462306a36Sopenharmony_ci
443562306a36Sopenharmony_cistatic int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
443662306a36Sopenharmony_ci{
443762306a36Sopenharmony_ci	__le64 *ustore = (__force __le64 *)prog;
443862306a36Sopenharmony_ci	int i;
443962306a36Sopenharmony_ci
444062306a36Sopenharmony_ci	for (i = 0; i < len; i++) {
444162306a36Sopenharmony_ci		int err;
444262306a36Sopenharmony_ci
444362306a36Sopenharmony_ci		err = nfp_ustore_check_valid_no_ecc(prog[i]);
444462306a36Sopenharmony_ci		if (err)
444562306a36Sopenharmony_ci			return err;
444662306a36Sopenharmony_ci
444762306a36Sopenharmony_ci		ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
444862306a36Sopenharmony_ci	}
444962306a36Sopenharmony_ci
445062306a36Sopenharmony_ci	return 0;
445162306a36Sopenharmony_ci}
445262306a36Sopenharmony_ci
445362306a36Sopenharmony_cistatic void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
445462306a36Sopenharmony_ci{
445562306a36Sopenharmony_ci	void *prog;
445662306a36Sopenharmony_ci
445762306a36Sopenharmony_ci	prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
445862306a36Sopenharmony_ci	if (!prog)
445962306a36Sopenharmony_ci		return;
446062306a36Sopenharmony_ci
446162306a36Sopenharmony_ci	nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
446262306a36Sopenharmony_ci	memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
446362306a36Sopenharmony_ci	kvfree(nfp_prog->prog);
446462306a36Sopenharmony_ci	nfp_prog->prog = prog;
446562306a36Sopenharmony_ci}
446662306a36Sopenharmony_ci
446762306a36Sopenharmony_ciint nfp_bpf_jit(struct nfp_prog *nfp_prog)
446862306a36Sopenharmony_ci{
446962306a36Sopenharmony_ci	int ret;
447062306a36Sopenharmony_ci
447162306a36Sopenharmony_ci	ret = nfp_bpf_replace_map_ptrs(nfp_prog);
447262306a36Sopenharmony_ci	if (ret)
447362306a36Sopenharmony_ci		return ret;
447462306a36Sopenharmony_ci
447562306a36Sopenharmony_ci	ret = nfp_bpf_optimize(nfp_prog);
447662306a36Sopenharmony_ci	if (ret)
447762306a36Sopenharmony_ci		return ret;
447862306a36Sopenharmony_ci
447962306a36Sopenharmony_ci	ret = nfp_translate(nfp_prog);
448062306a36Sopenharmony_ci	if (ret) {
448162306a36Sopenharmony_ci		pr_err("Translation failed with error %d (translated: %u)\n",
448262306a36Sopenharmony_ci		       ret, nfp_prog->n_translated);
448362306a36Sopenharmony_ci		return -EINVAL;
448462306a36Sopenharmony_ci	}
448562306a36Sopenharmony_ci
448662306a36Sopenharmony_ci	nfp_bpf_prog_trim(nfp_prog);
448762306a36Sopenharmony_ci
448862306a36Sopenharmony_ci	return ret;
448962306a36Sopenharmony_ci}
449062306a36Sopenharmony_ci
449162306a36Sopenharmony_civoid nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog)
449262306a36Sopenharmony_ci{
449362306a36Sopenharmony_ci	struct nfp_insn_meta *meta;
449462306a36Sopenharmony_ci
449562306a36Sopenharmony_ci	/* Another pass to record jump information. */
449662306a36Sopenharmony_ci	list_for_each_entry(meta, &nfp_prog->insns, l) {
449762306a36Sopenharmony_ci		struct nfp_insn_meta *dst_meta;
449862306a36Sopenharmony_ci		u64 code = meta->insn.code;
449962306a36Sopenharmony_ci		unsigned int dst_idx;
450062306a36Sopenharmony_ci		bool pseudo_call;
450162306a36Sopenharmony_ci
450262306a36Sopenharmony_ci		if (!is_mbpf_jmp(meta))
450362306a36Sopenharmony_ci			continue;
450462306a36Sopenharmony_ci		if (BPF_OP(code) == BPF_EXIT)
450562306a36Sopenharmony_ci			continue;
450662306a36Sopenharmony_ci		if (is_mbpf_helper_call(meta))
450762306a36Sopenharmony_ci			continue;
450862306a36Sopenharmony_ci
450962306a36Sopenharmony_ci		/* If opcode is BPF_CALL at this point, this can only be a
451062306a36Sopenharmony_ci		 * BPF-to-BPF call (a.k.a pseudo call).
451162306a36Sopenharmony_ci		 */
451262306a36Sopenharmony_ci		pseudo_call = BPF_OP(code) == BPF_CALL;
451362306a36Sopenharmony_ci
451462306a36Sopenharmony_ci		if (pseudo_call)
451562306a36Sopenharmony_ci			dst_idx = meta->n + 1 + meta->insn.imm;
451662306a36Sopenharmony_ci		else
451762306a36Sopenharmony_ci			dst_idx = meta->n + 1 + meta->insn.off;
451862306a36Sopenharmony_ci
451962306a36Sopenharmony_ci		dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx);
452062306a36Sopenharmony_ci
452162306a36Sopenharmony_ci		if (pseudo_call)
452262306a36Sopenharmony_ci			dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START;
452362306a36Sopenharmony_ci
452462306a36Sopenharmony_ci		dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
452562306a36Sopenharmony_ci		meta->jmp_dst = dst_meta;
452662306a36Sopenharmony_ci	}
452762306a36Sopenharmony_ci}
452862306a36Sopenharmony_ci
452962306a36Sopenharmony_cibool nfp_bpf_supported_opcode(u8 code)
453062306a36Sopenharmony_ci{
453162306a36Sopenharmony_ci	return !!instr_cb[code];
453262306a36Sopenharmony_ci}
453362306a36Sopenharmony_ci
453462306a36Sopenharmony_civoid *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
453562306a36Sopenharmony_ci{
453662306a36Sopenharmony_ci	unsigned int i;
453762306a36Sopenharmony_ci	u64 *prog;
453862306a36Sopenharmony_ci	int err;
453962306a36Sopenharmony_ci
454062306a36Sopenharmony_ci	prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
454162306a36Sopenharmony_ci		       GFP_KERNEL);
454262306a36Sopenharmony_ci	if (!prog)
454362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
454462306a36Sopenharmony_ci
454562306a36Sopenharmony_ci	for (i = 0; i < nfp_prog->prog_len; i++) {
454662306a36Sopenharmony_ci		enum nfp_relo_type special;
454762306a36Sopenharmony_ci		u32 val;
454862306a36Sopenharmony_ci		u16 off;
454962306a36Sopenharmony_ci
455062306a36Sopenharmony_ci		special = FIELD_GET(OP_RELO_TYPE, prog[i]);
455162306a36Sopenharmony_ci		switch (special) {
455262306a36Sopenharmony_ci		case RELO_NONE:
455362306a36Sopenharmony_ci			continue;
455462306a36Sopenharmony_ci		case RELO_BR_REL:
455562306a36Sopenharmony_ci			br_add_offset(&prog[i], bv->start_off);
455662306a36Sopenharmony_ci			break;
455762306a36Sopenharmony_ci		case RELO_BR_GO_OUT:
455862306a36Sopenharmony_ci			br_set_offset(&prog[i],
455962306a36Sopenharmony_ci				      nfp_prog->tgt_out + bv->start_off);
456062306a36Sopenharmony_ci			break;
456162306a36Sopenharmony_ci		case RELO_BR_GO_ABORT:
456262306a36Sopenharmony_ci			br_set_offset(&prog[i],
456362306a36Sopenharmony_ci				      nfp_prog->tgt_abort + bv->start_off);
456462306a36Sopenharmony_ci			break;
456562306a36Sopenharmony_ci		case RELO_BR_GO_CALL_PUSH_REGS:
456662306a36Sopenharmony_ci			if (!nfp_prog->tgt_call_push_regs) {
456762306a36Sopenharmony_ci				pr_err("BUG: failed to detect subprogram registers needs\n");
456862306a36Sopenharmony_ci				err = -EINVAL;
456962306a36Sopenharmony_ci				goto err_free_prog;
457062306a36Sopenharmony_ci			}
457162306a36Sopenharmony_ci			off = nfp_prog->tgt_call_push_regs + bv->start_off;
457262306a36Sopenharmony_ci			br_set_offset(&prog[i], off);
457362306a36Sopenharmony_ci			break;
457462306a36Sopenharmony_ci		case RELO_BR_GO_CALL_POP_REGS:
457562306a36Sopenharmony_ci			if (!nfp_prog->tgt_call_pop_regs) {
457662306a36Sopenharmony_ci				pr_err("BUG: failed to detect subprogram registers needs\n");
457762306a36Sopenharmony_ci				err = -EINVAL;
457862306a36Sopenharmony_ci				goto err_free_prog;
457962306a36Sopenharmony_ci			}
458062306a36Sopenharmony_ci			off = nfp_prog->tgt_call_pop_regs + bv->start_off;
458162306a36Sopenharmony_ci			br_set_offset(&prog[i], off);
458262306a36Sopenharmony_ci			break;
458362306a36Sopenharmony_ci		case RELO_BR_NEXT_PKT:
458462306a36Sopenharmony_ci			br_set_offset(&prog[i], bv->tgt_done);
458562306a36Sopenharmony_ci			break;
458662306a36Sopenharmony_ci		case RELO_BR_HELPER:
458762306a36Sopenharmony_ci			val = br_get_offset(prog[i]);
458862306a36Sopenharmony_ci			val -= BR_OFF_RELO;
458962306a36Sopenharmony_ci			switch (val) {
459062306a36Sopenharmony_ci			case BPF_FUNC_map_lookup_elem:
459162306a36Sopenharmony_ci				val = nfp_prog->bpf->helpers.map_lookup;
459262306a36Sopenharmony_ci				break;
459362306a36Sopenharmony_ci			case BPF_FUNC_map_update_elem:
459462306a36Sopenharmony_ci				val = nfp_prog->bpf->helpers.map_update;
459562306a36Sopenharmony_ci				break;
459662306a36Sopenharmony_ci			case BPF_FUNC_map_delete_elem:
459762306a36Sopenharmony_ci				val = nfp_prog->bpf->helpers.map_delete;
459862306a36Sopenharmony_ci				break;
459962306a36Sopenharmony_ci			case BPF_FUNC_perf_event_output:
460062306a36Sopenharmony_ci				val = nfp_prog->bpf->helpers.perf_event_output;
460162306a36Sopenharmony_ci				break;
460262306a36Sopenharmony_ci			default:
460362306a36Sopenharmony_ci				pr_err("relocation of unknown helper %d\n",
460462306a36Sopenharmony_ci				       val);
460562306a36Sopenharmony_ci				err = -EINVAL;
460662306a36Sopenharmony_ci				goto err_free_prog;
460762306a36Sopenharmony_ci			}
460862306a36Sopenharmony_ci			br_set_offset(&prog[i], val);
460962306a36Sopenharmony_ci			break;
461062306a36Sopenharmony_ci		case RELO_IMMED_REL:
461162306a36Sopenharmony_ci			immed_add_value(&prog[i], bv->start_off);
461262306a36Sopenharmony_ci			break;
461362306a36Sopenharmony_ci		}
461462306a36Sopenharmony_ci
461562306a36Sopenharmony_ci		prog[i] &= ~OP_RELO_TYPE;
461662306a36Sopenharmony_ci	}
461762306a36Sopenharmony_ci
461862306a36Sopenharmony_ci	err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
461962306a36Sopenharmony_ci	if (err)
462062306a36Sopenharmony_ci		goto err_free_prog;
462162306a36Sopenharmony_ci
462262306a36Sopenharmony_ci	return prog;
462362306a36Sopenharmony_ci
462462306a36Sopenharmony_cierr_free_prog:
462562306a36Sopenharmony_ci	kfree(prog);
462662306a36Sopenharmony_ci	return ERR_PTR(err);
462762306a36Sopenharmony_ci}
4628