162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * NVMe Over Fabrics Target Passthrough command implementation.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (c) 2017-2018 Western Digital Corporation or its
662306a36Sopenharmony_ci * affiliates.
762306a36Sopenharmony_ci * Copyright (c) 2019-2020, Eideticom Inc.
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1162306a36Sopenharmony_ci#include <linux/module.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include "../host/nvme.h"
1462306a36Sopenharmony_ci#include "nvmet.h"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ciMODULE_IMPORT_NS(NVME_TARGET_PASSTHRU);
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci/*
1962306a36Sopenharmony_ci * xarray to maintain one passthru subsystem per nvme controller.
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_cistatic DEFINE_XARRAY(passthru_subsystems);
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_civoid nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl)
2462306a36Sopenharmony_ci{
2562306a36Sopenharmony_ci	/*
2662306a36Sopenharmony_ci	 * Multiple command set support can only be declared if the underlying
2762306a36Sopenharmony_ci	 * controller actually supports it.
2862306a36Sopenharmony_ci	 */
2962306a36Sopenharmony_ci	if (!nvme_multi_css(ctrl->subsys->passthru_ctrl))
3062306a36Sopenharmony_ci		ctrl->cap &= ~(1ULL << 43);
3162306a36Sopenharmony_ci}
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_cistatic u16 nvmet_passthru_override_id_descs(struct nvmet_req *req)
3462306a36Sopenharmony_ci{
3562306a36Sopenharmony_ci	struct nvmet_ctrl *ctrl = req->sq->ctrl;
3662306a36Sopenharmony_ci	u16 status = NVME_SC_SUCCESS;
3762306a36Sopenharmony_ci	int pos, len;
3862306a36Sopenharmony_ci	bool csi_seen = false;
3962306a36Sopenharmony_ci	void *data;
4062306a36Sopenharmony_ci	u8 csi;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	if (!ctrl->subsys->clear_ids)
4362306a36Sopenharmony_ci		return status;
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
4662306a36Sopenharmony_ci	if (!data)
4762306a36Sopenharmony_ci		return NVME_SC_INTERNAL;
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	status = nvmet_copy_from_sgl(req, 0, data, NVME_IDENTIFY_DATA_SIZE);
5062306a36Sopenharmony_ci	if (status)
5162306a36Sopenharmony_ci		goto out_free;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
5462306a36Sopenharmony_ci		struct nvme_ns_id_desc *cur = data + pos;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci		if (cur->nidl == 0)
5762306a36Sopenharmony_ci			break;
5862306a36Sopenharmony_ci		if (cur->nidt == NVME_NIDT_CSI) {
5962306a36Sopenharmony_ci			memcpy(&csi, cur + 1, NVME_NIDT_CSI_LEN);
6062306a36Sopenharmony_ci			csi_seen = true;
6162306a36Sopenharmony_ci			break;
6262306a36Sopenharmony_ci		}
6362306a36Sopenharmony_ci		len = sizeof(struct nvme_ns_id_desc) + cur->nidl;
6462306a36Sopenharmony_ci	}
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	memset(data, 0, NVME_IDENTIFY_DATA_SIZE);
6762306a36Sopenharmony_ci	if (csi_seen) {
6862306a36Sopenharmony_ci		struct nvme_ns_id_desc *cur = data;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci		cur->nidt = NVME_NIDT_CSI;
7162306a36Sopenharmony_ci		cur->nidl = NVME_NIDT_CSI_LEN;
7262306a36Sopenharmony_ci		memcpy(cur + 1, &csi, NVME_NIDT_CSI_LEN);
7362306a36Sopenharmony_ci	}
7462306a36Sopenharmony_ci	status = nvmet_copy_to_sgl(req, 0, data, NVME_IDENTIFY_DATA_SIZE);
7562306a36Sopenharmony_ciout_free:
7662306a36Sopenharmony_ci	kfree(data);
7762306a36Sopenharmony_ci	return status;
7862306a36Sopenharmony_ci}
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_cistatic u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
8162306a36Sopenharmony_ci{
8262306a36Sopenharmony_ci	struct nvmet_ctrl *ctrl = req->sq->ctrl;
8362306a36Sopenharmony_ci	struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
8462306a36Sopenharmony_ci	u16 status = NVME_SC_SUCCESS;
8562306a36Sopenharmony_ci	struct nvme_id_ctrl *id;
8662306a36Sopenharmony_ci	unsigned int max_hw_sectors;
8762306a36Sopenharmony_ci	int page_shift;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	id = kzalloc(sizeof(*id), GFP_KERNEL);
9062306a36Sopenharmony_ci	if (!id)
9162306a36Sopenharmony_ci		return NVME_SC_INTERNAL;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	status = nvmet_copy_from_sgl(req, 0, id, sizeof(*id));
9462306a36Sopenharmony_ci	if (status)
9562306a36Sopenharmony_ci		goto out_free;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	id->cntlid = cpu_to_le16(ctrl->cntlid);
9862306a36Sopenharmony_ci	id->ver = cpu_to_le32(ctrl->subsys->ver);
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	/*
10162306a36Sopenharmony_ci	 * The passthru NVMe driver may have a limit on the number of segments
10262306a36Sopenharmony_ci	 * which depends on the host's memory fragementation. To solve this,
10362306a36Sopenharmony_ci	 * ensure mdts is limited to the pages equal to the number of segments.
10462306a36Sopenharmony_ci	 */
10562306a36Sopenharmony_ci	max_hw_sectors = min_not_zero(pctrl->max_segments << PAGE_SECTORS_SHIFT,
10662306a36Sopenharmony_ci				      pctrl->max_hw_sectors);
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	/*
10962306a36Sopenharmony_ci	 * nvmet_passthru_map_sg is limitted to using a single bio so limit
11062306a36Sopenharmony_ci	 * the mdts based on BIO_MAX_VECS as well
11162306a36Sopenharmony_ci	 */
11262306a36Sopenharmony_ci	max_hw_sectors = min_not_zero(BIO_MAX_VECS << PAGE_SECTORS_SHIFT,
11362306a36Sopenharmony_ci				      max_hw_sectors);
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	id->acl = 3;
12062306a36Sopenharmony_ci	/*
12162306a36Sopenharmony_ci	 * We export aerl limit for the fabrics controller, update this when
12262306a36Sopenharmony_ci	 * passthru based aerl support is added.
12362306a36Sopenharmony_ci	 */
12462306a36Sopenharmony_ci	id->aerl = NVMET_ASYNC_EVENTS - 1;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	/* emulate kas as most of the PCIe ctrl don't have a support for kas */
12762306a36Sopenharmony_ci	id->kas = cpu_to_le16(NVMET_KAS);
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	/* don't support host memory buffer */
13062306a36Sopenharmony_ci	id->hmpre = 0;
13162306a36Sopenharmony_ci	id->hmmin = 0;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
13462306a36Sopenharmony_ci	id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
13562306a36Sopenharmony_ci	id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	/* don't support fuse commands */
13862306a36Sopenharmony_ci	id->fuses = 0;
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
14162306a36Sopenharmony_ci	if (ctrl->ops->flags & NVMF_KEYED_SGLS)
14262306a36Sopenharmony_ci		id->sgls |= cpu_to_le32(1 << 2);
14362306a36Sopenharmony_ci	if (req->port->inline_data_size)
14462306a36Sopenharmony_ci		id->sgls |= cpu_to_le32(1 << 20);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	/*
14762306a36Sopenharmony_ci	 * When passthru controller is setup using nvme-loop transport it will
14862306a36Sopenharmony_ci	 * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in
14962306a36Sopenharmony_ci	 * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl()
15062306a36Sopenharmony_ci	 * code path with duplicate ctr subsynqn. In order to prevent that we
15162306a36Sopenharmony_ci	 * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn.
15262306a36Sopenharmony_ci	 */
15362306a36Sopenharmony_ci	memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn));
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	/* use fabric id-ctrl values */
15662306a36Sopenharmony_ci	id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
15762306a36Sopenharmony_ci				req->port->inline_data_size) / 16);
15862306a36Sopenharmony_ci	id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	id->msdbd = ctrl->ops->msdbd;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	/* Support multipath connections with fabrics */
16362306a36Sopenharmony_ci	id->cmic |= 1 << 1;
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	/* Disable reservations, see nvmet_parse_passthru_io_cmd() */
16662306a36Sopenharmony_ci	id->oncs &= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	status = nvmet_copy_to_sgl(req, 0, id, sizeof(struct nvme_id_ctrl));
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ciout_free:
17162306a36Sopenharmony_ci	kfree(id);
17262306a36Sopenharmony_ci	return status;
17362306a36Sopenharmony_ci}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_cistatic u16 nvmet_passthru_override_id_ns(struct nvmet_req *req)
17662306a36Sopenharmony_ci{
17762306a36Sopenharmony_ci	u16 status = NVME_SC_SUCCESS;
17862306a36Sopenharmony_ci	struct nvme_id_ns *id;
17962306a36Sopenharmony_ci	int i;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	id = kzalloc(sizeof(*id), GFP_KERNEL);
18262306a36Sopenharmony_ci	if (!id)
18362306a36Sopenharmony_ci		return NVME_SC_INTERNAL;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	status = nvmet_copy_from_sgl(req, 0, id, sizeof(struct nvme_id_ns));
18662306a36Sopenharmony_ci	if (status)
18762306a36Sopenharmony_ci		goto out_free;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	for (i = 0; i < (id->nlbaf + 1); i++)
19062306a36Sopenharmony_ci		if (id->lbaf[i].ms)
19162306a36Sopenharmony_ci			memset(&id->lbaf[i], 0, sizeof(id->lbaf[i]));
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	id->flbas = id->flbas & ~(1 << 4);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	/*
19662306a36Sopenharmony_ci	 * Presently the NVMEof target code does not support sending
19762306a36Sopenharmony_ci	 * metadata, so we must disable it here. This should be updated
19862306a36Sopenharmony_ci	 * once target starts supporting metadata.
19962306a36Sopenharmony_ci	 */
20062306a36Sopenharmony_ci	id->mc = 0;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	if (req->sq->ctrl->subsys->clear_ids) {
20362306a36Sopenharmony_ci		memset(id->nguid, 0, NVME_NIDT_NGUID_LEN);
20462306a36Sopenharmony_ci		memset(id->eui64, 0, NVME_NIDT_EUI64_LEN);
20562306a36Sopenharmony_ci	}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ciout_free:
21062306a36Sopenharmony_ci	kfree(id);
21162306a36Sopenharmony_ci	return status;
21262306a36Sopenharmony_ci}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_cistatic void nvmet_passthru_execute_cmd_work(struct work_struct *w)
21562306a36Sopenharmony_ci{
21662306a36Sopenharmony_ci	struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
21762306a36Sopenharmony_ci	struct request *rq = req->p.rq;
21862306a36Sopenharmony_ci	struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
21962306a36Sopenharmony_ci	struct nvme_ns *ns = rq->q->queuedata;
22062306a36Sopenharmony_ci	u32 effects;
22162306a36Sopenharmony_ci	int status;
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	effects = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode);
22462306a36Sopenharmony_ci	status = nvme_execute_rq(rq, false);
22562306a36Sopenharmony_ci	if (status == NVME_SC_SUCCESS &&
22662306a36Sopenharmony_ci	    req->cmd->common.opcode == nvme_admin_identify) {
22762306a36Sopenharmony_ci		switch (req->cmd->identify.cns) {
22862306a36Sopenharmony_ci		case NVME_ID_CNS_CTRL:
22962306a36Sopenharmony_ci			nvmet_passthru_override_id_ctrl(req);
23062306a36Sopenharmony_ci			break;
23162306a36Sopenharmony_ci		case NVME_ID_CNS_NS:
23262306a36Sopenharmony_ci			nvmet_passthru_override_id_ns(req);
23362306a36Sopenharmony_ci			break;
23462306a36Sopenharmony_ci		case NVME_ID_CNS_NS_DESC_LIST:
23562306a36Sopenharmony_ci			nvmet_passthru_override_id_descs(req);
23662306a36Sopenharmony_ci			break;
23762306a36Sopenharmony_ci		}
23862306a36Sopenharmony_ci	} else if (status < 0)
23962306a36Sopenharmony_ci		status = NVME_SC_INTERNAL;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	req->cqe->result = nvme_req(rq)->result;
24262306a36Sopenharmony_ci	nvmet_req_complete(req, status);
24362306a36Sopenharmony_ci	blk_mq_free_request(rq);
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	if (effects)
24662306a36Sopenharmony_ci		nvme_passthru_end(ctrl, ns, effects, req->cmd, status);
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_cistatic enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq,
25062306a36Sopenharmony_ci						  blk_status_t blk_status)
25162306a36Sopenharmony_ci{
25262306a36Sopenharmony_ci	struct nvmet_req *req = rq->end_io_data;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	req->cqe->result = nvme_req(rq)->result;
25562306a36Sopenharmony_ci	nvmet_req_complete(req, nvme_req(rq)->status);
25662306a36Sopenharmony_ci	blk_mq_free_request(rq);
25762306a36Sopenharmony_ci	return RQ_END_IO_NONE;
25862306a36Sopenharmony_ci}
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_cistatic int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
26162306a36Sopenharmony_ci{
26262306a36Sopenharmony_ci	struct scatterlist *sg;
26362306a36Sopenharmony_ci	struct bio *bio;
26462306a36Sopenharmony_ci	int i;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	if (req->sg_cnt > BIO_MAX_VECS)
26762306a36Sopenharmony_ci		return -EINVAL;
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	if (nvmet_use_inline_bvec(req)) {
27062306a36Sopenharmony_ci		bio = &req->p.inline_bio;
27162306a36Sopenharmony_ci		bio_init(bio, NULL, req->inline_bvec,
27262306a36Sopenharmony_ci			 ARRAY_SIZE(req->inline_bvec), req_op(rq));
27362306a36Sopenharmony_ci	} else {
27462306a36Sopenharmony_ci		bio = bio_alloc(NULL, bio_max_segs(req->sg_cnt), req_op(rq),
27562306a36Sopenharmony_ci				GFP_KERNEL);
27662306a36Sopenharmony_ci		bio->bi_end_io = bio_put;
27762306a36Sopenharmony_ci	}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	for_each_sg(req->sg, sg, req->sg_cnt, i) {
28062306a36Sopenharmony_ci		if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
28162306a36Sopenharmony_ci				    sg->offset) < sg->length) {
28262306a36Sopenharmony_ci			nvmet_req_bio_put(req, bio);
28362306a36Sopenharmony_ci			return -EINVAL;
28462306a36Sopenharmony_ci		}
28562306a36Sopenharmony_ci	}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	blk_rq_bio_prep(rq, bio, req->sg_cnt);
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	return 0;
29062306a36Sopenharmony_ci}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_cistatic void nvmet_passthru_execute_cmd(struct nvmet_req *req)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
29562306a36Sopenharmony_ci	struct request_queue *q = ctrl->admin_q;
29662306a36Sopenharmony_ci	struct nvme_ns *ns = NULL;
29762306a36Sopenharmony_ci	struct request *rq = NULL;
29862306a36Sopenharmony_ci	unsigned int timeout;
29962306a36Sopenharmony_ci	u32 effects;
30062306a36Sopenharmony_ci	u16 status;
30162306a36Sopenharmony_ci	int ret;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	if (likely(req->sq->qid != 0)) {
30462306a36Sopenharmony_ci		u32 nsid = le32_to_cpu(req->cmd->common.nsid);
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci		ns = nvme_find_get_ns(ctrl, nsid);
30762306a36Sopenharmony_ci		if (unlikely(!ns)) {
30862306a36Sopenharmony_ci			pr_err("failed to get passthru ns nsid:%u\n", nsid);
30962306a36Sopenharmony_ci			status = NVME_SC_INVALID_NS | NVME_SC_DNR;
31062306a36Sopenharmony_ci			goto out;
31162306a36Sopenharmony_ci		}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci		q = ns->queue;
31462306a36Sopenharmony_ci		timeout = nvmet_req_subsys(req)->io_timeout;
31562306a36Sopenharmony_ci	} else {
31662306a36Sopenharmony_ci		timeout = nvmet_req_subsys(req)->admin_timeout;
31762306a36Sopenharmony_ci	}
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	rq = blk_mq_alloc_request(q, nvme_req_op(req->cmd), 0);
32062306a36Sopenharmony_ci	if (IS_ERR(rq)) {
32162306a36Sopenharmony_ci		status = NVME_SC_INTERNAL;
32262306a36Sopenharmony_ci		goto out_put_ns;
32362306a36Sopenharmony_ci	}
32462306a36Sopenharmony_ci	nvme_init_request(rq, req->cmd);
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	if (timeout)
32762306a36Sopenharmony_ci		rq->timeout = timeout;
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	if (req->sg_cnt) {
33062306a36Sopenharmony_ci		ret = nvmet_passthru_map_sg(req, rq);
33162306a36Sopenharmony_ci		if (unlikely(ret)) {
33262306a36Sopenharmony_ci			status = NVME_SC_INTERNAL;
33362306a36Sopenharmony_ci			goto out_put_req;
33462306a36Sopenharmony_ci		}
33562306a36Sopenharmony_ci	}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	/*
33862306a36Sopenharmony_ci	 * If a command needs post-execution fixups, or there are any
33962306a36Sopenharmony_ci	 * non-trivial effects, make sure to execute the command synchronously
34062306a36Sopenharmony_ci	 * in a workqueue so that nvme_passthru_end gets called.
34162306a36Sopenharmony_ci	 */
34262306a36Sopenharmony_ci	effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
34362306a36Sopenharmony_ci	if (req->p.use_workqueue ||
34462306a36Sopenharmony_ci	    (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))) {
34562306a36Sopenharmony_ci		INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
34662306a36Sopenharmony_ci		req->p.rq = rq;
34762306a36Sopenharmony_ci		queue_work(nvmet_wq, &req->p.work);
34862306a36Sopenharmony_ci	} else {
34962306a36Sopenharmony_ci		rq->end_io = nvmet_passthru_req_done;
35062306a36Sopenharmony_ci		rq->end_io_data = req;
35162306a36Sopenharmony_ci		blk_execute_rq_nowait(rq, false);
35262306a36Sopenharmony_ci	}
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	if (ns)
35562306a36Sopenharmony_ci		nvme_put_ns(ns);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	return;
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ciout_put_req:
36062306a36Sopenharmony_ci	blk_mq_free_request(rq);
36162306a36Sopenharmony_ciout_put_ns:
36262306a36Sopenharmony_ci	if (ns)
36362306a36Sopenharmony_ci		nvme_put_ns(ns);
36462306a36Sopenharmony_ciout:
36562306a36Sopenharmony_ci	nvmet_req_complete(req, status);
36662306a36Sopenharmony_ci}
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci/*
36962306a36Sopenharmony_ci * We need to emulate set host behaviour to ensure that any requested
37062306a36Sopenharmony_ci * behaviour of the target's host matches the requested behaviour
37162306a36Sopenharmony_ci * of the device's host and fail otherwise.
37262306a36Sopenharmony_ci */
37362306a36Sopenharmony_cistatic void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
37662306a36Sopenharmony_ci	struct nvme_feat_host_behavior *host;
37762306a36Sopenharmony_ci	u16 status = NVME_SC_INTERNAL;
37862306a36Sopenharmony_ci	int ret;
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	host = kzalloc(sizeof(*host) * 2, GFP_KERNEL);
38162306a36Sopenharmony_ci	if (!host)
38262306a36Sopenharmony_ci		goto out_complete_req;
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	ret = nvme_get_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
38562306a36Sopenharmony_ci				host, sizeof(*host), NULL);
38662306a36Sopenharmony_ci	if (ret)
38762306a36Sopenharmony_ci		goto out_free_host;
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_ci	status = nvmet_copy_from_sgl(req, 0, &host[1], sizeof(*host));
39062306a36Sopenharmony_ci	if (status)
39162306a36Sopenharmony_ci		goto out_free_host;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	if (memcmp(&host[0], &host[1], sizeof(host[0]))) {
39462306a36Sopenharmony_ci		pr_warn("target host has requested different behaviour from the local host\n");
39562306a36Sopenharmony_ci		status = NVME_SC_INTERNAL;
39662306a36Sopenharmony_ci	}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ciout_free_host:
39962306a36Sopenharmony_ci	kfree(host);
40062306a36Sopenharmony_ciout_complete_req:
40162306a36Sopenharmony_ci	nvmet_req_complete(req, status);
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_cistatic u16 nvmet_setup_passthru_command(struct nvmet_req *req)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	req->p.use_workqueue = false;
40762306a36Sopenharmony_ci	req->execute = nvmet_passthru_execute_cmd;
40862306a36Sopenharmony_ci	return NVME_SC_SUCCESS;
40962306a36Sopenharmony_ci}
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ciu16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
41262306a36Sopenharmony_ci{
41362306a36Sopenharmony_ci	/* Reject any commands with non-sgl flags set (ie. fused commands) */
41462306a36Sopenharmony_ci	if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
41562306a36Sopenharmony_ci		return NVME_SC_INVALID_FIELD;
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	switch (req->cmd->common.opcode) {
41862306a36Sopenharmony_ci	case nvme_cmd_resv_register:
41962306a36Sopenharmony_ci	case nvme_cmd_resv_report:
42062306a36Sopenharmony_ci	case nvme_cmd_resv_acquire:
42162306a36Sopenharmony_ci	case nvme_cmd_resv_release:
42262306a36Sopenharmony_ci		/*
42362306a36Sopenharmony_ci		 * Reservations cannot be supported properly because the
42462306a36Sopenharmony_ci		 * underlying device has no way of differentiating different
42562306a36Sopenharmony_ci		 * hosts that connect via fabrics. This could potentially be
42662306a36Sopenharmony_ci		 * emulated in the future if regular targets grow support for
42762306a36Sopenharmony_ci		 * this feature.
42862306a36Sopenharmony_ci		 */
42962306a36Sopenharmony_ci		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	return nvmet_setup_passthru_command(req);
43362306a36Sopenharmony_ci}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci/*
43662306a36Sopenharmony_ci * Only features that are emulated or specifically allowed in the list  are
43762306a36Sopenharmony_ci * passed down to the controller. This function implements the allow list for
43862306a36Sopenharmony_ci * both get and set features.
43962306a36Sopenharmony_ci */
44062306a36Sopenharmony_cistatic u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
44162306a36Sopenharmony_ci{
44262306a36Sopenharmony_ci	switch (le32_to_cpu(req->cmd->features.fid)) {
44362306a36Sopenharmony_ci	case NVME_FEAT_ARBITRATION:
44462306a36Sopenharmony_ci	case NVME_FEAT_POWER_MGMT:
44562306a36Sopenharmony_ci	case NVME_FEAT_LBA_RANGE:
44662306a36Sopenharmony_ci	case NVME_FEAT_TEMP_THRESH:
44762306a36Sopenharmony_ci	case NVME_FEAT_ERR_RECOVERY:
44862306a36Sopenharmony_ci	case NVME_FEAT_VOLATILE_WC:
44962306a36Sopenharmony_ci	case NVME_FEAT_WRITE_ATOMIC:
45062306a36Sopenharmony_ci	case NVME_FEAT_AUTO_PST:
45162306a36Sopenharmony_ci	case NVME_FEAT_TIMESTAMP:
45262306a36Sopenharmony_ci	case NVME_FEAT_HCTM:
45362306a36Sopenharmony_ci	case NVME_FEAT_NOPSC:
45462306a36Sopenharmony_ci	case NVME_FEAT_RRL:
45562306a36Sopenharmony_ci	case NVME_FEAT_PLM_CONFIG:
45662306a36Sopenharmony_ci	case NVME_FEAT_PLM_WINDOW:
45762306a36Sopenharmony_ci	case NVME_FEAT_HOST_BEHAVIOR:
45862306a36Sopenharmony_ci	case NVME_FEAT_SANITIZE:
45962306a36Sopenharmony_ci	case NVME_FEAT_VENDOR_START ... NVME_FEAT_VENDOR_END:
46062306a36Sopenharmony_ci		return nvmet_setup_passthru_command(req);
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	case NVME_FEAT_ASYNC_EVENT:
46362306a36Sopenharmony_ci		/* There is no support for forwarding ASYNC events */
46462306a36Sopenharmony_ci	case NVME_FEAT_IRQ_COALESCE:
46562306a36Sopenharmony_ci	case NVME_FEAT_IRQ_CONFIG:
46662306a36Sopenharmony_ci		/* The IRQ settings will not apply to the target controller */
46762306a36Sopenharmony_ci	case NVME_FEAT_HOST_MEM_BUF:
46862306a36Sopenharmony_ci		/*
46962306a36Sopenharmony_ci		 * Any HMB that's set will not be passed through and will
47062306a36Sopenharmony_ci		 * not work as expected
47162306a36Sopenharmony_ci		 */
47262306a36Sopenharmony_ci	case NVME_FEAT_SW_PROGRESS:
47362306a36Sopenharmony_ci		/*
47462306a36Sopenharmony_ci		 * The Pre-Boot Software Load Count doesn't make much
47562306a36Sopenharmony_ci		 * sense for a target to export
47662306a36Sopenharmony_ci		 */
47762306a36Sopenharmony_ci	case NVME_FEAT_RESV_MASK:
47862306a36Sopenharmony_ci	case NVME_FEAT_RESV_PERSIST:
47962306a36Sopenharmony_ci		/* No reservations, see nvmet_parse_passthru_io_cmd() */
48062306a36Sopenharmony_ci	default:
48162306a36Sopenharmony_ci		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
48262306a36Sopenharmony_ci	}
48362306a36Sopenharmony_ci}
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ciu16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
48662306a36Sopenharmony_ci{
48762306a36Sopenharmony_ci	/* Reject any commands with non-sgl flags set (ie. fused commands) */
48862306a36Sopenharmony_ci	if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
48962306a36Sopenharmony_ci		return NVME_SC_INVALID_FIELD;
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	/*
49262306a36Sopenharmony_ci	 * Passthru all vendor specific commands
49362306a36Sopenharmony_ci	 */
49462306a36Sopenharmony_ci	if (req->cmd->common.opcode >= nvme_admin_vendor_start)
49562306a36Sopenharmony_ci		return nvmet_setup_passthru_command(req);
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	switch (req->cmd->common.opcode) {
49862306a36Sopenharmony_ci	case nvme_admin_async_event:
49962306a36Sopenharmony_ci		req->execute = nvmet_execute_async_event;
50062306a36Sopenharmony_ci		return NVME_SC_SUCCESS;
50162306a36Sopenharmony_ci	case nvme_admin_keep_alive:
50262306a36Sopenharmony_ci		/*
50362306a36Sopenharmony_ci		 * Most PCIe ctrls don't support keep alive cmd, we route keep
50462306a36Sopenharmony_ci		 * alive to the non-passthru mode. In future please change this
50562306a36Sopenharmony_ci		 * code when PCIe ctrls with keep alive support available.
50662306a36Sopenharmony_ci		 */
50762306a36Sopenharmony_ci		req->execute = nvmet_execute_keep_alive;
50862306a36Sopenharmony_ci		return NVME_SC_SUCCESS;
50962306a36Sopenharmony_ci	case nvme_admin_set_features:
51062306a36Sopenharmony_ci		switch (le32_to_cpu(req->cmd->features.fid)) {
51162306a36Sopenharmony_ci		case NVME_FEAT_ASYNC_EVENT:
51262306a36Sopenharmony_ci		case NVME_FEAT_KATO:
51362306a36Sopenharmony_ci		case NVME_FEAT_NUM_QUEUES:
51462306a36Sopenharmony_ci		case NVME_FEAT_HOST_ID:
51562306a36Sopenharmony_ci			req->execute = nvmet_execute_set_features;
51662306a36Sopenharmony_ci			return NVME_SC_SUCCESS;
51762306a36Sopenharmony_ci		case NVME_FEAT_HOST_BEHAVIOR:
51862306a36Sopenharmony_ci			req->execute = nvmet_passthru_set_host_behaviour;
51962306a36Sopenharmony_ci			return NVME_SC_SUCCESS;
52062306a36Sopenharmony_ci		default:
52162306a36Sopenharmony_ci			return nvmet_passthru_get_set_features(req);
52262306a36Sopenharmony_ci		}
52362306a36Sopenharmony_ci		break;
52462306a36Sopenharmony_ci	case nvme_admin_get_features:
52562306a36Sopenharmony_ci		switch (le32_to_cpu(req->cmd->features.fid)) {
52662306a36Sopenharmony_ci		case NVME_FEAT_ASYNC_EVENT:
52762306a36Sopenharmony_ci		case NVME_FEAT_KATO:
52862306a36Sopenharmony_ci		case NVME_FEAT_NUM_QUEUES:
52962306a36Sopenharmony_ci		case NVME_FEAT_HOST_ID:
53062306a36Sopenharmony_ci			req->execute = nvmet_execute_get_features;
53162306a36Sopenharmony_ci			return NVME_SC_SUCCESS;
53262306a36Sopenharmony_ci		default:
53362306a36Sopenharmony_ci			return nvmet_passthru_get_set_features(req);
53462306a36Sopenharmony_ci		}
53562306a36Sopenharmony_ci		break;
53662306a36Sopenharmony_ci	case nvme_admin_identify:
53762306a36Sopenharmony_ci		switch (req->cmd->identify.cns) {
53862306a36Sopenharmony_ci		case NVME_ID_CNS_CTRL:
53962306a36Sopenharmony_ci			req->execute = nvmet_passthru_execute_cmd;
54062306a36Sopenharmony_ci			req->p.use_workqueue = true;
54162306a36Sopenharmony_ci			return NVME_SC_SUCCESS;
54262306a36Sopenharmony_ci		case NVME_ID_CNS_CS_CTRL:
54362306a36Sopenharmony_ci			switch (req->cmd->identify.csi) {
54462306a36Sopenharmony_ci			case NVME_CSI_ZNS:
54562306a36Sopenharmony_ci				req->execute = nvmet_passthru_execute_cmd;
54662306a36Sopenharmony_ci				req->p.use_workqueue = true;
54762306a36Sopenharmony_ci				return NVME_SC_SUCCESS;
54862306a36Sopenharmony_ci			}
54962306a36Sopenharmony_ci			return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
55062306a36Sopenharmony_ci		case NVME_ID_CNS_NS:
55162306a36Sopenharmony_ci			req->execute = nvmet_passthru_execute_cmd;
55262306a36Sopenharmony_ci			req->p.use_workqueue = true;
55362306a36Sopenharmony_ci			return NVME_SC_SUCCESS;
55462306a36Sopenharmony_ci		case NVME_ID_CNS_CS_NS:
55562306a36Sopenharmony_ci			switch (req->cmd->identify.csi) {
55662306a36Sopenharmony_ci			case NVME_CSI_ZNS:
55762306a36Sopenharmony_ci				req->execute = nvmet_passthru_execute_cmd;
55862306a36Sopenharmony_ci				req->p.use_workqueue = true;
55962306a36Sopenharmony_ci				return NVME_SC_SUCCESS;
56062306a36Sopenharmony_ci			}
56162306a36Sopenharmony_ci			return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
56262306a36Sopenharmony_ci		default:
56362306a36Sopenharmony_ci			return nvmet_setup_passthru_command(req);
56462306a36Sopenharmony_ci		}
56562306a36Sopenharmony_ci	case nvme_admin_get_log_page:
56662306a36Sopenharmony_ci		return nvmet_setup_passthru_command(req);
56762306a36Sopenharmony_ci	default:
56862306a36Sopenharmony_ci		/* Reject commands not in the allowlist above */
56962306a36Sopenharmony_ci		return nvmet_report_invalid_opcode(req);
57062306a36Sopenharmony_ci	}
57162306a36Sopenharmony_ci}
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ciint nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
57462306a36Sopenharmony_ci{
57562306a36Sopenharmony_ci	struct nvme_ctrl *ctrl;
57662306a36Sopenharmony_ci	struct file *file;
57762306a36Sopenharmony_ci	int ret = -EINVAL;
57862306a36Sopenharmony_ci	void *old;
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	mutex_lock(&subsys->lock);
58162306a36Sopenharmony_ci	if (!subsys->passthru_ctrl_path)
58262306a36Sopenharmony_ci		goto out_unlock;
58362306a36Sopenharmony_ci	if (subsys->passthru_ctrl)
58462306a36Sopenharmony_ci		goto out_unlock;
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	if (subsys->nr_namespaces) {
58762306a36Sopenharmony_ci		pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
58862306a36Sopenharmony_ci		goto out_unlock;
58962306a36Sopenharmony_ci	}
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0);
59262306a36Sopenharmony_ci	if (IS_ERR(file)) {
59362306a36Sopenharmony_ci		ret = PTR_ERR(file);
59462306a36Sopenharmony_ci		goto out_unlock;
59562306a36Sopenharmony_ci	}
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	ctrl = nvme_ctrl_from_file(file);
59862306a36Sopenharmony_ci	if (!ctrl) {
59962306a36Sopenharmony_ci		pr_err("failed to open nvme controller %s\n",
60062306a36Sopenharmony_ci		       subsys->passthru_ctrl_path);
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci		goto out_put_file;
60362306a36Sopenharmony_ci	}
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
60662306a36Sopenharmony_ci			 subsys, GFP_KERNEL);
60762306a36Sopenharmony_ci	if (xa_is_err(old)) {
60862306a36Sopenharmony_ci		ret = xa_err(old);
60962306a36Sopenharmony_ci		goto out_put_file;
61062306a36Sopenharmony_ci	}
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	if (old)
61362306a36Sopenharmony_ci		goto out_put_file;
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	subsys->passthru_ctrl = ctrl;
61662306a36Sopenharmony_ci	subsys->ver = ctrl->vs;
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	if (subsys->ver < NVME_VS(1, 2, 1)) {
61962306a36Sopenharmony_ci		pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n",
62062306a36Sopenharmony_ci			NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver),
62162306a36Sopenharmony_ci			NVME_TERTIARY(subsys->ver));
62262306a36Sopenharmony_ci		subsys->ver = NVME_VS(1, 2, 1);
62362306a36Sopenharmony_ci	}
62462306a36Sopenharmony_ci	nvme_get_ctrl(ctrl);
62562306a36Sopenharmony_ci	__module_get(subsys->passthru_ctrl->ops->module);
62662306a36Sopenharmony_ci	ret = 0;
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ciout_put_file:
62962306a36Sopenharmony_ci	filp_close(file, NULL);
63062306a36Sopenharmony_ciout_unlock:
63162306a36Sopenharmony_ci	mutex_unlock(&subsys->lock);
63262306a36Sopenharmony_ci	return ret;
63362306a36Sopenharmony_ci}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_cistatic void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
63662306a36Sopenharmony_ci{
63762306a36Sopenharmony_ci	if (subsys->passthru_ctrl) {
63862306a36Sopenharmony_ci		xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid);
63962306a36Sopenharmony_ci		module_put(subsys->passthru_ctrl->ops->module);
64062306a36Sopenharmony_ci		nvme_put_ctrl(subsys->passthru_ctrl);
64162306a36Sopenharmony_ci	}
64262306a36Sopenharmony_ci	subsys->passthru_ctrl = NULL;
64362306a36Sopenharmony_ci	subsys->ver = NVMET_DEFAULT_VS;
64462306a36Sopenharmony_ci}
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_civoid nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	mutex_lock(&subsys->lock);
64962306a36Sopenharmony_ci	__nvmet_passthru_ctrl_disable(subsys);
65062306a36Sopenharmony_ci	mutex_unlock(&subsys->lock);
65162306a36Sopenharmony_ci}
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_civoid nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
65462306a36Sopenharmony_ci{
65562306a36Sopenharmony_ci	mutex_lock(&subsys->lock);
65662306a36Sopenharmony_ci	__nvmet_passthru_ctrl_disable(subsys);
65762306a36Sopenharmony_ci	mutex_unlock(&subsys->lock);
65862306a36Sopenharmony_ci	kfree(subsys->passthru_ctrl_path);
65962306a36Sopenharmony_ci}
660