xref: /kernel/linux/linux-6.6/drivers/nvme/host/zns.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4 */
5
6#include <linux/blkdev.h>
7#include <linux/vmalloc.h>
8#include "nvme.h"
9
10int nvme_revalidate_zones(struct nvme_ns *ns)
11{
12	struct request_queue *q = ns->queue;
13
14	blk_queue_chunk_sectors(q, ns->zsze);
15	blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
16
17	return blk_revalidate_disk_zones(ns->disk, NULL);
18}
19
20static int nvme_set_max_append(struct nvme_ctrl *ctrl)
21{
22	struct nvme_command c = { };
23	struct nvme_id_ctrl_zns *id;
24	int status;
25
26	id = kzalloc(sizeof(*id), GFP_KERNEL);
27	if (!id)
28		return -ENOMEM;
29
30	c.identify.opcode = nvme_admin_identify;
31	c.identify.cns = NVME_ID_CNS_CS_CTRL;
32	c.identify.csi = NVME_CSI_ZNS;
33
34	status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
35	if (status) {
36		kfree(id);
37		return status;
38	}
39
40	if (id->zasl)
41		ctrl->max_zone_append = 1 << (id->zasl + 3);
42	else
43		ctrl->max_zone_append = ctrl->max_hw_sectors;
44	kfree(id);
45	return 0;
46}
47
48int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
49{
50	struct nvme_effects_log *log = ns->head->effects;
51	struct request_queue *q = ns->queue;
52	struct nvme_command c = { };
53	struct nvme_id_ns_zns *id;
54	int status;
55
56	/* Driver requires zone append support */
57	if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
58			NVME_CMD_EFFECTS_CSUPP)) {
59		if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
60			dev_warn(ns->ctrl->device,
61				 "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
62				 ns->head->ns_id);
63	} else {
64		set_bit(NVME_NS_FORCE_RO, &ns->flags);
65		dev_warn(ns->ctrl->device,
66			 "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
67			 ns->head->ns_id);
68	}
69
70	/* Lazily query controller append limit for the first zoned namespace */
71	if (!ns->ctrl->max_zone_append) {
72		status = nvme_set_max_append(ns->ctrl);
73		if (status)
74			return status;
75	}
76
77	id = kzalloc(sizeof(*id), GFP_KERNEL);
78	if (!id)
79		return -ENOMEM;
80
81	c.identify.opcode = nvme_admin_identify;
82	c.identify.nsid = cpu_to_le32(ns->head->ns_id);
83	c.identify.cns = NVME_ID_CNS_CS_NS;
84	c.identify.csi = NVME_CSI_ZNS;
85
86	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
87	if (status)
88		goto free_data;
89
90	/*
91	 * We currently do not handle devices requiring any of the zoned
92	 * operation characteristics.
93	 */
94	if (id->zoc) {
95		dev_warn(ns->ctrl->device,
96			"zone operations:%x not supported for namespace:%u\n",
97			le16_to_cpu(id->zoc), ns->head->ns_id);
98		status = -ENODEV;
99		goto free_data;
100	}
101
102	ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
103	if (!is_power_of_2(ns->zsze)) {
104		dev_warn(ns->ctrl->device,
105			"invalid zone size:%llu for namespace:%u\n",
106			ns->zsze, ns->head->ns_id);
107		status = -ENODEV;
108		goto free_data;
109	}
110
111	disk_set_zoned(ns->disk, BLK_ZONED_HM);
112	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
113	disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1);
114	disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1);
115free_data:
116	kfree(id);
117	return status;
118}
119
120static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
121					  unsigned int nr_zones, size_t *buflen)
122{
123	struct request_queue *q = ns->disk->queue;
124	size_t bufsize;
125	void *buf;
126
127	const size_t min_bufsize = sizeof(struct nvme_zone_report) +
128				   sizeof(struct nvme_zone_descriptor);
129
130	nr_zones = min_t(unsigned int, nr_zones,
131			 get_capacity(ns->disk) >> ilog2(ns->zsze));
132
133	bufsize = sizeof(struct nvme_zone_report) +
134		nr_zones * sizeof(struct nvme_zone_descriptor);
135	bufsize = min_t(size_t, bufsize,
136			queue_max_hw_sectors(q) << SECTOR_SHIFT);
137	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
138
139	while (bufsize >= min_bufsize) {
140		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
141		if (buf) {
142			*buflen = bufsize;
143			return buf;
144		}
145		bufsize >>= 1;
146	}
147	return NULL;
148}
149
150static int nvme_zone_parse_entry(struct nvme_ns *ns,
151				 struct nvme_zone_descriptor *entry,
152				 unsigned int idx, report_zones_cb cb,
153				 void *data)
154{
155	struct blk_zone zone = { };
156
157	if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
158		dev_err(ns->ctrl->device, "invalid zone type %#x\n",
159				entry->zt);
160		return -EINVAL;
161	}
162
163	zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
164	zone.cond = entry->zs >> 4;
165	zone.len = ns->zsze;
166	zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
167	zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
168	if (zone.cond == BLK_ZONE_COND_FULL)
169		zone.wp = zone.start + zone.len;
170	else
171		zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
172
173	return cb(&zone, idx, data);
174}
175
176int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
177		unsigned int nr_zones, report_zones_cb cb, void *data)
178{
179	struct nvme_zone_report *report;
180	struct nvme_command c = { };
181	int ret, zone_idx = 0;
182	unsigned int nz, i;
183	size_t buflen;
184
185	if (ns->head->ids.csi != NVME_CSI_ZNS)
186		return -EINVAL;
187
188	report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
189	if (!report)
190		return -ENOMEM;
191
192	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
193	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
194	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
195	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
196	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
197	c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
198
199	sector &= ~(ns->zsze - 1);
200	while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
201		memset(report, 0, buflen);
202
203		c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
204		ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
205		if (ret) {
206			if (ret > 0)
207				ret = -EIO;
208			goto out_free;
209		}
210
211		nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
212		if (!nz)
213			break;
214
215		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
216			ret = nvme_zone_parse_entry(ns, &report->entries[i],
217						    zone_idx, cb, data);
218			if (ret)
219				goto out_free;
220			zone_idx++;
221		}
222
223		sector += ns->zsze * nz;
224	}
225
226	if (zone_idx > 0)
227		ret = zone_idx;
228	else
229		ret = -EINVAL;
230out_free:
231	kvfree(report);
232	return ret;
233}
234
235blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
236		struct nvme_command *c, enum nvme_zone_mgmt_action action)
237{
238	memset(c, 0, sizeof(*c));
239
240	c->zms.opcode = nvme_cmd_zone_mgmt_send;
241	c->zms.nsid = cpu_to_le32(ns->head->ns_id);
242	c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
243	c->zms.zsa = action;
244
245	if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
246		c->zms.select_all = 1;
247
248	return BLK_STS_OK;
249}
250