1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Block driver for s390 storage class memory.
4 *
5 * Copyright IBM Corp. 2012
6 * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com>
7 */
8
9#define KMSG_COMPONENT "scm_block"
10#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
12#include <linux/interrupt.h>
13#include <linux/spinlock.h>
14#include <linux/mempool.h>
15#include <linux/module.h>
16#include <linux/blkdev.h>
17#include <linux/blk-mq.h>
18#include <linux/genhd.h>
19#include <linux/slab.h>
20#include <linux/list.h>
21#include <linux/io.h>
22#include <asm/eadm.h>
23#include "scm_blk.h"
24
25debug_info_t *scm_debug;
26static int scm_major;
27static mempool_t *aidaw_pool;
28static DEFINE_SPINLOCK(list_lock);
29static LIST_HEAD(inactive_requests);
30static unsigned int nr_requests = 64;
31static unsigned int nr_requests_per_io = 8;
32static atomic_t nr_devices = ATOMIC_INIT(0);
33module_param(nr_requests, uint, S_IRUGO);
34MODULE_PARM_DESC(nr_requests, "Number of parallel requests.");
35
36module_param(nr_requests_per_io, uint, S_IRUGO);
37MODULE_PARM_DESC(nr_requests_per_io, "Number of requests per IO.");
38
39MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
40MODULE_LICENSE("GPL");
41MODULE_ALIAS("scm:scmdev*");
42
43static void __scm_free_rq(struct scm_request *scmrq)
44{
45	struct aob_rq_header *aobrq = to_aobrq(scmrq);
46
47	free_page((unsigned long) scmrq->aob);
48	kfree(scmrq->request);
49	kfree(aobrq);
50}
51
52static void scm_free_rqs(void)
53{
54	struct list_head *iter, *safe;
55	struct scm_request *scmrq;
56
57	spin_lock_irq(&list_lock);
58	list_for_each_safe(iter, safe, &inactive_requests) {
59		scmrq = list_entry(iter, struct scm_request, list);
60		list_del(&scmrq->list);
61		__scm_free_rq(scmrq);
62	}
63	spin_unlock_irq(&list_lock);
64
65	mempool_destroy(aidaw_pool);
66}
67
68static int __scm_alloc_rq(void)
69{
70	struct aob_rq_header *aobrq;
71	struct scm_request *scmrq;
72
73	aobrq = kzalloc(sizeof(*aobrq) + sizeof(*scmrq), GFP_KERNEL);
74	if (!aobrq)
75		return -ENOMEM;
76
77	scmrq = (void *) aobrq->data;
78	scmrq->aob = (void *) get_zeroed_page(GFP_DMA);
79	if (!scmrq->aob)
80		goto free;
81
82	scmrq->request = kcalloc(nr_requests_per_io, sizeof(scmrq->request[0]),
83				 GFP_KERNEL);
84	if (!scmrq->request)
85		goto free;
86
87	INIT_LIST_HEAD(&scmrq->list);
88	spin_lock_irq(&list_lock);
89	list_add(&scmrq->list, &inactive_requests);
90	spin_unlock_irq(&list_lock);
91
92	return 0;
93free:
94	__scm_free_rq(scmrq);
95	return -ENOMEM;
96}
97
98static int scm_alloc_rqs(unsigned int nrqs)
99{
100	int ret = 0;
101
102	aidaw_pool = mempool_create_page_pool(max(nrqs/8, 1U), 0);
103	if (!aidaw_pool)
104		return -ENOMEM;
105
106	while (nrqs-- && !ret)
107		ret = __scm_alloc_rq();
108
109	return ret;
110}
111
112static struct scm_request *scm_request_fetch(void)
113{
114	struct scm_request *scmrq = NULL;
115
116	spin_lock_irq(&list_lock);
117	if (list_empty(&inactive_requests))
118		goto out;
119	scmrq = list_first_entry(&inactive_requests, struct scm_request, list);
120	list_del(&scmrq->list);
121out:
122	spin_unlock_irq(&list_lock);
123	return scmrq;
124}
125
126static void scm_request_done(struct scm_request *scmrq)
127{
128	unsigned long flags;
129	struct msb *msb;
130	u64 aidaw;
131	int i;
132
133	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
134		msb = &scmrq->aob->msb[i];
135		aidaw = (u64)phys_to_virt(msb->data_addr);
136
137		if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
138		    IS_ALIGNED(aidaw, PAGE_SIZE))
139			mempool_free(virt_to_page(aidaw), aidaw_pool);
140	}
141
142	spin_lock_irqsave(&list_lock, flags);
143	list_add(&scmrq->list, &inactive_requests);
144	spin_unlock_irqrestore(&list_lock, flags);
145}
146
147static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req)
148{
149	return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT;
150}
151
152static inline struct aidaw *scm_aidaw_alloc(void)
153{
154	struct page *page = mempool_alloc(aidaw_pool, GFP_ATOMIC);
155
156	return page ? page_address(page) : NULL;
157}
158
159static inline unsigned long scm_aidaw_bytes(struct aidaw *aidaw)
160{
161	unsigned long _aidaw = (unsigned long) aidaw;
162	unsigned long bytes = ALIGN(_aidaw, PAGE_SIZE) - _aidaw;
163
164	return (bytes / sizeof(*aidaw)) * PAGE_SIZE;
165}
166
167struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes)
168{
169	struct aidaw *aidaw;
170
171	if (scm_aidaw_bytes(scmrq->next_aidaw) >= bytes)
172		return scmrq->next_aidaw;
173
174	aidaw = scm_aidaw_alloc();
175	if (aidaw)
176		memset(aidaw, 0, PAGE_SIZE);
177	return aidaw;
178}
179
180static int scm_request_prepare(struct scm_request *scmrq)
181{
182	struct scm_blk_dev *bdev = scmrq->bdev;
183	struct scm_device *scmdev = bdev->gendisk->private_data;
184	int pos = scmrq->aob->request.msb_count;
185	struct msb *msb = &scmrq->aob->msb[pos];
186	struct request *req = scmrq->request[pos];
187	struct req_iterator iter;
188	struct aidaw *aidaw;
189	struct bio_vec bv;
190
191	aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(req));
192	if (!aidaw)
193		return -ENOMEM;
194
195	msb->bs = MSB_BS_4K;
196	scmrq->aob->request.msb_count++;
197	msb->scm_addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
198	msb->oc = (rq_data_dir(req) == READ) ? MSB_OC_READ : MSB_OC_WRITE;
199	msb->flags |= MSB_FLAG_IDA;
200	msb->data_addr = (u64)virt_to_phys(aidaw);
201
202	rq_for_each_segment(bv, req, iter) {
203		WARN_ON(bv.bv_offset);
204		msb->blk_count += bv.bv_len >> 12;
205		aidaw->data_addr = virt_to_phys(page_address(bv.bv_page));
206		aidaw++;
207	}
208
209	scmrq->next_aidaw = aidaw;
210	return 0;
211}
212
213static inline void scm_request_set(struct scm_request *scmrq,
214				   struct request *req)
215{
216	scmrq->request[scmrq->aob->request.msb_count] = req;
217}
218
219static inline void scm_request_init(struct scm_blk_dev *bdev,
220				    struct scm_request *scmrq)
221{
222	struct aob_rq_header *aobrq = to_aobrq(scmrq);
223	struct aob *aob = scmrq->aob;
224
225	memset(scmrq->request, 0,
226	       nr_requests_per_io * sizeof(scmrq->request[0]));
227	memset(aob, 0, sizeof(*aob));
228	aobrq->scmdev = bdev->scmdev;
229	aob->request.cmd_code = ARQB_CMD_MOVE;
230	aob->request.data = (u64) aobrq;
231	scmrq->bdev = bdev;
232	scmrq->retries = 4;
233	scmrq->error = BLK_STS_OK;
234	/* We don't use all msbs - place aidaws at the end of the aob page. */
235	scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
236}
237
238static void scm_request_requeue(struct scm_request *scmrq)
239{
240	struct scm_blk_dev *bdev = scmrq->bdev;
241	int i;
242
243	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++)
244		blk_mq_requeue_request(scmrq->request[i], false);
245
246	atomic_dec(&bdev->queued_reqs);
247	scm_request_done(scmrq);
248	blk_mq_kick_requeue_list(bdev->rq);
249}
250
251static void scm_request_finish(struct scm_request *scmrq)
252{
253	struct scm_blk_dev *bdev = scmrq->bdev;
254	blk_status_t *error;
255	int i;
256
257	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
258		error = blk_mq_rq_to_pdu(scmrq->request[i]);
259		*error = scmrq->error;
260		if (likely(!blk_should_fake_timeout(scmrq->request[i]->q)))
261			blk_mq_complete_request(scmrq->request[i]);
262	}
263
264	atomic_dec(&bdev->queued_reqs);
265	scm_request_done(scmrq);
266}
267
268static void scm_request_start(struct scm_request *scmrq)
269{
270	struct scm_blk_dev *bdev = scmrq->bdev;
271
272	atomic_inc(&bdev->queued_reqs);
273	if (eadm_start_aob(scmrq->aob)) {
274		SCM_LOG(5, "no subchannel");
275		scm_request_requeue(scmrq);
276	}
277}
278
279struct scm_queue {
280	struct scm_request *scmrq;
281	spinlock_t lock;
282};
283
284static blk_status_t scm_blk_request(struct blk_mq_hw_ctx *hctx,
285			   const struct blk_mq_queue_data *qd)
286{
287	struct scm_device *scmdev = hctx->queue->queuedata;
288	struct scm_blk_dev *bdev = dev_get_drvdata(&scmdev->dev);
289	struct scm_queue *sq = hctx->driver_data;
290	struct request *req = qd->rq;
291	struct scm_request *scmrq;
292
293	spin_lock(&sq->lock);
294	if (!scm_permit_request(bdev, req)) {
295		spin_unlock(&sq->lock);
296		return BLK_STS_RESOURCE;
297	}
298
299	scmrq = sq->scmrq;
300	if (!scmrq) {
301		scmrq = scm_request_fetch();
302		if (!scmrq) {
303			SCM_LOG(5, "no request");
304			spin_unlock(&sq->lock);
305			return BLK_STS_RESOURCE;
306		}
307		scm_request_init(bdev, scmrq);
308		sq->scmrq = scmrq;
309	}
310	scm_request_set(scmrq, req);
311
312	if (scm_request_prepare(scmrq)) {
313		SCM_LOG(5, "aidaw alloc failed");
314		scm_request_set(scmrq, NULL);
315
316		if (scmrq->aob->request.msb_count)
317			scm_request_start(scmrq);
318
319		sq->scmrq = NULL;
320		spin_unlock(&sq->lock);
321		return BLK_STS_RESOURCE;
322	}
323	blk_mq_start_request(req);
324
325	if (qd->last || scmrq->aob->request.msb_count == nr_requests_per_io) {
326		scm_request_start(scmrq);
327		sq->scmrq = NULL;
328	}
329	spin_unlock(&sq->lock);
330	return BLK_STS_OK;
331}
332
333static int scm_blk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
334			     unsigned int idx)
335{
336	struct scm_queue *qd = kzalloc(sizeof(*qd), GFP_KERNEL);
337
338	if (!qd)
339		return -ENOMEM;
340
341	spin_lock_init(&qd->lock);
342	hctx->driver_data = qd;
343
344	return 0;
345}
346
347static void scm_blk_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int idx)
348{
349	struct scm_queue *qd = hctx->driver_data;
350
351	WARN_ON(qd->scmrq);
352	kfree(hctx->driver_data);
353	hctx->driver_data = NULL;
354}
355
356static void __scmrq_log_error(struct scm_request *scmrq)
357{
358	struct aob *aob = scmrq->aob;
359
360	if (scmrq->error == BLK_STS_TIMEOUT)
361		SCM_LOG(1, "Request timeout");
362	else {
363		SCM_LOG(1, "Request error");
364		SCM_LOG_HEX(1, &aob->response, sizeof(aob->response));
365	}
366	if (scmrq->retries)
367		SCM_LOG(1, "Retry request");
368	else
369		pr_err("An I/O operation to SCM failed with rc=%d\n",
370		       scmrq->error);
371}
372
373static void scm_blk_handle_error(struct scm_request *scmrq)
374{
375	struct scm_blk_dev *bdev = scmrq->bdev;
376	unsigned long flags;
377
378	if (scmrq->error != BLK_STS_IOERR)
379		goto restart;
380
381	/* For -EIO the response block is valid. */
382	switch (scmrq->aob->response.eqc) {
383	case EQC_WR_PROHIBIT:
384		spin_lock_irqsave(&bdev->lock, flags);
385		if (bdev->state != SCM_WR_PROHIBIT)
386			pr_info("%lx: Write access to the SCM increment is suspended\n",
387				(unsigned long) bdev->scmdev->address);
388		bdev->state = SCM_WR_PROHIBIT;
389		spin_unlock_irqrestore(&bdev->lock, flags);
390		goto requeue;
391	default:
392		break;
393	}
394
395restart:
396	if (!eadm_start_aob(scmrq->aob))
397		return;
398
399requeue:
400	scm_request_requeue(scmrq);
401}
402
403void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
404{
405	struct scm_request *scmrq = data;
406
407	scmrq->error = error;
408	if (error) {
409		__scmrq_log_error(scmrq);
410		if (scmrq->retries-- > 0) {
411			scm_blk_handle_error(scmrq);
412			return;
413		}
414	}
415
416	scm_request_finish(scmrq);
417}
418
419static void scm_blk_request_done(struct request *req)
420{
421	blk_status_t *error = blk_mq_rq_to_pdu(req);
422
423	blk_mq_end_request(req, *error);
424}
425
426static const struct block_device_operations scm_blk_devops = {
427	.owner = THIS_MODULE,
428};
429
430static const struct blk_mq_ops scm_mq_ops = {
431	.queue_rq = scm_blk_request,
432	.complete = scm_blk_request_done,
433	.init_hctx = scm_blk_init_hctx,
434	.exit_hctx = scm_blk_exit_hctx,
435};
436
437int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
438{
439	unsigned int devindex, nr_max_blk;
440	struct request_queue *rq;
441	int len, ret;
442
443	devindex = atomic_inc_return(&nr_devices) - 1;
444	/* scma..scmz + scmaa..scmzz */
445	if (devindex > 701) {
446		ret = -ENODEV;
447		goto out;
448	}
449
450	bdev->scmdev = scmdev;
451	bdev->state = SCM_OPER;
452	spin_lock_init(&bdev->lock);
453	atomic_set(&bdev->queued_reqs, 0);
454
455	bdev->tag_set.ops = &scm_mq_ops;
456	bdev->tag_set.cmd_size = sizeof(blk_status_t);
457	bdev->tag_set.nr_hw_queues = nr_requests;
458	bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests;
459	bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
460	bdev->tag_set.numa_node = NUMA_NO_NODE;
461
462	ret = blk_mq_alloc_tag_set(&bdev->tag_set);
463	if (ret)
464		goto out;
465
466	rq = blk_mq_init_queue(&bdev->tag_set);
467	if (IS_ERR(rq)) {
468		ret = PTR_ERR(rq);
469		goto out_tag;
470	}
471	bdev->rq = rq;
472	nr_max_blk = min(scmdev->nr_max_block,
473			 (unsigned int) (PAGE_SIZE / sizeof(struct aidaw)));
474
475	blk_queue_logical_block_size(rq, 1 << 12);
476	blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */
477	blk_queue_max_segments(rq, nr_max_blk);
478	blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
479	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
480
481	bdev->gendisk = alloc_disk(SCM_NR_PARTS);
482	if (!bdev->gendisk) {
483		ret = -ENOMEM;
484		goto out_queue;
485	}
486	rq->queuedata = scmdev;
487	bdev->gendisk->private_data = scmdev;
488	bdev->gendisk->fops = &scm_blk_devops;
489	bdev->gendisk->queue = rq;
490	bdev->gendisk->major = scm_major;
491	bdev->gendisk->first_minor = devindex * SCM_NR_PARTS;
492
493	len = snprintf(bdev->gendisk->disk_name, DISK_NAME_LEN, "scm");
494	if (devindex > 25) {
495		len += snprintf(bdev->gendisk->disk_name + len,
496				DISK_NAME_LEN - len, "%c",
497				'a' + (devindex / 26) - 1);
498		devindex = devindex % 26;
499	}
500	snprintf(bdev->gendisk->disk_name + len, DISK_NAME_LEN - len, "%c",
501		 'a' + devindex);
502
503	/* 512 byte sectors */
504	set_capacity(bdev->gendisk, scmdev->size >> 9);
505	device_add_disk(&scmdev->dev, bdev->gendisk, NULL);
506	return 0;
507
508out_queue:
509	blk_cleanup_queue(rq);
510out_tag:
511	blk_mq_free_tag_set(&bdev->tag_set);
512out:
513	atomic_dec(&nr_devices);
514	return ret;
515}
516
517void scm_blk_dev_cleanup(struct scm_blk_dev *bdev)
518{
519	del_gendisk(bdev->gendisk);
520	blk_cleanup_queue(bdev->gendisk->queue);
521	blk_mq_free_tag_set(&bdev->tag_set);
522	put_disk(bdev->gendisk);
523}
524
525void scm_blk_set_available(struct scm_blk_dev *bdev)
526{
527	unsigned long flags;
528
529	spin_lock_irqsave(&bdev->lock, flags);
530	if (bdev->state == SCM_WR_PROHIBIT)
531		pr_info("%lx: Write access to the SCM increment is restored\n",
532			(unsigned long) bdev->scmdev->address);
533	bdev->state = SCM_OPER;
534	spin_unlock_irqrestore(&bdev->lock, flags);
535}
536
537static bool __init scm_blk_params_valid(void)
538{
539	if (!nr_requests_per_io || nr_requests_per_io > 64)
540		return false;
541
542	return true;
543}
544
545static int __init scm_blk_init(void)
546{
547	int ret = -EINVAL;
548
549	if (!scm_blk_params_valid())
550		goto out;
551
552	ret = register_blkdev(0, "scm");
553	if (ret < 0)
554		goto out;
555
556	scm_major = ret;
557	ret = scm_alloc_rqs(nr_requests);
558	if (ret)
559		goto out_free;
560
561	scm_debug = debug_register("scm_log", 16, 1, 16);
562	if (!scm_debug) {
563		ret = -ENOMEM;
564		goto out_free;
565	}
566
567	debug_register_view(scm_debug, &debug_hex_ascii_view);
568	debug_set_level(scm_debug, 2);
569
570	ret = scm_drv_init();
571	if (ret)
572		goto out_dbf;
573
574	return ret;
575
576out_dbf:
577	debug_unregister(scm_debug);
578out_free:
579	scm_free_rqs();
580	unregister_blkdev(scm_major, "scm");
581out:
582	return ret;
583}
584module_init(scm_blk_init);
585
586static void __exit scm_blk_cleanup(void)
587{
588	scm_drv_cleanup();
589	debug_unregister(scm_debug);
590	scm_free_rqs();
591	unregister_blkdev(scm_major, "scm");
592}
593module_exit(scm_blk_cleanup);
594