1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
2/*
3 * aoeblk.c
4 * block device routines
5 */
6
7#include <linux/kernel.h>
8#include <linux/hdreg.h>
9#include <linux/blk-mq.h>
10#include <linux/backing-dev.h>
11#include <linux/fs.h>
12#include <linux/ioctl.h>
13#include <linux/slab.h>
14#include <linux/ratelimit.h>
15#include <linux/netdevice.h>
16#include <linux/mutex.h>
17#include <linux/export.h>
18#include <linux/moduleparam.h>
19#include <linux/debugfs.h>
20#include <scsi/sg.h>
21#include "aoe.h"
22
23static DEFINE_MUTEX(aoeblk_mutex);
24static struct kmem_cache *buf_pool_cache;
25static struct dentry *aoe_debugfs_dir;
26
27/* GPFS needs a larger value than the default. */
28static int aoe_maxsectors;
29module_param(aoe_maxsectors, int, 0644);
30MODULE_PARM_DESC(aoe_maxsectors,
31	"When nonzero, set the maximum number of sectors per I/O request");
32
33static ssize_t aoedisk_show_state(struct device *dev,
34				  struct device_attribute *attr, char *page)
35{
36	struct gendisk *disk = dev_to_disk(dev);
37	struct aoedev *d = disk->private_data;
38
39	return sysfs_emit(page, "%s%s\n",
40			(d->flags & DEVFL_UP) ? "up" : "down",
41			(d->flags & DEVFL_KICKME) ? ",kickme" :
42			(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
43	/* I'd rather see nopen exported so we can ditch closewait */
44}
45static ssize_t aoedisk_show_mac(struct device *dev,
46				struct device_attribute *attr, char *page)
47{
48	struct gendisk *disk = dev_to_disk(dev);
49	struct aoedev *d = disk->private_data;
50	struct aoetgt *t = d->targets[0];
51
52	if (t == NULL)
53		return sysfs_emit(page, "none\n");
54	return sysfs_emit(page, "%pm\n", t->addr);
55}
56static ssize_t aoedisk_show_netif(struct device *dev,
57				  struct device_attribute *attr, char *page)
58{
59	struct gendisk *disk = dev_to_disk(dev);
60	struct aoedev *d = disk->private_data;
61	struct net_device *nds[8], **nd, **nnd, **ne;
62	struct aoetgt **t, **te;
63	struct aoeif *ifp, *e;
64	char *p;
65
66	memset(nds, 0, sizeof nds);
67	nd = nds;
68	ne = nd + ARRAY_SIZE(nds);
69	t = d->targets;
70	te = t + d->ntargets;
71	for (; t < te && *t; t++) {
72		ifp = (*t)->ifs;
73		e = ifp + NAOEIFS;
74		for (; ifp < e && ifp->nd; ifp++) {
75			for (nnd = nds; nnd < nd; nnd++)
76				if (*nnd == ifp->nd)
77					break;
78			if (nnd == nd && nd != ne)
79				*nd++ = ifp->nd;
80		}
81	}
82
83	ne = nd;
84	nd = nds;
85	if (*nd == NULL)
86		return sysfs_emit(page, "none\n");
87	for (p = page; nd < ne; nd++)
88		p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s",
89			p == page ? "" : ",", (*nd)->name);
90	p += scnprintf(p, PAGE_SIZE - (p-page), "\n");
91	return p-page;
92}
93/* firmware version */
94static ssize_t aoedisk_show_fwver(struct device *dev,
95				  struct device_attribute *attr, char *page)
96{
97	struct gendisk *disk = dev_to_disk(dev);
98	struct aoedev *d = disk->private_data;
99
100	return sysfs_emit(page, "0x%04x\n", (unsigned int) d->fw_ver);
101}
102static ssize_t aoedisk_show_payload(struct device *dev,
103				    struct device_attribute *attr, char *page)
104{
105	struct gendisk *disk = dev_to_disk(dev);
106	struct aoedev *d = disk->private_data;
107
108	return sysfs_emit(page, "%lu\n", d->maxbcnt);
109}
110
111static int aoe_debugfs_show(struct seq_file *s, void *ignored)
112{
113	struct aoedev *d;
114	struct aoetgt **t, **te;
115	struct aoeif *ifp, *ife;
116	unsigned long flags;
117	char c;
118
119	d = s->private;
120	seq_printf(s, "rttavg: %d rttdev: %d\n",
121		d->rttavg >> RTTSCALE,
122		d->rttdev >> RTTDSCALE);
123	seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool));
124	seq_printf(s, "kicked: %ld\n", d->kicked);
125	seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt);
126	seq_printf(s, "ref: %ld\n", d->ref);
127
128	spin_lock_irqsave(&d->lock, flags);
129	t = d->targets;
130	te = t + d->ntargets;
131	for (; t < te && *t; t++) {
132		c = '\t';
133		seq_printf(s, "falloc: %ld\n", (*t)->falloc);
134		seq_printf(s, "ffree: %p\n",
135			list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next);
136		seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout,
137			(*t)->maxout, (*t)->nframes);
138		seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh);
139		seq_printf(s, "\ttaint:%d\n", (*t)->taint);
140		seq_printf(s, "\tr:%d\n", (*t)->rpkts);
141		seq_printf(s, "\tw:%d\n", (*t)->wpkts);
142		ifp = (*t)->ifs;
143		ife = ifp + ARRAY_SIZE((*t)->ifs);
144		for (; ifp->nd && ifp < ife; ifp++) {
145			seq_printf(s, "%c%s", c, ifp->nd->name);
146			c = ',';
147		}
148		seq_puts(s, "\n");
149	}
150	spin_unlock_irqrestore(&d->lock, flags);
151
152	return 0;
153}
154DEFINE_SHOW_ATTRIBUTE(aoe_debugfs);
155
156static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
157static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
158static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
159static struct device_attribute dev_attr_firmware_version = {
160	.attr = { .name = "firmware-version", .mode = 0444 },
161	.show = aoedisk_show_fwver,
162};
163static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);
164
165static struct attribute *aoe_attrs[] = {
166	&dev_attr_state.attr,
167	&dev_attr_mac.attr,
168	&dev_attr_netif.attr,
169	&dev_attr_firmware_version.attr,
170	&dev_attr_payload.attr,
171	NULL,
172};
173
174static const struct attribute_group aoe_attr_group = {
175	.attrs = aoe_attrs,
176};
177
178static const struct attribute_group *aoe_attr_groups[] = {
179	&aoe_attr_group,
180	NULL,
181};
182
183static void
184aoedisk_add_debugfs(struct aoedev *d)
185{
186	char *p;
187
188	if (aoe_debugfs_dir == NULL)
189		return;
190	p = strchr(d->gd->disk_name, '/');
191	if (p == NULL)
192		p = d->gd->disk_name;
193	else
194		p++;
195	BUG_ON(*p == '\0');
196	d->debugfs = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
197					 &aoe_debugfs_fops);
198}
199void
200aoedisk_rm_debugfs(struct aoedev *d)
201{
202	debugfs_remove(d->debugfs);
203	d->debugfs = NULL;
204}
205
206static int
207aoeblk_open(struct gendisk *disk, blk_mode_t mode)
208{
209	struct aoedev *d = disk->private_data;
210	ulong flags;
211
212	if (!virt_addr_valid(d)) {
213		pr_crit("aoe: invalid device pointer in %s\n",
214			__func__);
215		WARN_ON(1);
216		return -ENODEV;
217	}
218	if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
219		return -ENODEV;
220
221	mutex_lock(&aoeblk_mutex);
222	spin_lock_irqsave(&d->lock, flags);
223	if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
224		d->nopen++;
225		spin_unlock_irqrestore(&d->lock, flags);
226		mutex_unlock(&aoeblk_mutex);
227		return 0;
228	}
229	spin_unlock_irqrestore(&d->lock, flags);
230	mutex_unlock(&aoeblk_mutex);
231	return -ENODEV;
232}
233
234static void
235aoeblk_release(struct gendisk *disk)
236{
237	struct aoedev *d = disk->private_data;
238	ulong flags;
239
240	spin_lock_irqsave(&d->lock, flags);
241
242	if (--d->nopen == 0) {
243		spin_unlock_irqrestore(&d->lock, flags);
244		aoecmd_cfg(d->aoemajor, d->aoeminor);
245		return;
246	}
247	spin_unlock_irqrestore(&d->lock, flags);
248}
249
250static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx,
251				    const struct blk_mq_queue_data *bd)
252{
253	struct aoedev *d = hctx->queue->queuedata;
254
255	spin_lock_irq(&d->lock);
256
257	if ((d->flags & DEVFL_UP) == 0) {
258		pr_info_ratelimited("aoe: device %ld.%d is not up\n",
259			d->aoemajor, d->aoeminor);
260		spin_unlock_irq(&d->lock);
261		blk_mq_start_request(bd->rq);
262		return BLK_STS_IOERR;
263	}
264
265	list_add_tail(&bd->rq->queuelist, &d->rq_list);
266	aoecmd_work(d);
267	spin_unlock_irq(&d->lock);
268	return BLK_STS_OK;
269}
270
271static int
272aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
273{
274	struct aoedev *d = bdev->bd_disk->private_data;
275
276	if ((d->flags & DEVFL_UP) == 0) {
277		printk(KERN_ERR "aoe: disk not up\n");
278		return -ENODEV;
279	}
280
281	geo->cylinders = d->geo.cylinders;
282	geo->heads = d->geo.heads;
283	geo->sectors = d->geo.sectors;
284	return 0;
285}
286
287static int
288aoeblk_ioctl(struct block_device *bdev, blk_mode_t mode, uint cmd, ulong arg)
289{
290	struct aoedev *d;
291
292	if (!arg)
293		return -EINVAL;
294
295	d = bdev->bd_disk->private_data;
296	if ((d->flags & DEVFL_UP) == 0) {
297		pr_err("aoe: disk not up\n");
298		return -ENODEV;
299	}
300
301	if (cmd == HDIO_GET_IDENTITY) {
302		if (!copy_to_user((void __user *) arg, &d->ident,
303			sizeof(d->ident)))
304			return 0;
305		return -EFAULT;
306	}
307
308	/* udev calls scsi_id, which uses SG_IO, resulting in noise */
309	if (cmd != SG_IO)
310		pr_info("aoe: unknown ioctl 0x%x\n", cmd);
311
312	return -ENOTTY;
313}
314
315static const struct block_device_operations aoe_bdops = {
316	.open = aoeblk_open,
317	.release = aoeblk_release,
318	.ioctl = aoeblk_ioctl,
319	.compat_ioctl = blkdev_compat_ptr_ioctl,
320	.getgeo = aoeblk_getgeo,
321	.owner = THIS_MODULE,
322};
323
324static const struct blk_mq_ops aoeblk_mq_ops = {
325	.queue_rq	= aoeblk_queue_rq,
326};
327
328/* blk_mq_alloc_disk and add_disk can sleep */
329void
330aoeblk_gdalloc(void *vp)
331{
332	struct aoedev *d = vp;
333	struct gendisk *gd;
334	mempool_t *mp;
335	struct blk_mq_tag_set *set;
336	sector_t ssize;
337	ulong flags;
338	int late = 0;
339	int err;
340
341	spin_lock_irqsave(&d->lock, flags);
342	if (d->flags & DEVFL_GDALLOC
343	&& !(d->flags & DEVFL_TKILL)
344	&& !(d->flags & DEVFL_GD_NOW))
345		d->flags |= DEVFL_GD_NOW;
346	else
347		late = 1;
348	spin_unlock_irqrestore(&d->lock, flags);
349	if (late)
350		return;
351
352	mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
353		buf_pool_cache);
354	if (mp == NULL) {
355		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
356			d->aoemajor, d->aoeminor);
357		goto err;
358	}
359
360	set = &d->tag_set;
361	set->ops = &aoeblk_mq_ops;
362	set->cmd_size = sizeof(struct aoe_req);
363	set->nr_hw_queues = 1;
364	set->queue_depth = 128;
365	set->numa_node = NUMA_NO_NODE;
366	set->flags = BLK_MQ_F_SHOULD_MERGE;
367	err = blk_mq_alloc_tag_set(set);
368	if (err) {
369		pr_err("aoe: cannot allocate tag set for %ld.%d\n",
370			d->aoemajor, d->aoeminor);
371		goto err_mempool;
372	}
373
374	gd = blk_mq_alloc_disk(set, d);
375	if (IS_ERR(gd)) {
376		pr_err("aoe: cannot allocate block queue for %ld.%d\n",
377			d->aoemajor, d->aoeminor);
378		goto err_tagset;
379	}
380
381	spin_lock_irqsave(&d->lock, flags);
382	WARN_ON(!(d->flags & DEVFL_GD_NOW));
383	WARN_ON(!(d->flags & DEVFL_GDALLOC));
384	WARN_ON(d->flags & DEVFL_TKILL);
385	WARN_ON(d->gd);
386	WARN_ON(d->flags & DEVFL_UP);
387	blk_queue_max_hw_sectors(gd->queue, BLK_DEF_MAX_SECTORS);
388	blk_queue_io_opt(gd->queue, SZ_2M);
389	d->bufpool = mp;
390	d->blkq = gd->queue;
391	d->gd = gd;
392	if (aoe_maxsectors)
393		blk_queue_max_hw_sectors(gd->queue, aoe_maxsectors);
394	gd->major = AOE_MAJOR;
395	gd->first_minor = d->sysminor;
396	gd->minors = AOE_PARTITIONS;
397	gd->fops = &aoe_bdops;
398	gd->private_data = d;
399	ssize = d->ssize;
400	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
401		d->aoemajor, d->aoeminor);
402
403	d->flags &= ~DEVFL_GDALLOC;
404	d->flags |= DEVFL_UP;
405
406	spin_unlock_irqrestore(&d->lock, flags);
407
408	set_capacity(gd, ssize);
409
410	err = device_add_disk(NULL, gd, aoe_attr_groups);
411	if (err)
412		goto out_disk_cleanup;
413	aoedisk_add_debugfs(d);
414
415	spin_lock_irqsave(&d->lock, flags);
416	WARN_ON(!(d->flags & DEVFL_GD_NOW));
417	d->flags &= ~DEVFL_GD_NOW;
418	spin_unlock_irqrestore(&d->lock, flags);
419	return;
420
421out_disk_cleanup:
422	put_disk(gd);
423err_tagset:
424	blk_mq_free_tag_set(set);
425err_mempool:
426	mempool_destroy(mp);
427err:
428	spin_lock_irqsave(&d->lock, flags);
429	d->flags &= ~DEVFL_GD_NOW;
430	queue_work(aoe_wq, &d->work);
431	spin_unlock_irqrestore(&d->lock, flags);
432}
433
434void
435aoeblk_exit(void)
436{
437	debugfs_remove_recursive(aoe_debugfs_dir);
438	aoe_debugfs_dir = NULL;
439	kmem_cache_destroy(buf_pool_cache);
440}
441
442int __init
443aoeblk_init(void)
444{
445	buf_pool_cache = kmem_cache_create("aoe_bufs",
446					   sizeof(struct buf),
447					   0, 0, NULL);
448	if (buf_pool_cache == NULL)
449		return -ENOMEM;
450	aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
451	return 0;
452}
453
454