xref: /kernel/linux/linux-5.10/drivers/dax/bus.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
3#include <linux/memremap.h>
4#include <linux/device.h>
5#include <linux/mutex.h>
6#include <linux/list.h>
7#include <linux/slab.h>
8#include <linux/dax.h>
9#include <linux/io.h>
10#include "dax-private.h"
11#include "bus.h"
12
13static struct class *dax_class;
14
15static DEFINE_MUTEX(dax_bus_lock);
16
17#define DAX_NAME_LEN 30
18struct dax_id {
19	struct list_head list;
20	char dev_name[DAX_NAME_LEN];
21};
22
23static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
24{
25	/*
26	 * We only ever expect to handle device-dax instances, i.e. the
27	 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
28	 */
29	return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
30}
31
32static struct dax_device_driver *to_dax_drv(struct device_driver *drv)
33{
34	return container_of(drv, struct dax_device_driver, drv);
35}
36
37static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv,
38		const char *dev_name)
39{
40	struct dax_id *dax_id;
41
42	lockdep_assert_held(&dax_bus_lock);
43
44	list_for_each_entry(dax_id, &dax_drv->ids, list)
45		if (sysfs_streq(dax_id->dev_name, dev_name))
46			return dax_id;
47	return NULL;
48}
49
50static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
51{
52	int match;
53
54	mutex_lock(&dax_bus_lock);
55	match = !!__dax_match_id(dax_drv, dev_name(dev));
56	mutex_unlock(&dax_bus_lock);
57
58	return match;
59}
60
61enum id_action {
62	ID_REMOVE,
63	ID_ADD,
64};
65
66static ssize_t do_id_store(struct device_driver *drv, const char *buf,
67		size_t count, enum id_action action)
68{
69	struct dax_device_driver *dax_drv = to_dax_drv(drv);
70	unsigned int region_id, id;
71	char devname[DAX_NAME_LEN];
72	struct dax_id *dax_id;
73	ssize_t rc = count;
74	int fields;
75
76	fields = sscanf(buf, "dax%d.%d", &region_id, &id);
77	if (fields != 2)
78		return -EINVAL;
79	sprintf(devname, "dax%d.%d", region_id, id);
80	if (!sysfs_streq(buf, devname))
81		return -EINVAL;
82
83	mutex_lock(&dax_bus_lock);
84	dax_id = __dax_match_id(dax_drv, buf);
85	if (!dax_id) {
86		if (action == ID_ADD) {
87			dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
88			if (dax_id) {
89				strncpy(dax_id->dev_name, buf, DAX_NAME_LEN);
90				list_add(&dax_id->list, &dax_drv->ids);
91			} else
92				rc = -ENOMEM;
93		} else
94			/* nothing to remove */;
95	} else if (action == ID_REMOVE) {
96		list_del(&dax_id->list);
97		kfree(dax_id);
98	} else
99		/* dax_id already added */;
100	mutex_unlock(&dax_bus_lock);
101
102	if (rc < 0)
103		return rc;
104	if (action == ID_ADD)
105		rc = driver_attach(drv);
106	if (rc)
107		return rc;
108	return count;
109}
110
111static ssize_t new_id_store(struct device_driver *drv, const char *buf,
112		size_t count)
113{
114	return do_id_store(drv, buf, count, ID_ADD);
115}
116static DRIVER_ATTR_WO(new_id);
117
118static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
119		size_t count)
120{
121	return do_id_store(drv, buf, count, ID_REMOVE);
122}
123static DRIVER_ATTR_WO(remove_id);
124
125static struct attribute *dax_drv_attrs[] = {
126	&driver_attr_new_id.attr,
127	&driver_attr_remove_id.attr,
128	NULL,
129};
130ATTRIBUTE_GROUPS(dax_drv);
131
132static int dax_bus_match(struct device *dev, struct device_driver *drv);
133
134static bool is_static(struct dax_region *dax_region)
135{
136	return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
137}
138
139static u64 dev_dax_size(struct dev_dax *dev_dax)
140{
141	u64 size = 0;
142	int i;
143
144	device_lock_assert(&dev_dax->dev);
145
146	for (i = 0; i < dev_dax->nr_range; i++)
147		size += range_len(&dev_dax->ranges[i].range);
148
149	return size;
150}
151
152static int dax_bus_probe(struct device *dev)
153{
154	struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
155	struct dev_dax *dev_dax = to_dev_dax(dev);
156	struct dax_region *dax_region = dev_dax->region;
157	int rc;
158
159	if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0)
160		return -ENXIO;
161
162	rc = dax_drv->probe(dev_dax);
163
164	if (rc || is_static(dax_region))
165		return rc;
166
167	/*
168	 * Track new seed creation only after successful probe of the
169	 * previous seed.
170	 */
171	if (dax_region->seed == dev)
172		dax_region->seed = NULL;
173
174	return 0;
175}
176
177static int dax_bus_remove(struct device *dev)
178{
179	struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
180	struct dev_dax *dev_dax = to_dev_dax(dev);
181
182	return dax_drv->remove(dev_dax);
183}
184
185static struct bus_type dax_bus_type = {
186	.name = "dax",
187	.uevent = dax_bus_uevent,
188	.match = dax_bus_match,
189	.probe = dax_bus_probe,
190	.remove = dax_bus_remove,
191	.drv_groups = dax_drv_groups,
192};
193
194static int dax_bus_match(struct device *dev, struct device_driver *drv)
195{
196	struct dax_device_driver *dax_drv = to_dax_drv(drv);
197
198	/*
199	 * All but the 'device-dax' driver, which has 'match_always'
200	 * set, requires an exact id match.
201	 */
202	if (dax_drv->match_always)
203		return 1;
204
205	return dax_match_id(dax_drv, dev);
206}
207
208/*
209 * Rely on the fact that drvdata is set before the attributes are
210 * registered, and that the attributes are unregistered before drvdata
211 * is cleared to assume that drvdata is always valid.
212 */
213static ssize_t id_show(struct device *dev,
214		struct device_attribute *attr, char *buf)
215{
216	struct dax_region *dax_region = dev_get_drvdata(dev);
217
218	return sprintf(buf, "%d\n", dax_region->id);
219}
220static DEVICE_ATTR_RO(id);
221
222static ssize_t region_size_show(struct device *dev,
223		struct device_attribute *attr, char *buf)
224{
225	struct dax_region *dax_region = dev_get_drvdata(dev);
226
227	return sprintf(buf, "%llu\n", (unsigned long long)
228			resource_size(&dax_region->res));
229}
230static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
231		region_size_show, NULL);
232
233static ssize_t region_align_show(struct device *dev,
234		struct device_attribute *attr, char *buf)
235{
236	struct dax_region *dax_region = dev_get_drvdata(dev);
237
238	return sprintf(buf, "%u\n", dax_region->align);
239}
240static struct device_attribute dev_attr_region_align =
241		__ATTR(align, 0400, region_align_show, NULL);
242
243#define for_each_dax_region_resource(dax_region, res) \
244	for (res = (dax_region)->res.child; res; res = res->sibling)
245
246static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
247{
248	resource_size_t size = resource_size(&dax_region->res);
249	struct resource *res;
250
251	device_lock_assert(dax_region->dev);
252
253	for_each_dax_region_resource(dax_region, res)
254		size -= resource_size(res);
255	return size;
256}
257
258static ssize_t available_size_show(struct device *dev,
259		struct device_attribute *attr, char *buf)
260{
261	struct dax_region *dax_region = dev_get_drvdata(dev);
262	unsigned long long size;
263
264	device_lock(dev);
265	size = dax_region_avail_size(dax_region);
266	device_unlock(dev);
267
268	return sprintf(buf, "%llu\n", size);
269}
270static DEVICE_ATTR_RO(available_size);
271
272static ssize_t seed_show(struct device *dev,
273		struct device_attribute *attr, char *buf)
274{
275	struct dax_region *dax_region = dev_get_drvdata(dev);
276	struct device *seed;
277	ssize_t rc;
278
279	if (is_static(dax_region))
280		return -EINVAL;
281
282	device_lock(dev);
283	seed = dax_region->seed;
284	rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : "");
285	device_unlock(dev);
286
287	return rc;
288}
289static DEVICE_ATTR_RO(seed);
290
291static ssize_t create_show(struct device *dev,
292		struct device_attribute *attr, char *buf)
293{
294	struct dax_region *dax_region = dev_get_drvdata(dev);
295	struct device *youngest;
296	ssize_t rc;
297
298	if (is_static(dax_region))
299		return -EINVAL;
300
301	device_lock(dev);
302	youngest = dax_region->youngest;
303	rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : "");
304	device_unlock(dev);
305
306	return rc;
307}
308
309static ssize_t create_store(struct device *dev, struct device_attribute *attr,
310		const char *buf, size_t len)
311{
312	struct dax_region *dax_region = dev_get_drvdata(dev);
313	unsigned long long avail;
314	ssize_t rc;
315	int val;
316
317	if (is_static(dax_region))
318		return -EINVAL;
319
320	rc = kstrtoint(buf, 0, &val);
321	if (rc)
322		return rc;
323	if (val != 1)
324		return -EINVAL;
325
326	device_lock(dev);
327	avail = dax_region_avail_size(dax_region);
328	if (avail == 0)
329		rc = -ENOSPC;
330	else {
331		struct dev_dax_data data = {
332			.dax_region = dax_region,
333			.size = 0,
334			.id = -1,
335		};
336		struct dev_dax *dev_dax = devm_create_dev_dax(&data);
337
338		if (IS_ERR(dev_dax))
339			rc = PTR_ERR(dev_dax);
340		else {
341			/*
342			 * In support of crafting multiple new devices
343			 * simultaneously multiple seeds can be created,
344			 * but only the first one that has not been
345			 * successfully bound is tracked as the region
346			 * seed.
347			 */
348			if (!dax_region->seed)
349				dax_region->seed = &dev_dax->dev;
350			dax_region->youngest = &dev_dax->dev;
351			rc = len;
352		}
353	}
354	device_unlock(dev);
355
356	return rc;
357}
358static DEVICE_ATTR_RW(create);
359
360void kill_dev_dax(struct dev_dax *dev_dax)
361{
362	struct dax_device *dax_dev = dev_dax->dax_dev;
363	struct inode *inode = dax_inode(dax_dev);
364
365	kill_dax(dax_dev);
366	unmap_mapping_range(inode->i_mapping, 0, 0, 1);
367}
368EXPORT_SYMBOL_GPL(kill_dev_dax);
369
370static void trim_dev_dax_range(struct dev_dax *dev_dax)
371{
372	int i = dev_dax->nr_range - 1;
373	struct range *range = &dev_dax->ranges[i].range;
374	struct dax_region *dax_region = dev_dax->region;
375
376	device_lock_assert(dax_region->dev);
377	dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
378		(unsigned long long)range->start,
379		(unsigned long long)range->end);
380
381	__release_region(&dax_region->res, range->start, range_len(range));
382	if (--dev_dax->nr_range == 0) {
383		kfree(dev_dax->ranges);
384		dev_dax->ranges = NULL;
385	}
386}
387
388static void free_dev_dax_ranges(struct dev_dax *dev_dax)
389{
390	while (dev_dax->nr_range)
391		trim_dev_dax_range(dev_dax);
392}
393
394static void unregister_dev_dax(void *dev)
395{
396	struct dev_dax *dev_dax = to_dev_dax(dev);
397
398	dev_dbg(dev, "%s\n", __func__);
399
400	kill_dev_dax(dev_dax);
401	device_del(dev);
402	free_dev_dax_ranges(dev_dax);
403	put_device(dev);
404}
405
406static void dax_region_free(struct kref *kref)
407{
408	struct dax_region *dax_region;
409
410	dax_region = container_of(kref, struct dax_region, kref);
411	kfree(dax_region);
412}
413
414void dax_region_put(struct dax_region *dax_region)
415{
416	kref_put(&dax_region->kref, dax_region_free);
417}
418EXPORT_SYMBOL_GPL(dax_region_put);
419
420/* a return value >= 0 indicates this invocation invalidated the id */
421static int __free_dev_dax_id(struct dev_dax *dev_dax)
422{
423	struct device *dev = &dev_dax->dev;
424	struct dax_region *dax_region;
425	int rc = dev_dax->id;
426
427	device_lock_assert(dev);
428
429	if (!dev_dax->dyn_id || dev_dax->id < 0)
430		return -1;
431	dax_region = dev_dax->region;
432	ida_free(&dax_region->ida, dev_dax->id);
433	dax_region_put(dax_region);
434	dev_dax->id = -1;
435	return rc;
436}
437
438static int free_dev_dax_id(struct dev_dax *dev_dax)
439{
440	struct device *dev = &dev_dax->dev;
441	int rc;
442
443	device_lock(dev);
444	rc = __free_dev_dax_id(dev_dax);
445	device_unlock(dev);
446	return rc;
447}
448
449static int alloc_dev_dax_id(struct dev_dax *dev_dax)
450{
451	struct dax_region *dax_region = dev_dax->region;
452	int id;
453
454	id = ida_alloc(&dax_region->ida, GFP_KERNEL);
455	if (id < 0)
456		return id;
457	kref_get(&dax_region->kref);
458	dev_dax->dyn_id = true;
459	dev_dax->id = id;
460	return id;
461}
462
463static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
464		const char *buf, size_t len)
465{
466	struct dax_region *dax_region = dev_get_drvdata(dev);
467	struct dev_dax *dev_dax;
468	struct device *victim;
469	bool do_del = false;
470	int rc;
471
472	if (is_static(dax_region))
473		return -EINVAL;
474
475	victim = device_find_child_by_name(dax_region->dev, buf);
476	if (!victim)
477		return -ENXIO;
478
479	device_lock(dev);
480	device_lock(victim);
481	dev_dax = to_dev_dax(victim);
482	if (victim->driver || dev_dax_size(dev_dax))
483		rc = -EBUSY;
484	else {
485		/*
486		 * Invalidate the device so it does not become active
487		 * again, but always preserve device-id-0 so that
488		 * /sys/bus/dax/ is guaranteed to be populated while any
489		 * dax_region is registered.
490		 */
491		if (dev_dax->id > 0) {
492			do_del = __free_dev_dax_id(dev_dax) >= 0;
493			rc = len;
494			if (dax_region->seed == victim)
495				dax_region->seed = NULL;
496			if (dax_region->youngest == victim)
497				dax_region->youngest = NULL;
498		} else
499			rc = -EBUSY;
500	}
501	device_unlock(victim);
502
503	/* won the race to invalidate the device, clean it up */
504	if (do_del)
505		devm_release_action(dev, unregister_dev_dax, victim);
506	device_unlock(dev);
507	put_device(victim);
508
509	return rc;
510}
511static DEVICE_ATTR_WO(delete);
512
513static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
514		int n)
515{
516	struct device *dev = container_of(kobj, struct device, kobj);
517	struct dax_region *dax_region = dev_get_drvdata(dev);
518
519	if (is_static(dax_region))
520		if (a == &dev_attr_available_size.attr
521				|| a == &dev_attr_create.attr
522				|| a == &dev_attr_seed.attr
523				|| a == &dev_attr_delete.attr)
524			return 0;
525	return a->mode;
526}
527
528static struct attribute *dax_region_attributes[] = {
529	&dev_attr_available_size.attr,
530	&dev_attr_region_size.attr,
531	&dev_attr_region_align.attr,
532	&dev_attr_create.attr,
533	&dev_attr_seed.attr,
534	&dev_attr_delete.attr,
535	&dev_attr_id.attr,
536	NULL,
537};
538
539static const struct attribute_group dax_region_attribute_group = {
540	.name = "dax_region",
541	.attrs = dax_region_attributes,
542	.is_visible = dax_region_visible,
543};
544
545static const struct attribute_group *dax_region_attribute_groups[] = {
546	&dax_region_attribute_group,
547	NULL,
548};
549
550static void dax_region_unregister(void *region)
551{
552	struct dax_region *dax_region = region;
553
554	sysfs_remove_groups(&dax_region->dev->kobj,
555			dax_region_attribute_groups);
556	dax_region_put(dax_region);
557}
558
559struct dax_region *alloc_dax_region(struct device *parent, int region_id,
560		struct range *range, int target_node, unsigned int align,
561		unsigned long flags)
562{
563	struct dax_region *dax_region;
564
565	/*
566	 * The DAX core assumes that it can store its private data in
567	 * parent->driver_data. This WARN is a reminder / safeguard for
568	 * developers of device-dax drivers.
569	 */
570	if (dev_get_drvdata(parent)) {
571		dev_WARN(parent, "dax core failed to setup private data\n");
572		return NULL;
573	}
574
575	if (!IS_ALIGNED(range->start, align)
576			|| !IS_ALIGNED(range_len(range), align))
577		return NULL;
578
579	dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
580	if (!dax_region)
581		return NULL;
582
583	dev_set_drvdata(parent, dax_region);
584	kref_init(&dax_region->kref);
585	dax_region->id = region_id;
586	dax_region->align = align;
587	dax_region->dev = parent;
588	dax_region->target_node = target_node;
589	ida_init(&dax_region->ida);
590	dax_region->res = (struct resource) {
591		.start = range->start,
592		.end = range->end,
593		.flags = IORESOURCE_MEM | flags,
594	};
595
596	if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
597		kfree(dax_region);
598		return NULL;
599	}
600
601	kref_get(&dax_region->kref);
602	if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
603		return NULL;
604	return dax_region;
605}
606EXPORT_SYMBOL_GPL(alloc_dax_region);
607
608static void dax_mapping_release(struct device *dev)
609{
610	struct dax_mapping *mapping = to_dax_mapping(dev);
611	struct device *parent = dev->parent;
612	struct dev_dax *dev_dax = to_dev_dax(parent);
613
614	ida_free(&dev_dax->ida, mapping->id);
615	kfree(mapping);
616	put_device(parent);
617}
618
619static void unregister_dax_mapping(void *data)
620{
621	struct device *dev = data;
622	struct dax_mapping *mapping = to_dax_mapping(dev);
623	struct dev_dax *dev_dax = to_dev_dax(dev->parent);
624	struct dax_region *dax_region = dev_dax->region;
625
626	dev_dbg(dev, "%s\n", __func__);
627
628	device_lock_assert(dax_region->dev);
629
630	dev_dax->ranges[mapping->range_id].mapping = NULL;
631	mapping->range_id = -1;
632
633	device_del(dev);
634	put_device(dev);
635}
636
637static struct dev_dax_range *get_dax_range(struct device *dev)
638{
639	struct dax_mapping *mapping = to_dax_mapping(dev);
640	struct dev_dax *dev_dax = to_dev_dax(dev->parent);
641	struct dax_region *dax_region = dev_dax->region;
642
643	device_lock(dax_region->dev);
644	if (mapping->range_id < 0) {
645		device_unlock(dax_region->dev);
646		return NULL;
647	}
648
649	return &dev_dax->ranges[mapping->range_id];
650}
651
652static void put_dax_range(struct dev_dax_range *dax_range)
653{
654	struct dax_mapping *mapping = dax_range->mapping;
655	struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent);
656	struct dax_region *dax_region = dev_dax->region;
657
658	device_unlock(dax_region->dev);
659}
660
661static ssize_t start_show(struct device *dev,
662		struct device_attribute *attr, char *buf)
663{
664	struct dev_dax_range *dax_range;
665	ssize_t rc;
666
667	dax_range = get_dax_range(dev);
668	if (!dax_range)
669		return -ENXIO;
670	rc = sprintf(buf, "%#llx\n", dax_range->range.start);
671	put_dax_range(dax_range);
672
673	return rc;
674}
675static DEVICE_ATTR(start, 0400, start_show, NULL);
676
677static ssize_t end_show(struct device *dev,
678		struct device_attribute *attr, char *buf)
679{
680	struct dev_dax_range *dax_range;
681	ssize_t rc;
682
683	dax_range = get_dax_range(dev);
684	if (!dax_range)
685		return -ENXIO;
686	rc = sprintf(buf, "%#llx\n", dax_range->range.end);
687	put_dax_range(dax_range);
688
689	return rc;
690}
691static DEVICE_ATTR(end, 0400, end_show, NULL);
692
693static ssize_t pgoff_show(struct device *dev,
694		struct device_attribute *attr, char *buf)
695{
696	struct dev_dax_range *dax_range;
697	ssize_t rc;
698
699	dax_range = get_dax_range(dev);
700	if (!dax_range)
701		return -ENXIO;
702	rc = sprintf(buf, "%#lx\n", dax_range->pgoff);
703	put_dax_range(dax_range);
704
705	return rc;
706}
707static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
708
709static struct attribute *dax_mapping_attributes[] = {
710	&dev_attr_start.attr,
711	&dev_attr_end.attr,
712	&dev_attr_page_offset.attr,
713	NULL,
714};
715
716static const struct attribute_group dax_mapping_attribute_group = {
717	.attrs = dax_mapping_attributes,
718};
719
720static const struct attribute_group *dax_mapping_attribute_groups[] = {
721	&dax_mapping_attribute_group,
722	NULL,
723};
724
725static struct device_type dax_mapping_type = {
726	.release = dax_mapping_release,
727	.groups = dax_mapping_attribute_groups,
728};
729
730static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
731{
732	struct dax_region *dax_region = dev_dax->region;
733	struct dax_mapping *mapping;
734	struct device *dev;
735	int rc;
736
737	device_lock_assert(dax_region->dev);
738
739	if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
740				"region disabled\n"))
741		return -ENXIO;
742
743	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
744	if (!mapping)
745		return -ENOMEM;
746	mapping->range_id = range_id;
747	mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
748	if (mapping->id < 0) {
749		kfree(mapping);
750		return -ENOMEM;
751	}
752	dev_dax->ranges[range_id].mapping = mapping;
753	dev = &mapping->dev;
754	device_initialize(dev);
755	dev->parent = &dev_dax->dev;
756	get_device(dev->parent);
757	dev->type = &dax_mapping_type;
758	dev_set_name(dev, "mapping%d", mapping->id);
759	rc = device_add(dev);
760	if (rc) {
761		put_device(dev);
762		return rc;
763	}
764
765	rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
766			dev);
767	if (rc)
768		return rc;
769	return 0;
770}
771
772static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
773		resource_size_t size)
774{
775	struct dax_region *dax_region = dev_dax->region;
776	struct resource *res = &dax_region->res;
777	struct device *dev = &dev_dax->dev;
778	struct dev_dax_range *ranges;
779	unsigned long pgoff = 0;
780	struct resource *alloc;
781	int i, rc;
782
783	device_lock_assert(dax_region->dev);
784
785	/* handle the seed alloc special case */
786	if (!size) {
787		if (dev_WARN_ONCE(dev, dev_dax->nr_range,
788					"0-size allocation must be first\n"))
789			return -EBUSY;
790		/* nr_range == 0 is elsewhere special cased as 0-size device */
791		return 0;
792	}
793
794	ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
795			* (dev_dax->nr_range + 1), GFP_KERNEL);
796	if (!ranges)
797		return -ENOMEM;
798
799	alloc = __request_region(res, start, size, dev_name(dev), 0);
800	if (!alloc) {
801		/*
802		 * If this was an empty set of ranges nothing else
803		 * will release @ranges, so do it now.
804		 */
805		if (!dev_dax->nr_range) {
806			kfree(ranges);
807			ranges = NULL;
808		}
809		dev_dax->ranges = ranges;
810		return -ENOMEM;
811	}
812
813	for (i = 0; i < dev_dax->nr_range; i++)
814		pgoff += PHYS_PFN(range_len(&ranges[i].range));
815	dev_dax->ranges = ranges;
816	ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
817		.pgoff = pgoff,
818		.range = {
819			.start = alloc->start,
820			.end = alloc->end,
821		},
822	};
823
824	dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
825			&alloc->start, &alloc->end);
826	/*
827	 * A dev_dax instance must be registered before mapping device
828	 * children can be added. Defer to devm_create_dev_dax() to add
829	 * the initial mapping device.
830	 */
831	if (!device_is_registered(&dev_dax->dev))
832		return 0;
833
834	rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
835	if (rc)
836		trim_dev_dax_range(dev_dax);
837
838	return rc;
839}
840
841static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
842{
843	int last_range = dev_dax->nr_range - 1;
844	struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
845	struct dax_region *dax_region = dev_dax->region;
846	bool is_shrink = resource_size(res) > size;
847	struct range *range = &dax_range->range;
848	struct device *dev = &dev_dax->dev;
849	int rc;
850
851	device_lock_assert(dax_region->dev);
852
853	if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
854		return -EINVAL;
855
856	rc = adjust_resource(res, range->start, size);
857	if (rc)
858		return rc;
859
860	*range = (struct range) {
861		.start = range->start,
862		.end = range->start + size - 1,
863	};
864
865	dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
866			last_range, (unsigned long long) range->start,
867			(unsigned long long) range->end);
868
869	return 0;
870}
871
872static ssize_t size_show(struct device *dev,
873		struct device_attribute *attr, char *buf)
874{
875	struct dev_dax *dev_dax = to_dev_dax(dev);
876	unsigned long long size;
877
878	device_lock(dev);
879	size = dev_dax_size(dev_dax);
880	device_unlock(dev);
881
882	return sprintf(buf, "%llu\n", size);
883}
884
885static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
886{
887	/*
888	 * The minimum mapping granularity for a device instance is a
889	 * single subsection, unless the arch says otherwise.
890	 */
891	return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align()));
892}
893
894static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
895{
896	resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
897	struct dax_region *dax_region = dev_dax->region;
898	struct device *dev = &dev_dax->dev;
899	int i;
900
901	for (i = dev_dax->nr_range - 1; i >= 0; i--) {
902		struct range *range = &dev_dax->ranges[i].range;
903		struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
904		struct resource *adjust = NULL, *res;
905		resource_size_t shrink;
906
907		shrink = min_t(u64, to_shrink, range_len(range));
908		if (shrink >= range_len(range)) {
909			devm_release_action(dax_region->dev,
910					unregister_dax_mapping, &mapping->dev);
911			trim_dev_dax_range(dev_dax);
912			to_shrink -= shrink;
913			if (!to_shrink)
914				break;
915			continue;
916		}
917
918		for_each_dax_region_resource(dax_region, res)
919			if (strcmp(res->name, dev_name(dev)) == 0
920					&& res->start == range->start) {
921				adjust = res;
922				break;
923			}
924
925		if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
926					"failed to find matching resource\n"))
927			return -ENXIO;
928		return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
929				- shrink);
930	}
931	return 0;
932}
933
934/*
935 * Only allow adjustments that preserve the relative pgoff of existing
936 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
937 */
938static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
939{
940	struct dev_dax_range *last;
941	int i;
942
943	if (dev_dax->nr_range == 0)
944		return false;
945	if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
946		return false;
947	last = &dev_dax->ranges[dev_dax->nr_range - 1];
948	if (last->range.start != res->start || last->range.end != res->end)
949		return false;
950	for (i = 0; i < dev_dax->nr_range - 1; i++) {
951		struct dev_dax_range *dax_range = &dev_dax->ranges[i];
952
953		if (dax_range->pgoff > last->pgoff)
954			return false;
955	}
956
957	return true;
958}
959
960static ssize_t dev_dax_resize(struct dax_region *dax_region,
961		struct dev_dax *dev_dax, resource_size_t size)
962{
963	resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
964	resource_size_t dev_size = dev_dax_size(dev_dax);
965	struct resource *region_res = &dax_region->res;
966	struct device *dev = &dev_dax->dev;
967	struct resource *res, *first;
968	resource_size_t alloc = 0;
969	int rc;
970
971	if (dev->driver)
972		return -EBUSY;
973	if (size == dev_size)
974		return 0;
975	if (size > dev_size && size - dev_size > avail)
976		return -ENOSPC;
977	if (size < dev_size)
978		return dev_dax_shrink(dev_dax, size);
979
980	to_alloc = size - dev_size;
981	if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc),
982			"resize of %pa misaligned\n", &to_alloc))
983		return -ENXIO;
984
985	/*
986	 * Expand the device into the unused portion of the region. This
987	 * may involve adjusting the end of an existing resource, or
988	 * allocating a new resource.
989	 */
990retry:
991	first = region_res->child;
992	if (!first)
993		return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
994
995	rc = -ENOSPC;
996	for (res = first; res; res = res->sibling) {
997		struct resource *next = res->sibling;
998
999		/* space at the beginning of the region */
1000		if (res == first && res->start > dax_region->res.start) {
1001			alloc = min(res->start - dax_region->res.start, to_alloc);
1002			rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
1003			break;
1004		}
1005
1006		alloc = 0;
1007		/* space between allocations */
1008		if (next && next->start > res->end + 1)
1009			alloc = min(next->start - (res->end + 1), to_alloc);
1010
1011		/* space at the end of the region */
1012		if (!alloc && !next && res->end < region_res->end)
1013			alloc = min(region_res->end - res->end, to_alloc);
1014
1015		if (!alloc)
1016			continue;
1017
1018		if (adjust_ok(dev_dax, res)) {
1019			rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
1020			break;
1021		}
1022		rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
1023		break;
1024	}
1025	if (rc)
1026		return rc;
1027	to_alloc -= alloc;
1028	if (to_alloc)
1029		goto retry;
1030	return 0;
1031}
1032
1033static ssize_t size_store(struct device *dev, struct device_attribute *attr,
1034		const char *buf, size_t len)
1035{
1036	ssize_t rc;
1037	unsigned long long val;
1038	struct dev_dax *dev_dax = to_dev_dax(dev);
1039	struct dax_region *dax_region = dev_dax->region;
1040
1041	rc = kstrtoull(buf, 0, &val);
1042	if (rc)
1043		return rc;
1044
1045	if (!alloc_is_aligned(dev_dax, val)) {
1046		dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
1047		return -EINVAL;
1048	}
1049
1050	device_lock(dax_region->dev);
1051	if (!dax_region->dev->driver) {
1052		device_unlock(dax_region->dev);
1053		return -ENXIO;
1054	}
1055	device_lock(dev);
1056	rc = dev_dax_resize(dax_region, dev_dax, val);
1057	device_unlock(dev);
1058	device_unlock(dax_region->dev);
1059
1060	return rc == 0 ? len : rc;
1061}
1062static DEVICE_ATTR_RW(size);
1063
1064static ssize_t range_parse(const char *opt, size_t len, struct range *range)
1065{
1066	unsigned long long addr = 0;
1067	char *start, *end, *str;
1068	ssize_t rc = -EINVAL;
1069
1070	str = kstrdup(opt, GFP_KERNEL);
1071	if (!str)
1072		return rc;
1073
1074	end = str;
1075	start = strsep(&end, "-");
1076	if (!start || !end)
1077		goto err;
1078
1079	rc = kstrtoull(start, 16, &addr);
1080	if (rc)
1081		goto err;
1082	range->start = addr;
1083
1084	rc = kstrtoull(end, 16, &addr);
1085	if (rc)
1086		goto err;
1087	range->end = addr;
1088
1089err:
1090	kfree(str);
1091	return rc;
1092}
1093
1094static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
1095		const char *buf, size_t len)
1096{
1097	struct dev_dax *dev_dax = to_dev_dax(dev);
1098	struct dax_region *dax_region = dev_dax->region;
1099	size_t to_alloc;
1100	struct range r;
1101	ssize_t rc;
1102
1103	rc = range_parse(buf, len, &r);
1104	if (rc)
1105		return rc;
1106
1107	rc = -ENXIO;
1108	device_lock(dax_region->dev);
1109	if (!dax_region->dev->driver) {
1110		device_unlock(dax_region->dev);
1111		return rc;
1112	}
1113	device_lock(dev);
1114
1115	to_alloc = range_len(&r);
1116	if (alloc_is_aligned(dev_dax, to_alloc))
1117		rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
1118	device_unlock(dev);
1119	device_unlock(dax_region->dev);
1120
1121	return rc == 0 ? len : rc;
1122}
1123static DEVICE_ATTR_WO(mapping);
1124
1125static ssize_t align_show(struct device *dev,
1126		struct device_attribute *attr, char *buf)
1127{
1128	struct dev_dax *dev_dax = to_dev_dax(dev);
1129
1130	return sprintf(buf, "%d\n", dev_dax->align);
1131}
1132
1133static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
1134{
1135	resource_size_t dev_size = dev_dax_size(dev_dax);
1136	struct device *dev = &dev_dax->dev;
1137	int i;
1138
1139	if (dev_size > 0 && !alloc_is_aligned(dev_dax, dev_size)) {
1140		dev_dbg(dev, "%s: align %u invalid for size %pa\n",
1141			__func__, dev_dax->align, &dev_size);
1142		return -EINVAL;
1143	}
1144
1145	for (i = 0; i < dev_dax->nr_range; i++) {
1146		size_t len = range_len(&dev_dax->ranges[i].range);
1147
1148		if (!alloc_is_aligned(dev_dax, len)) {
1149			dev_dbg(dev, "%s: align %u invalid for range %d\n",
1150				__func__, dev_dax->align, i);
1151			return -EINVAL;
1152		}
1153	}
1154
1155	return 0;
1156}
1157
1158static ssize_t align_store(struct device *dev, struct device_attribute *attr,
1159		const char *buf, size_t len)
1160{
1161	struct dev_dax *dev_dax = to_dev_dax(dev);
1162	struct dax_region *dax_region = dev_dax->region;
1163	unsigned long val, align_save;
1164	ssize_t rc;
1165
1166	rc = kstrtoul(buf, 0, &val);
1167	if (rc)
1168		return -ENXIO;
1169
1170	if (!dax_align_valid(val))
1171		return -EINVAL;
1172
1173	device_lock(dax_region->dev);
1174	if (!dax_region->dev->driver) {
1175		device_unlock(dax_region->dev);
1176		return -ENXIO;
1177	}
1178
1179	device_lock(dev);
1180	if (dev->driver) {
1181		rc = -EBUSY;
1182		goto out_unlock;
1183	}
1184
1185	align_save = dev_dax->align;
1186	dev_dax->align = val;
1187	rc = dev_dax_validate_align(dev_dax);
1188	if (rc)
1189		dev_dax->align = align_save;
1190out_unlock:
1191	device_unlock(dev);
1192	device_unlock(dax_region->dev);
1193	return rc == 0 ? len : rc;
1194}
1195static DEVICE_ATTR_RW(align);
1196
1197static int dev_dax_target_node(struct dev_dax *dev_dax)
1198{
1199	struct dax_region *dax_region = dev_dax->region;
1200
1201	return dax_region->target_node;
1202}
1203
1204static ssize_t target_node_show(struct device *dev,
1205		struct device_attribute *attr, char *buf)
1206{
1207	struct dev_dax *dev_dax = to_dev_dax(dev);
1208
1209	return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
1210}
1211static DEVICE_ATTR_RO(target_node);
1212
1213static ssize_t resource_show(struct device *dev,
1214		struct device_attribute *attr, char *buf)
1215{
1216	struct dev_dax *dev_dax = to_dev_dax(dev);
1217	struct dax_region *dax_region = dev_dax->region;
1218	unsigned long long start;
1219
1220	if (dev_dax->nr_range < 1)
1221		start = dax_region->res.start;
1222	else
1223		start = dev_dax->ranges[0].range.start;
1224
1225	return sprintf(buf, "%#llx\n", start);
1226}
1227static DEVICE_ATTR(resource, 0400, resource_show, NULL);
1228
1229static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1230		char *buf)
1231{
1232	/*
1233	 * We only ever expect to handle device-dax instances, i.e. the
1234	 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
1235	 */
1236	return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
1237}
1238static DEVICE_ATTR_RO(modalias);
1239
1240static ssize_t numa_node_show(struct device *dev,
1241		struct device_attribute *attr, char *buf)
1242{
1243	return sprintf(buf, "%d\n", dev_to_node(dev));
1244}
1245static DEVICE_ATTR_RO(numa_node);
1246
1247static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
1248{
1249	struct device *dev = container_of(kobj, struct device, kobj);
1250	struct dev_dax *dev_dax = to_dev_dax(dev);
1251	struct dax_region *dax_region = dev_dax->region;
1252
1253	if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
1254		return 0;
1255	if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
1256		return 0;
1257	if (a == &dev_attr_mapping.attr && is_static(dax_region))
1258		return 0;
1259	if ((a == &dev_attr_align.attr ||
1260	     a == &dev_attr_size.attr) && is_static(dax_region))
1261		return 0444;
1262	return a->mode;
1263}
1264
1265static struct attribute *dev_dax_attributes[] = {
1266	&dev_attr_modalias.attr,
1267	&dev_attr_size.attr,
1268	&dev_attr_mapping.attr,
1269	&dev_attr_target_node.attr,
1270	&dev_attr_align.attr,
1271	&dev_attr_resource.attr,
1272	&dev_attr_numa_node.attr,
1273	NULL,
1274};
1275
1276static const struct attribute_group dev_dax_attribute_group = {
1277	.attrs = dev_dax_attributes,
1278	.is_visible = dev_dax_visible,
1279};
1280
1281static const struct attribute_group *dax_attribute_groups[] = {
1282	&dev_dax_attribute_group,
1283	NULL,
1284};
1285
1286static void dev_dax_release(struct device *dev)
1287{
1288	struct dev_dax *dev_dax = to_dev_dax(dev);
1289	struct dax_device *dax_dev = dev_dax->dax_dev;
1290
1291	put_dax(dax_dev);
1292	free_dev_dax_id(dev_dax);
1293	kfree(dev_dax->pgmap);
1294	kfree(dev_dax);
1295}
1296
1297static const struct device_type dev_dax_type = {
1298	.release = dev_dax_release,
1299	.groups = dax_attribute_groups,
1300};
1301
1302struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
1303{
1304	struct dax_region *dax_region = data->dax_region;
1305	struct device *parent = dax_region->dev;
1306	struct dax_device *dax_dev;
1307	struct dev_dax *dev_dax;
1308	struct inode *inode;
1309	struct device *dev;
1310	int rc;
1311
1312	dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
1313	if (!dev_dax)
1314		return ERR_PTR(-ENOMEM);
1315
1316	dev_dax->region = dax_region;
1317	if (is_static(dax_region)) {
1318		if (dev_WARN_ONCE(parent, data->id < 0,
1319				"dynamic id specified to static region\n")) {
1320			rc = -EINVAL;
1321			goto err_id;
1322		}
1323
1324		dev_dax->id = data->id;
1325	} else {
1326		if (dev_WARN_ONCE(parent, data->id >= 0,
1327				"static id specified to dynamic region\n")) {
1328			rc = -EINVAL;
1329			goto err_id;
1330		}
1331
1332		rc = alloc_dev_dax_id(dev_dax);
1333		if (rc < 0)
1334			goto err_id;
1335	}
1336
1337	dev = &dev_dax->dev;
1338	device_initialize(dev);
1339	dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
1340
1341	rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
1342	if (rc)
1343		goto err_range;
1344
1345	if (data->pgmap) {
1346		dev_WARN_ONCE(parent, !is_static(dax_region),
1347			"custom dev_pagemap requires a static dax_region\n");
1348
1349		dev_dax->pgmap = kmemdup(data->pgmap,
1350				sizeof(struct dev_pagemap), GFP_KERNEL);
1351		if (!dev_dax->pgmap) {
1352			rc = -ENOMEM;
1353			goto err_pgmap;
1354		}
1355	}
1356
1357	/*
1358	 * No 'host' or dax_operations since there is no access to this
1359	 * device outside of mmap of the resulting character device.
1360	 */
1361	dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
1362	if (IS_ERR(dax_dev)) {
1363		rc = PTR_ERR(dax_dev);
1364		goto err_alloc_dax;
1365	}
1366
1367	/* a device_dax instance is dead while the driver is not attached */
1368	kill_dax(dax_dev);
1369
1370	dev_dax->dax_dev = dax_dev;
1371	dev_dax->target_node = dax_region->target_node;
1372	dev_dax->align = dax_region->align;
1373	ida_init(&dev_dax->ida);
1374
1375	inode = dax_inode(dax_dev);
1376	dev->devt = inode->i_rdev;
1377	if (data->subsys == DEV_DAX_BUS)
1378		dev->bus = &dax_bus_type;
1379	else
1380		dev->class = dax_class;
1381	dev->parent = parent;
1382	dev->type = &dev_dax_type;
1383
1384	rc = device_add(dev);
1385	if (rc) {
1386		kill_dev_dax(dev_dax);
1387		put_device(dev);
1388		return ERR_PTR(rc);
1389	}
1390
1391	rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
1392	if (rc)
1393		return ERR_PTR(rc);
1394
1395	/* register mapping device for the initial allocation range */
1396	if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
1397		rc = devm_register_dax_mapping(dev_dax, 0);
1398		if (rc)
1399			return ERR_PTR(rc);
1400	}
1401
1402	return dev_dax;
1403
1404err_alloc_dax:
1405	kfree(dev_dax->pgmap);
1406err_pgmap:
1407	free_dev_dax_ranges(dev_dax);
1408err_range:
1409	free_dev_dax_id(dev_dax);
1410err_id:
1411	kfree(dev_dax);
1412
1413	return ERR_PTR(rc);
1414}
1415EXPORT_SYMBOL_GPL(devm_create_dev_dax);
1416
1417static int match_always_count;
1418
1419int __dax_driver_register(struct dax_device_driver *dax_drv,
1420		struct module *module, const char *mod_name)
1421{
1422	struct device_driver *drv = &dax_drv->drv;
1423	int rc = 0;
1424
1425	INIT_LIST_HEAD(&dax_drv->ids);
1426	drv->owner = module;
1427	drv->name = mod_name;
1428	drv->mod_name = mod_name;
1429	drv->bus = &dax_bus_type;
1430
1431	/* there can only be one default driver */
1432	mutex_lock(&dax_bus_lock);
1433	match_always_count += dax_drv->match_always;
1434	if (match_always_count > 1) {
1435		match_always_count--;
1436		WARN_ON(1);
1437		rc = -EINVAL;
1438	}
1439	mutex_unlock(&dax_bus_lock);
1440	if (rc)
1441		return rc;
1442	return driver_register(drv);
1443}
1444EXPORT_SYMBOL_GPL(__dax_driver_register);
1445
1446void dax_driver_unregister(struct dax_device_driver *dax_drv)
1447{
1448	struct device_driver *drv = &dax_drv->drv;
1449	struct dax_id *dax_id, *_id;
1450
1451	mutex_lock(&dax_bus_lock);
1452	match_always_count -= dax_drv->match_always;
1453	list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
1454		list_del(&dax_id->list);
1455		kfree(dax_id);
1456	}
1457	mutex_unlock(&dax_bus_lock);
1458	driver_unregister(drv);
1459}
1460EXPORT_SYMBOL_GPL(dax_driver_unregister);
1461
1462int __init dax_bus_init(void)
1463{
1464	int rc;
1465
1466	if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) {
1467		dax_class = class_create(THIS_MODULE, "dax");
1468		if (IS_ERR(dax_class))
1469			return PTR_ERR(dax_class);
1470	}
1471
1472	rc = bus_register(&dax_bus_type);
1473	if (rc)
1474		class_destroy(dax_class);
1475	return rc;
1476}
1477
1478void __exit dax_bus_exit(void)
1479{
1480	bus_unregister(&dax_bus_type);
1481	class_destroy(dax_class);
1482}
1483