1// SPDX-License-Identifier: GPL-2.0 2/* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ 3#include <linux/memremap.h> 4#include <linux/device.h> 5#include <linux/mutex.h> 6#include <linux/list.h> 7#include <linux/slab.h> 8#include <linux/dax.h> 9#include <linux/io.h> 10#include "dax-private.h" 11#include "bus.h" 12 13static struct class *dax_class; 14 15static DEFINE_MUTEX(dax_bus_lock); 16 17#define DAX_NAME_LEN 30 18struct dax_id { 19 struct list_head list; 20 char dev_name[DAX_NAME_LEN]; 21}; 22 23static int dax_bus_uevent(struct device *dev, struct kobj_uevent_env *env) 24{ 25 /* 26 * We only ever expect to handle device-dax instances, i.e. the 27 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 28 */ 29 return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); 30} 31 32static struct dax_device_driver *to_dax_drv(struct device_driver *drv) 33{ 34 return container_of(drv, struct dax_device_driver, drv); 35} 36 37static struct dax_id *__dax_match_id(struct dax_device_driver *dax_drv, 38 const char *dev_name) 39{ 40 struct dax_id *dax_id; 41 42 lockdep_assert_held(&dax_bus_lock); 43 44 list_for_each_entry(dax_id, &dax_drv->ids, list) 45 if (sysfs_streq(dax_id->dev_name, dev_name)) 46 return dax_id; 47 return NULL; 48} 49 50static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev) 51{ 52 int match; 53 54 mutex_lock(&dax_bus_lock); 55 match = !!__dax_match_id(dax_drv, dev_name(dev)); 56 mutex_unlock(&dax_bus_lock); 57 58 return match; 59} 60 61enum id_action { 62 ID_REMOVE, 63 ID_ADD, 64}; 65 66static ssize_t do_id_store(struct device_driver *drv, const char *buf, 67 size_t count, enum id_action action) 68{ 69 struct dax_device_driver *dax_drv = to_dax_drv(drv); 70 unsigned int region_id, id; 71 char devname[DAX_NAME_LEN]; 72 struct dax_id *dax_id; 73 ssize_t rc = count; 74 int fields; 75 76 fields = sscanf(buf, "dax%d.%d", ®ion_id, &id); 77 if (fields != 2) 78 return -EINVAL; 79 sprintf(devname, "dax%d.%d", region_id, id); 80 if (!sysfs_streq(buf, devname)) 81 return -EINVAL; 82 83 mutex_lock(&dax_bus_lock); 84 dax_id = __dax_match_id(dax_drv, buf); 85 if (!dax_id) { 86 if (action == ID_ADD) { 87 dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL); 88 if (dax_id) { 89 strncpy(dax_id->dev_name, buf, DAX_NAME_LEN); 90 list_add(&dax_id->list, &dax_drv->ids); 91 } else 92 rc = -ENOMEM; 93 } else 94 /* nothing to remove */; 95 } else if (action == ID_REMOVE) { 96 list_del(&dax_id->list); 97 kfree(dax_id); 98 } else 99 /* dax_id already added */; 100 mutex_unlock(&dax_bus_lock); 101 102 if (rc < 0) 103 return rc; 104 if (action == ID_ADD) 105 rc = driver_attach(drv); 106 if (rc) 107 return rc; 108 return count; 109} 110 111static ssize_t new_id_store(struct device_driver *drv, const char *buf, 112 size_t count) 113{ 114 return do_id_store(drv, buf, count, ID_ADD); 115} 116static DRIVER_ATTR_WO(new_id); 117 118static ssize_t remove_id_store(struct device_driver *drv, const char *buf, 119 size_t count) 120{ 121 return do_id_store(drv, buf, count, ID_REMOVE); 122} 123static DRIVER_ATTR_WO(remove_id); 124 125static struct attribute *dax_drv_attrs[] = { 126 &driver_attr_new_id.attr, 127 &driver_attr_remove_id.attr, 128 NULL, 129}; 130ATTRIBUTE_GROUPS(dax_drv); 131 132static int dax_bus_match(struct device *dev, struct device_driver *drv); 133 134static bool is_static(struct dax_region *dax_region) 135{ 136 return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; 137} 138 139static u64 dev_dax_size(struct dev_dax *dev_dax) 140{ 141 u64 size = 0; 142 int i; 143 144 device_lock_assert(&dev_dax->dev); 145 146 for (i = 0; i < dev_dax->nr_range; i++) 147 size += range_len(&dev_dax->ranges[i].range); 148 149 return size; 150} 151 152static int dax_bus_probe(struct device *dev) 153{ 154 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 155 struct dev_dax *dev_dax = to_dev_dax(dev); 156 struct dax_region *dax_region = dev_dax->region; 157 int rc; 158 159 if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0) 160 return -ENXIO; 161 162 rc = dax_drv->probe(dev_dax); 163 164 if (rc || is_static(dax_region)) 165 return rc; 166 167 /* 168 * Track new seed creation only after successful probe of the 169 * previous seed. 170 */ 171 if (dax_region->seed == dev) 172 dax_region->seed = NULL; 173 174 return 0; 175} 176 177static int dax_bus_remove(struct device *dev) 178{ 179 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 180 struct dev_dax *dev_dax = to_dev_dax(dev); 181 182 return dax_drv->remove(dev_dax); 183} 184 185static struct bus_type dax_bus_type = { 186 .name = "dax", 187 .uevent = dax_bus_uevent, 188 .match = dax_bus_match, 189 .probe = dax_bus_probe, 190 .remove = dax_bus_remove, 191 .drv_groups = dax_drv_groups, 192}; 193 194static int dax_bus_match(struct device *dev, struct device_driver *drv) 195{ 196 struct dax_device_driver *dax_drv = to_dax_drv(drv); 197 198 /* 199 * All but the 'device-dax' driver, which has 'match_always' 200 * set, requires an exact id match. 201 */ 202 if (dax_drv->match_always) 203 return 1; 204 205 return dax_match_id(dax_drv, dev); 206} 207 208/* 209 * Rely on the fact that drvdata is set before the attributes are 210 * registered, and that the attributes are unregistered before drvdata 211 * is cleared to assume that drvdata is always valid. 212 */ 213static ssize_t id_show(struct device *dev, 214 struct device_attribute *attr, char *buf) 215{ 216 struct dax_region *dax_region = dev_get_drvdata(dev); 217 218 return sprintf(buf, "%d\n", dax_region->id); 219} 220static DEVICE_ATTR_RO(id); 221 222static ssize_t region_size_show(struct device *dev, 223 struct device_attribute *attr, char *buf) 224{ 225 struct dax_region *dax_region = dev_get_drvdata(dev); 226 227 return sprintf(buf, "%llu\n", (unsigned long long) 228 resource_size(&dax_region->res)); 229} 230static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, 231 region_size_show, NULL); 232 233static ssize_t region_align_show(struct device *dev, 234 struct device_attribute *attr, char *buf) 235{ 236 struct dax_region *dax_region = dev_get_drvdata(dev); 237 238 return sprintf(buf, "%u\n", dax_region->align); 239} 240static struct device_attribute dev_attr_region_align = 241 __ATTR(align, 0400, region_align_show, NULL); 242 243#define for_each_dax_region_resource(dax_region, res) \ 244 for (res = (dax_region)->res.child; res; res = res->sibling) 245 246static unsigned long long dax_region_avail_size(struct dax_region *dax_region) 247{ 248 resource_size_t size = resource_size(&dax_region->res); 249 struct resource *res; 250 251 device_lock_assert(dax_region->dev); 252 253 for_each_dax_region_resource(dax_region, res) 254 size -= resource_size(res); 255 return size; 256} 257 258static ssize_t available_size_show(struct device *dev, 259 struct device_attribute *attr, char *buf) 260{ 261 struct dax_region *dax_region = dev_get_drvdata(dev); 262 unsigned long long size; 263 264 device_lock(dev); 265 size = dax_region_avail_size(dax_region); 266 device_unlock(dev); 267 268 return sprintf(buf, "%llu\n", size); 269} 270static DEVICE_ATTR_RO(available_size); 271 272static ssize_t seed_show(struct device *dev, 273 struct device_attribute *attr, char *buf) 274{ 275 struct dax_region *dax_region = dev_get_drvdata(dev); 276 struct device *seed; 277 ssize_t rc; 278 279 if (is_static(dax_region)) 280 return -EINVAL; 281 282 device_lock(dev); 283 seed = dax_region->seed; 284 rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : ""); 285 device_unlock(dev); 286 287 return rc; 288} 289static DEVICE_ATTR_RO(seed); 290 291static ssize_t create_show(struct device *dev, 292 struct device_attribute *attr, char *buf) 293{ 294 struct dax_region *dax_region = dev_get_drvdata(dev); 295 struct device *youngest; 296 ssize_t rc; 297 298 if (is_static(dax_region)) 299 return -EINVAL; 300 301 device_lock(dev); 302 youngest = dax_region->youngest; 303 rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : ""); 304 device_unlock(dev); 305 306 return rc; 307} 308 309static ssize_t create_store(struct device *dev, struct device_attribute *attr, 310 const char *buf, size_t len) 311{ 312 struct dax_region *dax_region = dev_get_drvdata(dev); 313 unsigned long long avail; 314 ssize_t rc; 315 int val; 316 317 if (is_static(dax_region)) 318 return -EINVAL; 319 320 rc = kstrtoint(buf, 0, &val); 321 if (rc) 322 return rc; 323 if (val != 1) 324 return -EINVAL; 325 326 device_lock(dev); 327 avail = dax_region_avail_size(dax_region); 328 if (avail == 0) 329 rc = -ENOSPC; 330 else { 331 struct dev_dax_data data = { 332 .dax_region = dax_region, 333 .size = 0, 334 .id = -1, 335 }; 336 struct dev_dax *dev_dax = devm_create_dev_dax(&data); 337 338 if (IS_ERR(dev_dax)) 339 rc = PTR_ERR(dev_dax); 340 else { 341 /* 342 * In support of crafting multiple new devices 343 * simultaneously multiple seeds can be created, 344 * but only the first one that has not been 345 * successfully bound is tracked as the region 346 * seed. 347 */ 348 if (!dax_region->seed) 349 dax_region->seed = &dev_dax->dev; 350 dax_region->youngest = &dev_dax->dev; 351 rc = len; 352 } 353 } 354 device_unlock(dev); 355 356 return rc; 357} 358static DEVICE_ATTR_RW(create); 359 360void kill_dev_dax(struct dev_dax *dev_dax) 361{ 362 struct dax_device *dax_dev = dev_dax->dax_dev; 363 struct inode *inode = dax_inode(dax_dev); 364 365 kill_dax(dax_dev); 366 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 367} 368EXPORT_SYMBOL_GPL(kill_dev_dax); 369 370static void trim_dev_dax_range(struct dev_dax *dev_dax) 371{ 372 int i = dev_dax->nr_range - 1; 373 struct range *range = &dev_dax->ranges[i].range; 374 struct dax_region *dax_region = dev_dax->region; 375 376 device_lock_assert(dax_region->dev); 377 dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, 378 (unsigned long long)range->start, 379 (unsigned long long)range->end); 380 381 __release_region(&dax_region->res, range->start, range_len(range)); 382 if (--dev_dax->nr_range == 0) { 383 kfree(dev_dax->ranges); 384 dev_dax->ranges = NULL; 385 } 386} 387 388static void free_dev_dax_ranges(struct dev_dax *dev_dax) 389{ 390 while (dev_dax->nr_range) 391 trim_dev_dax_range(dev_dax); 392} 393 394static void unregister_dev_dax(void *dev) 395{ 396 struct dev_dax *dev_dax = to_dev_dax(dev); 397 398 dev_dbg(dev, "%s\n", __func__); 399 400 kill_dev_dax(dev_dax); 401 device_del(dev); 402 free_dev_dax_ranges(dev_dax); 403 put_device(dev); 404} 405 406static void dax_region_free(struct kref *kref) 407{ 408 struct dax_region *dax_region; 409 410 dax_region = container_of(kref, struct dax_region, kref); 411 kfree(dax_region); 412} 413 414void dax_region_put(struct dax_region *dax_region) 415{ 416 kref_put(&dax_region->kref, dax_region_free); 417} 418EXPORT_SYMBOL_GPL(dax_region_put); 419 420/* a return value >= 0 indicates this invocation invalidated the id */ 421static int __free_dev_dax_id(struct dev_dax *dev_dax) 422{ 423 struct device *dev = &dev_dax->dev; 424 struct dax_region *dax_region; 425 int rc = dev_dax->id; 426 427 device_lock_assert(dev); 428 429 if (!dev_dax->dyn_id || dev_dax->id < 0) 430 return -1; 431 dax_region = dev_dax->region; 432 ida_free(&dax_region->ida, dev_dax->id); 433 dax_region_put(dax_region); 434 dev_dax->id = -1; 435 return rc; 436} 437 438static int free_dev_dax_id(struct dev_dax *dev_dax) 439{ 440 struct device *dev = &dev_dax->dev; 441 int rc; 442 443 device_lock(dev); 444 rc = __free_dev_dax_id(dev_dax); 445 device_unlock(dev); 446 return rc; 447} 448 449static int alloc_dev_dax_id(struct dev_dax *dev_dax) 450{ 451 struct dax_region *dax_region = dev_dax->region; 452 int id; 453 454 id = ida_alloc(&dax_region->ida, GFP_KERNEL); 455 if (id < 0) 456 return id; 457 kref_get(&dax_region->kref); 458 dev_dax->dyn_id = true; 459 dev_dax->id = id; 460 return id; 461} 462 463static ssize_t delete_store(struct device *dev, struct device_attribute *attr, 464 const char *buf, size_t len) 465{ 466 struct dax_region *dax_region = dev_get_drvdata(dev); 467 struct dev_dax *dev_dax; 468 struct device *victim; 469 bool do_del = false; 470 int rc; 471 472 if (is_static(dax_region)) 473 return -EINVAL; 474 475 victim = device_find_child_by_name(dax_region->dev, buf); 476 if (!victim) 477 return -ENXIO; 478 479 device_lock(dev); 480 device_lock(victim); 481 dev_dax = to_dev_dax(victim); 482 if (victim->driver || dev_dax_size(dev_dax)) 483 rc = -EBUSY; 484 else { 485 /* 486 * Invalidate the device so it does not become active 487 * again, but always preserve device-id-0 so that 488 * /sys/bus/dax/ is guaranteed to be populated while any 489 * dax_region is registered. 490 */ 491 if (dev_dax->id > 0) { 492 do_del = __free_dev_dax_id(dev_dax) >= 0; 493 rc = len; 494 if (dax_region->seed == victim) 495 dax_region->seed = NULL; 496 if (dax_region->youngest == victim) 497 dax_region->youngest = NULL; 498 } else 499 rc = -EBUSY; 500 } 501 device_unlock(victim); 502 503 /* won the race to invalidate the device, clean it up */ 504 if (do_del) 505 devm_release_action(dev, unregister_dev_dax, victim); 506 device_unlock(dev); 507 put_device(victim); 508 509 return rc; 510} 511static DEVICE_ATTR_WO(delete); 512 513static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a, 514 int n) 515{ 516 struct device *dev = container_of(kobj, struct device, kobj); 517 struct dax_region *dax_region = dev_get_drvdata(dev); 518 519 if (is_static(dax_region)) 520 if (a == &dev_attr_available_size.attr 521 || a == &dev_attr_create.attr 522 || a == &dev_attr_seed.attr 523 || a == &dev_attr_delete.attr) 524 return 0; 525 return a->mode; 526} 527 528static struct attribute *dax_region_attributes[] = { 529 &dev_attr_available_size.attr, 530 &dev_attr_region_size.attr, 531 &dev_attr_region_align.attr, 532 &dev_attr_create.attr, 533 &dev_attr_seed.attr, 534 &dev_attr_delete.attr, 535 &dev_attr_id.attr, 536 NULL, 537}; 538 539static const struct attribute_group dax_region_attribute_group = { 540 .name = "dax_region", 541 .attrs = dax_region_attributes, 542 .is_visible = dax_region_visible, 543}; 544 545static const struct attribute_group *dax_region_attribute_groups[] = { 546 &dax_region_attribute_group, 547 NULL, 548}; 549 550static void dax_region_unregister(void *region) 551{ 552 struct dax_region *dax_region = region; 553 554 sysfs_remove_groups(&dax_region->dev->kobj, 555 dax_region_attribute_groups); 556 dax_region_put(dax_region); 557} 558 559struct dax_region *alloc_dax_region(struct device *parent, int region_id, 560 struct range *range, int target_node, unsigned int align, 561 unsigned long flags) 562{ 563 struct dax_region *dax_region; 564 565 /* 566 * The DAX core assumes that it can store its private data in 567 * parent->driver_data. This WARN is a reminder / safeguard for 568 * developers of device-dax drivers. 569 */ 570 if (dev_get_drvdata(parent)) { 571 dev_WARN(parent, "dax core failed to setup private data\n"); 572 return NULL; 573 } 574 575 if (!IS_ALIGNED(range->start, align) 576 || !IS_ALIGNED(range_len(range), align)) 577 return NULL; 578 579 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); 580 if (!dax_region) 581 return NULL; 582 583 dev_set_drvdata(parent, dax_region); 584 kref_init(&dax_region->kref); 585 dax_region->id = region_id; 586 dax_region->align = align; 587 dax_region->dev = parent; 588 dax_region->target_node = target_node; 589 ida_init(&dax_region->ida); 590 dax_region->res = (struct resource) { 591 .start = range->start, 592 .end = range->end, 593 .flags = IORESOURCE_MEM | flags, 594 }; 595 596 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { 597 kfree(dax_region); 598 return NULL; 599 } 600 601 kref_get(&dax_region->kref); 602 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) 603 return NULL; 604 return dax_region; 605} 606EXPORT_SYMBOL_GPL(alloc_dax_region); 607 608static void dax_mapping_release(struct device *dev) 609{ 610 struct dax_mapping *mapping = to_dax_mapping(dev); 611 struct device *parent = dev->parent; 612 struct dev_dax *dev_dax = to_dev_dax(parent); 613 614 ida_free(&dev_dax->ida, mapping->id); 615 kfree(mapping); 616 put_device(parent); 617} 618 619static void unregister_dax_mapping(void *data) 620{ 621 struct device *dev = data; 622 struct dax_mapping *mapping = to_dax_mapping(dev); 623 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 624 struct dax_region *dax_region = dev_dax->region; 625 626 dev_dbg(dev, "%s\n", __func__); 627 628 device_lock_assert(dax_region->dev); 629 630 dev_dax->ranges[mapping->range_id].mapping = NULL; 631 mapping->range_id = -1; 632 633 device_del(dev); 634 put_device(dev); 635} 636 637static struct dev_dax_range *get_dax_range(struct device *dev) 638{ 639 struct dax_mapping *mapping = to_dax_mapping(dev); 640 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 641 struct dax_region *dax_region = dev_dax->region; 642 643 device_lock(dax_region->dev); 644 if (mapping->range_id < 0) { 645 device_unlock(dax_region->dev); 646 return NULL; 647 } 648 649 return &dev_dax->ranges[mapping->range_id]; 650} 651 652static void put_dax_range(struct dev_dax_range *dax_range) 653{ 654 struct dax_mapping *mapping = dax_range->mapping; 655 struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent); 656 struct dax_region *dax_region = dev_dax->region; 657 658 device_unlock(dax_region->dev); 659} 660 661static ssize_t start_show(struct device *dev, 662 struct device_attribute *attr, char *buf) 663{ 664 struct dev_dax_range *dax_range; 665 ssize_t rc; 666 667 dax_range = get_dax_range(dev); 668 if (!dax_range) 669 return -ENXIO; 670 rc = sprintf(buf, "%#llx\n", dax_range->range.start); 671 put_dax_range(dax_range); 672 673 return rc; 674} 675static DEVICE_ATTR(start, 0400, start_show, NULL); 676 677static ssize_t end_show(struct device *dev, 678 struct device_attribute *attr, char *buf) 679{ 680 struct dev_dax_range *dax_range; 681 ssize_t rc; 682 683 dax_range = get_dax_range(dev); 684 if (!dax_range) 685 return -ENXIO; 686 rc = sprintf(buf, "%#llx\n", dax_range->range.end); 687 put_dax_range(dax_range); 688 689 return rc; 690} 691static DEVICE_ATTR(end, 0400, end_show, NULL); 692 693static ssize_t pgoff_show(struct device *dev, 694 struct device_attribute *attr, char *buf) 695{ 696 struct dev_dax_range *dax_range; 697 ssize_t rc; 698 699 dax_range = get_dax_range(dev); 700 if (!dax_range) 701 return -ENXIO; 702 rc = sprintf(buf, "%#lx\n", dax_range->pgoff); 703 put_dax_range(dax_range); 704 705 return rc; 706} 707static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL); 708 709static struct attribute *dax_mapping_attributes[] = { 710 &dev_attr_start.attr, 711 &dev_attr_end.attr, 712 &dev_attr_page_offset.attr, 713 NULL, 714}; 715 716static const struct attribute_group dax_mapping_attribute_group = { 717 .attrs = dax_mapping_attributes, 718}; 719 720static const struct attribute_group *dax_mapping_attribute_groups[] = { 721 &dax_mapping_attribute_group, 722 NULL, 723}; 724 725static struct device_type dax_mapping_type = { 726 .release = dax_mapping_release, 727 .groups = dax_mapping_attribute_groups, 728}; 729 730static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) 731{ 732 struct dax_region *dax_region = dev_dax->region; 733 struct dax_mapping *mapping; 734 struct device *dev; 735 int rc; 736 737 device_lock_assert(dax_region->dev); 738 739 if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver, 740 "region disabled\n")) 741 return -ENXIO; 742 743 mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); 744 if (!mapping) 745 return -ENOMEM; 746 mapping->range_id = range_id; 747 mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL); 748 if (mapping->id < 0) { 749 kfree(mapping); 750 return -ENOMEM; 751 } 752 dev_dax->ranges[range_id].mapping = mapping; 753 dev = &mapping->dev; 754 device_initialize(dev); 755 dev->parent = &dev_dax->dev; 756 get_device(dev->parent); 757 dev->type = &dax_mapping_type; 758 dev_set_name(dev, "mapping%d", mapping->id); 759 rc = device_add(dev); 760 if (rc) { 761 put_device(dev); 762 return rc; 763 } 764 765 rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping, 766 dev); 767 if (rc) 768 return rc; 769 return 0; 770} 771 772static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, 773 resource_size_t size) 774{ 775 struct dax_region *dax_region = dev_dax->region; 776 struct resource *res = &dax_region->res; 777 struct device *dev = &dev_dax->dev; 778 struct dev_dax_range *ranges; 779 unsigned long pgoff = 0; 780 struct resource *alloc; 781 int i, rc; 782 783 device_lock_assert(dax_region->dev); 784 785 /* handle the seed alloc special case */ 786 if (!size) { 787 if (dev_WARN_ONCE(dev, dev_dax->nr_range, 788 "0-size allocation must be first\n")) 789 return -EBUSY; 790 /* nr_range == 0 is elsewhere special cased as 0-size device */ 791 return 0; 792 } 793 794 ranges = krealloc(dev_dax->ranges, sizeof(*ranges) 795 * (dev_dax->nr_range + 1), GFP_KERNEL); 796 if (!ranges) 797 return -ENOMEM; 798 799 alloc = __request_region(res, start, size, dev_name(dev), 0); 800 if (!alloc) { 801 /* 802 * If this was an empty set of ranges nothing else 803 * will release @ranges, so do it now. 804 */ 805 if (!dev_dax->nr_range) { 806 kfree(ranges); 807 ranges = NULL; 808 } 809 dev_dax->ranges = ranges; 810 return -ENOMEM; 811 } 812 813 for (i = 0; i < dev_dax->nr_range; i++) 814 pgoff += PHYS_PFN(range_len(&ranges[i].range)); 815 dev_dax->ranges = ranges; 816 ranges[dev_dax->nr_range++] = (struct dev_dax_range) { 817 .pgoff = pgoff, 818 .range = { 819 .start = alloc->start, 820 .end = alloc->end, 821 }, 822 }; 823 824 dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, 825 &alloc->start, &alloc->end); 826 /* 827 * A dev_dax instance must be registered before mapping device 828 * children can be added. Defer to devm_create_dev_dax() to add 829 * the initial mapping device. 830 */ 831 if (!device_is_registered(&dev_dax->dev)) 832 return 0; 833 834 rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); 835 if (rc) 836 trim_dev_dax_range(dev_dax); 837 838 return rc; 839} 840 841static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) 842{ 843 int last_range = dev_dax->nr_range - 1; 844 struct dev_dax_range *dax_range = &dev_dax->ranges[last_range]; 845 struct dax_region *dax_region = dev_dax->region; 846 bool is_shrink = resource_size(res) > size; 847 struct range *range = &dax_range->range; 848 struct device *dev = &dev_dax->dev; 849 int rc; 850 851 device_lock_assert(dax_region->dev); 852 853 if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n")) 854 return -EINVAL; 855 856 rc = adjust_resource(res, range->start, size); 857 if (rc) 858 return rc; 859 860 *range = (struct range) { 861 .start = range->start, 862 .end = range->start + size - 1, 863 }; 864 865 dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend", 866 last_range, (unsigned long long) range->start, 867 (unsigned long long) range->end); 868 869 return 0; 870} 871 872static ssize_t size_show(struct device *dev, 873 struct device_attribute *attr, char *buf) 874{ 875 struct dev_dax *dev_dax = to_dev_dax(dev); 876 unsigned long long size; 877 878 device_lock(dev); 879 size = dev_dax_size(dev_dax); 880 device_unlock(dev); 881 882 return sprintf(buf, "%llu\n", size); 883} 884 885static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size) 886{ 887 /* 888 * The minimum mapping granularity for a device instance is a 889 * single subsection, unless the arch says otherwise. 890 */ 891 return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align())); 892} 893 894static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) 895{ 896 resource_size_t to_shrink = dev_dax_size(dev_dax) - size; 897 struct dax_region *dax_region = dev_dax->region; 898 struct device *dev = &dev_dax->dev; 899 int i; 900 901 for (i = dev_dax->nr_range - 1; i >= 0; i--) { 902 struct range *range = &dev_dax->ranges[i].range; 903 struct dax_mapping *mapping = dev_dax->ranges[i].mapping; 904 struct resource *adjust = NULL, *res; 905 resource_size_t shrink; 906 907 shrink = min_t(u64, to_shrink, range_len(range)); 908 if (shrink >= range_len(range)) { 909 devm_release_action(dax_region->dev, 910 unregister_dax_mapping, &mapping->dev); 911 trim_dev_dax_range(dev_dax); 912 to_shrink -= shrink; 913 if (!to_shrink) 914 break; 915 continue; 916 } 917 918 for_each_dax_region_resource(dax_region, res) 919 if (strcmp(res->name, dev_name(dev)) == 0 920 && res->start == range->start) { 921 adjust = res; 922 break; 923 } 924 925 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1, 926 "failed to find matching resource\n")) 927 return -ENXIO; 928 return adjust_dev_dax_range(dev_dax, adjust, range_len(range) 929 - shrink); 930 } 931 return 0; 932} 933 934/* 935 * Only allow adjustments that preserve the relative pgoff of existing 936 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff. 937 */ 938static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res) 939{ 940 struct dev_dax_range *last; 941 int i; 942 943 if (dev_dax->nr_range == 0) 944 return false; 945 if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0) 946 return false; 947 last = &dev_dax->ranges[dev_dax->nr_range - 1]; 948 if (last->range.start != res->start || last->range.end != res->end) 949 return false; 950 for (i = 0; i < dev_dax->nr_range - 1; i++) { 951 struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 952 953 if (dax_range->pgoff > last->pgoff) 954 return false; 955 } 956 957 return true; 958} 959 960static ssize_t dev_dax_resize(struct dax_region *dax_region, 961 struct dev_dax *dev_dax, resource_size_t size) 962{ 963 resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; 964 resource_size_t dev_size = dev_dax_size(dev_dax); 965 struct resource *region_res = &dax_region->res; 966 struct device *dev = &dev_dax->dev; 967 struct resource *res, *first; 968 resource_size_t alloc = 0; 969 int rc; 970 971 if (dev->driver) 972 return -EBUSY; 973 if (size == dev_size) 974 return 0; 975 if (size > dev_size && size - dev_size > avail) 976 return -ENOSPC; 977 if (size < dev_size) 978 return dev_dax_shrink(dev_dax, size); 979 980 to_alloc = size - dev_size; 981 if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc), 982 "resize of %pa misaligned\n", &to_alloc)) 983 return -ENXIO; 984 985 /* 986 * Expand the device into the unused portion of the region. This 987 * may involve adjusting the end of an existing resource, or 988 * allocating a new resource. 989 */ 990retry: 991 first = region_res->child; 992 if (!first) 993 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); 994 995 rc = -ENOSPC; 996 for (res = first; res; res = res->sibling) { 997 struct resource *next = res->sibling; 998 999 /* space at the beginning of the region */ 1000 if (res == first && res->start > dax_region->res.start) { 1001 alloc = min(res->start - dax_region->res.start, to_alloc); 1002 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc); 1003 break; 1004 } 1005 1006 alloc = 0; 1007 /* space between allocations */ 1008 if (next && next->start > res->end + 1) 1009 alloc = min(next->start - (res->end + 1), to_alloc); 1010 1011 /* space at the end of the region */ 1012 if (!alloc && !next && res->end < region_res->end) 1013 alloc = min(region_res->end - res->end, to_alloc); 1014 1015 if (!alloc) 1016 continue; 1017 1018 if (adjust_ok(dev_dax, res)) { 1019 rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc); 1020 break; 1021 } 1022 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc); 1023 break; 1024 } 1025 if (rc) 1026 return rc; 1027 to_alloc -= alloc; 1028 if (to_alloc) 1029 goto retry; 1030 return 0; 1031} 1032 1033static ssize_t size_store(struct device *dev, struct device_attribute *attr, 1034 const char *buf, size_t len) 1035{ 1036 ssize_t rc; 1037 unsigned long long val; 1038 struct dev_dax *dev_dax = to_dev_dax(dev); 1039 struct dax_region *dax_region = dev_dax->region; 1040 1041 rc = kstrtoull(buf, 0, &val); 1042 if (rc) 1043 return rc; 1044 1045 if (!alloc_is_aligned(dev_dax, val)) { 1046 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val); 1047 return -EINVAL; 1048 } 1049 1050 device_lock(dax_region->dev); 1051 if (!dax_region->dev->driver) { 1052 device_unlock(dax_region->dev); 1053 return -ENXIO; 1054 } 1055 device_lock(dev); 1056 rc = dev_dax_resize(dax_region, dev_dax, val); 1057 device_unlock(dev); 1058 device_unlock(dax_region->dev); 1059 1060 return rc == 0 ? len : rc; 1061} 1062static DEVICE_ATTR_RW(size); 1063 1064static ssize_t range_parse(const char *opt, size_t len, struct range *range) 1065{ 1066 unsigned long long addr = 0; 1067 char *start, *end, *str; 1068 ssize_t rc = -EINVAL; 1069 1070 str = kstrdup(opt, GFP_KERNEL); 1071 if (!str) 1072 return rc; 1073 1074 end = str; 1075 start = strsep(&end, "-"); 1076 if (!start || !end) 1077 goto err; 1078 1079 rc = kstrtoull(start, 16, &addr); 1080 if (rc) 1081 goto err; 1082 range->start = addr; 1083 1084 rc = kstrtoull(end, 16, &addr); 1085 if (rc) 1086 goto err; 1087 range->end = addr; 1088 1089err: 1090 kfree(str); 1091 return rc; 1092} 1093 1094static ssize_t mapping_store(struct device *dev, struct device_attribute *attr, 1095 const char *buf, size_t len) 1096{ 1097 struct dev_dax *dev_dax = to_dev_dax(dev); 1098 struct dax_region *dax_region = dev_dax->region; 1099 size_t to_alloc; 1100 struct range r; 1101 ssize_t rc; 1102 1103 rc = range_parse(buf, len, &r); 1104 if (rc) 1105 return rc; 1106 1107 rc = -ENXIO; 1108 device_lock(dax_region->dev); 1109 if (!dax_region->dev->driver) { 1110 device_unlock(dax_region->dev); 1111 return rc; 1112 } 1113 device_lock(dev); 1114 1115 to_alloc = range_len(&r); 1116 if (alloc_is_aligned(dev_dax, to_alloc)) 1117 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc); 1118 device_unlock(dev); 1119 device_unlock(dax_region->dev); 1120 1121 return rc == 0 ? len : rc; 1122} 1123static DEVICE_ATTR_WO(mapping); 1124 1125static ssize_t align_show(struct device *dev, 1126 struct device_attribute *attr, char *buf) 1127{ 1128 struct dev_dax *dev_dax = to_dev_dax(dev); 1129 1130 return sprintf(buf, "%d\n", dev_dax->align); 1131} 1132 1133static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax) 1134{ 1135 resource_size_t dev_size = dev_dax_size(dev_dax); 1136 struct device *dev = &dev_dax->dev; 1137 int i; 1138 1139 if (dev_size > 0 && !alloc_is_aligned(dev_dax, dev_size)) { 1140 dev_dbg(dev, "%s: align %u invalid for size %pa\n", 1141 __func__, dev_dax->align, &dev_size); 1142 return -EINVAL; 1143 } 1144 1145 for (i = 0; i < dev_dax->nr_range; i++) { 1146 size_t len = range_len(&dev_dax->ranges[i].range); 1147 1148 if (!alloc_is_aligned(dev_dax, len)) { 1149 dev_dbg(dev, "%s: align %u invalid for range %d\n", 1150 __func__, dev_dax->align, i); 1151 return -EINVAL; 1152 } 1153 } 1154 1155 return 0; 1156} 1157 1158static ssize_t align_store(struct device *dev, struct device_attribute *attr, 1159 const char *buf, size_t len) 1160{ 1161 struct dev_dax *dev_dax = to_dev_dax(dev); 1162 struct dax_region *dax_region = dev_dax->region; 1163 unsigned long val, align_save; 1164 ssize_t rc; 1165 1166 rc = kstrtoul(buf, 0, &val); 1167 if (rc) 1168 return -ENXIO; 1169 1170 if (!dax_align_valid(val)) 1171 return -EINVAL; 1172 1173 device_lock(dax_region->dev); 1174 if (!dax_region->dev->driver) { 1175 device_unlock(dax_region->dev); 1176 return -ENXIO; 1177 } 1178 1179 device_lock(dev); 1180 if (dev->driver) { 1181 rc = -EBUSY; 1182 goto out_unlock; 1183 } 1184 1185 align_save = dev_dax->align; 1186 dev_dax->align = val; 1187 rc = dev_dax_validate_align(dev_dax); 1188 if (rc) 1189 dev_dax->align = align_save; 1190out_unlock: 1191 device_unlock(dev); 1192 device_unlock(dax_region->dev); 1193 return rc == 0 ? len : rc; 1194} 1195static DEVICE_ATTR_RW(align); 1196 1197static int dev_dax_target_node(struct dev_dax *dev_dax) 1198{ 1199 struct dax_region *dax_region = dev_dax->region; 1200 1201 return dax_region->target_node; 1202} 1203 1204static ssize_t target_node_show(struct device *dev, 1205 struct device_attribute *attr, char *buf) 1206{ 1207 struct dev_dax *dev_dax = to_dev_dax(dev); 1208 1209 return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax)); 1210} 1211static DEVICE_ATTR_RO(target_node); 1212 1213static ssize_t resource_show(struct device *dev, 1214 struct device_attribute *attr, char *buf) 1215{ 1216 struct dev_dax *dev_dax = to_dev_dax(dev); 1217 struct dax_region *dax_region = dev_dax->region; 1218 unsigned long long start; 1219 1220 if (dev_dax->nr_range < 1) 1221 start = dax_region->res.start; 1222 else 1223 start = dev_dax->ranges[0].range.start; 1224 1225 return sprintf(buf, "%#llx\n", start); 1226} 1227static DEVICE_ATTR(resource, 0400, resource_show, NULL); 1228 1229static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, 1230 char *buf) 1231{ 1232 /* 1233 * We only ever expect to handle device-dax instances, i.e. the 1234 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 1235 */ 1236 return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0); 1237} 1238static DEVICE_ATTR_RO(modalias); 1239 1240static ssize_t numa_node_show(struct device *dev, 1241 struct device_attribute *attr, char *buf) 1242{ 1243 return sprintf(buf, "%d\n", dev_to_node(dev)); 1244} 1245static DEVICE_ATTR_RO(numa_node); 1246 1247static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) 1248{ 1249 struct device *dev = container_of(kobj, struct device, kobj); 1250 struct dev_dax *dev_dax = to_dev_dax(dev); 1251 struct dax_region *dax_region = dev_dax->region; 1252 1253 if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0) 1254 return 0; 1255 if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA)) 1256 return 0; 1257 if (a == &dev_attr_mapping.attr && is_static(dax_region)) 1258 return 0; 1259 if ((a == &dev_attr_align.attr || 1260 a == &dev_attr_size.attr) && is_static(dax_region)) 1261 return 0444; 1262 return a->mode; 1263} 1264 1265static struct attribute *dev_dax_attributes[] = { 1266 &dev_attr_modalias.attr, 1267 &dev_attr_size.attr, 1268 &dev_attr_mapping.attr, 1269 &dev_attr_target_node.attr, 1270 &dev_attr_align.attr, 1271 &dev_attr_resource.attr, 1272 &dev_attr_numa_node.attr, 1273 NULL, 1274}; 1275 1276static const struct attribute_group dev_dax_attribute_group = { 1277 .attrs = dev_dax_attributes, 1278 .is_visible = dev_dax_visible, 1279}; 1280 1281static const struct attribute_group *dax_attribute_groups[] = { 1282 &dev_dax_attribute_group, 1283 NULL, 1284}; 1285 1286static void dev_dax_release(struct device *dev) 1287{ 1288 struct dev_dax *dev_dax = to_dev_dax(dev); 1289 struct dax_device *dax_dev = dev_dax->dax_dev; 1290 1291 put_dax(dax_dev); 1292 free_dev_dax_id(dev_dax); 1293 kfree(dev_dax->pgmap); 1294 kfree(dev_dax); 1295} 1296 1297static const struct device_type dev_dax_type = { 1298 .release = dev_dax_release, 1299 .groups = dax_attribute_groups, 1300}; 1301 1302struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) 1303{ 1304 struct dax_region *dax_region = data->dax_region; 1305 struct device *parent = dax_region->dev; 1306 struct dax_device *dax_dev; 1307 struct dev_dax *dev_dax; 1308 struct inode *inode; 1309 struct device *dev; 1310 int rc; 1311 1312 dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL); 1313 if (!dev_dax) 1314 return ERR_PTR(-ENOMEM); 1315 1316 dev_dax->region = dax_region; 1317 if (is_static(dax_region)) { 1318 if (dev_WARN_ONCE(parent, data->id < 0, 1319 "dynamic id specified to static region\n")) { 1320 rc = -EINVAL; 1321 goto err_id; 1322 } 1323 1324 dev_dax->id = data->id; 1325 } else { 1326 if (dev_WARN_ONCE(parent, data->id >= 0, 1327 "static id specified to dynamic region\n")) { 1328 rc = -EINVAL; 1329 goto err_id; 1330 } 1331 1332 rc = alloc_dev_dax_id(dev_dax); 1333 if (rc < 0) 1334 goto err_id; 1335 } 1336 1337 dev = &dev_dax->dev; 1338 device_initialize(dev); 1339 dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); 1340 1341 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size); 1342 if (rc) 1343 goto err_range; 1344 1345 if (data->pgmap) { 1346 dev_WARN_ONCE(parent, !is_static(dax_region), 1347 "custom dev_pagemap requires a static dax_region\n"); 1348 1349 dev_dax->pgmap = kmemdup(data->pgmap, 1350 sizeof(struct dev_pagemap), GFP_KERNEL); 1351 if (!dev_dax->pgmap) { 1352 rc = -ENOMEM; 1353 goto err_pgmap; 1354 } 1355 } 1356 1357 /* 1358 * No 'host' or dax_operations since there is no access to this 1359 * device outside of mmap of the resulting character device. 1360 */ 1361 dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); 1362 if (IS_ERR(dax_dev)) { 1363 rc = PTR_ERR(dax_dev); 1364 goto err_alloc_dax; 1365 } 1366 1367 /* a device_dax instance is dead while the driver is not attached */ 1368 kill_dax(dax_dev); 1369 1370 dev_dax->dax_dev = dax_dev; 1371 dev_dax->target_node = dax_region->target_node; 1372 dev_dax->align = dax_region->align; 1373 ida_init(&dev_dax->ida); 1374 1375 inode = dax_inode(dax_dev); 1376 dev->devt = inode->i_rdev; 1377 if (data->subsys == DEV_DAX_BUS) 1378 dev->bus = &dax_bus_type; 1379 else 1380 dev->class = dax_class; 1381 dev->parent = parent; 1382 dev->type = &dev_dax_type; 1383 1384 rc = device_add(dev); 1385 if (rc) { 1386 kill_dev_dax(dev_dax); 1387 put_device(dev); 1388 return ERR_PTR(rc); 1389 } 1390 1391 rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); 1392 if (rc) 1393 return ERR_PTR(rc); 1394 1395 /* register mapping device for the initial allocation range */ 1396 if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) { 1397 rc = devm_register_dax_mapping(dev_dax, 0); 1398 if (rc) 1399 return ERR_PTR(rc); 1400 } 1401 1402 return dev_dax; 1403 1404err_alloc_dax: 1405 kfree(dev_dax->pgmap); 1406err_pgmap: 1407 free_dev_dax_ranges(dev_dax); 1408err_range: 1409 free_dev_dax_id(dev_dax); 1410err_id: 1411 kfree(dev_dax); 1412 1413 return ERR_PTR(rc); 1414} 1415EXPORT_SYMBOL_GPL(devm_create_dev_dax); 1416 1417static int match_always_count; 1418 1419int __dax_driver_register(struct dax_device_driver *dax_drv, 1420 struct module *module, const char *mod_name) 1421{ 1422 struct device_driver *drv = &dax_drv->drv; 1423 int rc = 0; 1424 1425 INIT_LIST_HEAD(&dax_drv->ids); 1426 drv->owner = module; 1427 drv->name = mod_name; 1428 drv->mod_name = mod_name; 1429 drv->bus = &dax_bus_type; 1430 1431 /* there can only be one default driver */ 1432 mutex_lock(&dax_bus_lock); 1433 match_always_count += dax_drv->match_always; 1434 if (match_always_count > 1) { 1435 match_always_count--; 1436 WARN_ON(1); 1437 rc = -EINVAL; 1438 } 1439 mutex_unlock(&dax_bus_lock); 1440 if (rc) 1441 return rc; 1442 return driver_register(drv); 1443} 1444EXPORT_SYMBOL_GPL(__dax_driver_register); 1445 1446void dax_driver_unregister(struct dax_device_driver *dax_drv) 1447{ 1448 struct device_driver *drv = &dax_drv->drv; 1449 struct dax_id *dax_id, *_id; 1450 1451 mutex_lock(&dax_bus_lock); 1452 match_always_count -= dax_drv->match_always; 1453 list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) { 1454 list_del(&dax_id->list); 1455 kfree(dax_id); 1456 } 1457 mutex_unlock(&dax_bus_lock); 1458 driver_unregister(drv); 1459} 1460EXPORT_SYMBOL_GPL(dax_driver_unregister); 1461 1462int __init dax_bus_init(void) 1463{ 1464 int rc; 1465 1466 if (IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)) { 1467 dax_class = class_create(THIS_MODULE, "dax"); 1468 if (IS_ERR(dax_class)) 1469 return PTR_ERR(dax_class); 1470 } 1471 1472 rc = bus_register(&dax_bus_type); 1473 if (rc) 1474 class_destroy(dax_class); 1475 return rc; 1476} 1477 1478void __exit dax_bus_exit(void) 1479{ 1480 bus_unregister(&dax_bus_type); 1481 class_destroy(dax_class); 1482} 1483