1// SPDX-License-Identifier: GPL-2.0 2/* 3 * linux/ipc/shm.c 4 * Copyright (C) 1992, 1993 Krishna Balasubramanian 5 * Many improvements/fixes by Bruno Haible. 6 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. 7 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. 8 * 9 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 10 * BIGMEM support, Andrea Arcangeli <andrea@suse.de> 11 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr> 12 * HIGHMEM support, Ingo Molnar <mingo@redhat.com> 13 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com> 14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> 15 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 * 24 * Better ipc lock (kern_ipc_perm.lock) handling 25 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. 26 */ 27 28#include <linux/slab.h> 29#include <linux/mm.h> 30#include <linux/hugetlb.h> 31#include <linux/shm.h> 32#include <linux/init.h> 33#include <linux/file.h> 34#include <linux/mman.h> 35#include <linux/shmem_fs.h> 36#include <linux/security.h> 37#include <linux/syscalls.h> 38#include <linux/audit.h> 39#include <linux/capability.h> 40#include <linux/ptrace.h> 41#include <linux/seq_file.h> 42#include <linux/rwsem.h> 43#include <linux/nsproxy.h> 44#include <linux/mount.h> 45#include <linux/ipc_namespace.h> 46#include <linux/rhashtable.h> 47 48#include <linux/uaccess.h> 49 50#include "util.h" 51 52struct shmid_kernel { 53 struct kern_ipc_perm shm_perm; 54 struct file *shm_file; 55 unsigned long shm_nattch; 56 unsigned long shm_segsz; 57 time64_t shm_atim; 58 time64_t shm_dtim; 59 time64_t shm_ctim; 60 struct pid *shm_cprid; 61 struct pid *shm_lprid; 62 struct user_struct *mlock_user; 63 64 /* The task created the shm object. NULL if the task is dead. */ 65 struct task_struct *shm_creator; 66 struct list_head shm_clist; /* list by creator */ 67 struct ipc_namespace *ns; 68} __randomize_layout; 69 70/* shm_mode upper byte flags */ 71#define SHM_DEST 01000 /* segment will be destroyed on last detach */ 72#define SHM_LOCKED 02000 /* segment will not be swapped */ 73 74struct shm_file_data { 75 int id; 76 struct ipc_namespace *ns; 77 struct file *file; 78 const struct vm_operations_struct *vm_ops; 79}; 80 81#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) 82 83static const struct file_operations shm_file_operations; 84static const struct vm_operations_struct shm_vm_ops; 85 86#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) 87 88#define shm_unlock(shp) ipc_unlock(&(shp)->shm_perm) 89 90static int newseg(struct ipc_namespace *, struct ipc_params *); 91static void shm_open(struct vm_area_struct *vma); 92static void shm_close(struct vm_area_struct *vma); 93static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); 94#ifdef CONFIG_PROC_FS 95static int sysvipc_shm_proc_show(struct seq_file *s, void *it); 96#endif 97 98void shm_init_ns(struct ipc_namespace *ns) 99{ 100 ns->shm_ctlmax = SHMMAX; 101 ns->shm_ctlall = SHMALL; 102 ns->shm_ctlmni = SHMMNI; 103 ns->shm_rmid_forced = 0; 104 ns->shm_tot = 0; 105 ipc_init_ids(&shm_ids(ns)); 106} 107 108/* 109 * Called with shm_ids.rwsem (writer) and the shp structure locked. 110 * Only shm_ids.rwsem remains locked on exit. 111 */ 112static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 113{ 114 struct shmid_kernel *shp; 115 116 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 117 WARN_ON(ns != shp->ns); 118 119 if (shp->shm_nattch) { 120 shp->shm_perm.mode |= SHM_DEST; 121 /* Do not find it any more */ 122 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm); 123 shm_unlock(shp); 124 } else { 125 shm_destroy(ns, shp); 126 } 127} 128 129#ifdef CONFIG_IPC_NS 130void shm_exit_ns(struct ipc_namespace *ns) 131{ 132 free_ipcs(ns, &shm_ids(ns), do_shm_rmid); 133 idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); 134 rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht); 135} 136#endif 137 138static int __init ipc_ns_init(void) 139{ 140 shm_init_ns(&init_ipc_ns); 141 return 0; 142} 143 144pure_initcall(ipc_ns_init); 145 146void __init shm_init(void) 147{ 148 ipc_init_proc_interface("sysvipc/shm", 149#if BITS_PER_LONG <= 32 150 " key shmid perms size cpid lpid nattch uid gid cuid cgid " 151 "atime dtime ctime rss swap\n", 152#else 153 " key shmid perms size cpid lpid nattch uid gid cuid " 154 "cgid atime dtime ctime rss swap\n", 155#endif 156 IPC_SHM_IDS, sysvipc_shm_proc_show); 157} 158 159static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) 160{ 161 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); 162 163 if (IS_ERR(ipcp)) { 164 return ERR_CAST(ipcp); 165 } 166 167 return container_of(ipcp, struct shmid_kernel, shm_perm); 168} 169 170static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) 171{ 172 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); 173 174 if (IS_ERR(ipcp)) { 175 return ERR_CAST(ipcp); 176 } 177 178 return container_of(ipcp, struct shmid_kernel, shm_perm); 179} 180 181/* 182 * shm_lock_(check_) routines are called in the paths where the rwsem 183 * is not necessarily held. 184 */ 185static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 186{ 187 struct kern_ipc_perm *ipcp; 188 189 rcu_read_lock(); 190 ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); 191 if (IS_ERR(ipcp)) { 192 goto err; 193 } 194 195 ipc_lock_object(ipcp); 196 /* 197 * ipc_rmid() may have already freed the ID while ipc_lock_object() 198 * was spinning: here verify that the structure is still valid. 199 * Upon races with RMID, return -EIDRM, thus indicating that 200 * the ID points to a removed identifier. 201 */ 202 if (ipc_valid_object(ipcp)) { 203 /* return a locked ipc object upon success */ 204 return container_of(ipcp, struct shmid_kernel, shm_perm); 205 } 206 207 ipc_unlock_object(ipcp); 208 ipcp = ERR_PTR(-EIDRM); 209err: 210 rcu_read_unlock(); 211 /* 212 * Callers of shm_lock() must validate the status of the returned ipc 213 * object pointer and error out as appropriate. 214 */ 215 return ERR_CAST(ipcp); 216} 217 218static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) 219{ 220 rcu_read_lock(); 221 ipc_lock_object(&ipcp->shm_perm); 222} 223 224static void shm_rcu_free(struct rcu_head *head) 225{ 226 struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm, rcu); 227 struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel, shm_perm); 228 security_shm_free(&shp->shm_perm); 229 kvfree(shp); 230} 231 232/* 233 * It has to be called with shp locked. 234 * It must be called before ipc_rmid() 235 */ 236static inline void shm_clist_rm(struct shmid_kernel *shp) 237{ 238 struct task_struct *creator; 239 240 /* ensure that shm_creator does not disappear */ 241 rcu_read_lock(); 242 243 /* 244 * A concurrent exit_shm may do a list_del_init() as well. 245 * Just do nothing if exit_shm already did the work 246 */ 247 if (!list_empty(&shp->shm_clist)) { 248 /* 249 * shp->shm_creator is guaranteed to be valid *only* 250 * if shp->shm_clist is not empty. 251 */ 252 creator = shp->shm_creator; 253 254 task_lock(creator); 255 /* 256 * list_del_init() is a nop if the entry was already removed 257 * from the list. 258 */ 259 list_del_init(&shp->shm_clist); 260 task_unlock(creator); 261 } 262 rcu_read_unlock(); 263} 264 265static inline void shm_rmid(struct shmid_kernel *s) 266{ 267 shm_clist_rm(s); 268 ipc_rmid(&shm_ids(s->ns), &s->shm_perm); 269} 270 271static int __shm_open(struct vm_area_struct *vma) 272{ 273 struct file *file = vma->vm_file; 274 struct shm_file_data *sfd = shm_file_data(file); 275 struct shmid_kernel *shp; 276 277 shp = shm_lock(sfd->ns, sfd->id); 278 if (IS_ERR(shp)) { 279 return PTR_ERR(shp); 280 } 281 282 if (shp->shm_file != sfd->file) { 283 /* ID was reused */ 284 shm_unlock(shp); 285 return -EINVAL; 286 } 287 288 shp->shm_atim = ktime_get_real_seconds(); 289 ipc_update_pid(&shp->shm_lprid, task_tgid(current)); 290 shp->shm_nattch++; 291 shm_unlock(shp); 292 return 0; 293} 294 295/* This is called by fork, once for every shm attach. */ 296static void shm_open(struct vm_area_struct *vma) 297{ 298 int err = __shm_open(vma); 299 /* 300 * We raced in the idr lookup or with shm_destroy(). 301 * Either way, the ID is busted. 302 */ 303 WARN_ON_ONCE(err); 304} 305 306/* 307 * shm_destroy - free the struct shmid_kernel 308 * 309 * @ns: namespace 310 * @shp: struct to free 311 * 312 * It has to be called with shp and shm_ids.rwsem (writer) locked, 313 * but returns with shp unlocked and freed. 314 */ 315static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 316{ 317 struct file *shm_file; 318 319 shm_file = shp->shm_file; 320 shp->shm_file = NULL; 321 ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; 322 shm_rmid(shp); 323 shm_unlock(shp); 324 if (!is_file_hugepages(shm_file)) { 325 shmem_lock(shm_file, 0, shp->mlock_user); 326 } else if (shp->mlock_user) { 327 user_shm_unlock(i_size_read(file_inode(shm_file)), shp->mlock_user); 328 } 329 fput(shm_file); 330 ipc_update_pid(&shp->shm_cprid, NULL); 331 ipc_update_pid(&shp->shm_lprid, NULL); 332 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); 333} 334 335/* 336 * shm_may_destroy - identifies whether shm segment should be destroyed now 337 * 338 * Returns true if and only if there are no active users of the segment and 339 * one of the following is true: 340 * 341 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp 342 * 343 * 2) sysctl kernel.shm_rmid_forced is set to 1. 344 */ 345static bool shm_may_destroy(struct shmid_kernel *shp) 346{ 347 return (shp->shm_nattch == 0) && 348 (shp->ns->shm_rmid_forced || 349 (shp->shm_perm.mode & SHM_DEST)); 350} 351 352/* 353 * remove the attach descriptor vma. 354 * free memory for segment if it is marked destroyed. 355 * The descriptor has already been removed from the current->mm->mmap list 356 * and will later be kfree()d. 357 */ 358static void shm_close(struct vm_area_struct *vma) 359{ 360 struct file *file = vma->vm_file; 361 struct shm_file_data *sfd = shm_file_data(file); 362 struct shmid_kernel *shp; 363 struct ipc_namespace *ns = sfd->ns; 364 365 down_write(&shm_ids(ns).rwsem); 366 /* remove from the list of attaches of the shm segment */ 367 shp = shm_lock(ns, sfd->id); 368 /* 369 * We raced in the idr lookup or with shm_destroy(). 370 * Either way, the ID is busted. 371 */ 372 if (WARN_ON_ONCE(IS_ERR(shp))) { 373 goto done; /* no-op */ 374 } 375 376 ipc_update_pid(&shp->shm_lprid, task_tgid(current)); 377 shp->shm_dtim = ktime_get_real_seconds(); 378 shp->shm_nattch--; 379 if (shm_may_destroy(shp)) { 380 shm_destroy(ns, shp); 381 } else { 382 shm_unlock(shp); 383 } 384done: 385 up_write(&shm_ids(ns).rwsem); 386} 387 388/* Called with ns->shm_ids(ns).rwsem locked */ 389static int shm_try_destroy_orphaned(int id, void *p, void *data) 390{ 391 struct ipc_namespace *ns = data; 392 struct kern_ipc_perm *ipcp = p; 393 struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); 394 395 /* 396 * We want to destroy segments without users and with already 397 * exit'ed originating process. 398 * 399 * As shp->* are changed under rwsem, it's safe to skip shp locking. 400 */ 401 if (!list_empty(&shp->shm_clist)) { 402 return 0; 403 } 404 405 if (shm_may_destroy(shp)) { 406 shm_lock_by_ptr(shp); 407 shm_destroy(ns, shp); 408 } 409 return 0; 410} 411 412void shm_destroy_orphaned(struct ipc_namespace *ns) 413{ 414 down_write(&shm_ids(ns).rwsem); 415 if (shm_ids(ns).in_use) { 416 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 417 } 418 up_write(&shm_ids(ns).rwsem); 419} 420 421/* Locking assumes this will only be called with task == current */ 422void exit_shm(struct task_struct *task) 423{ 424 for (;;) { 425 struct shmid_kernel *shp; 426 struct ipc_namespace *ns; 427 428 task_lock(task); 429 430 if (list_empty(&task->sysvshm.shm_clist)) { 431 task_unlock(task); 432 break; 433 } 434 435 shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel, 436 shm_clist); 437 438 /* 439 * 1) Get pointer to the ipc namespace. It is worth to say 440 * that this pointer is guaranteed to be valid because 441 * shp lifetime is always shorter than namespace lifetime 442 * in which shp lives. 443 * We taken task_lock it means that shp won't be freed. 444 */ 445 ns = shp->ns; 446 447 /* 448 * 2) If kernel.shm_rmid_forced is not set then only keep track of 449 * which shmids are orphaned, so that a later set of the sysctl 450 * can clean them up. 451 */ 452 if (!ns->shm_rmid_forced) 453 goto unlink_continue; 454 455 /* 456 * 3) get a reference to the namespace. 457 * The refcount could be already 0. If it is 0, then 458 * the shm objects will be free by free_ipc_work(). 459 */ 460 ns = get_ipc_ns_not_zero(ns); 461 if (!ns) { 462unlink_continue: 463 list_del_init(&shp->shm_clist); 464 task_unlock(task); 465 continue; 466 } 467 468 /* 469 * 4) get a reference to shp. 470 * This cannot fail: shm_clist_rm() is called before 471 * ipc_rmid(), thus the refcount cannot be 0. 472 */ 473 WARN_ON(!ipc_rcu_getref(&shp->shm_perm)); 474 475 /* 476 * 5) unlink the shm segment from the list of segments 477 * created by current. 478 * This must be done last. After unlinking, 479 * only the refcounts obtained above prevent IPC_RMID 480 * from destroying the segment or the namespace. 481 */ 482 list_del_init(&shp->shm_clist); 483 484 task_unlock(task); 485 486 /* 487 * 6) we have all references 488 * Thus lock & if needed destroy shp. 489 */ 490 down_write(&shm_ids(ns).rwsem); 491 shm_lock_by_ptr(shp); 492 /* 493 * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's 494 * safe to call ipc_rcu_putref here 495 */ 496 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); 497 498 if (ipc_valid_object(&shp->shm_perm)) { 499 if (shm_may_destroy(shp)) 500 shm_destroy(ns, shp); 501 else 502 shm_unlock(shp); 503 } else { 504 /* 505 * Someone else deleted the shp from namespace 506 * idr/kht while we have waited. 507 * Just unlock and continue. 508 */ 509 shm_unlock(shp); 510 } 511 512 up_write(&shm_ids(ns).rwsem); 513 put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */ 514 } 515} 516 517static vm_fault_t shm_fault(struct vm_fault *vmf) 518{ 519 struct file *file = vmf->vma->vm_file; 520 struct shm_file_data *sfd = shm_file_data(file); 521 522 return sfd->vm_ops->fault(vmf); 523} 524 525static int shm_split(struct vm_area_struct *vma, unsigned long addr) 526{ 527 struct file *file = vma->vm_file; 528 struct shm_file_data *sfd = shm_file_data(file); 529 530 if (sfd->vm_ops->split) { 531 return sfd->vm_ops->split(vma, addr); 532 } 533 534 return 0; 535} 536 537static unsigned long shm_pagesize(struct vm_area_struct *vma) 538{ 539 struct file *file = vma->vm_file; 540 struct shm_file_data *sfd = shm_file_data(file); 541 542 if (sfd->vm_ops->pagesize) { 543 return sfd->vm_ops->pagesize(vma); 544 } 545 546 return PAGE_SIZE; 547} 548 549#ifdef CONFIG_NUMA 550static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 551{ 552 struct file *file = vma->vm_file; 553 struct shm_file_data *sfd = shm_file_data(file); 554 int err = 0; 555 556 if (sfd->vm_ops->set_policy) { 557 err = sfd->vm_ops->set_policy(vma, new); 558 } 559 return err; 560} 561 562static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, unsigned long addr) 563{ 564 struct file *file = vma->vm_file; 565 struct shm_file_data *sfd = shm_file_data(file); 566 struct mempolicy *pol = NULL; 567 568 if (sfd->vm_ops->get_policy) { 569 pol = sfd->vm_ops->get_policy(vma, addr); 570 } else if (vma->vm_policy) { 571 pol = vma->vm_policy; 572 } 573 574 return pol; 575} 576#endif 577 578static int shm_mmap(struct file *file, struct vm_area_struct *vma) 579{ 580 struct shm_file_data *sfd = shm_file_data(file); 581 int ret; 582 583 /* 584 * In case of remap_file_pages() emulation, the file can represent an 585 * IPC ID that was removed, and possibly even reused by another shm 586 * segment already. Propagate this case as an error to caller. 587 */ 588 ret = __shm_open(vma); 589 if (ret) { 590 return ret; 591 } 592 593 ret = call_mmap(sfd->file, vma); 594 if (ret) { 595 shm_close(vma); 596 return ret; 597 } 598 sfd->vm_ops = vma->vm_ops; 599#ifdef CONFIG_MMU 600 WARN_ON(!sfd->vm_ops->fault); 601#endif 602 vma->vm_ops = &shm_vm_ops; 603 return 0; 604} 605 606static int shm_release(struct inode *ino, struct file *file) 607{ 608 struct shm_file_data *sfd = shm_file_data(file); 609 610 put_ipc_ns(sfd->ns); 611 fput(sfd->file); 612 shm_file_data(file) = NULL; 613 kfree(sfd); 614 return 0; 615} 616 617static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) 618{ 619 struct shm_file_data *sfd = shm_file_data(file); 620 621 if (!sfd->file->f_op->fsync) { 622 return -EINVAL; 623 } 624 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 625} 626 627static long shm_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 628{ 629 struct shm_file_data *sfd = shm_file_data(file); 630 631 if (!sfd->file->f_op->fallocate) { 632 return -EOPNOTSUPP; 633 } 634 return sfd->file->f_op->fallocate(file, mode, offset, len); 635} 636 637static unsigned long shm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, 638 unsigned long pgoff, unsigned long flags) 639{ 640 struct shm_file_data *sfd = shm_file_data(file); 641 642 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, pgoff, flags); 643} 644 645static const struct file_operations shm_file_operations = { 646 .mmap = shm_mmap, 647 .fsync = shm_fsync, 648 .release = shm_release, 649 .get_unmapped_area = shm_get_unmapped_area, 650 .llseek = noop_llseek, 651 .fallocate = shm_fallocate, 652}; 653 654/* 655 * shm_file_operations_huge is now identical to shm_file_operations, 656 * but we keep it distinct for the sake of is_file_shm_hugepages(). 657 */ 658static const struct file_operations shm_file_operations_huge = { 659 .mmap = shm_mmap, 660 .fsync = shm_fsync, 661 .release = shm_release, 662 .get_unmapped_area = shm_get_unmapped_area, 663 .llseek = noop_llseek, 664 .fallocate = shm_fallocate, 665}; 666 667bool is_file_shm_hugepages(struct file *file) 668{ 669 return file->f_op == &shm_file_operations_huge; 670} 671 672static const struct vm_operations_struct shm_vm_ops = { 673 .open = shm_open, /* callback for a new vm-area open */ 674 .close = shm_close, /* callback for when the vm-area is released */ 675 .fault = shm_fault, 676 .split = shm_split, 677 .pagesize = shm_pagesize, 678#if defined(CONFIG_NUMA) 679 .set_policy = shm_set_policy, 680 .get_policy = shm_get_policy, 681#endif 682}; 683 684/** 685 * newseg - Create a new shared memory segment 686 * @ns: namespace 687 * @params: ptr to the structure that contains key, size and shmflg 688 * 689 * Called with shm_ids.rwsem held as a writer. 690 */ 691static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 692{ 693 key_t key = params->key; 694 int shmflg = params->flg; 695 size_t size = params->u.size; 696 int error; 697 struct shmid_kernel *shp; 698 size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 699 struct file *file; 700 char name[13]; 701 vm_flags_t acctflag = 0; 702 703 if (size < SHMMIN || size > ns->shm_ctlmax) { 704 return -EINVAL; 705 } 706 707 if ((numpages << PAGE_SHIFT) < size) { 708 return -ENOSPC; 709 } 710 711 if (ns->shm_tot + numpages < ns->shm_tot || ns->shm_tot + numpages > ns->shm_ctlall) { 712 return -ENOSPC; 713 } 714 715 shp = kvmalloc(sizeof(*shp), GFP_KERNEL); 716 if (unlikely(!shp)) { 717 return -ENOMEM; 718 } 719 720 shp->shm_perm.key = key; 721 shp->shm_perm.mode = (shmflg & S_IRWXUGO); 722 shp->mlock_user = NULL; 723 724 shp->shm_perm.security = NULL; 725 error = security_shm_alloc(&shp->shm_perm); 726 if (error) { 727 kvfree(shp); 728 return error; 729 } 730 731 (void)sprintf(name, "SYSV%08x", key); 732 if (shmflg & SHM_HUGETLB) { 733 struct hstate *hs; 734 size_t hugesize; 735 736 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 737 if (!hs) { 738 error = -EINVAL; 739 goto no_file; 740 } 741 hugesize = ALIGN(size, huge_page_size(hs)); 742 743 /* hugetlb_file_setup applies strict accounting */ 744 if (shmflg & SHM_NORESERVE) { 745 acctflag = VM_NORESERVE; 746 } 747 file = hugetlb_file_setup(name, hugesize, acctflag, &shp->mlock_user, HUGETLB_SHMFS_INODE, 748 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); 749 } else { 750 /* 751 * Do not allow no accounting for OVERCOMMIT_NEVER, even 752 * if it's asked for. 753 */ 754 if ((shmflg & SHM_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) { 755 acctflag = VM_NORESERVE; 756 } 757 file = shmem_kernel_file_setup(name, size, acctflag); 758 } 759 error = PTR_ERR(file); 760 if (IS_ERR(file)) { 761 goto no_file; 762 } 763 764 shp->shm_cprid = get_pid(task_tgid(current)); 765 shp->shm_lprid = NULL; 766 shp->shm_atim = shp->shm_dtim = 0; 767 shp->shm_ctim = ktime_get_real_seconds(); 768 shp->shm_segsz = size; 769 shp->shm_nattch = 0; 770 shp->shm_file = file; 771 shp->shm_creator = current; 772 773 /* ipc_addid() locks shp upon success. */ 774 error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); 775 if (error < 0) { 776 goto no_id; 777 } 778 shp->ns = ns; 779 780 task_lock(current); 781 list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); 782 task_unlock(current); 783 784 /* 785 * shmid gets reported as "inode#" in /proc/pid/maps. 786 * proc-ps tools use this. Changing this will break them. 787 */ 788 file_inode(file)->i_ino = shp->shm_perm.id; 789 790 ns->shm_tot += numpages; 791 error = shp->shm_perm.id; 792 793 ipc_unlock_object(&shp->shm_perm); 794 rcu_read_unlock(); 795 return error; 796 797no_id: 798 ipc_update_pid(&shp->shm_cprid, NULL); 799 ipc_update_pid(&shp->shm_lprid, NULL); 800 if (is_file_hugepages(file) && shp->mlock_user) { 801 user_shm_unlock(size, shp->mlock_user); 802 } 803 fput(file); 804 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); 805 return error; 806no_file: 807 call_rcu(&shp->shm_perm.rcu, shm_rcu_free); 808 return error; 809} 810 811/* 812 * Called with shm_ids.rwsem and ipcp locked. 813 */ 814static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) 815{ 816 struct shmid_kernel *shp; 817 818 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 819 if (shp->shm_segsz < params->u.size) { 820 return -EINVAL; 821 } 822 823 return 0; 824} 825 826long ksys_shmget(key_t key, size_t size, int shmflg) 827{ 828 struct ipc_namespace *ns; 829 static const struct ipc_ops shm_ops = { 830 .getnew = newseg, 831 .associate = security_shm_associate, 832 .more_checks = shm_more_checks, 833 }; 834 struct ipc_params shm_params; 835 836 ns = current->nsproxy->ipc_ns; 837 838 shm_params.key = key; 839 shm_params.flg = shmflg; 840 shm_params.u.size = size; 841 842 return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); 843} 844 845SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) 846{ 847 return ksys_shmget(key, size, shmflg); 848} 849 850static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 851{ 852 switch (version) { 853 case IPC_64: 854 return copy_to_user(buf, in, sizeof(*in)); 855 case IPC_OLD: { 856 struct shmid_ds out; 857 858 memset(&out, 0, sizeof(out)); 859 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); 860 out.shm_segsz = in->shm_segsz; 861 out.shm_atime = in->shm_atime; 862 out.shm_dtime = in->shm_dtime; 863 out.shm_ctime = in->shm_ctime; 864 out.shm_cpid = in->shm_cpid; 865 out.shm_lpid = in->shm_lpid; 866 out.shm_nattch = in->shm_nattch; 867 868 return copy_to_user(buf, &out, sizeof(out)); 869 } 870 default: 871 return -EINVAL; 872 } 873} 874 875static inline unsigned long copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) 876{ 877 switch (version) { 878 case IPC_64: 879 if (copy_from_user(out, buf, sizeof(*out))) { 880 return -EFAULT; 881 } 882 return 0; 883 case IPC_OLD: { 884 struct shmid_ds tbuf_old; 885 886 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) { 887 return -EFAULT; 888 } 889 890 out->shm_perm.uid = tbuf_old.shm_perm.uid; 891 out->shm_perm.gid = tbuf_old.shm_perm.gid; 892 out->shm_perm.mode = tbuf_old.shm_perm.mode; 893 894 return 0; 895 } 896 default: 897 return -EINVAL; 898 } 899} 900 901static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) 902{ 903 switch (version) { 904 case IPC_64: 905 return copy_to_user(buf, in, sizeof(*in)); 906 case IPC_OLD: { 907 struct shminfo out; 908 909 if (in->shmmax > INT_MAX) { 910 out.shmmax = INT_MAX; 911 } else { 912 out.shmmax = (int)in->shmmax; 913 } 914 915 out.shmmin = in->shmmin; 916 out.shmmni = in->shmmni; 917 out.shmseg = in->shmseg; 918 out.shmall = in->shmall; 919 920 return copy_to_user(buf, &out, sizeof(out)); 921 } 922 default: 923 return -EINVAL; 924 } 925} 926 927/* 928 * Calculate and add used RSS and swap pages of a shm. 929 * Called with shm_ids.rwsem held as a reader 930 */ 931static void shm_add_rss_swap(struct shmid_kernel *shp, unsigned long *rss_add, unsigned long *swp_add) 932{ 933 struct inode *inode; 934 935 inode = file_inode(shp->shm_file); 936 937 if (is_file_hugepages(shp->shm_file)) { 938 struct address_space *mapping = inode->i_mapping; 939 struct hstate *h = hstate_file(shp->shm_file); 940 *rss_add += pages_per_huge_page(h) * mapping->nrpages; 941 } else { 942#ifdef CONFIG_SHMEM 943 struct shmem_inode_info *info = SHMEM_I(inode); 944 945 spin_lock_irq(&info->lock); 946 *rss_add += inode->i_mapping->nrpages; 947 *swp_add += info->swapped; 948 spin_unlock_irq(&info->lock); 949#else 950 *rss_add += inode->i_mapping->nrpages; 951#endif 952 } 953} 954 955/* 956 * Called with shm_ids.rwsem held as a reader 957 */ 958static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, unsigned long *swp) 959{ 960 int next_id; 961 int total, in_use; 962 963 *rss = 0; 964 *swp = 0; 965 966 in_use = shm_ids(ns).in_use; 967 968 for (total = 0, next_id = 0; total < in_use; next_id++) { 969 struct kern_ipc_perm *ipc; 970 struct shmid_kernel *shp; 971 972 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); 973 if (ipc == NULL) { 974 continue; 975 } 976 shp = container_of(ipc, struct shmid_kernel, shm_perm); 977 978 shm_add_rss_swap(shp, rss, swp); 979 980 total++; 981 } 982} 983 984/* 985 * This function handles some shmctl commands which require the rwsem 986 * to be held in write mode. 987 * NOTE: no locks must be held, the rwsem is taken inside this function. 988 */ 989static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, struct shmid64_ds *shmid64) 990{ 991 struct kern_ipc_perm *ipcp; 992 struct shmid_kernel *shp; 993 int err; 994 995 down_write(&shm_ids(ns).rwsem); 996 rcu_read_lock(); 997 998 ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd, &shmid64->shm_perm, 0); 999 if (IS_ERR(ipcp)) { 1000 err = PTR_ERR(ipcp); 1001 goto out_unlock1; 1002 } 1003 1004 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 1005 1006 err = security_shm_shmctl(&shp->shm_perm, cmd); 1007 if (err) { 1008 goto out_unlock1; 1009 } 1010 1011 switch (cmd) { 1012 case IPC_RMID: 1013 ipc_lock_object(&shp->shm_perm); 1014 /* do_shm_rmid unlocks the ipc object and rcu */ 1015 do_shm_rmid(ns, ipcp); 1016 goto out_up; 1017 case IPC_SET: 1018 ipc_lock_object(&shp->shm_perm); 1019 err = ipc_update_perm(&shmid64->shm_perm, ipcp); 1020 if (err) { 1021 goto out_unlock0; 1022 } 1023 shp->shm_ctim = ktime_get_real_seconds(); 1024 break; 1025 default: 1026 err = -EINVAL; 1027 goto out_unlock1; 1028 } 1029 1030out_unlock0: 1031 ipc_unlock_object(&shp->shm_perm); 1032out_unlock1: 1033 rcu_read_unlock(); 1034out_up: 1035 up_write(&shm_ids(ns).rwsem); 1036 return err; 1037} 1038 1039static int shmctl_ipc_info(struct ipc_namespace *ns, struct shminfo64 *shminfo) 1040{ 1041 int err = security_shm_shmctl(NULL, IPC_INFO); 1042 if (!err) { 1043 memset(shminfo, 0, sizeof(*shminfo)); 1044 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni; 1045 shminfo->shmmax = ns->shm_ctlmax; 1046 shminfo->shmall = ns->shm_ctlall; 1047 shminfo->shmmin = SHMMIN; 1048 down_read(&shm_ids(ns).rwsem); 1049 err = ipc_get_maxidx(&shm_ids(ns)); 1050 up_read(&shm_ids(ns).rwsem); 1051 if (err < 0) { 1052 err = 0; 1053 } 1054 } 1055 return err; 1056} 1057 1058static int shmctl_shm_info(struct ipc_namespace *ns, struct shm_info *shm_info) 1059{ 1060 int err = security_shm_shmctl(NULL, SHM_INFO); 1061 if (!err) { 1062 memset(shm_info, 0, sizeof(*shm_info)); 1063 down_read(&shm_ids(ns).rwsem); 1064 shm_info->used_ids = shm_ids(ns).in_use; 1065 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp); 1066 shm_info->shm_tot = ns->shm_tot; 1067 shm_info->swap_attempts = 0; 1068 shm_info->swap_successes = 0; 1069 err = ipc_get_maxidx(&shm_ids(ns)); 1070 up_read(&shm_ids(ns).rwsem); 1071 if (err < 0) { 1072 err = 0; 1073 } 1074 } 1075 return err; 1076} 1077 1078static int shmctl_stat(struct ipc_namespace *ns, int shmid, int cmd, struct shmid64_ds *tbuf) 1079{ 1080 struct shmid_kernel *shp; 1081 int err; 1082 1083 memset(tbuf, 0, sizeof(*tbuf)); 1084 1085 rcu_read_lock(); 1086 if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) { 1087 shp = shm_obtain_object(ns, shmid); 1088 if (IS_ERR(shp)) { 1089 err = PTR_ERR(shp); 1090 goto out_unlock; 1091 } 1092 } else { /* IPC_STAT */ 1093 shp = shm_obtain_object_check(ns, shmid); 1094 if (IS_ERR(shp)) { 1095 err = PTR_ERR(shp); 1096 goto out_unlock; 1097 } 1098 } 1099 1100 /* 1101 * Semantically SHM_STAT_ANY ought to be identical to 1102 * that functionality provided by the /proc/sysvipc/ 1103 * interface. As such, only audit these calls and 1104 * do not do traditional S_IRUGO permission checks on 1105 * the ipc object. 1106 */ 1107 if (cmd == SHM_STAT_ANY) { 1108 audit_ipc_obj(&shp->shm_perm); 1109 } else { 1110 err = -EACCES; 1111 if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) { 1112 goto out_unlock; 1113 } 1114 } 1115 1116 err = security_shm_shmctl(&shp->shm_perm, cmd); 1117 if (err) { 1118 goto out_unlock; 1119 } 1120 1121 ipc_lock_object(&shp->shm_perm); 1122 1123 if (!ipc_valid_object(&shp->shm_perm)) { 1124 ipc_unlock_object(&shp->shm_perm); 1125 err = -EIDRM; 1126 goto out_unlock; 1127 } 1128 1129 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm); 1130 tbuf->shm_segsz = shp->shm_segsz; 1131 tbuf->shm_atime = shp->shm_atim; 1132 tbuf->shm_dtime = shp->shm_dtim; 1133 tbuf->shm_ctime = shp->shm_ctim; 1134#ifndef CONFIG_64BIT 1135 tbuf->shm_atime_high = shp->shm_atim >> 0x20; 1136 tbuf->shm_dtime_high = shp->shm_dtim >> 0x20; 1137 tbuf->shm_ctime_high = shp->shm_ctim >> 0x20; 1138#endif 1139 tbuf->shm_cpid = pid_vnr(shp->shm_cprid); 1140 tbuf->shm_lpid = pid_vnr(shp->shm_lprid); 1141 tbuf->shm_nattch = shp->shm_nattch; 1142 1143 if (cmd == IPC_STAT) { 1144 /* 1145 * As defined in SUS: 1146 * Return 0 on success 1147 */ 1148 err = 0; 1149 } else { 1150 /* 1151 * SHM_STAT and SHM_STAT_ANY (both Linux specific) 1152 * Return the full id, including the sequence number 1153 */ 1154 err = shp->shm_perm.id; 1155 } 1156 1157 ipc_unlock_object(&shp->shm_perm); 1158out_unlock: 1159 rcu_read_unlock(); 1160 return err; 1161} 1162 1163static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) 1164{ 1165 struct shmid_kernel *shp; 1166 struct file *shm_file; 1167 int err; 1168 1169 rcu_read_lock(); 1170 shp = shm_obtain_object_check(ns, shmid); 1171 if (IS_ERR(shp)) { 1172 err = PTR_ERR(shp); 1173 goto out_unlock1; 1174 } 1175 1176 audit_ipc_obj(&(shp->shm_perm)); 1177 err = security_shm_shmctl(&shp->shm_perm, cmd); 1178 if (err) { 1179 goto out_unlock1; 1180 } 1181 1182 ipc_lock_object(&shp->shm_perm); 1183 1184 /* check if shm_destroy() is tearing down shp */ 1185 if (!ipc_valid_object(&shp->shm_perm)) { 1186 err = -EIDRM; 1187 goto out_unlock0; 1188 } 1189 1190 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { 1191 kuid_t euid = current_euid(); 1192 if (!uid_eq(euid, shp->shm_perm.uid) && !uid_eq(euid, shp->shm_perm.cuid)) { 1193 err = -EPERM; 1194 goto out_unlock0; 1195 } 1196 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { 1197 err = -EPERM; 1198 goto out_unlock0; 1199 } 1200 } 1201 1202 shm_file = shp->shm_file; 1203 if (is_file_hugepages(shm_file)) { 1204 goto out_unlock0; 1205 } 1206 1207 if (cmd == SHM_LOCK) { 1208 struct user_struct *user = current_user(); 1209 1210 err = shmem_lock(shm_file, 1, user); 1211 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { 1212 shp->shm_perm.mode |= SHM_LOCKED; 1213 shp->mlock_user = user; 1214 } 1215 goto out_unlock0; 1216 } 1217 1218 /* SHM_UNLOCK */ 1219 if (!(shp->shm_perm.mode & SHM_LOCKED)) { 1220 goto out_unlock0; 1221 } 1222 shmem_lock(shm_file, 0, shp->mlock_user); 1223 shp->shm_perm.mode &= ~SHM_LOCKED; 1224 shp->mlock_user = NULL; 1225 get_file(shm_file); 1226 ipc_unlock_object(&shp->shm_perm); 1227 rcu_read_unlock(); 1228 shmem_unlock_mapping(shm_file->f_mapping); 1229 1230 fput(shm_file); 1231 return err; 1232 1233out_unlock0: 1234 ipc_unlock_object(&shp->shm_perm); 1235out_unlock1: 1236 rcu_read_unlock(); 1237 return err; 1238} 1239 1240static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version) 1241{ 1242 int err; 1243 struct ipc_namespace *ns; 1244 struct shmid64_ds sem64; 1245 1246 if (cmd < 0 || shmid < 0) { 1247 return -EINVAL; 1248 } 1249 1250 ns = current->nsproxy->ipc_ns; 1251 1252 switch (cmd) { 1253 case IPC_INFO: { 1254 struct shminfo64 shminfo; 1255 err = shmctl_ipc_info(ns, &shminfo); 1256 if (err < 0) { 1257 return err; 1258 } 1259 if (copy_shminfo_to_user(buf, &shminfo, version)) { 1260 err = -EFAULT; 1261 } 1262 return err; 1263 } 1264 case SHM_INFO: { 1265 struct shm_info shm_info; 1266 err = shmctl_shm_info(ns, &shm_info); 1267 if (err < 0) { 1268 return err; 1269 } 1270 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { 1271 err = -EFAULT; 1272 } 1273 return err; 1274 } 1275 case SHM_STAT: 1276 case SHM_STAT_ANY: 1277 case IPC_STAT: { 1278 err = shmctl_stat(ns, shmid, cmd, &sem64); 1279 if (err < 0) { 1280 return err; 1281 } 1282 if (copy_shmid_to_user(buf, &sem64, version)) { 1283 err = -EFAULT; 1284 } 1285 return err; 1286 } 1287 case IPC_SET: 1288 if (copy_shmid_from_user(&sem64, buf, version)) { 1289 return -EFAULT; 1290 } 1291 fallthrough; 1292 case IPC_RMID: 1293 return shmctl_down(ns, shmid, cmd, &sem64); 1294 case SHM_LOCK: 1295 case SHM_UNLOCK: 1296 return shmctl_do_lock(ns, shmid, cmd); 1297 default: 1298 return -EINVAL; 1299 } 1300} 1301 1302SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 1303{ 1304 return ksys_shmctl(shmid, cmd, buf, IPC_64); 1305} 1306 1307#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 1308long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) 1309{ 1310 int version = ipc_parse_version(&cmd); 1311 1312 return ksys_shmctl(shmid, cmd, buf, version); 1313} 1314 1315SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 1316{ 1317 return ksys_old_shmctl(shmid, cmd, buf); 1318} 1319#endif 1320 1321#ifdef CONFIG_COMPAT 1322 1323struct compat_shmid_ds { 1324 struct compat_ipc_perm shm_perm; 1325 int shm_segsz; 1326 old_time32_t shm_atime; 1327 old_time32_t shm_dtime; 1328 old_time32_t shm_ctime; 1329 compat_ipc_pid_t shm_cpid; 1330 compat_ipc_pid_t shm_lpid; 1331 unsigned short shm_nattch; 1332 unsigned short shm_unused; 1333 compat_uptr_t shm_unused2; 1334 compat_uptr_t shm_unused3; 1335}; 1336 1337struct compat_shminfo64 { 1338 compat_ulong_t shmmax; 1339 compat_ulong_t shmmin; 1340 compat_ulong_t shmmni; 1341 compat_ulong_t shmseg; 1342 compat_ulong_t shmall; 1343 compat_ulong_t __unused1; 1344 compat_ulong_t __unused2; 1345 compat_ulong_t __unused3; 1346 compat_ulong_t __unused4; 1347}; 1348 1349struct compat_shm_info { 1350 compat_int_t used_ids; 1351 compat_ulong_t shm_tot, shm_rss, shm_swp; 1352 compat_ulong_t swap_attempts, swap_successes; 1353}; 1354 1355static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) 1356{ 1357 if (in->shmmax > INT_MAX) { 1358 in->shmmax = INT_MAX; 1359 } 1360 if (version == IPC_64) { 1361 struct compat_shminfo64 info; 1362 memset(&info, 0, sizeof(info)); 1363 info.shmmax = in->shmmax; 1364 info.shmmin = in->shmmin; 1365 info.shmmni = in->shmmni; 1366 info.shmseg = in->shmseg; 1367 info.shmall = in->shmall; 1368 return copy_to_user(buf, &info, sizeof(info)); 1369 } else { 1370 struct shminfo info; 1371 memset(&info, 0, sizeof(info)); 1372 info.shmmax = in->shmmax; 1373 info.shmmin = in->shmmin; 1374 info.shmmni = in->shmmni; 1375 info.shmseg = in->shmseg; 1376 info.shmall = in->shmall; 1377 return copy_to_user(buf, &info, sizeof(info)); 1378 } 1379} 1380 1381static int put_compat_shm_info(struct shm_info *ip, struct compat_shm_info __user *uip) 1382{ 1383 struct compat_shm_info info; 1384 1385 memset(&info, 0, sizeof(info)); 1386 info.used_ids = ip->used_ids; 1387 info.shm_tot = ip->shm_tot; 1388 info.shm_rss = ip->shm_rss; 1389 info.shm_swp = ip->shm_swp; 1390 info.swap_attempts = ip->swap_attempts; 1391 info.swap_successes = ip->swap_successes; 1392 return copy_to_user(uip, &info, sizeof(info)); 1393} 1394 1395static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) 1396{ 1397 if (version == IPC_64) { 1398 struct compat_shmid64_ds v; 1399 memset(&v, 0, sizeof(v)); 1400 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm); 1401 v.shm_atime = lower_32_bits(in->shm_atime); 1402 v.shm_atime_high = upper_32_bits(in->shm_atime); 1403 v.shm_dtime = lower_32_bits(in->shm_dtime); 1404 v.shm_dtime_high = upper_32_bits(in->shm_dtime); 1405 v.shm_ctime = lower_32_bits(in->shm_ctime); 1406 v.shm_ctime_high = upper_32_bits(in->shm_ctime); 1407 v.shm_segsz = in->shm_segsz; 1408 v.shm_nattch = in->shm_nattch; 1409 v.shm_cpid = in->shm_cpid; 1410 v.shm_lpid = in->shm_lpid; 1411 return copy_to_user(buf, &v, sizeof(v)); 1412 } else { 1413 struct compat_shmid_ds v; 1414 memset(&v, 0, sizeof(v)); 1415 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm); 1416 v.shm_perm.key = in->shm_perm.key; 1417 v.shm_atime = in->shm_atime; 1418 v.shm_dtime = in->shm_dtime; 1419 v.shm_ctime = in->shm_ctime; 1420 v.shm_segsz = in->shm_segsz; 1421 v.shm_nattch = in->shm_nattch; 1422 v.shm_cpid = in->shm_cpid; 1423 v.shm_lpid = in->shm_lpid; 1424 return copy_to_user(buf, &v, sizeof(v)); 1425 } 1426} 1427 1428static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) 1429{ 1430 memset(out, 0, sizeof(*out)); 1431 if (version == IPC_64) { 1432 struct compat_shmid64_ds __user *p = buf; 1433 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm); 1434 } else { 1435 struct compat_shmid_ds __user *p = buf; 1436 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm); 1437 } 1438} 1439 1440static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version) 1441{ 1442 struct ipc_namespace *ns; 1443 struct shmid64_ds sem64; 1444 int err; 1445 1446 ns = current->nsproxy->ipc_ns; 1447 1448 if (cmd < 0 || shmid < 0) { 1449 return -EINVAL; 1450 } 1451 1452 switch (cmd) { 1453 case IPC_INFO: { 1454 struct shminfo64 shminfo; 1455 err = shmctl_ipc_info(ns, &shminfo); 1456 if (err < 0) { 1457 return err; 1458 } 1459 if (copy_compat_shminfo_to_user(uptr, &shminfo, version)) { 1460 err = -EFAULT; 1461 } 1462 return err; 1463 } 1464 case SHM_INFO: { 1465 struct shm_info shm_info; 1466 err = shmctl_shm_info(ns, &shm_info); 1467 if (err < 0) { 1468 return err; 1469 } 1470 if (put_compat_shm_info(&shm_info, uptr)) { 1471 err = -EFAULT; 1472 } 1473 return err; 1474 } 1475 case IPC_STAT: 1476 case SHM_STAT_ANY: 1477 case SHM_STAT: 1478 err = shmctl_stat(ns, shmid, cmd, &sem64); 1479 if (err < 0) { 1480 return err; 1481 } 1482 if (copy_compat_shmid_to_user(uptr, &sem64, version)) { 1483 err = -EFAULT; 1484 } 1485 return err; 1486 1487 case IPC_SET: 1488 if (copy_compat_shmid_from_user(&sem64, uptr, version)) { 1489 return -EFAULT; 1490 } 1491 fallthrough; 1492 case IPC_RMID: 1493 return shmctl_down(ns, shmid, cmd, &sem64); 1494 case SHM_LOCK: 1495 case SHM_UNLOCK: 1496 return shmctl_do_lock(ns, shmid, cmd); 1497 default: 1498 return -EINVAL; 1499 } 1500 return err; 1501} 1502 1503COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) 1504{ 1505 return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64); 1506} 1507 1508#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION 1509long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr) 1510{ 1511 int version = compat_ipc_parse_version(&cmd); 1512 1513 return compat_ksys_shmctl(shmid, cmd, uptr, version); 1514} 1515 1516COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr) 1517{ 1518 return compat_ksys_old_shmctl(shmid, cmd, uptr); 1519} 1520#endif 1521#endif 1522 1523/* 1524 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. 1525 * 1526 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The 1527 * "raddr" thing points to kernel space, and there has to be a wrapper around 1528 * this. 1529 */ 1530long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, unsigned long shmlba) 1531{ 1532 struct shmid_kernel *shp; 1533 unsigned long addr = (unsigned long)shmaddr; 1534 unsigned long size; 1535 struct file *file, *base; 1536 int err; 1537 unsigned long flags = MAP_SHARED; 1538 unsigned long prot; 1539 int acc_mode; 1540 struct ipc_namespace *ns; 1541 struct shm_file_data *sfd; 1542 int f_flags; 1543 unsigned long populate = 0; 1544 1545 err = -EINVAL; 1546 if (shmid < 0) { 1547 goto out; 1548 } 1549 1550 if (addr) { 1551 if (addr & (shmlba - 1)) { 1552 if (shmflg & SHM_RND) { 1553 addr &= ~(shmlba - 1); /* round down */ 1554 1555 /* 1556 * Ensure that the round-down is non-nil 1557 * when remapping. This can happen for 1558 * cases when addr < shmlba. 1559 */ 1560 if (!addr && (shmflg & SHM_REMAP)) { 1561 goto out; 1562 } 1563 } else 1564#ifndef __ARCH_FORCE_SHMLBA 1565 if (addr & ~PAGE_MASK) 1566#endif 1567 goto out; 1568 } 1569 1570 flags |= MAP_FIXED; 1571 } else if ((shmflg & SHM_REMAP)) { 1572 goto out; 1573 } 1574 1575 if (shmflg & SHM_RDONLY) { 1576 prot = PROT_READ; 1577 acc_mode = S_IRUGO; 1578 f_flags = O_RDONLY; 1579 } else { 1580 prot = PROT_READ | PROT_WRITE; 1581 acc_mode = S_IRUGO | S_IWUGO; 1582 f_flags = O_RDWR; 1583 } 1584 if (shmflg & SHM_EXEC) { 1585 prot |= PROT_EXEC; 1586 acc_mode |= S_IXUGO; 1587 } 1588 1589 /* 1590 * We cannot rely on the fs check since SYSV IPC does have an 1591 * additional creator id... 1592 */ 1593 ns = current->nsproxy->ipc_ns; 1594 rcu_read_lock(); 1595 shp = shm_obtain_object_check(ns, shmid); 1596 if (IS_ERR(shp)) { 1597 err = PTR_ERR(shp); 1598 goto out_unlock; 1599 } 1600 1601 err = -EACCES; 1602 if (ipcperms(ns, &shp->shm_perm, acc_mode)) { 1603 goto out_unlock; 1604 } 1605 1606 err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg); 1607 if (err) { 1608 goto out_unlock; 1609 } 1610 1611 ipc_lock_object(&shp->shm_perm); 1612 1613 /* check if shm_destroy() is tearing down shp */ 1614 if (!ipc_valid_object(&shp->shm_perm)) { 1615 ipc_unlock_object(&shp->shm_perm); 1616 err = -EIDRM; 1617 goto out_unlock; 1618 } 1619 1620 /* 1621 * We need to take a reference to the real shm file to prevent the 1622 * pointer from becoming stale in cases where the lifetime of the outer 1623 * file extends beyond that of the shm segment. It's not usually 1624 * possible, but it can happen during remap_file_pages() emulation as 1625 * that unmaps the memory, then does ->mmap() via file reference only. 1626 * We'll deny the ->mmap() if the shm segment was since removed, but to 1627 * detect shm ID reuse we need to compare the file pointers. 1628 */ 1629 base = get_file(shp->shm_file); 1630 shp->shm_nattch++; 1631 size = i_size_read(file_inode(base)); 1632 ipc_unlock_object(&shp->shm_perm); 1633 rcu_read_unlock(); 1634 1635 err = -ENOMEM; 1636 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 1637 if (!sfd) { 1638 fput(base); 1639 goto out_nattch; 1640 } 1641 1642 file = alloc_file_clone(base, f_flags, is_file_hugepages(base) ? &shm_file_operations_huge : &shm_file_operations); 1643 err = PTR_ERR(file); 1644 if (IS_ERR(file)) { 1645 kfree(sfd); 1646 fput(base); 1647 goto out_nattch; 1648 } 1649 1650 sfd->id = shp->shm_perm.id; 1651 sfd->ns = get_ipc_ns(ns); 1652 sfd->file = base; 1653 sfd->vm_ops = NULL; 1654 file->private_data = sfd; 1655 1656 err = security_mmap_file(file, prot, flags); 1657 if (err) { 1658 goto out_fput; 1659 } 1660 1661 if (mmap_write_lock_killable(current->mm)) { 1662 err = -EINTR; 1663 goto out_fput; 1664 } 1665 1666 if (addr && !(shmflg & SHM_REMAP)) { 1667 err = -EINVAL; 1668 if (addr + size < addr) { 1669 goto invalid; 1670 } 1671 1672 if (find_vma_intersection(current->mm, addr, addr + size)) { 1673 goto invalid; 1674 } 1675 } 1676 1677 addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL); 1678 *raddr = addr; 1679 err = 0; 1680 if (IS_ERR_VALUE(addr)) { 1681 err = (long)addr; 1682 } 1683invalid: 1684 mmap_write_unlock(current->mm); 1685 if (populate) { 1686 mm_populate(addr, populate); 1687 } 1688 1689out_fput: 1690 fput(file); 1691 1692out_nattch: 1693 down_write(&shm_ids(ns).rwsem); 1694 shp = shm_lock(ns, shmid); 1695 shp->shm_nattch--; 1696 if (shm_may_destroy(shp)) { 1697 shm_destroy(ns, shp); 1698 } else { 1699 shm_unlock(shp); 1700 } 1701 up_write(&shm_ids(ns).rwsem); 1702 return err; 1703 1704out_unlock: 1705 rcu_read_unlock(); 1706out: 1707 return err; 1708} 1709 1710SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) 1711{ 1712 unsigned long ret; 1713 long err; 1714 1715 err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); 1716 if (err) { 1717 return err; 1718 } 1719 force_successful_syscall_return(); 1720 return (long)ret; 1721} 1722 1723#ifdef CONFIG_COMPAT 1724 1725#ifndef COMPAT_SHMLBA 1726#define COMPAT_SHMLBA SHMLBA 1727#endif 1728 1729COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg) 1730{ 1731 unsigned long ret; 1732 long err; 1733 1734 err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA); 1735 if (err) { 1736 return err; 1737 } 1738 force_successful_syscall_return(); 1739 return (long)ret; 1740} 1741#endif 1742 1743/* 1744 * detach and kill segment if marked destroyed. 1745 * The work is done in shm_close. 1746 */ 1747long ksys_shmdt(char __user *shmaddr) 1748{ 1749 struct mm_struct *mm = current->mm; 1750 struct vm_area_struct *vma; 1751 unsigned long addr = (unsigned long)shmaddr; 1752 int retval = -EINVAL; 1753#ifdef CONFIG_MMU 1754 loff_t size = 0; 1755 struct file *file; 1756 struct vm_area_struct *next; 1757#endif 1758 1759 if (addr & ~PAGE_MASK) { 1760 return retval; 1761 } 1762 1763 if (mmap_write_lock_killable(mm)) { 1764 return -EINTR; 1765 } 1766 1767 /* 1768 * This function tries to be smart and unmap shm segments that 1769 * were modified by partial mlock or munmap calls: 1770 * - It first determines the size of the shm segment that should be 1771 * unmapped: It searches for a vma that is backed by shm and that 1772 * started at address shmaddr. It records it's size and then unmaps 1773 * it. 1774 * - Then it unmaps all shm vmas that started at shmaddr and that 1775 * are within the initially determined size and that are from the 1776 * same shm segment from which we determined the size. 1777 * Errors from do_munmap are ignored: the function only fails if 1778 * it's called with invalid parameters or if it's called to unmap 1779 * a part of a vma. Both calls in this function are for full vmas, 1780 * the parameters are directly copied from the vma itself and always 1781 * valid - therefore do_munmap cannot fail. (famous last words?) 1782 */ 1783 /* 1784 * If it had been mremap()'d, the starting address would not 1785 * match the usual checks anyway. So assume all vma's are 1786 * above the starting address given. 1787 */ 1788 vma = find_vma(mm, addr); 1789 1790#ifdef CONFIG_MMU 1791 while (vma) { 1792 next = vma->vm_next; 1793 1794 /* 1795 * Check if the starting address would match, i.e. it's 1796 * a fragment created by mprotect() and/or munmap(), or it 1797 * otherwise it starts at this address with no hassles. 1798 */ 1799 if ((vma->vm_ops == &shm_vm_ops) && (vma->vm_start - addr) / PAGE_SIZE == vma->vm_pgoff) { 1800 1801 /* 1802 * Record the file of the shm segment being 1803 * unmapped. With mremap(), someone could place 1804 * page from another segment but with equal offsets 1805 * in the range we are unmapping. 1806 */ 1807 file = vma->vm_file; 1808 size = i_size_read(file_inode(vma->vm_file)); 1809 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1810 /* 1811 * We discovered the size of the shm segment, so 1812 * break out of here and fall through to the next 1813 * loop that uses the size information to stop 1814 * searching for matching vma's. 1815 */ 1816 retval = 0; 1817 vma = next; 1818 break; 1819 } 1820 vma = next; 1821 } 1822 1823 /* 1824 * We need look no further than the maximum address a fragment 1825 * could possibly have landed at. Also cast things to loff_t to 1826 * prevent overflows and make comparisons vs. equal-width types. 1827 */ 1828 size = PAGE_ALIGN(size); 1829 while (vma && (loff_t)(vma->vm_end - addr) <= size) { 1830 next = vma->vm_next; 1831 1832 /* finding a matching vma now does not alter retval */ 1833 if ((vma->vm_ops == &shm_vm_ops) && ((vma->vm_start - addr) / PAGE_SIZE == vma->vm_pgoff) && 1834 (vma->vm_file == file)) { 1835 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1836 } 1837 vma = next; 1838 } 1839 1840#else /* CONFIG_MMU */ 1841 /* under NOMMU conditions, the exact address to be destroyed must be 1842 * given 1843 */ 1844 if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { 1845 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1846 retval = 0; 1847 } 1848 1849#endif 1850 1851 mmap_write_unlock(mm); 1852 return retval; 1853} 1854 1855SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) 1856{ 1857 return ksys_shmdt(shmaddr); 1858} 1859 1860#ifdef CONFIG_PROC_FS 1861static int sysvipc_shm_proc_show(struct seq_file *s, void *it) 1862{ 1863 struct pid_namespace *pid_ns = ipc_seq_pid_ns(s); 1864 struct user_namespace *user_ns = seq_user_ns(s); 1865 struct kern_ipc_perm *ipcp = it; 1866 struct shmid_kernel *shp; 1867 unsigned long rss = 0, swp = 0; 1868 1869 shp = container_of(ipcp, struct shmid_kernel, shm_perm); 1870 shm_add_rss_swap(shp, &rss, &swp); 1871 1872#if BITS_PER_LONG <= 32 1873#define SIZE_SPEC "%10lu" 1874#else 1875#define SIZE_SPEC "%21lu" 1876#endif 1877 1878 seq_printf(s, 1879 "%10d %10d %4o " SIZE_SPEC " %5u %5u " 1880 "%5lu %5u %5u %5u %5u %10llu %10llu %10llu " SIZE_SPEC " " SIZE_SPEC "\n", 1881 shp->shm_perm.key, shp->shm_perm.id, shp->shm_perm.mode, shp->shm_segsz, 1882 pid_nr_ns(shp->shm_cprid, pid_ns), pid_nr_ns(shp->shm_lprid, pid_ns), shp->shm_nattch, 1883 from_kuid_munged(user_ns, shp->shm_perm.uid), from_kgid_munged(user_ns, shp->shm_perm.gid), 1884 from_kuid_munged(user_ns, shp->shm_perm.cuid), from_kgid_munged(user_ns, shp->shm_perm.cgid), 1885 shp->shm_atim, shp->shm_dtim, shp->shm_ctim, rss * PAGE_SIZE, swp * PAGE_SIZE); 1886 1887 return 0; 1888} 1889#endif 1890