xref: /device/soc/rockchip/common/sdk_linux/ipc/shm.c (revision 3d0407ba)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * linux/ipc/shm.c
4 * Copyright (C) 1992, 1993 Krishna Balasubramanian
5 *     Many improvements/fixes by Bruno Haible.
6 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8 *
9 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16 *
17 * support for audit of ipc object properties and permission changes
18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19 *
20 * namespaces support
21 * OpenVZ, SWsoft Inc.
22 * Pavel Emelianov <xemul@openvz.org>
23 *
24 * Better ipc lock (kern_ipc_perm.lock) handling
25 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26 */
27
28#include <linux/slab.h>
29#include <linux/mm.h>
30#include <linux/hugetlb.h>
31#include <linux/shm.h>
32#include <linux/init.h>
33#include <linux/file.h>
34#include <linux/mman.h>
35#include <linux/shmem_fs.h>
36#include <linux/security.h>
37#include <linux/syscalls.h>
38#include <linux/audit.h>
39#include <linux/capability.h>
40#include <linux/ptrace.h>
41#include <linux/seq_file.h>
42#include <linux/rwsem.h>
43#include <linux/nsproxy.h>
44#include <linux/mount.h>
45#include <linux/ipc_namespace.h>
46#include <linux/rhashtable.h>
47
48#include <linux/uaccess.h>
49
50#include "util.h"
51
52struct shmid_kernel {
53    struct kern_ipc_perm shm_perm;
54    struct file *shm_file;
55    unsigned long shm_nattch;
56    unsigned long shm_segsz;
57    time64_t shm_atim;
58    time64_t shm_dtim;
59    time64_t shm_ctim;
60    struct pid *shm_cprid;
61    struct pid *shm_lprid;
62    struct user_struct *mlock_user;
63
64    /* The task created the shm object.  NULL if the task is dead. */
65    struct task_struct *shm_creator;
66    struct list_head shm_clist; /* list by creator */
67    struct ipc_namespace *ns;
68} __randomize_layout;
69
70/* shm_mode upper byte flags */
71#define SHM_DEST 01000   /* segment will be destroyed on last detach */
72#define SHM_LOCKED 02000 /* segment will not be swapped */
73
74struct shm_file_data {
75    int id;
76    struct ipc_namespace *ns;
77    struct file *file;
78    const struct vm_operations_struct *vm_ops;
79};
80
81#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
82
83static const struct file_operations shm_file_operations;
84static const struct vm_operations_struct shm_vm_ops;
85
86#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
87
88#define shm_unlock(shp) ipc_unlock(&(shp)->shm_perm)
89
90static int newseg(struct ipc_namespace *, struct ipc_params *);
91static void shm_open(struct vm_area_struct *vma);
92static void shm_close(struct vm_area_struct *vma);
93static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
94#ifdef CONFIG_PROC_FS
95static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
96#endif
97
98void shm_init_ns(struct ipc_namespace *ns)
99{
100    ns->shm_ctlmax = SHMMAX;
101    ns->shm_ctlall = SHMALL;
102    ns->shm_ctlmni = SHMMNI;
103    ns->shm_rmid_forced = 0;
104    ns->shm_tot = 0;
105    ipc_init_ids(&shm_ids(ns));
106}
107
108/*
109 * Called with shm_ids.rwsem (writer) and the shp structure locked.
110 * Only shm_ids.rwsem remains locked on exit.
111 */
112static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
113{
114    struct shmid_kernel *shp;
115
116    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
117    WARN_ON(ns != shp->ns);
118
119    if (shp->shm_nattch) {
120        shp->shm_perm.mode |= SHM_DEST;
121        /* Do not find it any more */
122        ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
123        shm_unlock(shp);
124    } else {
125        shm_destroy(ns, shp);
126    }
127}
128
129#ifdef CONFIG_IPC_NS
130void shm_exit_ns(struct ipc_namespace *ns)
131{
132    free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
133    idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
134    rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
135}
136#endif
137
138static int __init ipc_ns_init(void)
139{
140    shm_init_ns(&init_ipc_ns);
141    return 0;
142}
143
144pure_initcall(ipc_ns_init);
145
146void __init shm_init(void)
147{
148    ipc_init_proc_interface("sysvipc/shm",
149#if BITS_PER_LONG <= 32
150                            "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      "
151                            "atime      dtime      ctime        rss       swap\n",
152#else
153                            "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  "
154                            "cgid      atime      dtime      ctime                   rss                  swap\n",
155#endif
156                            IPC_SHM_IDS, sysvipc_shm_proc_show);
157}
158
159static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
160{
161    struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
162
163    if (IS_ERR(ipcp)) {
164        return ERR_CAST(ipcp);
165    }
166
167    return container_of(ipcp, struct shmid_kernel, shm_perm);
168}
169
170static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
171{
172    struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
173
174    if (IS_ERR(ipcp)) {
175        return ERR_CAST(ipcp);
176    }
177
178    return container_of(ipcp, struct shmid_kernel, shm_perm);
179}
180
181/*
182 * shm_lock_(check_) routines are called in the paths where the rwsem
183 * is not necessarily held.
184 */
185static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
186{
187    struct kern_ipc_perm *ipcp;
188
189    rcu_read_lock();
190    ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
191    if (IS_ERR(ipcp)) {
192        goto err;
193    }
194
195    ipc_lock_object(ipcp);
196    /*
197     * ipc_rmid() may have already freed the ID while ipc_lock_object()
198     * was spinning: here verify that the structure is still valid.
199     * Upon races with RMID, return -EIDRM, thus indicating that
200     * the ID points to a removed identifier.
201     */
202    if (ipc_valid_object(ipcp)) {
203        /* return a locked ipc object upon success */
204        return container_of(ipcp, struct shmid_kernel, shm_perm);
205    }
206
207    ipc_unlock_object(ipcp);
208    ipcp = ERR_PTR(-EIDRM);
209err:
210    rcu_read_unlock();
211    /*
212     * Callers of shm_lock() must validate the status of the returned ipc
213     * object pointer and error out as appropriate.
214     */
215    return ERR_CAST(ipcp);
216}
217
218static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
219{
220    rcu_read_lock();
221    ipc_lock_object(&ipcp->shm_perm);
222}
223
224static void shm_rcu_free(struct rcu_head *head)
225{
226    struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm, rcu);
227    struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel, shm_perm);
228    security_shm_free(&shp->shm_perm);
229    kvfree(shp);
230}
231
232/*
233 * It has to be called with shp locked.
234 * It must be called before ipc_rmid()
235 */
236static inline void shm_clist_rm(struct shmid_kernel *shp)
237{
238    struct task_struct *creator;
239
240    /* ensure that shm_creator does not disappear */
241    rcu_read_lock();
242
243    /*
244     * A concurrent exit_shm may do a list_del_init() as well.
245     * Just do nothing if exit_shm already did the work
246     */
247    if (!list_empty(&shp->shm_clist)) {
248        /*
249         * shp->shm_creator is guaranteed to be valid *only*
250         * if shp->shm_clist is not empty.
251         */
252        creator = shp->shm_creator;
253
254        task_lock(creator);
255        /*
256         * list_del_init() is a nop if the entry was already removed
257         * from the list.
258         */
259        list_del_init(&shp->shm_clist);
260        task_unlock(creator);
261    }
262    rcu_read_unlock();
263}
264
265static inline void shm_rmid(struct shmid_kernel *s)
266{
267    shm_clist_rm(s);
268    ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
269}
270
271static int __shm_open(struct vm_area_struct *vma)
272{
273    struct file *file = vma->vm_file;
274    struct shm_file_data *sfd = shm_file_data(file);
275    struct shmid_kernel *shp;
276
277    shp = shm_lock(sfd->ns, sfd->id);
278    if (IS_ERR(shp)) {
279        return PTR_ERR(shp);
280    }
281
282    if (shp->shm_file != sfd->file) {
283        /* ID was reused */
284        shm_unlock(shp);
285        return -EINVAL;
286    }
287
288    shp->shm_atim = ktime_get_real_seconds();
289    ipc_update_pid(&shp->shm_lprid, task_tgid(current));
290    shp->shm_nattch++;
291    shm_unlock(shp);
292    return 0;
293}
294
295/* This is called by fork, once for every shm attach. */
296static void shm_open(struct vm_area_struct *vma)
297{
298    int err = __shm_open(vma);
299    /*
300     * We raced in the idr lookup or with shm_destroy().
301     * Either way, the ID is busted.
302     */
303    WARN_ON_ONCE(err);
304}
305
306/*
307 * shm_destroy - free the struct shmid_kernel
308 *
309 * @ns: namespace
310 * @shp: struct to free
311 *
312 * It has to be called with shp and shm_ids.rwsem (writer) locked,
313 * but returns with shp unlocked and freed.
314 */
315static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
316{
317    struct file *shm_file;
318
319    shm_file = shp->shm_file;
320    shp->shm_file = NULL;
321    ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
322    shm_rmid(shp);
323    shm_unlock(shp);
324    if (!is_file_hugepages(shm_file)) {
325        shmem_lock(shm_file, 0, shp->mlock_user);
326    } else if (shp->mlock_user) {
327        user_shm_unlock(i_size_read(file_inode(shm_file)), shp->mlock_user);
328    }
329    fput(shm_file);
330    ipc_update_pid(&shp->shm_cprid, NULL);
331    ipc_update_pid(&shp->shm_lprid, NULL);
332    ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
333}
334
335/*
336 * shm_may_destroy - identifies whether shm segment should be destroyed now
337 *
338 * Returns true if and only if there are no active users of the segment and
339 * one of the following is true:
340 *
341 * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
342 *
343 * 2) sysctl kernel.shm_rmid_forced is set to 1.
344 */
345static bool shm_may_destroy(struct shmid_kernel *shp)
346{
347    return (shp->shm_nattch == 0) &&
348           (shp->ns->shm_rmid_forced ||
349        (shp->shm_perm.mode & SHM_DEST));
350}
351
352/*
353 * remove the attach descriptor vma.
354 * free memory for segment if it is marked destroyed.
355 * The descriptor has already been removed from the current->mm->mmap list
356 * and will later be kfree()d.
357 */
358static void shm_close(struct vm_area_struct *vma)
359{
360    struct file *file = vma->vm_file;
361    struct shm_file_data *sfd = shm_file_data(file);
362    struct shmid_kernel *shp;
363    struct ipc_namespace *ns = sfd->ns;
364
365    down_write(&shm_ids(ns).rwsem);
366    /* remove from the list of attaches of the shm segment */
367    shp = shm_lock(ns, sfd->id);
368    /*
369     * We raced in the idr lookup or with shm_destroy().
370     * Either way, the ID is busted.
371     */
372    if (WARN_ON_ONCE(IS_ERR(shp))) {
373        goto done; /* no-op */
374    }
375
376    ipc_update_pid(&shp->shm_lprid, task_tgid(current));
377    shp->shm_dtim = ktime_get_real_seconds();
378    shp->shm_nattch--;
379    if (shm_may_destroy(shp)) {
380        shm_destroy(ns, shp);
381    } else {
382        shm_unlock(shp);
383    }
384done:
385    up_write(&shm_ids(ns).rwsem);
386}
387
388/* Called with ns->shm_ids(ns).rwsem locked */
389static int shm_try_destroy_orphaned(int id, void *p, void *data)
390{
391    struct ipc_namespace *ns = data;
392    struct kern_ipc_perm *ipcp = p;
393    struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
394
395    /*
396     * We want to destroy segments without users and with already
397     * exit'ed originating process.
398     *
399     * As shp->* are changed under rwsem, it's safe to skip shp locking.
400     */
401    if (!list_empty(&shp->shm_clist)) {
402        return 0;
403    }
404
405    if (shm_may_destroy(shp)) {
406        shm_lock_by_ptr(shp);
407        shm_destroy(ns, shp);
408    }
409    return 0;
410}
411
412void shm_destroy_orphaned(struct ipc_namespace *ns)
413{
414    down_write(&shm_ids(ns).rwsem);
415    if (shm_ids(ns).in_use) {
416        idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
417    }
418    up_write(&shm_ids(ns).rwsem);
419}
420
421/* Locking assumes this will only be called with task == current */
422void exit_shm(struct task_struct *task)
423{
424    for (;;) {
425        struct shmid_kernel *shp;
426        struct ipc_namespace *ns;
427
428        task_lock(task);
429
430        if (list_empty(&task->sysvshm.shm_clist)) {
431            task_unlock(task);
432            break;
433        }
434
435        shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
436                shm_clist);
437
438        /*
439         * 1) Get pointer to the ipc namespace. It is worth to say
440         * that this pointer is guaranteed to be valid because
441         * shp lifetime is always shorter than namespace lifetime
442         * in which shp lives.
443         * We taken task_lock it means that shp won't be freed.
444         */
445        ns = shp->ns;
446
447        /*
448         * 2) If kernel.shm_rmid_forced is not set then only keep track of
449         * which shmids are orphaned, so that a later set of the sysctl
450         * can clean them up.
451         */
452        if (!ns->shm_rmid_forced)
453            goto unlink_continue;
454
455        /*
456         * 3) get a reference to the namespace.
457         *    The refcount could be already 0. If it is 0, then
458         *    the shm objects will be free by free_ipc_work().
459         */
460        ns = get_ipc_ns_not_zero(ns);
461        if (!ns) {
462unlink_continue:
463            list_del_init(&shp->shm_clist);
464            task_unlock(task);
465            continue;
466        }
467
468        /*
469         * 4) get a reference to shp.
470         *   This cannot fail: shm_clist_rm() is called before
471         *   ipc_rmid(), thus the refcount cannot be 0.
472         */
473        WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
474
475        /*
476         * 5) unlink the shm segment from the list of segments
477         *    created by current.
478         *    This must be done last. After unlinking,
479         *    only the refcounts obtained above prevent IPC_RMID
480         *    from destroying the segment or the namespace.
481         */
482        list_del_init(&shp->shm_clist);
483
484        task_unlock(task);
485
486        /*
487         * 6) we have all references
488         *    Thus lock & if needed destroy shp.
489         */
490        down_write(&shm_ids(ns).rwsem);
491        shm_lock_by_ptr(shp);
492        /*
493         * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
494         * safe to call ipc_rcu_putref here
495         */
496        ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
497
498        if (ipc_valid_object(&shp->shm_perm)) {
499            if (shm_may_destroy(shp))
500                shm_destroy(ns, shp);
501            else
502                shm_unlock(shp);
503        } else {
504            /*
505             * Someone else deleted the shp from namespace
506             * idr/kht while we have waited.
507             * Just unlock and continue.
508             */
509            shm_unlock(shp);
510        }
511
512        up_write(&shm_ids(ns).rwsem);
513        put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
514    }
515}
516
517static vm_fault_t shm_fault(struct vm_fault *vmf)
518{
519    struct file *file = vmf->vma->vm_file;
520    struct shm_file_data *sfd = shm_file_data(file);
521
522    return sfd->vm_ops->fault(vmf);
523}
524
525static int shm_split(struct vm_area_struct *vma, unsigned long addr)
526{
527    struct file *file = vma->vm_file;
528    struct shm_file_data *sfd = shm_file_data(file);
529
530    if (sfd->vm_ops->split) {
531        return sfd->vm_ops->split(vma, addr);
532    }
533
534    return 0;
535}
536
537static unsigned long shm_pagesize(struct vm_area_struct *vma)
538{
539    struct file *file = vma->vm_file;
540    struct shm_file_data *sfd = shm_file_data(file);
541
542    if (sfd->vm_ops->pagesize) {
543        return sfd->vm_ops->pagesize(vma);
544    }
545
546    return PAGE_SIZE;
547}
548
549#ifdef CONFIG_NUMA
550static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
551{
552    struct file *file = vma->vm_file;
553    struct shm_file_data *sfd = shm_file_data(file);
554    int err = 0;
555
556    if (sfd->vm_ops->set_policy) {
557        err = sfd->vm_ops->set_policy(vma, new);
558    }
559    return err;
560}
561
562static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, unsigned long addr)
563{
564    struct file *file = vma->vm_file;
565    struct shm_file_data *sfd = shm_file_data(file);
566    struct mempolicy *pol = NULL;
567
568    if (sfd->vm_ops->get_policy) {
569        pol = sfd->vm_ops->get_policy(vma, addr);
570    } else if (vma->vm_policy) {
571        pol = vma->vm_policy;
572    }
573
574    return pol;
575}
576#endif
577
578static int shm_mmap(struct file *file, struct vm_area_struct *vma)
579{
580    struct shm_file_data *sfd = shm_file_data(file);
581    int ret;
582
583    /*
584     * In case of remap_file_pages() emulation, the file can represent an
585     * IPC ID that was removed, and possibly even reused by another shm
586     * segment already.  Propagate this case as an error to caller.
587     */
588    ret = __shm_open(vma);
589    if (ret) {
590        return ret;
591    }
592
593    ret = call_mmap(sfd->file, vma);
594    if (ret) {
595        shm_close(vma);
596        return ret;
597    }
598    sfd->vm_ops = vma->vm_ops;
599#ifdef CONFIG_MMU
600    WARN_ON(!sfd->vm_ops->fault);
601#endif
602    vma->vm_ops = &shm_vm_ops;
603    return 0;
604}
605
606static int shm_release(struct inode *ino, struct file *file)
607{
608    struct shm_file_data *sfd = shm_file_data(file);
609
610    put_ipc_ns(sfd->ns);
611    fput(sfd->file);
612    shm_file_data(file) = NULL;
613    kfree(sfd);
614    return 0;
615}
616
617static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
618{
619    struct shm_file_data *sfd = shm_file_data(file);
620
621    if (!sfd->file->f_op->fsync) {
622        return -EINVAL;
623    }
624    return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
625}
626
627static long shm_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
628{
629    struct shm_file_data *sfd = shm_file_data(file);
630
631    if (!sfd->file->f_op->fallocate) {
632        return -EOPNOTSUPP;
633    }
634    return sfd->file->f_op->fallocate(file, mode, offset, len);
635}
636
637static unsigned long shm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
638                                           unsigned long pgoff, unsigned long flags)
639{
640    struct shm_file_data *sfd = shm_file_data(file);
641
642    return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, pgoff, flags);
643}
644
645static const struct file_operations shm_file_operations = {
646    .mmap = shm_mmap,
647    .fsync = shm_fsync,
648    .release = shm_release,
649    .get_unmapped_area = shm_get_unmapped_area,
650    .llseek = noop_llseek,
651    .fallocate = shm_fallocate,
652};
653
654/*
655 * shm_file_operations_huge is now identical to shm_file_operations,
656 * but we keep it distinct for the sake of is_file_shm_hugepages().
657 */
658static const struct file_operations shm_file_operations_huge = {
659    .mmap = shm_mmap,
660    .fsync = shm_fsync,
661    .release = shm_release,
662    .get_unmapped_area = shm_get_unmapped_area,
663    .llseek = noop_llseek,
664    .fallocate = shm_fallocate,
665};
666
667bool is_file_shm_hugepages(struct file *file)
668{
669    return file->f_op == &shm_file_operations_huge;
670}
671
672static const struct vm_operations_struct shm_vm_ops = {
673    .open = shm_open,   /* callback for a new vm-area open */
674    .close = shm_close, /* callback for when the vm-area is released */
675    .fault = shm_fault,
676    .split = shm_split,
677    .pagesize = shm_pagesize,
678#if defined(CONFIG_NUMA)
679    .set_policy = shm_set_policy,
680    .get_policy = shm_get_policy,
681#endif
682};
683
684/**
685 * newseg - Create a new shared memory segment
686 * @ns: namespace
687 * @params: ptr to the structure that contains key, size and shmflg
688 *
689 * Called with shm_ids.rwsem held as a writer.
690 */
691static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
692{
693    key_t key = params->key;
694    int shmflg = params->flg;
695    size_t size = params->u.size;
696    int error;
697    struct shmid_kernel *shp;
698    size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
699    struct file *file;
700    char name[13];
701    vm_flags_t acctflag = 0;
702
703    if (size < SHMMIN || size > ns->shm_ctlmax) {
704        return -EINVAL;
705    }
706
707    if ((numpages << PAGE_SHIFT) < size) {
708        return -ENOSPC;
709    }
710
711    if (ns->shm_tot + numpages < ns->shm_tot || ns->shm_tot + numpages > ns->shm_ctlall) {
712        return -ENOSPC;
713    }
714
715    shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
716    if (unlikely(!shp)) {
717        return -ENOMEM;
718    }
719
720    shp->shm_perm.key = key;
721    shp->shm_perm.mode = (shmflg & S_IRWXUGO);
722    shp->mlock_user = NULL;
723
724    shp->shm_perm.security = NULL;
725    error = security_shm_alloc(&shp->shm_perm);
726    if (error) {
727        kvfree(shp);
728        return error;
729    }
730
731    (void)sprintf(name, "SYSV%08x", key);
732    if (shmflg & SHM_HUGETLB) {
733        struct hstate *hs;
734        size_t hugesize;
735
736        hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
737        if (!hs) {
738            error = -EINVAL;
739            goto no_file;
740        }
741        hugesize = ALIGN(size, huge_page_size(hs));
742
743        /* hugetlb_file_setup applies strict accounting */
744        if (shmflg & SHM_NORESERVE) {
745            acctflag = VM_NORESERVE;
746        }
747        file = hugetlb_file_setup(name, hugesize, acctflag, &shp->mlock_user, HUGETLB_SHMFS_INODE,
748                                  (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
749    } else {
750        /*
751         * Do not allow no accounting for OVERCOMMIT_NEVER, even
752         * if it's asked for.
753         */
754        if ((shmflg & SHM_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) {
755            acctflag = VM_NORESERVE;
756        }
757        file = shmem_kernel_file_setup(name, size, acctflag);
758    }
759    error = PTR_ERR(file);
760    if (IS_ERR(file)) {
761        goto no_file;
762    }
763
764    shp->shm_cprid = get_pid(task_tgid(current));
765    shp->shm_lprid = NULL;
766    shp->shm_atim = shp->shm_dtim = 0;
767    shp->shm_ctim = ktime_get_real_seconds();
768    shp->shm_segsz = size;
769    shp->shm_nattch = 0;
770    shp->shm_file = file;
771    shp->shm_creator = current;
772
773    /* ipc_addid() locks shp upon success. */
774    error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
775    if (error < 0) {
776        goto no_id;
777    }
778    shp->ns = ns;
779
780    task_lock(current);
781    list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
782    task_unlock(current);
783
784    /*
785     * shmid gets reported as "inode#" in /proc/pid/maps.
786     * proc-ps tools use this. Changing this will break them.
787     */
788    file_inode(file)->i_ino = shp->shm_perm.id;
789
790    ns->shm_tot += numpages;
791    error = shp->shm_perm.id;
792
793    ipc_unlock_object(&shp->shm_perm);
794    rcu_read_unlock();
795    return error;
796
797no_id:
798    ipc_update_pid(&shp->shm_cprid, NULL);
799    ipc_update_pid(&shp->shm_lprid, NULL);
800    if (is_file_hugepages(file) && shp->mlock_user) {
801        user_shm_unlock(size, shp->mlock_user);
802    }
803    fput(file);
804    ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
805    return error;
806no_file:
807    call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
808    return error;
809}
810
811/*
812 * Called with shm_ids.rwsem and ipcp locked.
813 */
814static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params)
815{
816    struct shmid_kernel *shp;
817
818    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
819    if (shp->shm_segsz < params->u.size) {
820        return -EINVAL;
821    }
822
823    return 0;
824}
825
826long ksys_shmget(key_t key, size_t size, int shmflg)
827{
828    struct ipc_namespace *ns;
829    static const struct ipc_ops shm_ops = {
830        .getnew = newseg,
831        .associate = security_shm_associate,
832        .more_checks = shm_more_checks,
833    };
834    struct ipc_params shm_params;
835
836    ns = current->nsproxy->ipc_ns;
837
838    shm_params.key = key;
839    shm_params.flg = shmflg;
840    shm_params.u.size = size;
841
842    return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
843}
844
845SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
846{
847    return ksys_shmget(key, size, shmflg);
848}
849
850static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
851{
852    switch (version) {
853        case IPC_64:
854            return copy_to_user(buf, in, sizeof(*in));
855        case IPC_OLD: {
856            struct shmid_ds out;
857
858            memset(&out, 0, sizeof(out));
859            ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
860            out.shm_segsz = in->shm_segsz;
861            out.shm_atime = in->shm_atime;
862            out.shm_dtime = in->shm_dtime;
863            out.shm_ctime = in->shm_ctime;
864            out.shm_cpid = in->shm_cpid;
865            out.shm_lpid = in->shm_lpid;
866            out.shm_nattch = in->shm_nattch;
867
868            return copy_to_user(buf, &out, sizeof(out));
869        }
870        default:
871            return -EINVAL;
872    }
873}
874
875static inline unsigned long copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
876{
877    switch (version) {
878        case IPC_64:
879            if (copy_from_user(out, buf, sizeof(*out))) {
880                return -EFAULT;
881            }
882            return 0;
883        case IPC_OLD: {
884            struct shmid_ds tbuf_old;
885
886            if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) {
887                return -EFAULT;
888            }
889
890            out->shm_perm.uid = tbuf_old.shm_perm.uid;
891            out->shm_perm.gid = tbuf_old.shm_perm.gid;
892            out->shm_perm.mode = tbuf_old.shm_perm.mode;
893
894            return 0;
895        }
896        default:
897            return -EINVAL;
898    }
899}
900
901static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
902{
903    switch (version) {
904        case IPC_64:
905            return copy_to_user(buf, in, sizeof(*in));
906        case IPC_OLD: {
907            struct shminfo out;
908
909            if (in->shmmax > INT_MAX) {
910                out.shmmax = INT_MAX;
911            } else {
912                out.shmmax = (int)in->shmmax;
913            }
914
915            out.shmmin = in->shmmin;
916            out.shmmni = in->shmmni;
917            out.shmseg = in->shmseg;
918            out.shmall = in->shmall;
919
920            return copy_to_user(buf, &out, sizeof(out));
921        }
922        default:
923            return -EINVAL;
924    }
925}
926
927/*
928 * Calculate and add used RSS and swap pages of a shm.
929 * Called with shm_ids.rwsem held as a reader
930 */
931static void shm_add_rss_swap(struct shmid_kernel *shp, unsigned long *rss_add, unsigned long *swp_add)
932{
933    struct inode *inode;
934
935    inode = file_inode(shp->shm_file);
936
937    if (is_file_hugepages(shp->shm_file)) {
938        struct address_space *mapping = inode->i_mapping;
939        struct hstate *h = hstate_file(shp->shm_file);
940        *rss_add += pages_per_huge_page(h) * mapping->nrpages;
941    } else {
942#ifdef CONFIG_SHMEM
943        struct shmem_inode_info *info = SHMEM_I(inode);
944
945        spin_lock_irq(&info->lock);
946        *rss_add += inode->i_mapping->nrpages;
947        *swp_add += info->swapped;
948        spin_unlock_irq(&info->lock);
949#else
950        *rss_add += inode->i_mapping->nrpages;
951#endif
952    }
953}
954
955/*
956 * Called with shm_ids.rwsem held as a reader
957 */
958static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, unsigned long *swp)
959{
960    int next_id;
961    int total, in_use;
962
963    *rss = 0;
964    *swp = 0;
965
966    in_use = shm_ids(ns).in_use;
967
968    for (total = 0, next_id = 0; total < in_use; next_id++) {
969        struct kern_ipc_perm *ipc;
970        struct shmid_kernel *shp;
971
972        ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
973        if (ipc == NULL) {
974            continue;
975        }
976        shp = container_of(ipc, struct shmid_kernel, shm_perm);
977
978        shm_add_rss_swap(shp, rss, swp);
979
980        total++;
981    }
982}
983
984/*
985 * This function handles some shmctl commands which require the rwsem
986 * to be held in write mode.
987 * NOTE: no locks must be held, the rwsem is taken inside this function.
988 */
989static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, struct shmid64_ds *shmid64)
990{
991    struct kern_ipc_perm *ipcp;
992    struct shmid_kernel *shp;
993    int err;
994
995    down_write(&shm_ids(ns).rwsem);
996    rcu_read_lock();
997
998    ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd, &shmid64->shm_perm, 0);
999    if (IS_ERR(ipcp)) {
1000        err = PTR_ERR(ipcp);
1001        goto out_unlock1;
1002    }
1003
1004    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1005
1006    err = security_shm_shmctl(&shp->shm_perm, cmd);
1007    if (err) {
1008        goto out_unlock1;
1009    }
1010
1011    switch (cmd) {
1012        case IPC_RMID:
1013            ipc_lock_object(&shp->shm_perm);
1014            /* do_shm_rmid unlocks the ipc object and rcu */
1015            do_shm_rmid(ns, ipcp);
1016            goto out_up;
1017        case IPC_SET:
1018            ipc_lock_object(&shp->shm_perm);
1019            err = ipc_update_perm(&shmid64->shm_perm, ipcp);
1020            if (err) {
1021                goto out_unlock0;
1022            }
1023            shp->shm_ctim = ktime_get_real_seconds();
1024            break;
1025        default:
1026            err = -EINVAL;
1027            goto out_unlock1;
1028    }
1029
1030out_unlock0:
1031    ipc_unlock_object(&shp->shm_perm);
1032out_unlock1:
1033    rcu_read_unlock();
1034out_up:
1035    up_write(&shm_ids(ns).rwsem);
1036    return err;
1037}
1038
1039static int shmctl_ipc_info(struct ipc_namespace *ns, struct shminfo64 *shminfo)
1040{
1041    int err = security_shm_shmctl(NULL, IPC_INFO);
1042    if (!err) {
1043        memset(shminfo, 0, sizeof(*shminfo));
1044        shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
1045        shminfo->shmmax = ns->shm_ctlmax;
1046        shminfo->shmall = ns->shm_ctlall;
1047        shminfo->shmmin = SHMMIN;
1048        down_read(&shm_ids(ns).rwsem);
1049        err = ipc_get_maxidx(&shm_ids(ns));
1050        up_read(&shm_ids(ns).rwsem);
1051        if (err < 0) {
1052            err = 0;
1053        }
1054    }
1055    return err;
1056}
1057
1058static int shmctl_shm_info(struct ipc_namespace *ns, struct shm_info *shm_info)
1059{
1060    int err = security_shm_shmctl(NULL, SHM_INFO);
1061    if (!err) {
1062        memset(shm_info, 0, sizeof(*shm_info));
1063        down_read(&shm_ids(ns).rwsem);
1064        shm_info->used_ids = shm_ids(ns).in_use;
1065        shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
1066        shm_info->shm_tot = ns->shm_tot;
1067        shm_info->swap_attempts = 0;
1068        shm_info->swap_successes = 0;
1069        err = ipc_get_maxidx(&shm_ids(ns));
1070        up_read(&shm_ids(ns).rwsem);
1071        if (err < 0) {
1072            err = 0;
1073        }
1074    }
1075    return err;
1076}
1077
1078static int shmctl_stat(struct ipc_namespace *ns, int shmid, int cmd, struct shmid64_ds *tbuf)
1079{
1080    struct shmid_kernel *shp;
1081    int err;
1082
1083    memset(tbuf, 0, sizeof(*tbuf));
1084
1085    rcu_read_lock();
1086    if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
1087        shp = shm_obtain_object(ns, shmid);
1088        if (IS_ERR(shp)) {
1089            err = PTR_ERR(shp);
1090            goto out_unlock;
1091        }
1092    } else { /* IPC_STAT */
1093        shp = shm_obtain_object_check(ns, shmid);
1094        if (IS_ERR(shp)) {
1095            err = PTR_ERR(shp);
1096            goto out_unlock;
1097        }
1098    }
1099
1100    /*
1101     * Semantically SHM_STAT_ANY ought to be identical to
1102     * that functionality provided by the /proc/sysvipc/
1103     * interface. As such, only audit these calls and
1104     * do not do traditional S_IRUGO permission checks on
1105     * the ipc object.
1106     */
1107    if (cmd == SHM_STAT_ANY) {
1108        audit_ipc_obj(&shp->shm_perm);
1109    } else {
1110        err = -EACCES;
1111        if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) {
1112            goto out_unlock;
1113        }
1114    }
1115
1116    err = security_shm_shmctl(&shp->shm_perm, cmd);
1117    if (err) {
1118        goto out_unlock;
1119    }
1120
1121    ipc_lock_object(&shp->shm_perm);
1122
1123    if (!ipc_valid_object(&shp->shm_perm)) {
1124        ipc_unlock_object(&shp->shm_perm);
1125        err = -EIDRM;
1126        goto out_unlock;
1127    }
1128
1129    kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1130    tbuf->shm_segsz = shp->shm_segsz;
1131    tbuf->shm_atime = shp->shm_atim;
1132    tbuf->shm_dtime = shp->shm_dtim;
1133    tbuf->shm_ctime = shp->shm_ctim;
1134#ifndef CONFIG_64BIT
1135    tbuf->shm_atime_high = shp->shm_atim >> 0x20;
1136    tbuf->shm_dtime_high = shp->shm_dtim >> 0x20;
1137    tbuf->shm_ctime_high = shp->shm_ctim >> 0x20;
1138#endif
1139    tbuf->shm_cpid = pid_vnr(shp->shm_cprid);
1140    tbuf->shm_lpid = pid_vnr(shp->shm_lprid);
1141    tbuf->shm_nattch = shp->shm_nattch;
1142
1143    if (cmd == IPC_STAT) {
1144        /*
1145         * As defined in SUS:
1146         * Return 0 on success
1147         */
1148        err = 0;
1149    } else {
1150        /*
1151         * SHM_STAT and SHM_STAT_ANY (both Linux specific)
1152         * Return the full id, including the sequence number
1153         */
1154        err = shp->shm_perm.id;
1155    }
1156
1157    ipc_unlock_object(&shp->shm_perm);
1158out_unlock:
1159    rcu_read_unlock();
1160    return err;
1161}
1162
1163static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1164{
1165    struct shmid_kernel *shp;
1166    struct file *shm_file;
1167    int err;
1168
1169    rcu_read_lock();
1170    shp = shm_obtain_object_check(ns, shmid);
1171    if (IS_ERR(shp)) {
1172        err = PTR_ERR(shp);
1173        goto out_unlock1;
1174    }
1175
1176    audit_ipc_obj(&(shp->shm_perm));
1177    err = security_shm_shmctl(&shp->shm_perm, cmd);
1178    if (err) {
1179        goto out_unlock1;
1180    }
1181
1182    ipc_lock_object(&shp->shm_perm);
1183
1184    /* check if shm_destroy() is tearing down shp */
1185    if (!ipc_valid_object(&shp->shm_perm)) {
1186        err = -EIDRM;
1187        goto out_unlock0;
1188    }
1189
1190    if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1191        kuid_t euid = current_euid();
1192        if (!uid_eq(euid, shp->shm_perm.uid) && !uid_eq(euid, shp->shm_perm.cuid)) {
1193            err = -EPERM;
1194            goto out_unlock0;
1195        }
1196        if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1197            err = -EPERM;
1198            goto out_unlock0;
1199        }
1200    }
1201
1202    shm_file = shp->shm_file;
1203    if (is_file_hugepages(shm_file)) {
1204        goto out_unlock0;
1205    }
1206
1207    if (cmd == SHM_LOCK) {
1208        struct user_struct *user = current_user();
1209
1210        err = shmem_lock(shm_file, 1, user);
1211        if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1212            shp->shm_perm.mode |= SHM_LOCKED;
1213            shp->mlock_user = user;
1214        }
1215        goto out_unlock0;
1216    }
1217
1218    /* SHM_UNLOCK */
1219    if (!(shp->shm_perm.mode & SHM_LOCKED)) {
1220        goto out_unlock0;
1221    }
1222    shmem_lock(shm_file, 0, shp->mlock_user);
1223    shp->shm_perm.mode &= ~SHM_LOCKED;
1224    shp->mlock_user = NULL;
1225    get_file(shm_file);
1226    ipc_unlock_object(&shp->shm_perm);
1227    rcu_read_unlock();
1228    shmem_unlock_mapping(shm_file->f_mapping);
1229
1230    fput(shm_file);
1231    return err;
1232
1233out_unlock0:
1234    ipc_unlock_object(&shp->shm_perm);
1235out_unlock1:
1236    rcu_read_unlock();
1237    return err;
1238}
1239
1240static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version)
1241{
1242    int err;
1243    struct ipc_namespace *ns;
1244    struct shmid64_ds sem64;
1245
1246    if (cmd < 0 || shmid < 0) {
1247        return -EINVAL;
1248    }
1249
1250    ns = current->nsproxy->ipc_ns;
1251
1252    switch (cmd) {
1253        case IPC_INFO: {
1254            struct shminfo64 shminfo;
1255            err = shmctl_ipc_info(ns, &shminfo);
1256            if (err < 0) {
1257                return err;
1258            }
1259            if (copy_shminfo_to_user(buf, &shminfo, version)) {
1260                err = -EFAULT;
1261            }
1262            return err;
1263        }
1264        case SHM_INFO: {
1265            struct shm_info shm_info;
1266            err = shmctl_shm_info(ns, &shm_info);
1267            if (err < 0) {
1268                return err;
1269            }
1270            if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
1271                err = -EFAULT;
1272            }
1273            return err;
1274        }
1275        case SHM_STAT:
1276        case SHM_STAT_ANY:
1277        case IPC_STAT: {
1278            err = shmctl_stat(ns, shmid, cmd, &sem64);
1279            if (err < 0) {
1280                return err;
1281            }
1282            if (copy_shmid_to_user(buf, &sem64, version)) {
1283                err = -EFAULT;
1284            }
1285            return err;
1286        }
1287        case IPC_SET:
1288            if (copy_shmid_from_user(&sem64, buf, version)) {
1289                return -EFAULT;
1290            }
1291            fallthrough;
1292        case IPC_RMID:
1293            return shmctl_down(ns, shmid, cmd, &sem64);
1294        case SHM_LOCK:
1295        case SHM_UNLOCK:
1296            return shmctl_do_lock(ns, shmid, cmd);
1297        default:
1298            return -EINVAL;
1299    }
1300}
1301
1302SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1303{
1304    return ksys_shmctl(shmid, cmd, buf, IPC_64);
1305}
1306
1307#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
1308long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1309{
1310    int version = ipc_parse_version(&cmd);
1311
1312    return ksys_shmctl(shmid, cmd, buf, version);
1313}
1314
1315SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1316{
1317    return ksys_old_shmctl(shmid, cmd, buf);
1318}
1319#endif
1320
1321#ifdef CONFIG_COMPAT
1322
1323struct compat_shmid_ds {
1324    struct compat_ipc_perm shm_perm;
1325    int shm_segsz;
1326    old_time32_t shm_atime;
1327    old_time32_t shm_dtime;
1328    old_time32_t shm_ctime;
1329    compat_ipc_pid_t shm_cpid;
1330    compat_ipc_pid_t shm_lpid;
1331    unsigned short shm_nattch;
1332    unsigned short shm_unused;
1333    compat_uptr_t shm_unused2;
1334    compat_uptr_t shm_unused3;
1335};
1336
1337struct compat_shminfo64 {
1338    compat_ulong_t shmmax;
1339    compat_ulong_t shmmin;
1340    compat_ulong_t shmmni;
1341    compat_ulong_t shmseg;
1342    compat_ulong_t shmall;
1343    compat_ulong_t __unused1;
1344    compat_ulong_t __unused2;
1345    compat_ulong_t __unused3;
1346    compat_ulong_t __unused4;
1347};
1348
1349struct compat_shm_info {
1350    compat_int_t used_ids;
1351    compat_ulong_t shm_tot, shm_rss, shm_swp;
1352    compat_ulong_t swap_attempts, swap_successes;
1353};
1354
1355static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
1356{
1357    if (in->shmmax > INT_MAX) {
1358        in->shmmax = INT_MAX;
1359    }
1360    if (version == IPC_64) {
1361        struct compat_shminfo64 info;
1362        memset(&info, 0, sizeof(info));
1363        info.shmmax = in->shmmax;
1364        info.shmmin = in->shmmin;
1365        info.shmmni = in->shmmni;
1366        info.shmseg = in->shmseg;
1367        info.shmall = in->shmall;
1368        return copy_to_user(buf, &info, sizeof(info));
1369    } else {
1370        struct shminfo info;
1371        memset(&info, 0, sizeof(info));
1372        info.shmmax = in->shmmax;
1373        info.shmmin = in->shmmin;
1374        info.shmmni = in->shmmni;
1375        info.shmseg = in->shmseg;
1376        info.shmall = in->shmall;
1377        return copy_to_user(buf, &info, sizeof(info));
1378    }
1379}
1380
1381static int put_compat_shm_info(struct shm_info *ip, struct compat_shm_info __user *uip)
1382{
1383    struct compat_shm_info info;
1384
1385    memset(&info, 0, sizeof(info));
1386    info.used_ids = ip->used_ids;
1387    info.shm_tot = ip->shm_tot;
1388    info.shm_rss = ip->shm_rss;
1389    info.shm_swp = ip->shm_swp;
1390    info.swap_attempts = ip->swap_attempts;
1391    info.swap_successes = ip->swap_successes;
1392    return copy_to_user(uip, &info, sizeof(info));
1393}
1394
1395static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
1396{
1397    if (version == IPC_64) {
1398        struct compat_shmid64_ds v;
1399        memset(&v, 0, sizeof(v));
1400        to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1401        v.shm_atime = lower_32_bits(in->shm_atime);
1402        v.shm_atime_high = upper_32_bits(in->shm_atime);
1403        v.shm_dtime = lower_32_bits(in->shm_dtime);
1404        v.shm_dtime_high = upper_32_bits(in->shm_dtime);
1405        v.shm_ctime = lower_32_bits(in->shm_ctime);
1406        v.shm_ctime_high = upper_32_bits(in->shm_ctime);
1407        v.shm_segsz = in->shm_segsz;
1408        v.shm_nattch = in->shm_nattch;
1409        v.shm_cpid = in->shm_cpid;
1410        v.shm_lpid = in->shm_lpid;
1411        return copy_to_user(buf, &v, sizeof(v));
1412    } else {
1413        struct compat_shmid_ds v;
1414        memset(&v, 0, sizeof(v));
1415        to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1416        v.shm_perm.key = in->shm_perm.key;
1417        v.shm_atime = in->shm_atime;
1418        v.shm_dtime = in->shm_dtime;
1419        v.shm_ctime = in->shm_ctime;
1420        v.shm_segsz = in->shm_segsz;
1421        v.shm_nattch = in->shm_nattch;
1422        v.shm_cpid = in->shm_cpid;
1423        v.shm_lpid = in->shm_lpid;
1424        return copy_to_user(buf, &v, sizeof(v));
1425    }
1426}
1427
1428static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
1429{
1430    memset(out, 0, sizeof(*out));
1431    if (version == IPC_64) {
1432        struct compat_shmid64_ds __user *p = buf;
1433        return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1434    } else {
1435        struct compat_shmid_ds __user *p = buf;
1436        return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1437    }
1438}
1439
1440static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
1441{
1442    struct ipc_namespace *ns;
1443    struct shmid64_ds sem64;
1444    int err;
1445
1446    ns = current->nsproxy->ipc_ns;
1447
1448    if (cmd < 0 || shmid < 0) {
1449        return -EINVAL;
1450    }
1451
1452    switch (cmd) {
1453        case IPC_INFO: {
1454            struct shminfo64 shminfo;
1455            err = shmctl_ipc_info(ns, &shminfo);
1456            if (err < 0) {
1457                return err;
1458            }
1459            if (copy_compat_shminfo_to_user(uptr, &shminfo, version)) {
1460                err = -EFAULT;
1461            }
1462            return err;
1463        }
1464        case SHM_INFO: {
1465            struct shm_info shm_info;
1466            err = shmctl_shm_info(ns, &shm_info);
1467            if (err < 0) {
1468                return err;
1469            }
1470            if (put_compat_shm_info(&shm_info, uptr)) {
1471                err = -EFAULT;
1472            }
1473            return err;
1474        }
1475        case IPC_STAT:
1476        case SHM_STAT_ANY:
1477        case SHM_STAT:
1478            err = shmctl_stat(ns, shmid, cmd, &sem64);
1479            if (err < 0) {
1480                return err;
1481            }
1482            if (copy_compat_shmid_to_user(uptr, &sem64, version)) {
1483                err = -EFAULT;
1484            }
1485            return err;
1486
1487        case IPC_SET:
1488            if (copy_compat_shmid_from_user(&sem64, uptr, version)) {
1489                return -EFAULT;
1490            }
1491            fallthrough;
1492        case IPC_RMID:
1493            return shmctl_down(ns, shmid, cmd, &sem64);
1494        case SHM_LOCK:
1495        case SHM_UNLOCK:
1496            return shmctl_do_lock(ns, shmid, cmd);
1497        default:
1498            return -EINVAL;
1499    }
1500    return err;
1501}
1502
1503COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1504{
1505    return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64);
1506}
1507
1508#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
1509long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr)
1510{
1511    int version = compat_ipc_parse_version(&cmd);
1512
1513    return compat_ksys_shmctl(shmid, cmd, uptr, version);
1514}
1515
1516COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr)
1517{
1518    return compat_ksys_old_shmctl(shmid, cmd, uptr);
1519}
1520#endif
1521#endif
1522
1523/*
1524 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1525 *
1526 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1527 * "raddr" thing points to kernel space, and there has to be a wrapper around
1528 * this.
1529 */
1530long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, unsigned long shmlba)
1531{
1532    struct shmid_kernel *shp;
1533    unsigned long addr = (unsigned long)shmaddr;
1534    unsigned long size;
1535    struct file *file, *base;
1536    int err;
1537    unsigned long flags = MAP_SHARED;
1538    unsigned long prot;
1539    int acc_mode;
1540    struct ipc_namespace *ns;
1541    struct shm_file_data *sfd;
1542    int f_flags;
1543    unsigned long populate = 0;
1544
1545    err = -EINVAL;
1546    if (shmid < 0) {
1547        goto out;
1548    }
1549
1550    if (addr) {
1551        if (addr & (shmlba - 1)) {
1552            if (shmflg & SHM_RND) {
1553                addr &= ~(shmlba - 1); /* round down */
1554
1555                /*
1556                 * Ensure that the round-down is non-nil
1557                 * when remapping. This can happen for
1558                 * cases when addr < shmlba.
1559                 */
1560                if (!addr && (shmflg & SHM_REMAP)) {
1561                    goto out;
1562                }
1563            } else
1564#ifndef __ARCH_FORCE_SHMLBA
1565                if (addr & ~PAGE_MASK)
1566#endif
1567                goto out;
1568        }
1569
1570        flags |= MAP_FIXED;
1571    } else if ((shmflg & SHM_REMAP)) {
1572        goto out;
1573    }
1574
1575    if (shmflg & SHM_RDONLY) {
1576        prot = PROT_READ;
1577        acc_mode = S_IRUGO;
1578        f_flags = O_RDONLY;
1579    } else {
1580        prot = PROT_READ | PROT_WRITE;
1581        acc_mode = S_IRUGO | S_IWUGO;
1582        f_flags = O_RDWR;
1583    }
1584    if (shmflg & SHM_EXEC) {
1585        prot |= PROT_EXEC;
1586        acc_mode |= S_IXUGO;
1587    }
1588
1589    /*
1590     * We cannot rely on the fs check since SYSV IPC does have an
1591     * additional creator id...
1592     */
1593    ns = current->nsproxy->ipc_ns;
1594    rcu_read_lock();
1595    shp = shm_obtain_object_check(ns, shmid);
1596    if (IS_ERR(shp)) {
1597        err = PTR_ERR(shp);
1598        goto out_unlock;
1599    }
1600
1601    err = -EACCES;
1602    if (ipcperms(ns, &shp->shm_perm, acc_mode)) {
1603        goto out_unlock;
1604    }
1605
1606    err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1607    if (err) {
1608        goto out_unlock;
1609    }
1610
1611    ipc_lock_object(&shp->shm_perm);
1612
1613    /* check if shm_destroy() is tearing down shp */
1614    if (!ipc_valid_object(&shp->shm_perm)) {
1615        ipc_unlock_object(&shp->shm_perm);
1616        err = -EIDRM;
1617        goto out_unlock;
1618    }
1619
1620    /*
1621     * We need to take a reference to the real shm file to prevent the
1622     * pointer from becoming stale in cases where the lifetime of the outer
1623     * file extends beyond that of the shm segment.  It's not usually
1624     * possible, but it can happen during remap_file_pages() emulation as
1625     * that unmaps the memory, then does ->mmap() via file reference only.
1626     * We'll deny the ->mmap() if the shm segment was since removed, but to
1627     * detect shm ID reuse we need to compare the file pointers.
1628     */
1629    base = get_file(shp->shm_file);
1630    shp->shm_nattch++;
1631    size = i_size_read(file_inode(base));
1632    ipc_unlock_object(&shp->shm_perm);
1633    rcu_read_unlock();
1634
1635    err = -ENOMEM;
1636    sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1637    if (!sfd) {
1638        fput(base);
1639        goto out_nattch;
1640    }
1641
1642    file = alloc_file_clone(base, f_flags, is_file_hugepages(base) ? &shm_file_operations_huge : &shm_file_operations);
1643    err = PTR_ERR(file);
1644    if (IS_ERR(file)) {
1645        kfree(sfd);
1646        fput(base);
1647        goto out_nattch;
1648    }
1649
1650    sfd->id = shp->shm_perm.id;
1651    sfd->ns = get_ipc_ns(ns);
1652    sfd->file = base;
1653    sfd->vm_ops = NULL;
1654    file->private_data = sfd;
1655
1656    err = security_mmap_file(file, prot, flags);
1657    if (err) {
1658        goto out_fput;
1659    }
1660
1661    if (mmap_write_lock_killable(current->mm)) {
1662        err = -EINTR;
1663        goto out_fput;
1664    }
1665
1666    if (addr && !(shmflg & SHM_REMAP)) {
1667        err = -EINVAL;
1668        if (addr + size < addr) {
1669            goto invalid;
1670        }
1671
1672        if (find_vma_intersection(current->mm, addr, addr + size)) {
1673            goto invalid;
1674        }
1675    }
1676
1677    addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL);
1678    *raddr = addr;
1679    err = 0;
1680    if (IS_ERR_VALUE(addr)) {
1681        err = (long)addr;
1682    }
1683invalid:
1684    mmap_write_unlock(current->mm);
1685    if (populate) {
1686        mm_populate(addr, populate);
1687    }
1688
1689out_fput:
1690    fput(file);
1691
1692out_nattch:
1693    down_write(&shm_ids(ns).rwsem);
1694    shp = shm_lock(ns, shmid);
1695    shp->shm_nattch--;
1696    if (shm_may_destroy(shp)) {
1697        shm_destroy(ns, shp);
1698    } else {
1699        shm_unlock(shp);
1700    }
1701    up_write(&shm_ids(ns).rwsem);
1702    return err;
1703
1704out_unlock:
1705    rcu_read_unlock();
1706out:
1707    return err;
1708}
1709
1710SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1711{
1712    unsigned long ret;
1713    long err;
1714
1715    err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1716    if (err) {
1717        return err;
1718    }
1719    force_successful_syscall_return();
1720    return (long)ret;
1721}
1722
1723#ifdef CONFIG_COMPAT
1724
1725#ifndef COMPAT_SHMLBA
1726#define COMPAT_SHMLBA SHMLBA
1727#endif
1728
1729COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1730{
1731    unsigned long ret;
1732    long err;
1733
1734    err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1735    if (err) {
1736        return err;
1737    }
1738    force_successful_syscall_return();
1739    return (long)ret;
1740}
1741#endif
1742
1743/*
1744 * detach and kill segment if marked destroyed.
1745 * The work is done in shm_close.
1746 */
1747long ksys_shmdt(char __user *shmaddr)
1748{
1749    struct mm_struct *mm = current->mm;
1750    struct vm_area_struct *vma;
1751    unsigned long addr = (unsigned long)shmaddr;
1752    int retval = -EINVAL;
1753#ifdef CONFIG_MMU
1754    loff_t size = 0;
1755    struct file *file;
1756    struct vm_area_struct *next;
1757#endif
1758
1759    if (addr & ~PAGE_MASK) {
1760        return retval;
1761    }
1762
1763    if (mmap_write_lock_killable(mm)) {
1764        return -EINTR;
1765    }
1766
1767    /*
1768     * This function tries to be smart and unmap shm segments that
1769     * were modified by partial mlock or munmap calls:
1770     * - It first determines the size of the shm segment that should be
1771     *   unmapped: It searches for a vma that is backed by shm and that
1772     *   started at address shmaddr. It records it's size and then unmaps
1773     *   it.
1774     * - Then it unmaps all shm vmas that started at shmaddr and that
1775     *   are within the initially determined size and that are from the
1776     *   same shm segment from which we determined the size.
1777     * Errors from do_munmap are ignored: the function only fails if
1778     * it's called with invalid parameters or if it's called to unmap
1779     * a part of a vma. Both calls in this function are for full vmas,
1780     * the parameters are directly copied from the vma itself and always
1781     * valid - therefore do_munmap cannot fail. (famous last words?)
1782     */
1783    /*
1784     * If it had been mremap()'d, the starting address would not
1785     * match the usual checks anyway. So assume all vma's are
1786     * above the starting address given.
1787     */
1788    vma = find_vma(mm, addr);
1789
1790#ifdef CONFIG_MMU
1791    while (vma) {
1792        next = vma->vm_next;
1793
1794        /*
1795         * Check if the starting address would match, i.e. it's
1796         * a fragment created by mprotect() and/or munmap(), or it
1797         * otherwise it starts at this address with no hassles.
1798         */
1799        if ((vma->vm_ops == &shm_vm_ops) && (vma->vm_start - addr) / PAGE_SIZE == vma->vm_pgoff) {
1800
1801            /*
1802             * Record the file of the shm segment being
1803             * unmapped.  With mremap(), someone could place
1804             * page from another segment but with equal offsets
1805             * in the range we are unmapping.
1806             */
1807            file = vma->vm_file;
1808            size = i_size_read(file_inode(vma->vm_file));
1809            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1810            /*
1811             * We discovered the size of the shm segment, so
1812             * break out of here and fall through to the next
1813             * loop that uses the size information to stop
1814             * searching for matching vma's.
1815             */
1816            retval = 0;
1817            vma = next;
1818            break;
1819        }
1820        vma = next;
1821    }
1822
1823    /*
1824     * We need look no further than the maximum address a fragment
1825     * could possibly have landed at. Also cast things to loff_t to
1826     * prevent overflows and make comparisons vs. equal-width types.
1827     */
1828    size = PAGE_ALIGN(size);
1829    while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1830        next = vma->vm_next;
1831
1832        /* finding a matching vma now does not alter retval */
1833        if ((vma->vm_ops == &shm_vm_ops) && ((vma->vm_start - addr) / PAGE_SIZE == vma->vm_pgoff) &&
1834            (vma->vm_file == file)) {
1835            do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1836        }
1837        vma = next;
1838    }
1839
1840#else /* CONFIG_MMU */
1841    /* under NOMMU conditions, the exact address to be destroyed must be
1842     * given
1843     */
1844    if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1845        do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1846        retval = 0;
1847    }
1848
1849#endif
1850
1851    mmap_write_unlock(mm);
1852    return retval;
1853}
1854
1855SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1856{
1857    return ksys_shmdt(shmaddr);
1858}
1859
1860#ifdef CONFIG_PROC_FS
1861static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1862{
1863    struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1864    struct user_namespace *user_ns = seq_user_ns(s);
1865    struct kern_ipc_perm *ipcp = it;
1866    struct shmid_kernel *shp;
1867    unsigned long rss = 0, swp = 0;
1868
1869    shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1870    shm_add_rss_swap(shp, &rss, &swp);
1871
1872#if BITS_PER_LONG <= 32
1873#define SIZE_SPEC "%10lu"
1874#else
1875#define SIZE_SPEC "%21lu"
1876#endif
1877
1878    seq_printf(s,
1879               "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1880               "%5lu %5u %5u %5u %5u %10llu %10llu %10llu " SIZE_SPEC " " SIZE_SPEC "\n",
1881               shp->shm_perm.key, shp->shm_perm.id, shp->shm_perm.mode, shp->shm_segsz,
1882               pid_nr_ns(shp->shm_cprid, pid_ns), pid_nr_ns(shp->shm_lprid, pid_ns), shp->shm_nattch,
1883               from_kuid_munged(user_ns, shp->shm_perm.uid), from_kgid_munged(user_ns, shp->shm_perm.gid),
1884               from_kuid_munged(user_ns, shp->shm_perm.cuid), from_kgid_munged(user_ns, shp->shm_perm.cgid),
1885               shp->shm_atim, shp->shm_dtim, shp->shm_ctim, rss * PAGE_SIZE, swp * PAGE_SIZE);
1886
1887    return 0;
1888}
1889#endif
1890