Revert to Fedora kernel-2.6.17-1.2187_FC5 patched with vs2.0.2.1; there are too many...
[linux-2.6.git] / ipc / shm.c
1 /*
2  * linux/ipc/shm.c
3  * Copyright (C) 1992, 1993 Krishna Balasubramanian
4  *       Many improvements/fixes by Bruno Haible.
5  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7  *
8  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15  *
16  * support for audit of ipc object properties and permission changes
17  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
18  */
19
20 #include <linux/config.h>
21 #include <linux/slab.h>
22 #include <linux/mm.h>
23 #include <linux/hugetlb.h>
24 #include <linux/shm.h>
25 #include <linux/init.h>
26 #include <linux/file.h>
27 #include <linux/mman.h>
28 #include <linux/shmem_fs.h>
29 #include <linux/security.h>
30 #include <linux/syscalls.h>
31 #include <linux/audit.h>
32 #include <linux/capability.h>
33 #include <linux/ptrace.h>
34 #include <linux/seq_file.h>
35 #include <linux/mutex.h>
36 #include <linux/vs_base.h>
37 #include <linux/vs_context.h>
38 #include <linux/vs_limit.h>
39
40 #include <asm/uaccess.h>
41
42 #include "util.h"
43
44 static struct file_operations shm_file_operations;
45 static struct vm_operations_struct shm_vm_ops;
46
47 static struct ipc_ids shm_ids;
48
49 #define shm_lock(id)    ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
50 #define shm_unlock(shp) ipc_unlock(&(shp)->shm_perm)
51 #define shm_get(id)     ((struct shmid_kernel*)ipc_get(&shm_ids,id))
52 #define shm_buildid(id, seq) \
53         ipc_buildid(&shm_ids, id, seq)
54
55 static int newseg (key_t key, int shmflg, size_t size);
56 static void shm_open (struct vm_area_struct *shmd);
57 static void shm_close (struct vm_area_struct *shmd);
58 #ifdef CONFIG_PROC_FS
59 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
60 #endif
61
62 size_t  shm_ctlmax = SHMMAX;
63 size_t  shm_ctlall = SHMALL;
64 int     shm_ctlmni = SHMMNI;
65
66 static int shm_tot; /* total number of shared memory pages */
67
68 void __init shm_init (void)
69 {
70         ipc_init_ids(&shm_ids, 1);
71         ipc_init_proc_interface("sysvipc/shm",
72                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime\n",
73                                 &shm_ids,
74                                 sysvipc_shm_proc_show);
75 }
76
77 static inline int shm_checkid(struct shmid_kernel *s, int id)
78 {
79         if (ipc_checkid(&shm_ids,&s->shm_perm,id))
80                 return -EIDRM;
81         return 0;
82 }
83
84 static inline struct shmid_kernel *shm_rmid(int id)
85 {
86         return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
87 }
88
89 static inline int shm_addid(struct shmid_kernel *shp)
90 {
91         return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
92 }
93
94
95
96 static inline void shm_inc (int id) {
97         struct shmid_kernel *shp;
98
99         shp = shm_lock(id);
100         BUG_ON(!shp);
101         shp->shm_atim = get_seconds();
102         shp->shm_lprid = current->tgid;
103         shp->shm_nattch++;
104         shm_unlock(shp);
105 }
106
107 /* This is called by fork, once for every shm attach. */
108 static void shm_open (struct vm_area_struct *shmd)
109 {
110         shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
111 }
112
113 /*
114  * shm_destroy - free the struct shmid_kernel
115  *
116  * @shp: struct to free
117  *
118  * It has to be called with shp and shm_ids.mutex locked,
119  * but returns with shp unlocked and freed.
120  */
121 static void shm_destroy (struct shmid_kernel *shp)
122 {
123         struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
124         int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
125
126         vx_ipcshm_sub(vxi, shp, numpages);
127         shm_tot -= numpages;
128
129         shm_rmid (shp->id);
130         shm_unlock(shp);
131         if (!is_file_hugepages(shp->shm_file))
132                 shmem_lock(shp->shm_file, 0, shp->mlock_user);
133         else
134                 user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
135                                                 shp->mlock_user);
136         fput (shp->shm_file);
137         security_shm_free(shp);
138         put_vx_info(vxi);
139         ipc_rcu_putref(shp);
140 }
141
142 /*
143  * remove the attach descriptor shmd.
144  * free memory for segment if it is marked destroyed.
145  * The descriptor has already been removed from the current->mm->mmap list
146  * and will later be kfree()d.
147  */
148 static void shm_close (struct vm_area_struct *shmd)
149 {
150         struct file * file = shmd->vm_file;
151         int id = file->f_dentry->d_inode->i_ino;
152         struct shmid_kernel *shp;
153
154         mutex_lock(&shm_ids.mutex);
155         /* remove from the list of attaches of the shm segment */
156         shp = shm_lock(id);
157         BUG_ON(!shp);
158         shp->shm_lprid = current->tgid;
159         shp->shm_dtim = get_seconds();
160         shp->shm_nattch--;
161         if(shp->shm_nattch == 0 &&
162            shp->shm_perm.mode & SHM_DEST)
163                 shm_destroy (shp);
164         else
165                 shm_unlock(shp);
166         mutex_unlock(&shm_ids.mutex);
167 }
168
169 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
170 {
171         int ret;
172
173         ret = shmem_mmap(file, vma);
174         if (ret == 0) {
175                 vma->vm_ops = &shm_vm_ops;
176                 if (!(vma->vm_flags & VM_WRITE))
177                         vma->vm_flags &= ~VM_MAYWRITE;
178                 shm_inc(file->f_dentry->d_inode->i_ino);
179         }
180
181         return ret;
182 }
183
184 static struct file_operations shm_file_operations = {
185         .mmap   = shm_mmap,
186 #ifndef CONFIG_MMU
187         .get_unmapped_area = shmem_get_unmapped_area,
188 #endif
189 };
190
191 static struct vm_operations_struct shm_vm_ops = {
192         .open   = shm_open,     /* callback for a new vm-area open */
193         .close  = shm_close,    /* callback for when the vm-area is released */
194         .nopage = shmem_nopage,
195 #if defined(CONFIG_NUMA) && defined(CONFIG_SHMEM)
196         .set_policy = shmem_set_policy,
197         .get_policy = shmem_get_policy,
198 #endif
199 };
200
201 static int newseg (key_t key, int shmflg, size_t size)
202 {
203         int error;
204         struct shmid_kernel *shp;
205         int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
206         struct file * file;
207         char name[13];
208         int id;
209
210         if (size < SHMMIN || size > shm_ctlmax)
211                 return -EINVAL;
212
213         if (shm_tot + numpages >= shm_ctlall)
214                 return -ENOSPC;
215         if (!vx_ipcshm_avail(current->vx_info, numpages))
216                 return -ENOSPC;
217
218         shp = ipc_rcu_alloc(sizeof(*shp));
219         if (!shp)
220                 return -ENOMEM;
221
222         shp->shm_perm.key = key;
223         shp->shm_perm.xid = vx_current_xid();
224         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
225         shp->mlock_user = NULL;
226
227         shp->shm_perm.security = NULL;
228         error = security_shm_alloc(shp);
229         if (error) {
230                 ipc_rcu_putref(shp);
231                 return error;
232         }
233
234         if (shmflg & SHM_HUGETLB) {
235                 /* hugetlb_zero_setup takes care of mlock user accounting */
236                 file = hugetlb_zero_setup(size);
237                 shp->mlock_user = current->user;
238         } else {
239                 int acctflag = VM_ACCOUNT;
240                 /*
241                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
242                  * if it's asked for.
243                  */
244                 if  ((shmflg & SHM_NORESERVE) &&
245                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
246                         acctflag = 0;
247                 sprintf (name, "SYSV%08x", key);
248                 file = shmem_file_setup(name, size, acctflag);
249         }
250         error = PTR_ERR(file);
251         if (IS_ERR(file))
252                 goto no_file;
253
254         error = -ENOSPC;
255         id = shm_addid(shp);
256         if(id == -1) 
257                 goto no_id;
258
259         shp->shm_cprid = current->tgid;
260         shp->shm_lprid = 0;
261         shp->shm_atim = shp->shm_dtim = 0;
262         shp->shm_ctim = get_seconds();
263         shp->shm_segsz = size;
264         shp->shm_nattch = 0;
265         shp->id = shm_buildid(id,shp->shm_perm.seq);
266         shp->shm_file = file;
267         file->f_dentry->d_inode->i_ino = shp->id;
268
269         /* Hugetlb ops would have already been assigned. */
270         if (!(shmflg & SHM_HUGETLB))
271                 file->f_op = &shm_file_operations;
272
273         shm_tot += numpages;
274         vx_ipcshm_add(current->vx_info, key, numpages);
275         shm_unlock(shp);
276         return shp->id;
277
278 no_id:
279         fput(file);
280 no_file:
281         security_shm_free(shp);
282         ipc_rcu_putref(shp);
283         return error;
284 }
285
286 asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
287 {
288         struct shmid_kernel *shp;
289         int err, id = 0;
290
291         mutex_lock(&shm_ids.mutex);
292         if (key == IPC_PRIVATE) {
293                 err = newseg(key, shmflg, size);
294         } else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
295                 if (!(shmflg & IPC_CREAT))
296                         err = -ENOENT;
297                 else
298                         err = newseg(key, shmflg, size);
299         } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
300                 err = -EEXIST;
301         } else {
302                 shp = shm_lock(id);
303                 BUG_ON(shp==NULL);
304                 if (shp->shm_segsz < size)
305                         err = -EINVAL;
306                 else if (ipcperms(&shp->shm_perm, shmflg))
307                         err = -EACCES;
308                 else {
309                         int shmid = shm_buildid(id, shp->shm_perm.seq);
310                         err = security_shm_associate(shp, shmflg);
311                         if (!err)
312                                 err = shmid;
313                 }
314                 shm_unlock(shp);
315         }
316         mutex_unlock(&shm_ids.mutex);
317
318         return err;
319 }
320
321 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
322 {
323         switch(version) {
324         case IPC_64:
325                 return copy_to_user(buf, in, sizeof(*in));
326         case IPC_OLD:
327             {
328                 struct shmid_ds out;
329
330                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
331                 out.shm_segsz   = in->shm_segsz;
332                 out.shm_atime   = in->shm_atime;
333                 out.shm_dtime   = in->shm_dtime;
334                 out.shm_ctime   = in->shm_ctime;
335                 out.shm_cpid    = in->shm_cpid;
336                 out.shm_lpid    = in->shm_lpid;
337                 out.shm_nattch  = in->shm_nattch;
338
339                 return copy_to_user(buf, &out, sizeof(out));
340             }
341         default:
342                 return -EINVAL;
343         }
344 }
345
346 struct shm_setbuf {
347         uid_t   uid;
348         gid_t   gid;
349         mode_t  mode;
350 };      
351
352 static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version)
353 {
354         switch(version) {
355         case IPC_64:
356             {
357                 struct shmid64_ds tbuf;
358
359                 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
360                         return -EFAULT;
361
362                 out->uid        = tbuf.shm_perm.uid;
363                 out->gid        = tbuf.shm_perm.gid;
364                 out->mode       = tbuf.shm_perm.mode;
365
366                 return 0;
367             }
368         case IPC_OLD:
369             {
370                 struct shmid_ds tbuf_old;
371
372                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
373                         return -EFAULT;
374
375                 out->uid        = tbuf_old.shm_perm.uid;
376                 out->gid        = tbuf_old.shm_perm.gid;
377                 out->mode       = tbuf_old.shm_perm.mode;
378
379                 return 0;
380             }
381         default:
382                 return -EINVAL;
383         }
384 }
385
386 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
387 {
388         switch(version) {
389         case IPC_64:
390                 return copy_to_user(buf, in, sizeof(*in));
391         case IPC_OLD:
392             {
393                 struct shminfo out;
394
395                 if(in->shmmax > INT_MAX)
396                         out.shmmax = INT_MAX;
397                 else
398                         out.shmmax = (int)in->shmmax;
399
400                 out.shmmin      = in->shmmin;
401                 out.shmmni      = in->shmmni;
402                 out.shmseg      = in->shmseg;
403                 out.shmall      = in->shmall; 
404
405                 return copy_to_user(buf, &out, sizeof(out));
406             }
407         default:
408                 return -EINVAL;
409         }
410 }
411
412 static void shm_get_stat(unsigned long *rss, unsigned long *swp) 
413 {
414         int i;
415
416         *rss = 0;
417         *swp = 0;
418
419         for (i = 0; i <= shm_ids.max_id; i++) {
420                 struct shmid_kernel *shp;
421                 struct inode *inode;
422
423                 shp = shm_get(i);
424                 if(!shp)
425                         continue;
426
427                 inode = shp->shm_file->f_dentry->d_inode;
428
429                 if (is_file_hugepages(shp->shm_file)) {
430                         struct address_space *mapping = inode->i_mapping;
431                         *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
432                 } else {
433                         struct shmem_inode_info *info = SHMEM_I(inode);
434                         spin_lock(&info->lock);
435                         *rss += inode->i_mapping->nrpages;
436                         *swp += info->swapped;
437                         spin_unlock(&info->lock);
438                 }
439         }
440 }
441
442 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
443 {
444         struct shm_setbuf setbuf;
445         struct shmid_kernel *shp;
446         int err, version;
447
448         if (cmd < 0 || shmid < 0) {
449                 err = -EINVAL;
450                 goto out;
451         }
452
453         version = ipc_parse_version(&cmd);
454
455         switch (cmd) { /* replace with proc interface ? */
456         case IPC_INFO:
457         {
458                 struct shminfo64 shminfo;
459
460                 err = security_shm_shmctl(NULL, cmd);
461                 if (err)
462                         return err;
463
464                 memset(&shminfo,0,sizeof(shminfo));
465                 shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
466                 shminfo.shmmax = shm_ctlmax;
467                 shminfo.shmall = shm_ctlall;
468
469                 shminfo.shmmin = SHMMIN;
470                 if(copy_shminfo_to_user (buf, &shminfo, version))
471                         return -EFAULT;
472                 /* reading a integer is always atomic */
473                 err= shm_ids.max_id;
474                 if(err<0)
475                         err = 0;
476                 goto out;
477         }
478         case SHM_INFO:
479         {
480                 struct shm_info shm_info;
481
482                 err = security_shm_shmctl(NULL, cmd);
483                 if (err)
484                         return err;
485
486                 memset(&shm_info,0,sizeof(shm_info));
487                 mutex_lock(&shm_ids.mutex);
488                 shm_info.used_ids = shm_ids.in_use;
489                 shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
490                 shm_info.shm_tot = shm_tot;
491                 shm_info.swap_attempts = 0;
492                 shm_info.swap_successes = 0;
493                 err = shm_ids.max_id;
494                 mutex_unlock(&shm_ids.mutex);
495                 if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
496                         err = -EFAULT;
497                         goto out;
498                 }
499
500                 err = err < 0 ? 0 : err;
501                 goto out;
502         }
503         case SHM_STAT:
504         case IPC_STAT:
505         {
506                 struct shmid64_ds tbuf;
507                 int result;
508                 memset(&tbuf, 0, sizeof(tbuf));
509                 shp = shm_lock(shmid);
510                 if(shp==NULL) {
511                         err = -EINVAL;
512                         goto out;
513                 } else if(cmd==SHM_STAT) {
514                         err = -EINVAL;
515                         if (shmid > shm_ids.max_id)
516                                 goto out_unlock;
517                         result = shm_buildid(shmid, shp->shm_perm.seq);
518                 } else {
519                         err = shm_checkid(shp,shmid);
520                         if(err)
521                                 goto out_unlock;
522                         result = 0;
523                 }
524                 err=-EACCES;
525                 if (ipcperms (&shp->shm_perm, S_IRUGO))
526                         goto out_unlock;
527                 err = security_shm_shmctl(shp, cmd);
528                 if (err)
529                         goto out_unlock;
530                 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
531                 tbuf.shm_segsz  = shp->shm_segsz;
532                 tbuf.shm_atime  = shp->shm_atim;
533                 tbuf.shm_dtime  = shp->shm_dtim;
534                 tbuf.shm_ctime  = shp->shm_ctim;
535                 tbuf.shm_cpid   = shp->shm_cprid;
536                 tbuf.shm_lpid   = shp->shm_lprid;
537                 if (!is_file_hugepages(shp->shm_file))
538                         tbuf.shm_nattch = shp->shm_nattch;
539                 else
540                         tbuf.shm_nattch = file_count(shp->shm_file) - 1;
541                 shm_unlock(shp);
542                 if(copy_shmid_to_user (buf, &tbuf, version))
543                         err = -EFAULT;
544                 else
545                         err = result;
546                 goto out;
547         }
548         case SHM_LOCK:
549         case SHM_UNLOCK:
550         {
551                 shp = shm_lock(shmid);
552                 if(shp==NULL) {
553                         err = -EINVAL;
554                         goto out;
555                 }
556                 err = shm_checkid(shp,shmid);
557                 if(err)
558                         goto out_unlock;
559
560                 err = audit_ipc_obj(&(shp->shm_perm));
561                 if (err)
562                         goto out_unlock;
563
564                 if (!capable(CAP_IPC_LOCK)) {
565                         err = -EPERM;
566                         if (current->euid != shp->shm_perm.uid &&
567                             current->euid != shp->shm_perm.cuid)
568                                 goto out_unlock;
569                         if (cmd == SHM_LOCK &&
570                             !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
571                                 goto out_unlock;
572                 }
573
574                 err = security_shm_shmctl(shp, cmd);
575                 if (err)
576                         goto out_unlock;
577                 
578                 if(cmd==SHM_LOCK) {
579                         struct user_struct * user = current->user;
580                         if (!is_file_hugepages(shp->shm_file)) {
581                                 err = shmem_lock(shp->shm_file, 1, user);
582                                 if (!err) {
583                                         shp->shm_perm.mode |= SHM_LOCKED;
584                                         shp->mlock_user = user;
585                                 }
586                         }
587                 } else if (!is_file_hugepages(shp->shm_file)) {
588                         shmem_lock(shp->shm_file, 0, shp->mlock_user);
589                         shp->shm_perm.mode &= ~SHM_LOCKED;
590                         shp->mlock_user = NULL;
591                 }
592                 shm_unlock(shp);
593                 goto out;
594         }
595         case IPC_RMID:
596         {
597                 /*
598                  *      We cannot simply remove the file. The SVID states
599                  *      that the block remains until the last person
600                  *      detaches from it, then is deleted. A shmat() on
601                  *      an RMID segment is legal in older Linux and if 
602                  *      we change it apps break...
603                  *
604                  *      Instead we set a destroyed flag, and then blow
605                  *      the name away when the usage hits zero.
606                  */
607                 mutex_lock(&shm_ids.mutex);
608                 shp = shm_lock(shmid);
609                 err = -EINVAL;
610                 if (shp == NULL) 
611                         goto out_up;
612                 err = shm_checkid(shp, shmid);
613                 if(err)
614                         goto out_unlock_up;
615
616                 err = audit_ipc_obj(&(shp->shm_perm));
617                 if (err)
618                         goto out_unlock_up;
619
620                 if (current->euid != shp->shm_perm.uid &&
621                     current->euid != shp->shm_perm.cuid && 
622                     !capable(CAP_SYS_ADMIN)) {
623                         err=-EPERM;
624                         goto out_unlock_up;
625                 }
626
627                 err = security_shm_shmctl(shp, cmd);
628                 if (err)
629                         goto out_unlock_up;
630
631                 if (shp->shm_nattch){
632                         shp->shm_perm.mode |= SHM_DEST;
633                         /* Do not find it any more */
634                         shp->shm_perm.key = IPC_PRIVATE;
635                         shm_unlock(shp);
636                 } else
637                         shm_destroy (shp);
638                 mutex_unlock(&shm_ids.mutex);
639                 goto out;
640         }
641
642         case IPC_SET:
643         {
644                 if (copy_shmid_from_user (&setbuf, buf, version)) {
645                         err = -EFAULT;
646                         goto out;
647                 }
648                 mutex_lock(&shm_ids.mutex);
649                 shp = shm_lock(shmid);
650                 err=-EINVAL;
651                 if(shp==NULL)
652                         goto out_up;
653                 err = shm_checkid(shp,shmid);
654                 if(err)
655                         goto out_unlock_up;
656                 err = audit_ipc_obj(&(shp->shm_perm));
657                 if (err)
658                         goto out_unlock_up;
659                 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode, &(shp->shm_perm));
660                 if (err)
661                         goto out_unlock_up;
662                 err=-EPERM;
663                 if (current->euid != shp->shm_perm.uid &&
664                     current->euid != shp->shm_perm.cuid && 
665                     !capable(CAP_SYS_ADMIN)) {
666                         goto out_unlock_up;
667                 }
668
669                 err = security_shm_shmctl(shp, cmd);
670                 if (err)
671                         goto out_unlock_up;
672                 
673                 shp->shm_perm.uid = setbuf.uid;
674                 shp->shm_perm.gid = setbuf.gid;
675                 shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
676                         | (setbuf.mode & S_IRWXUGO);
677                 shp->shm_ctim = get_seconds();
678                 break;
679         }
680
681         default:
682                 err = -EINVAL;
683                 goto out;
684         }
685
686         err = 0;
687 out_unlock_up:
688         shm_unlock(shp);
689 out_up:
690         mutex_unlock(&shm_ids.mutex);
691         goto out;
692 out_unlock:
693         shm_unlock(shp);
694 out:
695         return err;
696 }
697
698 /*
699  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
700  *
701  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
702  * "raddr" thing points to kernel space, and there has to be a wrapper around
703  * this.
704  */
705 long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
706 {
707         struct shmid_kernel *shp;
708         unsigned long addr;
709         unsigned long size;
710         struct file * file;
711         int    err;
712         unsigned long flags;
713         unsigned long prot;
714         unsigned long o_flags;
715         int acc_mode;
716         void *user_addr;
717
718         if (shmid < 0) {
719                 err = -EINVAL;
720                 goto out;
721         } else if ((addr = (ulong)shmaddr)) {
722                 if (addr & (SHMLBA-1)) {
723                         if (shmflg & SHM_RND)
724                                 addr &= ~(SHMLBA-1);       /* round down */
725                         else
726 #ifndef __ARCH_FORCE_SHMLBA
727                                 if (addr & ~PAGE_MASK)
728 #endif
729                                         return -EINVAL;
730                 }
731                 flags = MAP_SHARED | MAP_FIXED;
732         } else {
733                 if ((shmflg & SHM_REMAP))
734                         return -EINVAL;
735
736                 flags = MAP_SHARED;
737         }
738
739         if (shmflg & SHM_RDONLY) {
740                 prot = PROT_READ;
741                 o_flags = O_RDONLY;
742                 acc_mode = S_IRUGO;
743         } else {
744                 prot = PROT_READ | PROT_WRITE;
745                 o_flags = O_RDWR;
746                 acc_mode = S_IRUGO | S_IWUGO;
747         }
748         if (shmflg & SHM_EXEC) {
749                 prot |= PROT_EXEC;
750                 acc_mode |= S_IXUGO;
751         }
752
753         /*
754          * We cannot rely on the fs check since SYSV IPC does have an
755          * additional creator id...
756          */
757         shp = shm_lock(shmid);
758         if(shp == NULL) {
759                 err = -EINVAL;
760                 goto out;
761         }
762         err = shm_checkid(shp,shmid);
763         if (err) {
764                 shm_unlock(shp);
765                 goto out;
766         }
767         if (ipcperms(&shp->shm_perm, acc_mode)) {
768                 shm_unlock(shp);
769                 err = -EACCES;
770                 goto out;
771         }
772
773         err = security_shm_shmat(shp, shmaddr, shmflg);
774         if (err) {
775                 shm_unlock(shp);
776                 return err;
777         }
778                 
779         file = shp->shm_file;
780         size = i_size_read(file->f_dentry->d_inode);
781         shp->shm_nattch++;
782         shm_unlock(shp);
783
784         down_write(&current->mm->mmap_sem);
785         if (addr && !(shmflg & SHM_REMAP)) {
786                 user_addr = ERR_PTR(-EINVAL);
787                 if (find_vma_intersection(current->mm, addr, addr + size))
788                         goto invalid;
789                 /*
790                  * If shm segment goes below stack, make sure there is some
791                  * space left for the stack to grow (at least 4 pages).
792                  */
793                 if (addr < current->mm->start_stack &&
794                     addr > current->mm->start_stack - size - PAGE_SIZE * 5)
795                         goto invalid;
796         }
797                 
798         user_addr = (void*) do_mmap (file, addr, size, prot, flags, 0);
799
800 invalid:
801         up_write(&current->mm->mmap_sem);
802
803         mutex_lock(&shm_ids.mutex);
804         shp = shm_lock(shmid);
805         BUG_ON(!shp);
806         shp->shm_nattch--;
807         if(shp->shm_nattch == 0 &&
808            shp->shm_perm.mode & SHM_DEST)
809                 shm_destroy (shp);
810         else
811                 shm_unlock(shp);
812         mutex_unlock(&shm_ids.mutex);
813
814         *raddr = (unsigned long) user_addr;
815         err = 0;
816         if (IS_ERR(user_addr))
817                 err = PTR_ERR(user_addr);
818 out:
819         return err;
820 }
821
822 asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg)
823 {
824         unsigned long ret;
825         long err;
826
827         err = do_shmat(shmid, shmaddr, shmflg, &ret);
828         if (err)
829                 return err;
830         force_successful_syscall_return();
831         return (long)ret;
832 }
833
834 /*
835  * detach and kill segment if marked destroyed.
836  * The work is done in shm_close.
837  */
838 asmlinkage long sys_shmdt(char __user *shmaddr)
839 {
840         struct mm_struct *mm = current->mm;
841         struct vm_area_struct *vma, *next;
842         unsigned long addr = (unsigned long)shmaddr;
843         loff_t size = 0;
844         int retval = -EINVAL;
845
846         if (addr & ~PAGE_MASK)
847                 return retval;
848
849         down_write(&mm->mmap_sem);
850
851         /*
852          * This function tries to be smart and unmap shm segments that
853          * were modified by partial mlock or munmap calls:
854          * - It first determines the size of the shm segment that should be
855          *   unmapped: It searches for a vma that is backed by shm and that
856          *   started at address shmaddr. It records it's size and then unmaps
857          *   it.
858          * - Then it unmaps all shm vmas that started at shmaddr and that
859          *   are within the initially determined size.
860          * Errors from do_munmap are ignored: the function only fails if
861          * it's called with invalid parameters or if it's called to unmap
862          * a part of a vma. Both calls in this function are for full vmas,
863          * the parameters are directly copied from the vma itself and always
864          * valid - therefore do_munmap cannot fail. (famous last words?)
865          */
866         /*
867          * If it had been mremap()'d, the starting address would not
868          * match the usual checks anyway. So assume all vma's are
869          * above the starting address given.
870          */
871         vma = find_vma(mm, addr);
872
873         while (vma) {
874                 next = vma->vm_next;
875
876                 /*
877                  * Check if the starting address would match, i.e. it's
878                  * a fragment created by mprotect() and/or munmap(), or it
879                  * otherwise it starts at this address with no hassles.
880                  */
881                 if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) &&
882                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
883
884
885                         size = vma->vm_file->f_dentry->d_inode->i_size;
886                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
887                         /*
888                          * We discovered the size of the shm segment, so
889                          * break out of here and fall through to the next
890                          * loop that uses the size information to stop
891                          * searching for matching vma's.
892                          */
893                         retval = 0;
894                         vma = next;
895                         break;
896                 }
897                 vma = next;
898         }
899
900         /*
901          * We need look no further than the maximum address a fragment
902          * could possibly have landed at. Also cast things to loff_t to
903          * prevent overflows and make comparisions vs. equal-width types.
904          */
905         size = PAGE_ALIGN(size);
906         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
907                 next = vma->vm_next;
908
909                 /* finding a matching vma now does not alter retval */
910                 if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) &&
911                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
912
913                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
914                 vma = next;
915         }
916
917         up_write(&mm->mmap_sem);
918         return retval;
919 }
920
921 #ifdef CONFIG_PROC_FS
922 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
923 {
924         struct shmid_kernel *shp = it;
925         char *format;
926
927 #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
928 #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
929
930         if (!vx_check(shp->shm_perm.xid, VX_IDENT))
931                 return 0;
932
933         if (sizeof(size_t) <= sizeof(int))
934                 format = SMALL_STRING;
935         else
936                 format = BIG_STRING;
937         return seq_printf(s, format,
938                           shp->shm_perm.key,
939                           shp->id,
940                           shp->shm_perm.mode,
941                           shp->shm_segsz,
942                           shp->shm_cprid,
943                           shp->shm_lprid,
944                           is_file_hugepages(shp->shm_file) ? (file_count(shp->shm_file) - 1) : shp->shm_nattch,
945                           shp->shm_perm.uid,
946                           shp->shm_perm.gid,
947                           shp->shm_perm.cuid,
948                           shp->shm_perm.cgid,
949                           shp->shm_atim,
950                           shp->shm_dtim,
951                           shp->shm_ctim);
952 }
953 #endif