patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / mm / shmem.c
1 /*
2  * Resizable virtual memory filesystem for Linux.
3  *
4  * Copyright (C) 2000 Linus Torvalds.
5  *               2000 Transmeta Corp.
6  *               2000-2001 Christoph Rohland
7  *               2000-2001 SAP AG
8  *               2002 Red Hat Inc.
9  * Copyright (C) 2002-2003 Hugh Dickins.
10  * Copyright (C) 2002-2003 VERITAS Software Corporation.
11  * Copyright (C) 2004 Andi Kleen, SuSE Labs
12  *
13  * This file is released under the GPL.
14  */
15
16 /*
17  * This virtual memory filesystem is heavily based on the ramfs. It
18  * extends ramfs by the ability to use swap and honor resource limits
19  * which makes it a completely usable filesystem.
20  */
21
22 #include <linux/config.h>
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/devfs_fs_kernel.h>
26 #include <linux/fs.h>
27 #include <linux/mm.h>
28 #include <linux/mman.h>
29 #include <linux/file.h>
30 #include <linux/swap.h>
31 #include <linux/pagemap.h>
32 #include <linux/string.h>
33 #include <linux/slab.h>
34 #include <linux/backing-dev.h>
35 #include <linux/shmem_fs.h>
36 #include <linux/mount.h>
37 #include <linux/writeback.h>
38 #include <linux/vfs.h>
39 #include <linux/blkdev.h>
40 #include <linux/security.h>
41 #include <linux/swapops.h>
42 #include <linux/mempolicy.h>
43 #include <asm/uaccess.h>
44 #include <asm/div64.h>
45 #include <asm/pgtable.h>
46
47 /* This magic number is used in glibc for posix shared memory */
48 #define TMPFS_MAGIC     0x01021994
49
50 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
51 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
52 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
53
54 #define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
55 #define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
56
57 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
58
59 /* info->flags needs VM_flags to handle pagein/truncate races efficiently */
60 #define SHMEM_PAGEIN     VM_READ
61 #define SHMEM_TRUNCATE   VM_WRITE
62
63 /* Pretend that each entry is of this size in directory's i_size */
64 #define BOGO_DIRENT_SIZE 20
65
66 /* Keep swapped page count in private field of indirect struct page */
67 #define nr_swapped              private
68
69 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
70 enum sgp_type {
71         SGP_QUICK,      /* don't try more than file page cache lookup */
72         SGP_READ,       /* don't exceed i_size, don't allocate page */
73         SGP_CACHE,      /* don't exceed i_size, may allocate page */
74         SGP_WRITE,      /* may exceed i_size, may allocate page */
75 };
76
77 static int shmem_getpage(struct inode *inode, unsigned long idx,
78                          struct page **pagep, enum sgp_type sgp, int *type);
79
80 static inline struct page *shmem_dir_alloc(unsigned int gfp_mask)
81 {
82         /*
83          * The above definition of ENTRIES_PER_PAGE, and the use of
84          * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
85          * might be reconsidered if it ever diverges from PAGE_SIZE.
86          */
87         return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
88 }
89
90 static inline void shmem_dir_free(struct page *page)
91 {
92         __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
93 }
94
95 static struct page **shmem_dir_map(struct page *page)
96 {
97         return (struct page **)kmap_atomic(page, KM_USER0);
98 }
99
100 static inline void shmem_dir_unmap(struct page **dir)
101 {
102         kunmap_atomic(dir, KM_USER0);
103 }
104
105 static swp_entry_t *shmem_swp_map(struct page *page)
106 {
107         /*
108          * We have to avoid the unconditional inc_preempt_count()
109          * in kmap_atomic(), since shmem_swp_unmap() will also be
110          * applied to the low memory addresses within i_direct[].
111          * PageHighMem and high_memory tests are good for all arches
112          * and configs: highmem_start_page and FIXADDR_START are not.
113          */
114         return PageHighMem(page)?
115                 (swp_entry_t *)kmap_atomic(page, KM_USER1):
116                 (swp_entry_t *)page_address(page);
117 }
118
119 static inline void shmem_swp_unmap(swp_entry_t *entry)
120 {
121         if (entry >= (swp_entry_t *)high_memory)
122                 kunmap_atomic(entry, KM_USER1);
123 }
124
125 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
126 {
127         return sb->s_fs_info;
128 }
129
130 /*
131  * shmem_file_setup pre-accounts the whole fixed size of a VM object,
132  * for shared memory and for shared anonymous (/dev/zero) mappings
133  * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
134  * consistent with the pre-accounting of private mappings ...
135  */
136 static inline int shmem_acct_size(unsigned long flags, loff_t size)
137 {
138         return (flags & VM_ACCOUNT)?
139                 security_vm_enough_memory(VM_ACCT(size)): 0;
140 }
141
142 static inline void shmem_unacct_size(unsigned long flags, loff_t size)
143 {
144         if (flags & VM_ACCOUNT)
145                 vm_unacct_memory(VM_ACCT(size));
146 }
147
148 /*
149  * ... whereas tmpfs objects are accounted incrementally as
150  * pages are allocated, in order to allow huge sparse files.
151  * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
152  * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
153  */
154 static inline int shmem_acct_block(unsigned long flags)
155 {
156         return (flags & VM_ACCOUNT)?
157                 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE));
158 }
159
160 static inline void shmem_unacct_blocks(unsigned long flags, long pages)
161 {
162         if (!(flags & VM_ACCOUNT))
163                 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
164 }
165
166 static struct super_operations shmem_ops;
167 static struct address_space_operations shmem_aops;
168 static struct file_operations shmem_file_operations;
169 static struct inode_operations shmem_inode_operations;
170 static struct inode_operations shmem_dir_inode_operations;
171 static struct vm_operations_struct shmem_vm_ops;
172
173 static struct backing_dev_info shmem_backing_dev_info = {
174         .ra_pages       = 0,    /* No readahead */
175         .memory_backed  = 1,    /* Does not contribute to dirty memory */
176         .unplug_io_fn = default_unplug_io_fn,
177 };
178
179 LIST_HEAD(shmem_inodes);
180 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
181
182 static void shmem_free_block(struct inode *inode)
183 {
184         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
185         spin_lock(&sbinfo->stat_lock);
186         sbinfo->free_blocks++;
187         inode->i_blocks -= BLOCKS_PER_PAGE;
188         spin_unlock(&sbinfo->stat_lock);
189 }
190
191 /*
192  * shmem_recalc_inode - recalculate the size of an inode
193  *
194  * @inode: inode to recalc
195  *
196  * We have to calculate the free blocks since the mm can drop
197  * undirtied hole pages behind our back.
198  *
199  * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
200  * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
201  *
202  * It has to be called with the spinlock held.
203  */
204 static void shmem_recalc_inode(struct inode *inode)
205 {
206         struct shmem_inode_info *info = SHMEM_I(inode);
207         long freed;
208
209         freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
210         if (freed > 0) {
211                 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
212                 info->alloced -= freed;
213                 spin_lock(&sbinfo->stat_lock);
214                 sbinfo->free_blocks += freed;
215                 inode->i_blocks -= freed*BLOCKS_PER_PAGE;
216                 spin_unlock(&sbinfo->stat_lock);
217                 shmem_unacct_blocks(info->flags, freed);
218         }
219 }
220
221 /*
222  * shmem_swp_entry - find the swap vector position in the info structure
223  *
224  * @info:  info structure for the inode
225  * @index: index of the page to find
226  * @page:  optional page to add to the structure. Has to be preset to
227  *         all zeros
228  *
229  * If there is no space allocated yet it will return NULL when
230  * page is NULL, else it will use the page for the needed block,
231  * setting it to NULL on return to indicate that it has been used.
232  *
233  * The swap vector is organized the following way:
234  *
235  * There are SHMEM_NR_DIRECT entries directly stored in the
236  * shmem_inode_info structure. So small files do not need an addional
237  * allocation.
238  *
239  * For pages with index > SHMEM_NR_DIRECT there is the pointer
240  * i_indirect which points to a page which holds in the first half
241  * doubly indirect blocks, in the second half triple indirect blocks:
242  *
243  * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
244  * following layout (for SHMEM_NR_DIRECT == 16):
245  *
246  * i_indirect -> dir --> 16-19
247  *            |      +-> 20-23
248  *            |
249  *            +-->dir2 --> 24-27
250  *            |        +-> 28-31
251  *            |        +-> 32-35
252  *            |        +-> 36-39
253  *            |
254  *            +-->dir3 --> 40-43
255  *                     +-> 44-47
256  *                     +-> 48-51
257  *                     +-> 52-55
258  */
259 static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
260 {
261         unsigned long offset;
262         struct page **dir;
263         struct page *subdir;
264
265         if (index < SHMEM_NR_DIRECT)
266                 return info->i_direct+index;
267         if (!info->i_indirect) {
268                 if (page) {
269                         info->i_indirect = *page;
270                         *page = NULL;
271                 }
272                 return NULL;                    /* need another page */
273         }
274
275         index -= SHMEM_NR_DIRECT;
276         offset = index % ENTRIES_PER_PAGE;
277         index /= ENTRIES_PER_PAGE;
278         dir = shmem_dir_map(info->i_indirect);
279
280         if (index >= ENTRIES_PER_PAGE/2) {
281                 index -= ENTRIES_PER_PAGE/2;
282                 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
283                 index %= ENTRIES_PER_PAGE;
284                 subdir = *dir;
285                 if (!subdir) {
286                         if (page) {
287                                 *dir = *page;
288                                 *page = NULL;
289                         }
290                         shmem_dir_unmap(dir);
291                         return NULL;            /* need another page */
292                 }
293                 shmem_dir_unmap(dir);
294                 dir = shmem_dir_map(subdir);
295         }
296
297         dir += index;
298         subdir = *dir;
299         if (!subdir) {
300                 if (!page || !(subdir = *page)) {
301                         shmem_dir_unmap(dir);
302                         return NULL;            /* need a page */
303                 }
304                 *dir = subdir;
305                 *page = NULL;
306         }
307         shmem_dir_unmap(dir);
308
309         /*
310          * With apologies... caller shmem_swp_alloc passes non-NULL
311          * page (though perhaps NULL *page); and now we know that this
312          * indirect page has been allocated, we can shortcut the final
313          * kmap if we know it contains no swap entries, as is commonly
314          * the case: return pointer to a 0 which doesn't need kmapping.
315          */
316         return (page && !subdir->nr_swapped)?
317                 (swp_entry_t *)&subdir->nr_swapped:
318                 shmem_swp_map(subdir) + offset;
319 }
320
321 static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
322 {
323         long incdec = value? 1: -1;
324
325         entry->val = value;
326         info->swapped += incdec;
327         if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT)
328                 kmap_atomic_to_page(entry)->nr_swapped += incdec;
329 }
330
331 /*
332  * shmem_swp_alloc - get the position of the swap entry for the page.
333  *                   If it does not exist allocate the entry.
334  *
335  * @info:       info structure for the inode
336  * @index:      index of the page to find
337  * @sgp:        check and recheck i_size? skip allocation?
338  */
339 static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
340 {
341         struct inode *inode = &info->vfs_inode;
342         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
343         struct page *page = NULL;
344         swp_entry_t *entry;
345         static const swp_entry_t unswapped = { 0 };
346
347         if (sgp != SGP_WRITE &&
348             ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
349                 return ERR_PTR(-EINVAL);
350
351         while (!(entry = shmem_swp_entry(info, index, &page))) {
352                 if (sgp == SGP_READ)
353                         return (swp_entry_t *) &unswapped;
354                 /*
355                  * Test free_blocks against 1 not 0, since we have 1 data
356                  * page (and perhaps indirect index pages) yet to allocate:
357                  * a waste to allocate index if we cannot allocate data.
358                  */
359                 spin_lock(&sbinfo->stat_lock);
360                 if (sbinfo->free_blocks <= 1) {
361                         spin_unlock(&sbinfo->stat_lock);
362                         return ERR_PTR(-ENOSPC);
363                 }
364                 sbinfo->free_blocks--;
365                 inode->i_blocks += BLOCKS_PER_PAGE;
366                 spin_unlock(&sbinfo->stat_lock);
367
368                 spin_unlock(&info->lock);
369                 page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
370                 if (page) {
371                         clear_highpage(page);
372                         page->nr_swapped = 0;
373                 }
374                 spin_lock(&info->lock);
375
376                 if (!page) {
377                         shmem_free_block(inode);
378                         return ERR_PTR(-ENOMEM);
379                 }
380                 if (sgp != SGP_WRITE &&
381                     ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
382                         entry = ERR_PTR(-EINVAL);
383                         break;
384                 }
385                 if (info->next_index <= index)
386                         info->next_index = index + 1;
387         }
388         if (page) {
389                 /* another task gave its page, or truncated the file */
390                 shmem_free_block(inode);
391                 shmem_dir_free(page);
392         }
393         if (info->next_index <= index && !IS_ERR(entry))
394                 info->next_index = index + 1;
395         return entry;
396 }
397
398 /*
399  * shmem_free_swp - free some swap entries in a directory
400  *
401  * @dir:   pointer to the directory
402  * @edir:  pointer after last entry of the directory
403  */
404 static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
405 {
406         swp_entry_t *ptr;
407         int freed = 0;
408
409         for (ptr = dir; ptr < edir; ptr++) {
410                 if (ptr->val) {
411                         free_swap_and_cache(*ptr);
412                         *ptr = (swp_entry_t){0};
413                         freed++;
414                 }
415         }
416         return freed;
417 }
418
419 static void shmem_truncate(struct inode *inode)
420 {
421         struct shmem_inode_info *info = SHMEM_I(inode);
422         unsigned long idx;
423         unsigned long size;
424         unsigned long limit;
425         unsigned long stage;
426         struct page **dir;
427         struct page *subdir;
428         struct page *empty;
429         swp_entry_t *ptr;
430         int offset;
431         int freed;
432
433         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
434         idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
435         if (idx >= info->next_index)
436                 return;
437
438         spin_lock(&info->lock);
439         info->flags |= SHMEM_TRUNCATE;
440         limit = info->next_index;
441         info->next_index = idx;
442         if (info->swapped && idx < SHMEM_NR_DIRECT) {
443                 ptr = info->i_direct;
444                 size = limit;
445                 if (size > SHMEM_NR_DIRECT)
446                         size = SHMEM_NR_DIRECT;
447                 info->swapped -= shmem_free_swp(ptr+idx, ptr+size);
448         }
449         if (!info->i_indirect)
450                 goto done2;
451
452         BUG_ON(limit <= SHMEM_NR_DIRECT);
453         limit -= SHMEM_NR_DIRECT;
454         idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
455         offset = idx % ENTRIES_PER_PAGE;
456         idx -= offset;
457
458         empty = NULL;
459         dir = shmem_dir_map(info->i_indirect);
460         stage = ENTRIES_PER_PAGEPAGE/2;
461         if (idx < ENTRIES_PER_PAGEPAGE/2)
462                 dir += idx/ENTRIES_PER_PAGE;
463         else {
464                 dir += ENTRIES_PER_PAGE/2;
465                 dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
466                 while (stage <= idx)
467                         stage += ENTRIES_PER_PAGEPAGE;
468                 if (*dir) {
469                         subdir = *dir;
470                         size = ((idx - ENTRIES_PER_PAGEPAGE/2) %
471                                 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
472                         if (!size && !offset) {
473                                 empty = subdir;
474                                 *dir = NULL;
475                         }
476                         shmem_dir_unmap(dir);
477                         dir = shmem_dir_map(subdir) + size;
478                 } else {
479                         offset = 0;
480                         idx = stage;
481                 }
482         }
483
484         for (; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
485                 if (unlikely(idx == stage)) {
486                         shmem_dir_unmap(dir-1);
487                         dir = shmem_dir_map(info->i_indirect) +
488                             ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
489                         while (!*dir) {
490                                 dir++;
491                                 idx += ENTRIES_PER_PAGEPAGE;
492                                 if (idx >= limit)
493                                         goto done1;
494                         }
495                         stage = idx + ENTRIES_PER_PAGEPAGE;
496                         subdir = *dir;
497                         *dir = NULL;
498                         shmem_dir_unmap(dir);
499                         if (empty) {
500                                 shmem_dir_free(empty);
501                                 shmem_free_block(inode);
502                         }
503                         empty = subdir;
504                         cond_resched_lock(&info->lock);
505                         dir = shmem_dir_map(subdir);
506                 }
507                 subdir = *dir;
508                 if (subdir && subdir->nr_swapped) {
509                         ptr = shmem_swp_map(subdir);
510                         size = limit - idx;
511                         if (size > ENTRIES_PER_PAGE)
512                                 size = ENTRIES_PER_PAGE;
513                         freed = shmem_free_swp(ptr+offset, ptr+size);
514                         shmem_swp_unmap(ptr);
515                         info->swapped -= freed;
516                         subdir->nr_swapped -= freed;
517                         BUG_ON(subdir->nr_swapped > offset);
518                 }
519                 if (offset)
520                         offset = 0;
521                 else if (subdir) {
522                         *dir = NULL;
523                         shmem_dir_free(subdir);
524                         shmem_free_block(inode);
525                 }
526         }
527 done1:
528         shmem_dir_unmap(dir-1);
529         if (empty) {
530                 shmem_dir_free(empty);
531                 shmem_free_block(inode);
532         }
533         if (info->next_index <= SHMEM_NR_DIRECT) {
534                 shmem_dir_free(info->i_indirect);
535                 info->i_indirect = NULL;
536                 shmem_free_block(inode);
537         }
538 done2:
539         BUG_ON(info->swapped > info->next_index);
540         if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
541                 /*
542                  * Call truncate_inode_pages again: racing shmem_unuse_inode
543                  * may have swizzled a page in from swap since vmtruncate or
544                  * generic_delete_inode did it, before we lowered next_index.
545                  * Also, though shmem_getpage checks i_size before adding to
546                  * cache, no recheck after: so fix the narrow window there too.
547                  */
548                 spin_unlock(&info->lock);
549                 truncate_inode_pages(inode->i_mapping, inode->i_size);
550                 spin_lock(&info->lock);
551         }
552         info->flags &= ~SHMEM_TRUNCATE;
553         shmem_recalc_inode(inode);
554         spin_unlock(&info->lock);
555 }
556
557 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
558 {
559         struct inode *inode = dentry->d_inode;
560         struct page *page = NULL;
561         int error;
562
563         if (attr->ia_valid & ATTR_SIZE) {
564                 if (attr->ia_size < inode->i_size) {
565                         /*
566                          * If truncating down to a partial page, then
567                          * if that page is already allocated, hold it
568                          * in memory until the truncation is over, so
569                          * truncate_partial_page cannnot miss it were
570                          * it assigned to swap.
571                          */
572                         if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
573                                 (void) shmem_getpage(inode,
574                                         attr->ia_size>>PAGE_CACHE_SHIFT,
575                                                 &page, SGP_READ, NULL);
576                         }
577                         /*
578                          * Reset SHMEM_PAGEIN flag so that shmem_truncate can
579                          * detect if any pages might have been added to cache
580                          * after truncate_inode_pages.  But we needn't bother
581                          * if it's being fully truncated to zero-length: the
582                          * nrpages check is efficient enough in that case.
583                          */
584                         if (attr->ia_size) {
585                                 struct shmem_inode_info *info = SHMEM_I(inode);
586                                 spin_lock(&info->lock);
587                                 info->flags &= ~SHMEM_PAGEIN;
588                                 spin_unlock(&info->lock);
589                         }
590                 }
591         }
592
593         error = inode_change_ok(inode, attr);
594         if (!error)
595                 error = inode_setattr(inode, attr);
596         if (page)
597                 page_cache_release(page);
598         return error;
599 }
600
601 static void shmem_delete_inode(struct inode *inode)
602 {
603         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
604         struct shmem_inode_info *info = SHMEM_I(inode);
605
606         if (inode->i_op->truncate == shmem_truncate) {
607                 spin_lock(&shmem_ilock);
608                 list_del(&info->list);
609                 spin_unlock(&shmem_ilock);
610                 shmem_unacct_size(info->flags, inode->i_size);
611                 inode->i_size = 0;
612                 shmem_truncate(inode);
613         }
614         BUG_ON(inode->i_blocks);
615         spin_lock(&sbinfo->stat_lock);
616         sbinfo->free_inodes++;
617         spin_unlock(&sbinfo->stat_lock);
618         clear_inode(inode);
619 }
620
621 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
622 {
623         swp_entry_t *ptr;
624
625         for (ptr = dir; ptr < edir; ptr++) {
626                 if (ptr->val == entry.val)
627                         return ptr - dir;
628         }
629         return -1;
630 }
631
632 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
633 {
634         struct inode *inode;
635         unsigned long idx;
636         unsigned long size;
637         unsigned long limit;
638         unsigned long stage;
639         struct page **dir;
640         struct page *subdir;
641         swp_entry_t *ptr;
642         int offset;
643
644         idx = 0;
645         ptr = info->i_direct;
646         spin_lock(&info->lock);
647         limit = info->next_index;
648         size = limit;
649         if (size > SHMEM_NR_DIRECT)
650                 size = SHMEM_NR_DIRECT;
651         offset = shmem_find_swp(entry, ptr, ptr+size);
652         if (offset >= 0)
653                 goto found;
654         if (!info->i_indirect)
655                 goto lost2;
656         /* we might be racing with shmem_truncate */
657         if (limit <= SHMEM_NR_DIRECT)
658                 goto lost2;
659
660         dir = shmem_dir_map(info->i_indirect);
661         stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
662
663         for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
664                 if (unlikely(idx == stage)) {
665                         shmem_dir_unmap(dir-1);
666                         dir = shmem_dir_map(info->i_indirect) +
667                             ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
668                         while (!*dir) {
669                                 dir++;
670                                 idx += ENTRIES_PER_PAGEPAGE;
671                                 if (idx >= limit)
672                                         goto lost1;
673                         }
674                         stage = idx + ENTRIES_PER_PAGEPAGE;
675                         subdir = *dir;
676                         shmem_dir_unmap(dir);
677                         dir = shmem_dir_map(subdir);
678                 }
679                 subdir = *dir;
680                 if (subdir && subdir->nr_swapped) {
681                         ptr = shmem_swp_map(subdir);
682                         size = limit - idx;
683                         if (size > ENTRIES_PER_PAGE)
684                                 size = ENTRIES_PER_PAGE;
685                         offset = shmem_find_swp(entry, ptr, ptr+size);
686                         if (offset >= 0) {
687                                 shmem_dir_unmap(dir);
688                                 goto found;
689                         }
690                         shmem_swp_unmap(ptr);
691                 }
692         }
693 lost1:
694         shmem_dir_unmap(dir-1);
695 lost2:
696         spin_unlock(&info->lock);
697         return 0;
698 found:
699         idx += offset;
700         inode = &info->vfs_inode;
701         if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) {
702                 info->flags |= SHMEM_PAGEIN;
703                 shmem_swp_set(info, ptr + offset, 0);
704         }
705         shmem_swp_unmap(ptr);
706         spin_unlock(&info->lock);
707         /*
708          * Decrement swap count even when the entry is left behind:
709          * try_to_unuse will skip over mms, then reincrement count.
710          */
711         swap_free(entry);
712         return 1;
713 }
714
715 /*
716  * shmem_unuse() search for an eventually swapped out shmem page.
717  */
718 int shmem_unuse(swp_entry_t entry, struct page *page)
719 {
720         struct list_head *p;
721         struct shmem_inode_info *info;
722         int found = 0;
723
724         spin_lock(&shmem_ilock);
725         list_for_each(p, &shmem_inodes) {
726                 info = list_entry(p, struct shmem_inode_info, list);
727
728                 if (info->swapped && shmem_unuse_inode(info, entry, page)) {
729                         /* move head to start search for next from here */
730                         list_move_tail(&shmem_inodes, &info->list);
731                         found = 1;
732                         break;
733                 }
734         }
735         spin_unlock(&shmem_ilock);
736         return found;
737 }
738
739 /*
740  * Move the page from the page cache to the swap cache.
741  */
742 static int shmem_writepage(struct page *page, struct writeback_control *wbc)
743 {
744         struct shmem_inode_info *info;
745         swp_entry_t *entry, swap;
746         struct address_space *mapping;
747         unsigned long index;
748         struct inode *inode;
749
750         BUG_ON(!PageLocked(page));
751         BUG_ON(page_mapped(page));
752
753         mapping = page->mapping;
754         index = page->index;
755         inode = mapping->host;
756         info = SHMEM_I(inode);
757         if (info->flags & VM_LOCKED)
758                 goto redirty;
759         swap = get_swap_page();
760         if (!swap.val)
761                 goto redirty;
762
763         spin_lock(&info->lock);
764         shmem_recalc_inode(inode);
765         if (index >= info->next_index) {
766                 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
767                 goto unlock;
768         }
769         entry = shmem_swp_entry(info, index, NULL);
770         BUG_ON(!entry);
771         BUG_ON(entry->val);
772
773         if (move_to_swap_cache(page, swap) == 0) {
774                 shmem_swp_set(info, entry, swap.val);
775                 shmem_swp_unmap(entry);
776                 spin_unlock(&info->lock);
777                 unlock_page(page);
778                 return 0;
779         }
780
781         shmem_swp_unmap(entry);
782 unlock:
783         spin_unlock(&info->lock);
784         swap_free(swap);
785 redirty:
786         set_page_dirty(page);
787         return WRITEPAGE_ACTIVATE;      /* Return with the page locked */
788 }
789
790 #ifdef CONFIG_NUMA
791 static struct page *shmem_swapin_async(struct shared_policy *p,
792                                        swp_entry_t entry, unsigned long idx)
793 {
794         struct page *page;
795         struct vm_area_struct pvma;
796
797         /* Create a pseudo vma that just contains the policy */
798         memset(&pvma, 0, sizeof(struct vm_area_struct));
799         pvma.vm_end = PAGE_SIZE;
800         pvma.vm_pgoff = idx;
801         pvma.vm_policy = mpol_shared_policy_lookup(p, idx);
802         page = read_swap_cache_async(entry, &pvma, 0);
803         mpol_free(pvma.vm_policy);
804         return page;
805 }
806
807 struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
808                           unsigned long idx)
809 {
810         struct shared_policy *p = &info->policy;
811         int i, num;
812         struct page *page;
813         unsigned long offset;
814
815         num = valid_swaphandles(entry, &offset);
816         for (i = 0; i < num; offset++, i++) {
817                 page = shmem_swapin_async(p,
818                                 swp_entry(swp_type(entry), offset), idx);
819                 if (!page)
820                         break;
821                 page_cache_release(page);
822         }
823         lru_add_drain();        /* Push any new pages onto the LRU now */
824         return shmem_swapin_async(p, entry, idx);
825 }
826
827 static struct page *
828 shmem_alloc_page(unsigned long gfp, struct shmem_inode_info *info,
829                  unsigned long idx)
830 {
831         struct vm_area_struct pvma;
832         struct page *page;
833
834         memset(&pvma, 0, sizeof(struct vm_area_struct));
835         pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
836         pvma.vm_pgoff = idx;
837         pvma.vm_end = PAGE_SIZE;
838         page = alloc_page_vma(gfp, &pvma, 0);
839         mpol_free(pvma.vm_policy);
840         return page;
841 }
842 #else
843 static inline struct page *
844 shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
845 {
846         swapin_readahead(entry, 0, NULL);
847         return read_swap_cache_async(entry, NULL, 0);
848 }
849
850 static inline struct page *
851 shmem_alloc_page(unsigned long gfp,struct shmem_inode_info *info,
852                                  unsigned long idx)
853 {
854         return alloc_page(gfp);
855 }
856 #endif
857
858 /*
859  * shmem_getpage - either get the page from swap or allocate a new one
860  *
861  * If we allocate a new one we do not mark it dirty. That's up to the
862  * vm. If we swap it in we mark it dirty since we also free the swap
863  * entry since a page cannot live in both the swap and page cache
864  */
865 static int shmem_getpage(struct inode *inode, unsigned long idx,
866                         struct page **pagep, enum sgp_type sgp, int *type)
867 {
868         struct address_space *mapping = inode->i_mapping;
869         struct shmem_inode_info *info = SHMEM_I(inode);
870         struct shmem_sb_info *sbinfo;
871         struct page *filepage = *pagep;
872         struct page *swappage;
873         swp_entry_t *entry;
874         swp_entry_t swap;
875         int error, majmin = VM_FAULT_MINOR;
876
877         if (idx >= SHMEM_MAX_INDEX)
878                 return -EFBIG;
879         /*
880          * Normally, filepage is NULL on entry, and either found
881          * uptodate immediately, or allocated and zeroed, or read
882          * in under swappage, which is then assigned to filepage.
883          * But shmem_prepare_write passes in a locked filepage,
884          * which may be found not uptodate by other callers too,
885          * and may need to be copied from the swappage read in.
886          */
887 repeat:
888         if (!filepage)
889                 filepage = find_lock_page(mapping, idx);
890         if (filepage && PageUptodate(filepage))
891                 goto done;
892         error = 0;
893         if (sgp == SGP_QUICK)
894                 goto failed;
895
896         spin_lock(&info->lock);
897         shmem_recalc_inode(inode);
898         entry = shmem_swp_alloc(info, idx, sgp);
899         if (IS_ERR(entry)) {
900                 spin_unlock(&info->lock);
901                 error = PTR_ERR(entry);
902                 goto failed;
903         }
904         swap = *entry;
905
906         if (swap.val) {
907                 /* Look it up and read it in.. */
908                 swappage = lookup_swap_cache(swap);
909                 if (!swappage) {
910                         shmem_swp_unmap(entry);
911                         spin_unlock(&info->lock);
912                         /* here we actually do the io */
913                         if (majmin == VM_FAULT_MINOR && type)
914                                 inc_page_state(pgmajfault);
915                         majmin = VM_FAULT_MAJOR;
916                         swappage = shmem_swapin(info, swap, idx);
917                         if (!swappage) {
918                                 spin_lock(&info->lock);
919                                 entry = shmem_swp_alloc(info, idx, sgp);
920                                 if (IS_ERR(entry))
921                                         error = PTR_ERR(entry);
922                                 else {
923                                         if (entry->val == swap.val)
924                                                 error = -ENOMEM;
925                                         shmem_swp_unmap(entry);
926                                 }
927                                 spin_unlock(&info->lock);
928                                 if (error)
929                                         goto failed;
930                                 goto repeat;
931                         }
932                         wait_on_page_locked(swappage);
933                         page_cache_release(swappage);
934                         goto repeat;
935                 }
936
937                 /* We have to do this with page locked to prevent races */
938                 if (TestSetPageLocked(swappage)) {
939                         shmem_swp_unmap(entry);
940                         spin_unlock(&info->lock);
941                         wait_on_page_locked(swappage);
942                         page_cache_release(swappage);
943                         goto repeat;
944                 }
945                 if (PageWriteback(swappage)) {
946                         shmem_swp_unmap(entry);
947                         spin_unlock(&info->lock);
948                         wait_on_page_writeback(swappage);
949                         unlock_page(swappage);
950                         page_cache_release(swappage);
951                         goto repeat;
952                 }
953                 if (!PageUptodate(swappage)) {
954                         shmem_swp_unmap(entry);
955                         spin_unlock(&info->lock);
956                         unlock_page(swappage);
957                         page_cache_release(swappage);
958                         error = -EIO;
959                         goto failed;
960                 }
961
962                 if (filepage) {
963                         shmem_swp_set(info, entry, 0);
964                         shmem_swp_unmap(entry);
965                         delete_from_swap_cache(swappage);
966                         spin_unlock(&info->lock);
967                         copy_highpage(filepage, swappage);
968                         unlock_page(swappage);
969                         page_cache_release(swappage);
970                         flush_dcache_page(filepage);
971                         SetPageUptodate(filepage);
972                         set_page_dirty(filepage);
973                         swap_free(swap);
974                 } else if (!(error = move_from_swap_cache(
975                                 swappage, idx, mapping))) {
976                         info->flags |= SHMEM_PAGEIN;
977                         shmem_swp_set(info, entry, 0);
978                         shmem_swp_unmap(entry);
979                         spin_unlock(&info->lock);
980                         filepage = swappage;
981                         swap_free(swap);
982                 } else {
983                         shmem_swp_unmap(entry);
984                         spin_unlock(&info->lock);
985                         unlock_page(swappage);
986                         page_cache_release(swappage);
987                         if (error == -ENOMEM) {
988                                 /* let kswapd refresh zone for GFP_ATOMICs */
989                                 blk_congestion_wait(WRITE, HZ/50);
990                         }
991                         goto repeat;
992                 }
993         } else if (sgp == SGP_READ && !filepage) {
994                 shmem_swp_unmap(entry);
995                 filepage = find_get_page(mapping, idx);
996                 if (filepage &&
997                     (!PageUptodate(filepage) || TestSetPageLocked(filepage))) {
998                         spin_unlock(&info->lock);
999                         wait_on_page_locked(filepage);
1000                         page_cache_release(filepage);
1001                         filepage = NULL;
1002                         goto repeat;
1003                 }
1004                 spin_unlock(&info->lock);
1005         } else {
1006                 shmem_swp_unmap(entry);
1007                 sbinfo = SHMEM_SB(inode->i_sb);
1008                 spin_lock(&sbinfo->stat_lock);
1009                 if (sbinfo->free_blocks == 0 || shmem_acct_block(info->flags)) {
1010                         spin_unlock(&sbinfo->stat_lock);
1011                         spin_unlock(&info->lock);
1012                         error = -ENOSPC;
1013                         goto failed;
1014                 }
1015                 sbinfo->free_blocks--;
1016                 inode->i_blocks += BLOCKS_PER_PAGE;
1017                 spin_unlock(&sbinfo->stat_lock);
1018
1019                 if (!filepage) {
1020                         spin_unlock(&info->lock);
1021                         filepage = shmem_alloc_page(mapping_gfp_mask(mapping),
1022                                                     info,
1023                                                     idx);
1024                         if (!filepage) {
1025                                 shmem_unacct_blocks(info->flags, 1);
1026                                 shmem_free_block(inode);
1027                                 error = -ENOMEM;
1028                                 goto failed;
1029                         }
1030
1031                         spin_lock(&info->lock);
1032                         entry = shmem_swp_alloc(info, idx, sgp);
1033                         if (IS_ERR(entry))
1034                                 error = PTR_ERR(entry);
1035                         else {
1036                                 swap = *entry;
1037                                 shmem_swp_unmap(entry);
1038                         }
1039                         if (error || swap.val || 0 != add_to_page_cache_lru(
1040                                         filepage, mapping, idx, GFP_ATOMIC)) {
1041                                 spin_unlock(&info->lock);
1042                                 page_cache_release(filepage);
1043                                 shmem_unacct_blocks(info->flags, 1);
1044                                 shmem_free_block(inode);
1045                                 filepage = NULL;
1046                                 if (error)
1047                                         goto failed;
1048                                 goto repeat;
1049                         }
1050                         info->flags |= SHMEM_PAGEIN;
1051                 }
1052
1053                 info->alloced++;
1054                 spin_unlock(&info->lock);
1055                 clear_highpage(filepage);
1056                 flush_dcache_page(filepage);
1057                 SetPageUptodate(filepage);
1058         }
1059 done:
1060         if (!*pagep) {
1061                 if (filepage) {
1062                         unlock_page(filepage);
1063                         *pagep = filepage;
1064                 } else
1065                         *pagep = ZERO_PAGE(0);
1066         }
1067         if (type)
1068                 *type = majmin;
1069         return 0;
1070
1071 failed:
1072         if (*pagep != filepage) {
1073                 unlock_page(filepage);
1074                 page_cache_release(filepage);
1075         }
1076         return error;
1077 }
1078
1079 struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
1080 {
1081         struct inode *inode = vma->vm_file->f_dentry->d_inode;
1082         struct page *page = NULL;
1083         unsigned long idx;
1084         int error;
1085
1086         idx = (address - vma->vm_start) >> PAGE_SHIFT;
1087         idx += vma->vm_pgoff;
1088         idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
1089
1090         error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
1091         if (error)
1092                 return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
1093
1094         mark_page_accessed(page);
1095         return page;
1096 }
1097
1098 static int shmem_populate(struct vm_area_struct *vma,
1099         unsigned long addr, unsigned long len,
1100         pgprot_t prot, unsigned long pgoff, int nonblock)
1101 {
1102         struct inode *inode = vma->vm_file->f_dentry->d_inode;
1103         struct mm_struct *mm = vma->vm_mm;
1104         enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
1105         unsigned long size;
1106
1107         size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1108         if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
1109                 return -EINVAL;
1110
1111         while ((long) len > 0) {
1112                 struct page *page = NULL;
1113                 int err;
1114                 /*
1115                  * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
1116                  */
1117                 err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
1118                 if (err)
1119                         return err;
1120                 if (page) {
1121                         mark_page_accessed(page);
1122                         err = install_page(mm, vma, addr, page, prot);
1123                         if (err) {
1124                                 page_cache_release(page);
1125                                 return err;
1126                         }
1127                 } else if (nonblock) {
1128                         /*
1129                          * If a nonlinear mapping then store the file page
1130                          * offset in the pte.
1131                          */
1132                         if (pgoff != linear_page_index(vma, addr)) {
1133                                 err = install_file_pte(mm, vma, addr, pgoff, prot);
1134                                 if (err)
1135                                         return err;
1136                         }
1137                 }
1138
1139                 len -= PAGE_SIZE;
1140                 addr += PAGE_SIZE;
1141                 pgoff++;
1142         }
1143         return 0;
1144 }
1145
1146 #ifdef CONFIG_NUMA
1147 int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
1148 {
1149         struct inode *i = vma->vm_file->f_dentry->d_inode;
1150         return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
1151 }
1152
1153 struct mempolicy *
1154 shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
1155 {
1156         struct inode *i = vma->vm_file->f_dentry->d_inode;
1157         unsigned long idx;
1158
1159         idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1160         return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
1161 }
1162 #endif
1163
1164 void shmem_lock(struct file *file, int lock)
1165 {
1166         struct inode *inode = file->f_dentry->d_inode;
1167         struct shmem_inode_info *info = SHMEM_I(inode);
1168
1169         spin_lock(&info->lock);
1170         if (lock)
1171                 info->flags |= VM_LOCKED;
1172         else
1173                 info->flags &= ~VM_LOCKED;
1174         spin_unlock(&info->lock);
1175 }
1176
1177 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1178 {
1179         file_accessed(file);
1180         vma->vm_ops = &shmem_vm_ops;
1181         return 0;
1182 }
1183
1184 static struct inode *
1185 shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
1186 {
1187         struct inode *inode;
1188         struct shmem_inode_info *info;
1189         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1190
1191         spin_lock(&sbinfo->stat_lock);
1192         if (!sbinfo->free_inodes) {
1193                 spin_unlock(&sbinfo->stat_lock);
1194                 return NULL;
1195         }
1196         sbinfo->free_inodes--;
1197         spin_unlock(&sbinfo->stat_lock);
1198
1199         inode = new_inode(sb);
1200         if (inode) {
1201                 inode->i_mode = mode;
1202                 inode->i_uid = current->fsuid;
1203                 inode->i_gid = current->fsgid;
1204                 inode->i_blksize = PAGE_CACHE_SIZE;
1205                 inode->i_blocks = 0;
1206                 inode->i_mapping->a_ops = &shmem_aops;
1207                 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
1208                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1209                 info = SHMEM_I(inode);
1210                 memset(info, 0, (char *)inode - (char *)info);
1211                 spin_lock_init(&info->lock);
1212                 mpol_shared_policy_init(&info->policy);
1213                 switch (mode & S_IFMT) {
1214                 default:
1215                         init_special_inode(inode, mode, dev);
1216                         break;
1217                 case S_IFREG:
1218                         inode->i_op = &shmem_inode_operations;
1219                         inode->i_fop = &shmem_file_operations;
1220                         spin_lock(&shmem_ilock);
1221                         list_add_tail(&info->list, &shmem_inodes);
1222                         spin_unlock(&shmem_ilock);
1223                         break;
1224                 case S_IFDIR:
1225                         inode->i_nlink++;
1226                         /* Some things misbehave if size == 0 on a directory */
1227                         inode->i_size = 2 * BOGO_DIRENT_SIZE;
1228                         inode->i_op = &shmem_dir_inode_operations;
1229                         inode->i_fop = &simple_dir_operations;
1230                         break;
1231                 case S_IFLNK:
1232                         break;
1233                 }
1234         }
1235         return inode;
1236 }
1237
1238 static int shmem_set_size(struct shmem_sb_info *info,
1239                           unsigned long max_blocks, unsigned long max_inodes)
1240 {
1241         int error;
1242         unsigned long blocks, inodes;
1243
1244         spin_lock(&info->stat_lock);
1245         blocks = info->max_blocks - info->free_blocks;
1246         inodes = info->max_inodes - info->free_inodes;
1247         error = -EINVAL;
1248         if (max_blocks < blocks)
1249                 goto out;
1250         if (max_inodes < inodes)
1251                 goto out;
1252         error = 0;
1253         info->max_blocks  = max_blocks;
1254         info->free_blocks = max_blocks - blocks;
1255         info->max_inodes  = max_inodes;
1256         info->free_inodes = max_inodes - inodes;
1257 out:
1258         spin_unlock(&info->stat_lock);
1259         return error;
1260 }
1261
1262 #ifdef CONFIG_TMPFS
1263
1264 static struct inode_operations shmem_symlink_inode_operations;
1265 static struct inode_operations shmem_symlink_inline_operations;
1266
1267 /*
1268  * Normally tmpfs makes no use of shmem_prepare_write, but it
1269  * lets a tmpfs file be used read-write below the loop driver.
1270  */
1271 static int
1272 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
1273 {
1274         struct inode *inode = page->mapping->host;
1275         return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
1276 }
1277
1278 static ssize_t
1279 shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
1280 {
1281         struct inode    *inode = file->f_dentry->d_inode;
1282         loff_t          pos;
1283         unsigned long   written;
1284         int             err;
1285
1286         if ((ssize_t) count < 0)
1287                 return -EINVAL;
1288
1289         if (!access_ok(VERIFY_READ, buf, count))
1290                 return -EFAULT;
1291
1292         down(&inode->i_sem);
1293
1294         pos = *ppos;
1295         written = 0;
1296
1297         err = generic_write_checks(file, &pos, &count, 0);
1298         if (err || !count)
1299                 goto out;
1300
1301         err = remove_suid(file->f_dentry);
1302         if (err)
1303                 goto out;
1304
1305         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
1306
1307         do {
1308                 struct page *page = NULL;
1309                 unsigned long bytes, index, offset;
1310                 char *kaddr;
1311                 int left;
1312
1313                 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1314                 index = pos >> PAGE_CACHE_SHIFT;
1315                 bytes = PAGE_CACHE_SIZE - offset;
1316                 if (bytes > count)
1317                         bytes = count;
1318
1319                 /*
1320                  * We don't hold page lock across copy from user -
1321                  * what would it guard against? - so no deadlock here.
1322                  * But it still may be a good idea to prefault below.
1323                  */
1324
1325                 err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
1326                 if (err)
1327                         break;
1328
1329                 left = bytes;
1330                 if (PageHighMem(page)) {
1331                         volatile unsigned char dummy;
1332                         __get_user(dummy, buf);
1333                         __get_user(dummy, buf + bytes - 1);
1334
1335                         kaddr = kmap_atomic(page, KM_USER0);
1336                         left = __copy_from_user(kaddr + offset, buf, bytes);
1337                         kunmap_atomic(kaddr, KM_USER0);
1338                 }
1339                 if (left) {
1340                         kaddr = kmap(page);
1341                         left = __copy_from_user(kaddr + offset, buf, bytes);
1342                         kunmap(page);
1343                 }
1344
1345                 written += bytes;
1346                 count -= bytes;
1347                 pos += bytes;
1348                 buf += bytes;
1349                 if (pos > inode->i_size)
1350                         i_size_write(inode, pos);
1351
1352                 flush_dcache_page(page);
1353                 set_page_dirty(page);
1354                 mark_page_accessed(page);
1355                 page_cache_release(page);
1356
1357                 if (left) {
1358                         pos -= left;
1359                         written -= left;
1360                         err = -EFAULT;
1361                         break;
1362                 }
1363
1364                 /*
1365                  * Our dirty pages are not counted in nr_dirty,
1366                  * and we do not attempt to balance dirty pages.
1367                  */
1368
1369                 cond_resched();
1370         } while (count);
1371
1372         *ppos = pos;
1373         if (written)
1374                 err = written;
1375 out:
1376         up(&inode->i_sem);
1377         return err;
1378 }
1379
1380 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
1381 {
1382         struct inode *inode = filp->f_dentry->d_inode;
1383         struct address_space *mapping = inode->i_mapping;
1384         unsigned long index, offset;
1385
1386         index = *ppos >> PAGE_CACHE_SHIFT;
1387         offset = *ppos & ~PAGE_CACHE_MASK;
1388
1389         for (;;) {
1390                 struct page *page = NULL;
1391                 unsigned long end_index, nr, ret;
1392                 loff_t i_size = i_size_read(inode);
1393
1394                 end_index = i_size >> PAGE_CACHE_SHIFT;
1395                 if (index > end_index)
1396                         break;
1397                 if (index == end_index) {
1398                         nr = i_size & ~PAGE_CACHE_MASK;
1399                         if (nr <= offset)
1400                                 break;
1401                 }
1402
1403                 desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL);
1404                 if (desc->error) {
1405                         if (desc->error == -EINVAL)
1406                                 desc->error = 0;
1407                         break;
1408                 }
1409
1410                 /*
1411                  * We must evaluate after, since reads (unlike writes)
1412                  * are called without i_sem protection against truncate
1413                  */
1414                 nr = PAGE_CACHE_SIZE;
1415                 i_size = i_size_read(inode);
1416                 end_index = i_size >> PAGE_CACHE_SHIFT;
1417                 if (index == end_index) {
1418                         nr = i_size & ~PAGE_CACHE_MASK;
1419                         if (nr <= offset) {
1420                                 page_cache_release(page);
1421                                 break;
1422                         }
1423                 }
1424                 nr -= offset;
1425
1426                 if (page != ZERO_PAGE(0)) {
1427                         /*
1428                          * If users can be writing to this page using arbitrary
1429                          * virtual addresses, take care about potential aliasing
1430                          * before reading the page on the kernel side.
1431                          */
1432                         if (mapping_writably_mapped(mapping))
1433                                 flush_dcache_page(page);
1434                         /*
1435                          * Mark the page accessed if we read the beginning.
1436                          */
1437                         if (!offset)
1438                                 mark_page_accessed(page);
1439                 }
1440
1441                 /*
1442                  * Ok, we have the page, and it's up-to-date, so
1443                  * now we can copy it to user space...
1444                  *
1445                  * The actor routine returns how many bytes were actually used..
1446                  * NOTE! This may not be the same as how much of a user buffer
1447                  * we filled up (we may be padding etc), so we can only update
1448                  * "pos" here (the actor routine has to update the user buffer
1449                  * pointers and the remaining count).
1450                  */
1451                 ret = actor(desc, page, offset, nr);
1452                 offset += ret;
1453                 index += offset >> PAGE_CACHE_SHIFT;
1454                 offset &= ~PAGE_CACHE_MASK;
1455
1456                 page_cache_release(page);
1457                 if (ret != nr || !desc->count)
1458                         break;
1459
1460                 cond_resched();
1461         }
1462
1463         *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1464         file_accessed(filp);
1465 }
1466
1467 static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
1468 {
1469         read_descriptor_t desc;
1470
1471         if ((ssize_t) count < 0)
1472                 return -EINVAL;
1473         if (!access_ok(VERIFY_WRITE, buf, count))
1474                 return -EFAULT;
1475         if (!count)
1476                 return 0;
1477
1478         desc.written = 0;
1479         desc.count = count;
1480         desc.buf = buf;
1481         desc.error = 0;
1482
1483         do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1484         if (desc.written)
1485                 return desc.written;
1486         return desc.error;
1487 }
1488
1489 static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
1490                          size_t count, read_actor_t actor, void __user *target)
1491 {
1492         read_descriptor_t desc;
1493
1494         if (!count)
1495                 return 0;
1496
1497         desc.written = 0;
1498         desc.count = count;
1499         desc.buf = target;
1500         desc.error = 0;
1501
1502         do_shmem_file_read(in_file, ppos, &desc, actor);
1503         if (desc.written)
1504                 return desc.written;
1505         return desc.error;
1506 }
1507
1508 static int shmem_statfs(struct super_block *sb, struct kstatfs *buf)
1509 {
1510         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1511
1512         buf->f_type = TMPFS_MAGIC;
1513         buf->f_bsize = PAGE_CACHE_SIZE;
1514         spin_lock(&sbinfo->stat_lock);
1515         buf->f_blocks = sbinfo->max_blocks;
1516         buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1517         buf->f_files = sbinfo->max_inodes;
1518         buf->f_ffree = sbinfo->free_inodes;
1519         spin_unlock(&sbinfo->stat_lock);
1520         buf->f_namelen = NAME_MAX;
1521         return 0;
1522 }
1523
1524 /*
1525  * File creation. Allocate an inode, and we're done..
1526  */
1527 static int
1528 shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1529 {
1530         struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1531         int error = -ENOSPC;
1532
1533         if (inode) {
1534                 if (dir->i_mode & S_ISGID) {
1535                         inode->i_gid = dir->i_gid;
1536                         if (S_ISDIR(mode))
1537                                 inode->i_mode |= S_ISGID;
1538                 }
1539                 dir->i_size += BOGO_DIRENT_SIZE;
1540                 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1541                 d_instantiate(dentry, inode);
1542                 dget(dentry); /* Extra count - pin the dentry in core */
1543                 error = 0;
1544         }
1545         return error;
1546 }
1547
1548 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1549 {
1550         int error;
1551
1552         if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1553                 return error;
1554         dir->i_nlink++;
1555         return 0;
1556 }
1557
1558 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode,
1559                 struct nameidata *nd)
1560 {
1561         return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1562 }
1563
1564 /*
1565  * Link a file..
1566  */
1567 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1568 {
1569         struct inode *inode = old_dentry->d_inode;
1570
1571         dir->i_size += BOGO_DIRENT_SIZE;
1572         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1573         inode->i_nlink++;
1574         atomic_inc(&inode->i_count);    /* New dentry reference */
1575         dget(dentry);           /* Extra pinning count for the created dentry */
1576         d_instantiate(dentry, inode);
1577         return 0;
1578 }
1579
1580 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1581 {
1582         struct inode *inode = dentry->d_inode;
1583
1584         dir->i_size -= BOGO_DIRENT_SIZE;
1585         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1586         inode->i_nlink--;
1587         dput(dentry);   /* Undo the count from "create" - this does all the work */
1588         return 0;
1589 }
1590
1591 static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1592 {
1593         if (!simple_empty(dentry))
1594                 return -ENOTEMPTY;
1595
1596         dir->i_nlink--;
1597         return shmem_unlink(dir, dentry);
1598 }
1599
1600 /*
1601  * The VFS layer already does all the dentry stuff for rename,
1602  * we just have to decrement the usage count for the target if
1603  * it exists so that the VFS layer correctly free's it when it
1604  * gets overwritten.
1605  */
1606 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1607 {
1608         struct inode *inode = old_dentry->d_inode;
1609         int they_are_dirs = S_ISDIR(inode->i_mode);
1610
1611         if (!simple_empty(new_dentry))
1612                 return -ENOTEMPTY;
1613
1614         if (new_dentry->d_inode) {
1615                 (void) shmem_unlink(new_dir, new_dentry);
1616                 if (they_are_dirs)
1617                         old_dir->i_nlink--;
1618         } else if (they_are_dirs) {
1619                 old_dir->i_nlink--;
1620                 new_dir->i_nlink++;
1621         }
1622
1623         old_dir->i_size -= BOGO_DIRENT_SIZE;
1624         new_dir->i_size += BOGO_DIRENT_SIZE;
1625         old_dir->i_ctime = old_dir->i_mtime =
1626         new_dir->i_ctime = new_dir->i_mtime =
1627         inode->i_ctime = CURRENT_TIME;
1628         return 0;
1629 }
1630
1631 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1632 {
1633         int error;
1634         int len;
1635         struct inode *inode;
1636         struct page *page = NULL;
1637         char *kaddr;
1638         struct shmem_inode_info *info;
1639
1640         len = strlen(symname) + 1;
1641         if (len > PAGE_CACHE_SIZE)
1642                 return -ENAMETOOLONG;
1643
1644         inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1645         if (!inode)
1646                 return -ENOSPC;
1647
1648         info = SHMEM_I(inode);
1649         inode->i_size = len-1;
1650         if (len <= (char *)inode - (char *)info) {
1651                 /* do it inline */
1652                 memcpy(info, symname, len);
1653                 inode->i_op = &shmem_symlink_inline_operations;
1654         } else {
1655                 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
1656                 if (error) {
1657                         iput(inode);
1658                         return error;
1659                 }
1660                 inode->i_op = &shmem_symlink_inode_operations;
1661                 spin_lock(&shmem_ilock);
1662                 list_add_tail(&info->list, &shmem_inodes);
1663                 spin_unlock(&shmem_ilock);
1664                 kaddr = kmap_atomic(page, KM_USER0);
1665                 memcpy(kaddr, symname, len);
1666                 kunmap_atomic(kaddr, KM_USER0);
1667                 set_page_dirty(page);
1668                 page_cache_release(page);
1669         }
1670         if (dir->i_mode & S_ISGID)
1671                 inode->i_gid = dir->i_gid;
1672         dir->i_size += BOGO_DIRENT_SIZE;
1673         dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1674         d_instantiate(dentry, inode);
1675         dget(dentry);
1676         return 0;
1677 }
1678
1679 static int shmem_readlink_inline(struct dentry *dentry, char __user *buffer, int buflen)
1680 {
1681         return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode));
1682 }
1683
1684 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1685 {
1686         return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1687 }
1688
1689 static int shmem_readlink(struct dentry *dentry, char __user *buffer, int buflen)
1690 {
1691         struct page *page = NULL;
1692         int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
1693         if (res)
1694                 return res;
1695         res = vfs_readlink(dentry, buffer, buflen, kmap(page));
1696         kunmap(page);
1697         mark_page_accessed(page);
1698         page_cache_release(page);
1699         return res;
1700 }
1701
1702 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1703 {
1704         struct page *page = NULL;
1705         int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
1706         if (res)
1707                 return res;
1708         res = vfs_follow_link(nd, kmap(page));
1709         kunmap(page);
1710         mark_page_accessed(page);
1711         page_cache_release(page);
1712         return res;
1713 }
1714
1715 static struct inode_operations shmem_symlink_inline_operations = {
1716         .readlink       = shmem_readlink_inline,
1717         .follow_link    = shmem_follow_link_inline,
1718 };
1719
1720 static struct inode_operations shmem_symlink_inode_operations = {
1721         .truncate       = shmem_truncate,
1722         .readlink       = shmem_readlink,
1723         .follow_link    = shmem_follow_link,
1724 };
1725
1726 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1727 {
1728         char *this_char, *value, *rest;
1729
1730         while ((this_char = strsep(&options, ",")) != NULL) {
1731                 if (!*this_char)
1732                         continue;
1733                 if ((value = strchr(this_char,'=')) != NULL) {
1734                         *value++ = 0;
1735                 } else {
1736                         printk(KERN_ERR
1737                             "tmpfs: No value for mount option '%s'\n",
1738                             this_char);
1739                         return 1;
1740                 }
1741
1742                 if (!strcmp(this_char,"size")) {
1743                         unsigned long long size;
1744                         size = memparse(value,&rest);
1745                         if (*rest == '%') {
1746                                 size <<= PAGE_SHIFT;
1747                                 size *= totalram_pages;
1748                                 do_div(size, 100);
1749                                 rest++;
1750                         }
1751                         if (*rest)
1752                                 goto bad_val;
1753                         *blocks = size >> PAGE_CACHE_SHIFT;
1754                 } else if (!strcmp(this_char,"nr_blocks")) {
1755                         *blocks = memparse(value,&rest);
1756                         if (*rest)
1757                                 goto bad_val;
1758                 } else if (!strcmp(this_char,"nr_inodes")) {
1759                         *inodes = memparse(value,&rest);
1760                         if (*rest)
1761                                 goto bad_val;
1762                 } else if (!strcmp(this_char,"mode")) {
1763                         if (!mode)
1764                                 continue;
1765                         *mode = simple_strtoul(value,&rest,8);
1766                         if (*rest)
1767                                 goto bad_val;
1768                 } else if (!strcmp(this_char,"uid")) {
1769                         if (!uid)
1770                                 continue;
1771                         *uid = simple_strtoul(value,&rest,0);
1772                         if (*rest)
1773                                 goto bad_val;
1774                 } else if (!strcmp(this_char,"gid")) {
1775                         if (!gid)
1776                                 continue;
1777                         *gid = simple_strtoul(value,&rest,0);
1778                         if (*rest)
1779                                 goto bad_val;
1780                 } else {
1781                         printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1782                                this_char);
1783                         return 1;
1784                 }
1785         }
1786         return 0;
1787
1788 bad_val:
1789         printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1790                value, this_char);
1791         return 1;
1792
1793 }
1794
1795 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1796 {
1797         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1798         unsigned long max_blocks = sbinfo->max_blocks;
1799         unsigned long max_inodes = sbinfo->max_inodes;
1800
1801         if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1802                 return -EINVAL;
1803         return shmem_set_size(sbinfo, max_blocks, max_inodes);
1804 }
1805 #endif
1806
1807 static int shmem_fill_super(struct super_block *sb,
1808                             void *data, int silent)
1809 {
1810         struct inode *inode;
1811         struct dentry *root;
1812         unsigned long blocks, inodes;
1813         int mode   = S_IRWXUGO | S_ISVTX;
1814         uid_t uid = current->fsuid;
1815         gid_t gid = current->fsgid;
1816         struct shmem_sb_info *sbinfo;
1817         int err = -ENOMEM;
1818
1819         sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
1820         if (!sbinfo)
1821                 return -ENOMEM;
1822         sb->s_fs_info = sbinfo;
1823         memset(sbinfo, 0, sizeof(struct shmem_sb_info));
1824
1825         /*
1826          * Per default we only allow half of the physical ram per
1827          * tmpfs instance
1828          */
1829         blocks = inodes = totalram_pages / 2;
1830
1831 #ifdef CONFIG_TMPFS
1832         if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) {
1833                 err = -EINVAL;
1834                 goto failed;
1835         }
1836 #else
1837         sb->s_flags |= MS_NOUSER;
1838 #endif
1839
1840         spin_lock_init(&sbinfo->stat_lock);
1841         sbinfo->max_blocks = blocks;
1842         sbinfo->free_blocks = blocks;
1843         sbinfo->max_inodes = inodes;
1844         sbinfo->free_inodes = inodes;
1845         sb->s_maxbytes = SHMEM_MAX_BYTES;
1846         sb->s_blocksize = PAGE_CACHE_SIZE;
1847         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1848         sb->s_magic = TMPFS_MAGIC;
1849         sb->s_op = &shmem_ops;
1850         inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1851         if (!inode)
1852                 goto failed;
1853         inode->i_uid = uid;
1854         inode->i_gid = gid;
1855         root = d_alloc_root(inode);
1856         if (!root)
1857                 goto failed_iput;
1858         sb->s_root = root;
1859         return 0;
1860
1861 failed_iput:
1862         iput(inode);
1863 failed:
1864         kfree(sbinfo);
1865         sb->s_fs_info = NULL;
1866         return err;
1867 }
1868
1869 static void shmem_put_super(struct super_block *sb)
1870 {
1871         kfree(sb->s_fs_info);
1872         sb->s_fs_info = NULL;
1873 }
1874
1875 static kmem_cache_t *shmem_inode_cachep;
1876
1877 static struct inode *shmem_alloc_inode(struct super_block *sb)
1878 {
1879         struct shmem_inode_info *p;
1880         p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL);
1881         if (!p)
1882                 return NULL;
1883         return &p->vfs_inode;
1884 }
1885
1886 static void shmem_destroy_inode(struct inode *inode)
1887 {
1888         mpol_free_shared_policy(&SHMEM_I(inode)->policy);
1889         kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
1890 }
1891
1892 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1893 {
1894         struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
1895
1896         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1897             SLAB_CTOR_CONSTRUCTOR) {
1898                 inode_init_once(&p->vfs_inode);
1899         }
1900 }
1901
1902 static int init_inodecache(void)
1903 {
1904         shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
1905                                 sizeof(struct shmem_inode_info),
1906                                 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
1907                                 init_once, NULL);
1908         if (shmem_inode_cachep == NULL)
1909                 return -ENOMEM;
1910         return 0;
1911 }
1912
1913 static void destroy_inodecache(void)
1914 {
1915         if (kmem_cache_destroy(shmem_inode_cachep))
1916                 printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
1917 }
1918
1919 static struct address_space_operations shmem_aops = {
1920         .writepage      = shmem_writepage,
1921         .set_page_dirty = __set_page_dirty_nobuffers,
1922 #ifdef CONFIG_TMPFS
1923         .prepare_write  = shmem_prepare_write,
1924         .commit_write   = simple_commit_write,
1925 #endif
1926 };
1927
1928 static struct file_operations shmem_file_operations = {
1929         .mmap           = shmem_mmap,
1930 #ifdef CONFIG_TMPFS
1931         .llseek         = generic_file_llseek,
1932         .read           = shmem_file_read,
1933         .write          = shmem_file_write,
1934         .fsync          = simple_sync_file,
1935         .sendfile       = shmem_file_sendfile,
1936 #endif
1937 };
1938
1939 static struct inode_operations shmem_inode_operations = {
1940         .truncate       = shmem_truncate,
1941         .setattr        = shmem_notify_change,
1942 };
1943
1944 static struct inode_operations shmem_dir_inode_operations = {
1945 #ifdef CONFIG_TMPFS
1946         .create         = shmem_create,
1947         .lookup         = simple_lookup,
1948         .link           = shmem_link,
1949         .unlink         = shmem_unlink,
1950         .symlink        = shmem_symlink,
1951         .mkdir          = shmem_mkdir,
1952         .rmdir          = shmem_rmdir,
1953         .mknod          = shmem_mknod,
1954         .rename         = shmem_rename,
1955 #endif
1956 };
1957
1958 static struct super_operations shmem_ops = {
1959         .alloc_inode    = shmem_alloc_inode,
1960         .destroy_inode  = shmem_destroy_inode,
1961 #ifdef CONFIG_TMPFS
1962         .statfs         = shmem_statfs,
1963         .remount_fs     = shmem_remount_fs,
1964 #endif
1965         .delete_inode   = shmem_delete_inode,
1966         .drop_inode     = generic_delete_inode,
1967         .put_super      = shmem_put_super,
1968 };
1969
1970 static struct vm_operations_struct shmem_vm_ops = {
1971         .nopage         = shmem_nopage,
1972         .populate       = shmem_populate,
1973 #ifdef CONFIG_NUMA
1974         .set_policy     = shmem_set_policy,
1975         .get_policy     = shmem_get_policy,
1976 #endif
1977 };
1978
1979 static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
1980         int flags, const char *dev_name, void *data)
1981 {
1982         return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
1983 }
1984
1985 static struct file_system_type tmpfs_fs_type = {
1986         .owner          = THIS_MODULE,
1987         .name           = "tmpfs",
1988         .get_sb         = shmem_get_sb,
1989         .kill_sb        = kill_litter_super,
1990 };
1991 static struct vfsmount *shm_mnt;
1992
1993 static int __init init_tmpfs(void)
1994 {
1995         int error;
1996
1997         error = init_inodecache();
1998         if (error)
1999                 goto out3;
2000
2001         error = register_filesystem(&tmpfs_fs_type);
2002         if (error) {
2003                 printk(KERN_ERR "Could not register tmpfs\n");
2004                 goto out2;
2005         }
2006 #ifdef CONFIG_TMPFS
2007         devfs_mk_dir("shm");
2008 #endif
2009         shm_mnt = kern_mount(&tmpfs_fs_type);
2010         if (IS_ERR(shm_mnt)) {
2011                 error = PTR_ERR(shm_mnt);
2012                 printk(KERN_ERR "Could not kern_mount tmpfs\n");
2013                 goto out1;
2014         }
2015
2016         /* The internal instance should not do size checking */
2017         shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
2018         return 0;
2019
2020 out1:
2021         unregister_filesystem(&tmpfs_fs_type);
2022 out2:
2023         destroy_inodecache();
2024 out3:
2025         shm_mnt = ERR_PTR(error);
2026         return error;
2027 }
2028 module_init(init_tmpfs)
2029
2030 /*
2031  * shmem_file_setup - get an unlinked file living in tmpfs
2032  *
2033  * @name: name for dentry (to be seen in /proc/<pid>/maps
2034  * @size: size to be set for the file
2035  *
2036  */
2037 struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2038 {
2039         int error;
2040         struct file *file;
2041         struct inode *inode;
2042         struct dentry *dentry, *root;
2043         struct qstr this;
2044
2045         if (IS_ERR(shm_mnt))
2046                 return (void *)shm_mnt;
2047
2048         if (size > SHMEM_MAX_BYTES)
2049                 return ERR_PTR(-EINVAL);
2050
2051         if (shmem_acct_size(flags, size))
2052                 return ERR_PTR(-ENOMEM);
2053
2054         error = -ENOMEM;
2055         this.name = name;
2056         this.len = strlen(name);
2057         this.hash = 0; /* will go */
2058         root = shm_mnt->mnt_root;
2059         dentry = d_alloc(root, &this);
2060         if (!dentry)
2061                 goto put_memory;
2062
2063         error = -ENFILE;
2064         file = get_empty_filp();
2065         if (!file)
2066                 goto put_dentry;
2067
2068         error = -ENOSPC;
2069         inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
2070         if (!inode)
2071                 goto close_file;
2072
2073         SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
2074         d_instantiate(dentry, inode);
2075         inode->i_size = size;
2076         inode->i_nlink = 0;     /* It is unlinked */
2077         file->f_vfsmnt = mntget(shm_mnt);
2078         file->f_dentry = dentry;
2079         file->f_mapping = inode->i_mapping;
2080         file->f_op = &shmem_file_operations;
2081         file->f_mode = FMODE_WRITE | FMODE_READ;
2082         return(file);
2083
2084 close_file:
2085         put_filp(file);
2086 put_dentry:
2087         dput(dentry);
2088 put_memory:
2089         shmem_unacct_size(flags, size);
2090         return ERR_PTR(error);
2091 }
2092
2093 /*
2094  * shmem_zero_setup - setup a shared anonymous mapping
2095  *
2096  * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
2097  */
2098 int shmem_zero_setup(struct vm_area_struct *vma)
2099 {
2100         struct file *file;
2101         loff_t size = vma->vm_end - vma->vm_start;
2102
2103         file = shmem_file_setup("dev/zero", size, vma->vm_flags);
2104         if (IS_ERR(file))
2105                 return PTR_ERR(file);
2106
2107         if (vma->vm_file)
2108                 fput(vma->vm_file);
2109         vma->vm_file = file;
2110         vma->vm_ops = &shmem_vm_ops;
2111         return 0;
2112 }
2113
2114 EXPORT_SYMBOL(shmem_file_setup);