4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer,
6 * with heavy changes by Linus Torvalds
10 * Notes on the allocation strategy:
12 * The dcache is a master of the icache - whenever a dcache entry
13 * exists, the inode will always exist. "iput()" is done either when
14 * the dcache entry is deleted or garbage collected.
17 #include <linux/config.h>
18 #include <linux/string.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/smp_lock.h>
24 #include <linux/cache.h>
25 #include <linux/module.h>
26 #include <linux/mount.h>
27 #include <linux/file.h>
28 #include <asm/uaccess.h>
29 #include <linux/security.h>
30 #include <linux/seqlock.h>
31 #include <linux/swap.h>
33 #define DCACHE_PARANOIA 1
34 /* #define DCACHE_DEBUG 1 */
36 spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
37 seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
39 EXPORT_SYMBOL(dcache_lock);
41 static kmem_cache_t *dentry_cache;
44 * This is the single most critical data structure when it comes
45 * to the dcache: the hashtable for lookups. Somebody should try
46 * to make this good - I've just made it work.
48 * This hash-function tries to avoid losing too many bits of hash
49 * information, yet avoid using a prime hash-size or similar.
51 #define D_HASHBITS d_hash_shift
52 #define D_HASHMASK d_hash_mask
54 static unsigned int d_hash_mask;
55 static unsigned int d_hash_shift;
56 static struct hlist_head *dentry_hashtable;
57 static LIST_HEAD(dentry_unused);
59 /* Statistics gathering. */
60 struct dentry_stat_t dentry_stat = {
64 static void d_callback(void *arg)
66 struct dentry * dentry = (struct dentry *)arg;
68 if (dname_external(dentry)) {
69 kfree(dentry->d_qstr);
71 kmem_cache_free(dentry_cache, dentry);
75 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry
78 static void d_free(struct dentry *dentry)
80 if (dentry->d_op && dentry->d_op->d_release)
81 dentry->d_op->d_release(dentry);
82 call_rcu(&dentry->d_rcu, d_callback, dentry);
86 * Release the dentry's inode, using the filesystem
87 * d_iput() operation if defined.
88 * Called with dcache_lock and per dentry lock held, drops both.
90 static inline void dentry_iput(struct dentry * dentry)
92 struct inode *inode = dentry->d_inode;
94 dentry->d_inode = NULL;
95 list_del_init(&dentry->d_alias);
96 spin_unlock(&dentry->d_lock);
97 spin_unlock(&dcache_lock);
98 if (dentry->d_op && dentry->d_op->d_iput)
99 dentry->d_op->d_iput(dentry, inode);
103 spin_unlock(&dentry->d_lock);
104 spin_unlock(&dcache_lock);
111 * This is complicated by the fact that we do not want to put
112 * dentries that are no longer on any hash chain on the unused
113 * list: we'd much rather just get rid of them immediately.
115 * However, that implies that we have to traverse the dentry
116 * tree upwards to the parents which might _also_ now be
117 * scheduled for deletion (it may have been only waiting for
118 * its last child to go away).
120 * This tail recursion is done by hand as we don't want to depend
121 * on the compiler to always get this right (gcc generally doesn't).
122 * Real recursion would eat up our stack space.
126 * dput - release a dentry
127 * @dentry: dentry to release
129 * Release a dentry. This will drop the usage count and if appropriate
130 * call the dentry unlink method as well as removing it from the queues and
131 * releasing its resources. If the parent dentries were scheduled for release
132 * they too may now get deleted.
134 * no dcache lock, please.
137 void dput(struct dentry *dentry)
143 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
146 spin_lock(&dentry->d_lock);
147 if (atomic_read(&dentry->d_count)) {
148 spin_unlock(&dentry->d_lock);
149 spin_unlock(&dcache_lock);
154 * AV: ->d_delete() is _NOT_ allowed to block now.
156 if (dentry->d_op && dentry->d_op->d_delete) {
157 if (dentry->d_op->d_delete(dentry))
160 /* Unreachable? Get rid of it */
161 if (d_unhashed(dentry))
163 if (list_empty(&dentry->d_lru)) {
164 dentry->d_vfs_flags |= DCACHE_REFERENCED;
165 list_add(&dentry->d_lru, &dentry_unused);
166 dentry_stat.nr_unused++;
168 spin_unlock(&dentry->d_lock);
169 spin_unlock(&dcache_lock);
176 struct dentry *parent;
178 /* If dentry was on d_lru list
179 * delete it from there
181 if (!list_empty(&dentry->d_lru)) {
182 list_del(&dentry->d_lru);
183 dentry_stat.nr_unused--;
185 list_del(&dentry->d_child);
186 dentry_stat.nr_dentry--; /* For d_free, below */
187 /*drops the locks, at that point nobody can reach this dentry */
189 parent = dentry->d_parent;
191 if (dentry == parent)
199 * d_invalidate - invalidate a dentry
200 * @dentry: dentry to invalidate
202 * Try to invalidate the dentry if it turns out to be
203 * possible. If there are other dentries that can be
204 * reached through this one we can't delete it and we
205 * return -EBUSY. On success we return 0.
210 int d_invalidate(struct dentry * dentry)
213 * If it's already been dropped, return OK.
215 spin_lock(&dcache_lock);
216 if (d_unhashed(dentry)) {
217 spin_unlock(&dcache_lock);
221 * Check whether to do a partial shrink_dcache
222 * to get rid of unused child entries.
224 if (!list_empty(&dentry->d_subdirs)) {
225 spin_unlock(&dcache_lock);
226 shrink_dcache_parent(dentry);
227 spin_lock(&dcache_lock);
231 * Somebody else still using it?
233 * If it's a directory, we can't drop it
234 * for fear of somebody re-populating it
235 * with children (even though dropping it
236 * would make it unreachable from the root,
237 * we might still populate it if it was a
238 * working directory or similar).
240 spin_lock(&dentry->d_lock);
241 if (atomic_read(&dentry->d_count) > 1) {
242 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
243 spin_unlock(&dentry->d_lock);
244 spin_unlock(&dcache_lock);
250 spin_unlock(&dentry->d_lock);
251 spin_unlock(&dcache_lock);
255 /* This should be called _only_ with dcache_lock held */
257 static inline struct dentry * __dget_locked(struct dentry *dentry)
259 atomic_inc(&dentry->d_count);
260 if (atomic_read(&dentry->d_count) == 1) {
261 dentry_stat.nr_unused--;
262 list_del_init(&dentry->d_lru);
267 struct dentry * dget_locked(struct dentry *dentry)
269 return __dget_locked(dentry);
273 * d_find_alias - grab a hashed alias of inode
274 * @inode: inode in question
276 * If inode has a hashed alias - acquire the reference to alias and
277 * return it. Otherwise return NULL. Notice that if inode is a directory
278 * there can be only one alias and it can be unhashed only if it has
281 * If the inode has a DCACHE_DISCONNECTED alias, then prefer
282 * any other hashed alias over that one.
285 struct dentry * d_find_alias(struct inode *inode)
287 struct list_head *head, *next, *tmp;
288 struct dentry *alias, *discon_alias=NULL;
290 spin_lock(&dcache_lock);
291 head = &inode->i_dentry;
292 next = inode->i_dentry.next;
293 while (next != head) {
297 alias = list_entry(tmp, struct dentry, d_alias);
298 if (!d_unhashed(alias)) {
299 if (alias->d_flags & DCACHE_DISCONNECTED)
300 discon_alias = alias;
302 __dget_locked(alias);
303 spin_unlock(&dcache_lock);
309 __dget_locked(discon_alias);
310 spin_unlock(&dcache_lock);
315 * Try to kill dentries associated with this inode.
316 * WARNING: you must own a reference to inode.
318 void d_prune_aliases(struct inode *inode)
320 struct list_head *tmp, *head = &inode->i_dentry;
322 spin_lock(&dcache_lock);
324 while ((tmp = tmp->next) != head) {
325 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
326 if (!atomic_read(&dentry->d_count)) {
327 __dget_locked(dentry);
329 spin_unlock(&dcache_lock);
334 spin_unlock(&dcache_lock);
338 * Throw away a dentry - free the inode, dput the parent.
339 * This requires that the LRU list has already been
341 * Called with dcache_lock, drops it and then regains.
343 static inline void prune_one_dentry(struct dentry * dentry)
345 struct dentry * parent;
348 list_del(&dentry->d_child);
349 dentry_stat.nr_dentry--; /* For d_free, below */
351 parent = dentry->d_parent;
353 if (parent != dentry)
355 spin_lock(&dcache_lock);
359 * prune_dcache - shrink the dcache
360 * @count: number of entries to try and free
362 * Shrink the dcache. This is done when we need
363 * more memory, or simply when we need to unmount
364 * something (at which point we need to unuse
367 * This function may fail to free any resources if
368 * all the dentries are in use.
371 static void prune_dcache(int count)
373 spin_lock(&dcache_lock);
374 for (; count ; count--) {
375 struct dentry *dentry;
376 struct list_head *tmp;
378 tmp = dentry_unused.prev;
379 if (tmp == &dentry_unused)
382 prefetch(dentry_unused.prev);
383 dentry_stat.nr_unused--;
384 dentry = list_entry(tmp, struct dentry, d_lru);
386 spin_lock(&dentry->d_lock);
388 * We found an inuse dentry which was not removed from
389 * dentry_unused because of laziness during lookup. Do not free
390 * it - just keep it off the dentry_unused list.
392 if (atomic_read(&dentry->d_count)) {
393 spin_unlock(&dentry->d_lock);
396 /* If the dentry was recently referenced, don't free it. */
397 if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
398 dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
399 list_add(&dentry->d_lru, &dentry_unused);
400 dentry_stat.nr_unused++;
401 spin_unlock(&dentry->d_lock);
404 prune_one_dentry(dentry);
406 spin_unlock(&dcache_lock);
410 * Shrink the dcache for the specified super block.
411 * This allows us to unmount a device without disturbing
412 * the dcache for the other devices.
414 * This implementation makes just two traversals of the
415 * unused list. On the first pass we move the selected
416 * dentries to the most recent end, and on the second
417 * pass we free them. The second pass must restart after
418 * each dput(), but since the target dentries are all at
419 * the end, it's really just a single traversal.
423 * shrink_dcache_sb - shrink dcache for a superblock
426 * Shrink the dcache for the specified super block. This
427 * is used to free the dcache before unmounting a file
431 void shrink_dcache_sb(struct super_block * sb)
433 struct list_head *tmp, *next;
434 struct dentry *dentry;
437 * Pass one ... move the dentries for the specified
438 * superblock to the most recent end of the unused list.
440 spin_lock(&dcache_lock);
441 next = dentry_unused.next;
442 while (next != &dentry_unused) {
445 dentry = list_entry(tmp, struct dentry, d_lru);
446 if (dentry->d_sb != sb)
449 list_add(tmp, &dentry_unused);
453 * Pass two ... free the dentries for this superblock.
456 next = dentry_unused.next;
457 while (next != &dentry_unused) {
460 dentry = list_entry(tmp, struct dentry, d_lru);
461 if (dentry->d_sb != sb)
463 dentry_stat.nr_unused--;
465 spin_lock(&dentry->d_lock);
466 if (atomic_read(&dentry->d_count)) {
467 spin_unlock(&dentry->d_lock);
470 prune_one_dentry(dentry);
473 spin_unlock(&dcache_lock);
477 * Search for at least 1 mount point in the dentry's subdirs.
478 * We descend to the next level whenever the d_subdirs
479 * list is non-empty and continue searching.
483 * have_submounts - check for mounts over a dentry
484 * @parent: dentry to check.
486 * Return true if the parent or its subdirectories contain
490 int have_submounts(struct dentry *parent)
492 struct dentry *this_parent = parent;
493 struct list_head *next;
495 spin_lock(&dcache_lock);
496 if (d_mountpoint(parent))
499 next = this_parent->d_subdirs.next;
501 while (next != &this_parent->d_subdirs) {
502 struct list_head *tmp = next;
503 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
505 /* Have we found a mount point ? */
506 if (d_mountpoint(dentry))
508 if (!list_empty(&dentry->d_subdirs)) {
509 this_parent = dentry;
514 * All done at this level ... ascend and resume the search.
516 if (this_parent != parent) {
517 next = this_parent->d_child.next;
518 this_parent = this_parent->d_parent;
521 spin_unlock(&dcache_lock);
522 return 0; /* No mount points found in tree */
524 spin_unlock(&dcache_lock);
529 * Search the dentry child list for the specified parent,
530 * and move any unused dentries to the end of the unused
531 * list for prune_dcache(). We descend to the next level
532 * whenever the d_subdirs list is non-empty and continue
535 static int select_parent(struct dentry * parent)
537 struct dentry *this_parent = parent;
538 struct list_head *next;
541 spin_lock(&dcache_lock);
543 next = this_parent->d_subdirs.next;
545 while (next != &this_parent->d_subdirs) {
546 struct list_head *tmp = next;
547 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
550 if (!list_empty(&dentry->d_lru)) {
551 dentry_stat.nr_unused--;
552 list_del_init(&dentry->d_lru);
555 * move only zero ref count dentries to the end
556 * of the unused list for prune_dcache
558 if (!atomic_read(&dentry->d_count)) {
559 list_add(&dentry->d_lru, dentry_unused.prev);
560 dentry_stat.nr_unused++;
564 * Descend a level if the d_subdirs list is non-empty.
566 if (!list_empty(&dentry->d_subdirs)) {
567 this_parent = dentry;
569 printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
570 dentry->d_parent->d_name.name, dentry->d_name.name, found);
576 * All done at this level ... ascend and resume the search.
578 if (this_parent != parent) {
579 next = this_parent->d_child.next;
580 this_parent = this_parent->d_parent;
582 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
583 this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
587 spin_unlock(&dcache_lock);
592 * shrink_dcache_parent - prune dcache
593 * @parent: parent of entries to prune
595 * Prune the dcache to remove unused children of the parent dentry.
598 void shrink_dcache_parent(struct dentry * parent)
602 while ((found = select_parent(parent)) != 0)
607 * shrink_dcache_anon - further prune the cache
608 * @head: head of d_hash list of dentries to prune
610 * Prune the dentries that are anonymous
612 * parsing d_hash list does not read_barrier_depends() as it
613 * done under dcache_lock.
616 void shrink_dcache_anon(struct hlist_head *head)
618 struct hlist_node *lp;
622 spin_lock(&dcache_lock);
623 hlist_for_each(lp, head) {
624 struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
625 if (!list_empty(&this->d_lru)) {
626 dentry_stat.nr_unused--;
627 list_del(&this->d_lru);
631 * move only zero ref count dentries to the end
632 * of the unused list for prune_dcache
634 if (!atomic_read(&this->d_count)) {
635 list_add_tail(&this->d_lru, &dentry_unused);
636 dentry_stat.nr_unused++;
640 spin_unlock(&dcache_lock);
646 * This is called from kswapd when we think we need some more memory.
648 static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
652 * Nasty deadlock avoidance.
654 * ext2_new_block->getblk->GFP->shrink_dcache_memory->
655 * prune_dcache->prune_one_dentry->dput->dentry_iput->iput->
656 * inode->i_sb->s_op->put_inode->ext2_discard_prealloc->
657 * ext2_free_blocks->lock_super->DEADLOCK.
659 * We should make sure we don't hold the superblock lock over
660 * block allocations, but for now:
662 if (gfp_mask & __GFP_FS)
665 return dentry_stat.nr_unused;
668 #define NAME_ALLOC_LEN(len) ((len+16) & ~15)
671 * d_alloc - allocate a dcache entry
672 * @parent: parent of entry to allocate
673 * @name: qstr of the name
675 * Allocates a dentry. It returns %NULL if there is insufficient memory
676 * available. On a success the dentry is returned. The name passed in is
677 * copied and the copy passed in may be reused after this call.
680 struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
683 struct dentry *dentry;
686 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
690 if (name->len > DNAME_INLINE_LEN-1) {
691 qstr = kmalloc(sizeof(*qstr) + NAME_ALLOC_LEN(name->len),
694 kmem_cache_free(dentry_cache, dentry);
697 qstr->name = qstr->name_str;
698 qstr->len = name->len;
699 qstr->hash = name->hash;
700 dentry->d_qstr = qstr;
701 str = qstr->name_str;
703 dentry->d_qstr = &dentry->d_name;
704 str = dentry->d_iname;
707 memcpy(str, name->name, name->len);
710 atomic_set(&dentry->d_count, 1);
711 dentry->d_vfs_flags = DCACHE_UNHASHED;
712 dentry->d_lock = SPIN_LOCK_UNLOCKED;
714 dentry->d_inode = NULL;
715 dentry->d_parent = NULL;
716 dentry->d_move_count = 0;
718 dentry->d_name.name = str;
719 dentry->d_name.len = name->len;
720 dentry->d_name.hash = name->hash;
722 dentry->d_fsdata = NULL;
723 dentry->d_mounted = 0;
724 dentry->d_cookie = NULL;
725 dentry->d_bucket = NULL;
726 INIT_HLIST_NODE(&dentry->d_hash);
727 INIT_LIST_HEAD(&dentry->d_lru);
728 INIT_LIST_HEAD(&dentry->d_subdirs);
729 INIT_LIST_HEAD(&dentry->d_alias);
732 dentry->d_parent = dget(parent);
733 dentry->d_sb = parent->d_sb;
735 INIT_LIST_HEAD(&dentry->d_child);
738 spin_lock(&dcache_lock);
740 list_add(&dentry->d_child, &parent->d_subdirs);
741 dentry_stat.nr_dentry++;
742 spin_unlock(&dcache_lock);
748 * d_instantiate - fill in inode information for a dentry
749 * @entry: dentry to complete
750 * @inode: inode to attach to this dentry
752 * Fill in inode information in the entry.
754 * This turns negative dentries into productive full members
757 * NOTE! This assumes that the inode count has been incremented
758 * (or otherwise set) by the caller to indicate that it is now
759 * in use by the dcache.
762 void d_instantiate(struct dentry *entry, struct inode * inode)
764 if (!list_empty(&entry->d_alias)) BUG();
765 spin_lock(&dcache_lock);
767 list_add(&entry->d_alias, &inode->i_dentry);
768 entry->d_inode = inode;
769 spin_unlock(&dcache_lock);
770 security_d_instantiate(entry, inode);
774 * d_alloc_root - allocate root dentry
775 * @root_inode: inode to allocate the root for
777 * Allocate a root ("/") dentry for the inode given. The inode is
778 * instantiated and returned. %NULL is returned if there is insufficient
779 * memory or the inode passed is %NULL.
782 struct dentry * d_alloc_root(struct inode * root_inode)
784 struct dentry *res = NULL;
787 static const struct qstr name = { .name = "/", .len = 1, .hash = 0 };
788 res = d_alloc(NULL, &name);
790 res->d_sb = root_inode->i_sb;
792 d_instantiate(res, root_inode);
798 static inline struct hlist_head * d_hash(struct dentry * parent, unsigned long hash)
800 hash += (unsigned long) parent / L1_CACHE_BYTES;
801 hash = hash ^ (hash >> D_HASHBITS);
802 return dentry_hashtable + (hash & D_HASHMASK);
806 * d_alloc_anon - allocate an anonymous dentry
807 * @inode: inode to allocate the dentry for
809 * This is similar to d_alloc_root. It is used by filesystems when
810 * creating a dentry for a given inode, often in the process of
811 * mapping a filehandle to a dentry. The returned dentry may be
812 * anonymous, or may have a full name (if the inode was already
813 * in the cache). The file system may need to make further
814 * efforts to connect this dentry into the dcache properly.
816 * When called on a directory inode, we must ensure that
817 * the inode only ever has one dentry. If a dentry is
818 * found, that is returned instead of allocating a new one.
820 * On successful return, the reference to the inode has been transferred
821 * to the dentry. If %NULL is returned (indicating kmalloc failure),
822 * the reference on the inode has not been released.
825 struct dentry * d_alloc_anon(struct inode *inode)
827 static const struct qstr anonstring = { "", 0, 0};
831 if ((res = d_find_alias(inode))) {
836 tmp = d_alloc(NULL, &anonstring);
840 tmp->d_parent = tmp; /* make sure dput doesn't croak */
842 spin_lock(&dcache_lock);
843 if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
844 /* A directory can only have one dentry.
845 * This (now) has one, so use it.
847 res = list_entry(inode->i_dentry.next, struct dentry, d_alias);
850 /* attach a disconnected dentry */
854 spin_lock(&res->d_lock);
855 res->d_sb = inode->i_sb;
857 res->d_inode = inode;
858 res->d_bucket = d_hash(res, res->d_name.hash);
859 res->d_flags |= DCACHE_DISCONNECTED;
860 res->d_vfs_flags &= ~DCACHE_UNHASHED;
861 list_add(&res->d_alias, &inode->i_dentry);
862 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
863 spin_unlock(&res->d_lock);
865 inode = NULL; /* don't drop reference */
867 spin_unlock(&dcache_lock);
878 * d_splice_alias - splice a disconnected dentry into the tree if one exists
879 * @inode: the inode which may have a disconnected dentry
880 * @dentry: a negative dentry which we want to point to the inode.
882 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
883 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
884 * and return it, else simply d_add the inode to the dentry and return NULL.
886 * This is (will be) needed in the lookup routine of any filesystem that is exportable
887 * (via knfsd) so that we can build dcache paths to directories effectively.
889 * If a dentry was found and moved, then it is returned. Otherwise NULL
890 * is returned. This matches the expected return value of ->lookup.
893 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
895 struct dentry *new = NULL;
897 if (inode && S_ISDIR(inode->i_mode)) {
898 spin_lock(&dcache_lock);
899 if (!list_empty(&inode->i_dentry)) {
900 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
902 spin_unlock(&dcache_lock);
903 security_d_instantiate(new, inode);
908 /* d_instantiate takes dcache_lock, so we do it by hand */
909 list_add(&dentry->d_alias, &inode->i_dentry);
910 dentry->d_inode = inode;
911 spin_unlock(&dcache_lock);
912 security_d_instantiate(dentry, inode);
916 d_add(dentry, inode);
922 * d_lookup - search for a dentry
923 * @parent: parent dentry
924 * @name: qstr of name we wish to find
926 * Searches the children of the parent dentry for the name in question. If
927 * the dentry is found its reference count is incremented and the dentry
928 * is returned. The caller must use d_put to free the entry when it has
929 * finished using it. %NULL is returned on failure.
931 * __d_lookup is dcache_lock free. The hash list is protected using RCU.
932 * Memory barriers are used while updating and doing lockless traversal.
933 * To avoid races with d_move while rename is happening, d_move_count is
936 * Overflows in memcmp(), while d_move, are avoided by keeping the length
937 * and name pointer in one structure pointed by d_qstr.
939 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
940 * lookup is going on.
942 * dentry_unused list is not updated even if lookup finds the required dentry
943 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb and
944 * select_parent. This laziness saves lookup from dcache_lock acquisition.
946 * d_lookup() is protected against the concurrent renames in some unrelated
947 * directory using the seqlockt_t rename_lock.
950 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
952 struct dentry * dentry = NULL;
956 seq = read_seqbegin(&rename_lock);
957 dentry = __d_lookup(parent, name);
960 } while (read_seqretry(&rename_lock, seq));
964 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
966 unsigned int len = name->len;
967 unsigned int hash = name->hash;
968 const unsigned char *str = name->name;
969 struct hlist_head *head = d_hash(parent,hash);
970 struct dentry *found = NULL;
971 struct hlist_node *node;
975 hlist_for_each (node, head) {
976 struct dentry *dentry;
977 unsigned long move_count;
980 smp_read_barrier_depends();
981 dentry = hlist_entry(node, struct dentry, d_hash);
983 /* if lookup ends up in a different bucket
984 * due to concurrent rename, fail it
986 if (unlikely(dentry->d_bucket != head))
990 * We must take a snapshot of d_move_count followed by
991 * read memory barrier before any search key comparison
993 move_count = dentry->d_move_count;
996 if (dentry->d_name.hash != hash)
998 if (dentry->d_parent != parent)
1001 qstr = dentry->d_qstr;
1002 smp_read_barrier_depends();
1003 if (parent->d_op && parent->d_op->d_compare) {
1004 if (parent->d_op->d_compare(parent, qstr, name))
1007 if (qstr->len != len)
1009 if (memcmp(qstr->name, str, len))
1012 spin_lock(&dentry->d_lock);
1014 * If dentry is moved, fail the lookup
1016 if (likely(move_count == dentry->d_move_count)) {
1017 if (!d_unhashed(dentry)) {
1018 atomic_inc(&dentry->d_count);
1022 spin_unlock(&dentry->d_lock);
1031 * d_validate - verify dentry provided from insecure source
1032 * @dentry: The dentry alleged to be valid child of @dparent
1033 * @dparent: The parent dentry (known to be valid)
1034 * @hash: Hash of the dentry
1035 * @len: Length of the name
1037 * An insecure source has sent us a dentry, here we verify it and dget() it.
1038 * This is used by ncpfs in its readdir implementation.
1039 * Zero is returned in the dentry is invalid.
1042 int d_validate(struct dentry *dentry, struct dentry *dparent)
1044 struct hlist_head *base;
1045 struct hlist_node *lhp;
1047 /* Check whether the ptr might be valid at all.. */
1048 if (!kmem_ptr_validate(dentry_cache, dentry))
1051 if (dentry->d_parent != dparent)
1054 spin_lock(&dcache_lock);
1055 base = d_hash(dparent, dentry->d_name.hash);
1056 hlist_for_each(lhp,base) {
1057 /* read_barrier_depends() not required for d_hash list
1058 * as it is parsed under dcache_lock
1060 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
1061 __dget_locked(dentry);
1062 spin_unlock(&dcache_lock);
1066 spin_unlock(&dcache_lock);
1072 * When a file is deleted, we have two options:
1073 * - turn this dentry into a negative dentry
1074 * - unhash this dentry and free it.
1076 * Usually, we want to just turn this into
1077 * a negative dentry, but if anybody else is
1078 * currently using the dentry or the inode
1079 * we can't do that and we fall back on removing
1080 * it from the hash queues and waiting for
1081 * it to be deleted later when it has no users
1085 * d_delete - delete a dentry
1086 * @dentry: The dentry to delete
1088 * Turn the dentry into a negative dentry if possible, otherwise
1089 * remove it from the hash queues so it can be deleted later
1092 void d_delete(struct dentry * dentry)
1095 * Are we the only user?
1097 spin_lock(&dcache_lock);
1098 spin_lock(&dentry->d_lock);
1099 if (atomic_read(&dentry->d_count) == 1) {
1100 dentry_iput(dentry);
1104 if (!d_unhashed(dentry))
1107 spin_unlock(&dentry->d_lock);
1108 spin_unlock(&dcache_lock);
1112 * d_rehash - add an entry back to the hash
1113 * @entry: dentry to add to the hash
1115 * Adds a dentry to the hash according to its name.
1118 void d_rehash(struct dentry * entry)
1120 struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
1121 spin_lock(&dcache_lock);
1122 entry->d_vfs_flags &= ~DCACHE_UNHASHED;
1123 entry->d_bucket = list;
1124 hlist_add_head_rcu(&entry->d_hash, list);
1125 spin_unlock(&dcache_lock);
1128 #define do_switch(x,y) do { \
1129 __typeof__ (x) __tmp = x; \
1130 x = y; y = __tmp; } while (0)
1133 * When switching names, the actual string doesn't strictly have to
1134 * be preserved in the target - because we're dropping the target
1135 * anyway. As such, we can just do a simple memcpy() to copy over
1136 * the new name before we switch.
1138 * Note that we have to be a lot more careful about getting the hash
1139 * switched - we have to switch the hash value properly even if it
1140 * then no longer matches the actual (corrupted) string of the target.
1141 * The hash value has to match the hash queue that the dentry is on..
1143 static inline void switch_names(struct dentry * dentry, struct dentry * target)
1145 const unsigned char *old_name, *new_name;
1146 struct qstr *old_qstr, *new_qstr;
1148 memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
1149 old_qstr = target->d_qstr;
1150 old_name = target->d_name.name;
1151 new_qstr = dentry->d_qstr;
1152 new_name = dentry->d_name.name;
1153 if (old_name == target->d_iname) {
1154 old_name = dentry->d_iname;
1155 old_qstr = &dentry->d_name;
1157 if (new_name == dentry->d_iname) {
1158 new_name = target->d_iname;
1159 new_qstr = &target->d_name;
1161 target->d_name.name = new_name;
1162 dentry->d_name.name = old_name;
1163 target->d_qstr = new_qstr;
1164 dentry->d_qstr = old_qstr;
1168 * We cannibalize "target" when moving dentry on top of it,
1169 * because it's going to be thrown away anyway. We could be more
1170 * polite about it, though.
1172 * This forceful removal will result in ugly /proc output if
1173 * somebody holds a file open that got deleted due to a rename.
1174 * We could be nicer about the deleted file, and let it show
1175 * up under the name it got deleted rather than the name that
1180 * d_move - move a dentry
1181 * @dentry: entry to move
1182 * @target: new dentry
1184 * Update the dcache to reflect the move of a file name. Negative
1185 * dcache entries should not be moved in this way.
1188 void d_move(struct dentry * dentry, struct dentry * target)
1190 if (!dentry->d_inode)
1191 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1193 spin_lock(&dcache_lock);
1194 write_seqlock(&rename_lock);
1196 * XXXX: do we really need to take target->d_lock?
1198 if (target < dentry) {
1199 spin_lock(&target->d_lock);
1200 spin_lock(&dentry->d_lock);
1202 spin_lock(&dentry->d_lock);
1203 spin_lock(&target->d_lock);
1206 /* Move the dentry to the target hash queue, if on different bucket */
1207 if (dentry->d_vfs_flags & DCACHE_UNHASHED)
1208 goto already_unhashed;
1209 if (dentry->d_bucket != target->d_bucket) {
1210 hlist_del_rcu(&dentry->d_hash);
1212 dentry->d_bucket = target->d_bucket;
1213 hlist_add_head_rcu(&dentry->d_hash, target->d_bucket);
1214 dentry->d_vfs_flags &= ~DCACHE_UNHASHED;
1217 /* Unhash the target: dput() will then get rid of it */
1220 list_del(&dentry->d_child);
1221 list_del(&target->d_child);
1223 /* Switch the names.. */
1224 switch_names(dentry, target);
1226 do_switch(dentry->d_name.len, target->d_name.len);
1227 do_switch(dentry->d_name.hash, target->d_name.hash);
1229 /* ... and switch the parents */
1230 if (IS_ROOT(dentry)) {
1231 dentry->d_parent = target->d_parent;
1232 target->d_parent = target;
1233 INIT_LIST_HEAD(&target->d_child);
1235 do_switch(dentry->d_parent, target->d_parent);
1237 /* And add them back to the (new) parent lists */
1238 list_add(&target->d_child, &target->d_parent->d_subdirs);
1241 list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
1242 dentry->d_move_count++;
1243 spin_unlock(&target->d_lock);
1244 spin_unlock(&dentry->d_lock);
1245 write_sequnlock(&rename_lock);
1246 spin_unlock(&dcache_lock);
1250 * d_path - return the path of a dentry
1251 * @dentry: dentry to report
1252 * @vfsmnt: vfsmnt to which the dentry belongs
1253 * @root: root dentry
1254 * @rootmnt: vfsmnt to which the root dentry belongs
1255 * @buffer: buffer to return value in
1256 * @buflen: buffer length
1258 * Convert a dentry into an ASCII path name. If the entry has been deleted
1259 * the string " (deleted)" is appended. Note that this is ambiguous.
1261 * Returns the buffer or an error code if the path was too long.
1263 * "buflen" should be positive. Caller holds the dcache_lock.
1265 static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
1266 struct dentry *root, struct vfsmount *rootmnt,
1267 char *buffer, int buflen)
1269 char * end = buffer+buflen;
1275 if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
1280 memcpy(end, " (deleted)", 10);
1290 struct dentry * parent;
1292 if (dentry == root && vfsmnt == rootmnt)
1294 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
1296 spin_lock(&vfsmount_lock);
1297 if (vfsmnt->mnt_parent == vfsmnt) {
1298 spin_unlock(&vfsmount_lock);
1301 dentry = vfsmnt->mnt_mountpoint;
1302 vfsmnt = vfsmnt->mnt_parent;
1303 spin_unlock(&vfsmount_lock);
1306 parent = dentry->d_parent;
1308 namelen = dentry->d_name.len;
1309 buflen -= namelen + 1;
1313 memcpy(end, dentry->d_name.name, namelen);
1322 namelen = dentry->d_name.len;
1326 retval -= namelen-1; /* hit the slash */
1327 memcpy(retval, dentry->d_name.name, namelen);
1330 return ERR_PTR(-ENAMETOOLONG);
1333 /* write full pathname into buffer and return start of pathname */
1334 char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
1335 char *buf, int buflen)
1338 struct vfsmount *rootmnt;
1339 struct dentry *root;
1340 read_lock(¤t->fs->lock);
1341 rootmnt = mntget(current->fs->rootmnt);
1342 root = dget(current->fs->root);
1343 read_unlock(¤t->fs->lock);
1344 spin_lock(&dcache_lock);
1345 res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
1346 spin_unlock(&dcache_lock);
1353 * NOTE! The user-level library version returns a
1354 * character pointer. The kernel system call just
1355 * returns the length of the buffer filled (which
1356 * includes the ending '\0' character), or a negative
1357 * error value. So libc would do something like
1359 * char *getcwd(char * buf, size_t size)
1363 * retval = sys_getcwd(buf, size);
1370 asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1373 struct vfsmount *pwdmnt, *rootmnt;
1374 struct dentry *pwd, *root;
1375 char *page = (char *) __get_free_page(GFP_USER);
1380 read_lock(¤t->fs->lock);
1381 pwdmnt = mntget(current->fs->pwdmnt);
1382 pwd = dget(current->fs->pwd);
1383 rootmnt = mntget(current->fs->rootmnt);
1384 root = dget(current->fs->root);
1385 read_unlock(¤t->fs->lock);
1388 /* Has the current directory has been unlinked? */
1389 spin_lock(&dcache_lock);
1390 if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
1394 cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
1395 spin_unlock(&dcache_lock);
1397 error = PTR_ERR(cwd);
1402 len = PAGE_SIZE + page - cwd;
1405 if (copy_to_user(buf, cwd, len))
1409 spin_unlock(&dcache_lock);
1416 free_page((unsigned long) page);
1421 * Test whether new_dentry is a subdirectory of old_dentry.
1423 * Trivially implemented using the dcache structure
1427 * is_subdir - is new dentry a subdirectory of old_dentry
1428 * @new_dentry: new dentry
1429 * @old_dentry: old dentry
1431 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
1432 * Returns 0 otherwise.
1433 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
1436 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
1439 struct dentry * saved = new_dentry;
1443 /* need rcu_readlock to protect against the d_parent trashing due to
1448 /* for restarting inner loop in case of seq retry */
1450 seq = read_seqbegin(&rename_lock);
1452 if (new_dentry != old_dentry) {
1453 struct dentry * parent = new_dentry->d_parent;
1454 if (parent == new_dentry)
1456 new_dentry = parent;
1462 } while (read_seqretry(&rename_lock, seq));
1468 void d_genocide(struct dentry *root)
1470 struct dentry *this_parent = root;
1471 struct list_head *next;
1473 spin_lock(&dcache_lock);
1475 next = this_parent->d_subdirs.next;
1477 while (next != &this_parent->d_subdirs) {
1478 struct list_head *tmp = next;
1479 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1481 if (d_unhashed(dentry)||!dentry->d_inode)
1483 if (!list_empty(&dentry->d_subdirs)) {
1484 this_parent = dentry;
1487 atomic_dec(&dentry->d_count);
1489 if (this_parent != root) {
1490 next = this_parent->d_child.next;
1491 atomic_dec(&this_parent->d_count);
1492 this_parent = this_parent->d_parent;
1495 spin_unlock(&dcache_lock);
1499 * find_inode_number - check for dentry with name
1500 * @dir: directory to check
1501 * @name: Name to find.
1503 * Check whether a dentry already exists for the given name,
1504 * and return the inode number if it has an inode. Otherwise
1507 * This routine is used to post-process directory listings for
1508 * filesystems using synthetic inode numbers, and is necessary
1509 * to keep getcwd() working.
1512 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
1514 struct dentry * dentry;
1518 * Check for a fs-specific hash function. Note that we must
1519 * calculate the standard hash first, as the d_op->d_hash()
1520 * routine may choose to leave the hash value unchanged.
1522 name->hash = full_name_hash(name->name, name->len);
1523 if (dir->d_op && dir->d_op->d_hash)
1525 if (dir->d_op->d_hash(dir, name) != 0)
1529 dentry = d_lookup(dir, name);
1532 if (dentry->d_inode)
1533 ino = dentry->d_inode->i_ino;
1540 static __initdata unsigned long dhash_entries;
1541 static int __init set_dhash_entries(char *str)
1545 dhash_entries = simple_strtoul(str, &str, 0);
1548 __setup("dhash_entries=", set_dhash_entries);
1550 static void __init dcache_init(unsigned long mempages)
1552 struct hlist_head *d;
1553 unsigned long order;
1554 unsigned int nr_hash;
1558 * A constructor could be added for stable state like the lists,
1559 * but it is probably not worth it because of the cache nature
1562 dentry_cache = kmem_cache_create("dentry_cache",
1563 sizeof(struct dentry),
1565 SLAB_RECLAIM_ACCOUNT,
1568 panic("Cannot create dentry cache");
1570 set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
1573 dhash_entries = PAGE_SHIFT < 13 ?
1574 mempages >> (13 - PAGE_SHIFT) :
1575 mempages << (PAGE_SHIFT - 13);
1577 dhash_entries *= sizeof(struct hlist_head);
1578 for (order = 0; ((1UL << order) << PAGE_SHIFT) < dhash_entries; order++)
1584 nr_hash = (1UL << order) * PAGE_SIZE /
1585 sizeof(struct hlist_head);
1586 d_hash_mask = (nr_hash - 1);
1590 while ((tmp >>= 1UL) != 0UL)
1593 dentry_hashtable = (struct hlist_head *)
1594 __get_free_pages(GFP_ATOMIC, order);
1595 } while (dentry_hashtable == NULL && --order >= 0);
1597 printk(KERN_INFO "Dentry cache hash table entries: %d (order: %ld, %ld bytes)\n",
1598 nr_hash, order, (PAGE_SIZE << order));
1600 if (!dentry_hashtable)
1601 panic("Failed to allocate dcache hash table\n");
1603 d = dentry_hashtable;
1612 /* SLAB cache for __getname() consumers */
1613 kmem_cache_t *names_cachep;
1615 /* SLAB cache for file structures */
1616 kmem_cache_t *filp_cachep;
1618 EXPORT_SYMBOL(d_genocide);
1620 extern void bdev_cache_init(void);
1621 extern void chrdev_init(void);
1623 void __init vfs_caches_init(unsigned long mempages)
1625 unsigned long reserve;
1627 /* Base hash sizes on available memory, with a reserve equal to
1628 150% of current kernel size */
1630 reserve = (mempages - nr_free_pages()) * 3/2;
1631 mempages -= reserve;
1633 names_cachep = kmem_cache_create("names_cache",
1635 SLAB_HWCACHE_ALIGN, NULL, NULL);
1637 panic("Cannot create names SLAB cache");
1639 filp_cachep = kmem_cache_create("filp",
1640 sizeof(struct file), 0,
1641 SLAB_HWCACHE_ALIGN, filp_ctor, filp_dtor);
1643 panic("Cannot create filp SLAB cache");
1645 dcache_init(mempages);
1646 inode_init(mempages);
1647 files_init(mempages);
1653 EXPORT_SYMBOL(d_alloc);
1654 EXPORT_SYMBOL(d_alloc_anon);
1655 EXPORT_SYMBOL(d_alloc_root);
1656 EXPORT_SYMBOL(d_delete);
1657 EXPORT_SYMBOL(d_find_alias);
1658 EXPORT_SYMBOL(d_instantiate);
1659 EXPORT_SYMBOL(d_invalidate);
1660 EXPORT_SYMBOL(d_lookup);
1661 EXPORT_SYMBOL(d_move);
1662 EXPORT_SYMBOL(d_path);
1663 EXPORT_SYMBOL(d_prune_aliases);
1664 EXPORT_SYMBOL(d_rehash);
1665 EXPORT_SYMBOL(d_splice_alias);
1666 EXPORT_SYMBOL(d_validate);
1667 EXPORT_SYMBOL(dget_locked);
1668 EXPORT_SYMBOL(dput);
1669 EXPORT_SYMBOL(find_inode_number);
1670 EXPORT_SYMBOL(have_submounts);
1671 EXPORT_SYMBOL(is_subdir);
1672 EXPORT_SYMBOL(names_cachep);
1673 EXPORT_SYMBOL(shrink_dcache_anon);
1674 EXPORT_SYMBOL(shrink_dcache_parent);
1675 EXPORT_SYMBOL(shrink_dcache_sb);