Currently, shrink_slab() will decide that it needs to scan a certain number of dentries, will call shrink_dcache_memory() requesting that this be done, and shrink_dcache_memory() will simply bale out without doing anything because the caller did not have __GFP_FS. This has the potential to disrupt our lovely pagecache-vs-slab balancing act. So change things so that shrinker callouts can return -1, indicating that they baled out. This way, shrink_slab can remember that this slab was owed a certain number of scannings and these will be correctly performed next time a __GFP_FS caller comes by. --- 25-akpm/fs/dcache.c | 27 +++++++++++++-------------- 25-akpm/mm/vmscan.c | 29 ++++++++++++++++------------- 2 files changed, 29 insertions(+), 27 deletions(-) diff -puN mm/vmscan.c~shrink_slab-handle-GFP_NOFS mm/vmscan.c --- 25/mm/vmscan.c~shrink_slab-handle-GFP_NOFS 2004-04-26 19:44:02.786746192 -0700 +++ 25-akpm/mm/vmscan.c 2004-04-26 21:28:14.738305088 -0700 @@ -152,20 +152,23 @@ static int shrink_slab(unsigned long sca delta *= (*shrinker->shrinker)(0, gfp_mask); do_div(delta, pages + 1); shrinker->nr += delta; - if (shrinker->nr > SHRINK_BATCH) { - long nr_to_scan = shrinker->nr; + if (shrinker->nr < 0) + shrinker->nr = LONG_MAX; /* It wrapped! */ - shrinker->nr = 0; - mod_page_state(slabs_scanned, nr_to_scan); - while (nr_to_scan) { - long this_scan = nr_to_scan; - - if (this_scan > 128) - this_scan = 128; - (*shrinker->shrinker)(this_scan, gfp_mask); - nr_to_scan -= this_scan; - cond_resched(); - } + if (shrinker->nr <= SHRINK_BATCH) + break; + while (shrinker->nr) { + long this_scan = shrinker->nr; + int shrink_ret; + + if (this_scan > 128) + this_scan = 128; + shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask); + mod_page_state(slabs_scanned, this_scan); + shrinker->nr -= this_scan; + if (shrink_ret == -1) + break; + cond_resched(); } } up(&shrinker_sem); diff -puN fs/dcache.c~shrink_slab-handle-GFP_NOFS fs/dcache.c --- 25/fs/dcache.c~shrink_slab-handle-GFP_NOFS 2004-04-26 19:44:02.787746040 -0700 +++ 25-akpm/fs/dcache.c 2004-04-26 19:44:02.794744976 -0700 @@ -643,24 +643,23 @@ void shrink_dcache_anon(struct hlist_hea } /* - * This is called from kswapd when we think we need some more memory. + * Scan `nr' dentries and return the number which remain. + * + * We need to avoid reentering the filesystem if the caller is performing a + * GFP_NOFS allocation attempt. One example deadlock is: + * + * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> + * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> + * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. + * + * In this case we return -1 to tell the caller that we baled. */ static int shrink_dcache_memory(int nr, unsigned int gfp_mask) { if (nr) { - /* - * Nasty deadlock avoidance. - * - * ext2_new_block->getblk->GFP->shrink_dcache_memory-> - * prune_dcache->prune_one_dentry->dput->dentry_iput->iput-> - * inode->i_sb->s_op->put_inode->ext2_discard_prealloc-> - * ext2_free_blocks->lock_super->DEADLOCK. - * - * We should make sure we don't hold the superblock lock over - * block allocations, but for now: - */ - if (gfp_mask & __GFP_FS) - prune_dcache(nr); + if (!(gfp_mask & __GFP_FS)) + return -1; + prune_dcache(nr); } return dentry_stat.nr_unused; } _