aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2004-10-19 18:17:12 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-10-19 18:17:12 -0700
commit1b46884a372f362e5edb1bb406bf0c5ca98bade7 (patch)
treecdfc9bfc76b98cc06f270e0a19f1da04005ee26d /kernel
parent5dfd31d1133be6b1c697c082900cc0e50d878639 (diff)
downloadhistory-1b46884a372f362e5edb1bb406bf0c5ca98bade7.tar.gz
[PATCH] lighten mmlist_lock
Let's lighten the global spinlock mmlist_lock. What's it for? 1. Its original role is to guard mmlist. 2. It later got a second role, to prevent get_task_mm from raising mm_users from the dead, just after it went down to 0. Firstly consider the second: __exit_mm sets tsk->mm NULL while holding task_lock before calling mmput; so mmlist_lock only guards against the exceptional case, of get_task_mm on a kernel workthread which did AIO's use_mm (which transiently sets its tsk->mm without raising mm_users) on an mm now exiting. Well, I don't think get_task_mm should succeed at all on use_mm tasks. It's mainly used by /proc/pid and ptrace, seems at best confusing for those to present the kernel thread as having a user mm, which it won't have a moment later. Define PF_BORROWED_MM, set in use_mm, clear in unuse_mm (though we could just leave it), get_task_mm give NULL if set. Secondly consider the first: and what's mmlist for? 1. Its original role was for swap_out to scan: rmap ended that in 2.5.27. 2. In 2.4.10 it got a second role, for try_to_unuse to scan for swapoff. So, make mmlist a list of mms which maybe have pages on swap: add mm to mmlist when first swap entry is assigned in try_to_unmap_one (pageout), or in copy_page_range (fork); and mmput remove it from mmlist as before, except usually list_empty and there's no need to lock. drain_mmlist added to swapoff, to empty out the mmlist if no swap is then in use. mmput leave mm on mmlist until after its exit_mmap, so try_to_unmap_one can still add mm to mmlist without worrying about the mm_users 0 case; but try_to_unuse must avoid the mm_users 0 case (when an mm might be removed from mmlist, and freed, while it's down in unuse_process): use atomic_inc_return now all architectures support that. Some of the detailed comments in try_to_unuse have grown out of date: updated and trimmed some, but leave SWAP_MAP_MAX for another occasion. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c37
1 files changed, 11 insertions, 26 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 96714c501cc831..7d9cd6f7b27869 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -179,17 +179,6 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
rb_parent = NULL;
pprev = &mm->mmap;
- /*
- * Add it to the mmlist after the parent.
- * Doing it this way means that we can order the list,
- * and fork() won't mess up the ordering significantly.
- * Add it first so that swapoff can see any swap entries.
- */
- spin_lock(&mmlist_lock);
- list_add(&mm->mmlist, &current->mm->mmlist);
- mmlist_nr++;
- spin_unlock(&mmlist_lock);
-
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
struct file *file;
@@ -289,7 +278,6 @@ static inline void mm_free_pgd(struct mm_struct * mm)
#endif /* CONFIG_MMU */
spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-int mmlist_nr;
#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
@@ -301,6 +289,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
+ INIT_LIST_HEAD(&mm->mmlist);
mm->core_waiters = 0;
mm->nr_ptes = 0;
mm->page_table_lock = SPIN_LOCK_UNLOCKED;
@@ -350,12 +339,14 @@ void fastcall __mmdrop(struct mm_struct *mm)
*/
void mmput(struct mm_struct *mm)
{
- if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
- list_del(&mm->mmlist);
- mmlist_nr--;
- spin_unlock(&mmlist_lock);
+ if (atomic_dec_and_test(&mm->mm_users)) {
exit_aio(mm);
exit_mmap(mm);
+ if (!list_empty(&mm->mmlist)) {
+ spin_lock(&mmlist_lock);
+ list_del(&mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
put_swap_token(mm);
mmdrop(mm);
}
@@ -365,15 +356,11 @@ EXPORT_SYMBOL_GPL(mmput);
/**
* get_task_mm - acquire a reference to the task's mm
*
- * Returns %NULL if the task has no mm. Checks if the use count
- * of the mm is non-zero and if so returns a reference to it, after
+ * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning
+ * this kernel workthread has transiently adopted a user mm with use_mm,
+ * to do its AIO) is not set and if so returns a reference to it, after
* bumping up the use count. User must release the mm via mmput()
* after use. Typically used by /proc and ptrace.
- *
- * If the use count is zero, it means that this mm is going away,
- * so return %NULL. This only happens in the case of an AIO daemon
- * which has temporarily adopted an mm (see use_mm), in the course
- * of its final mmput, before exit_aio has completed.
*/
struct mm_struct *get_task_mm(struct task_struct *task)
{
@@ -382,12 +369,10 @@ struct mm_struct *get_task_mm(struct task_struct *task)
task_lock(task);
mm = task->mm;
if (mm) {
- spin_lock(&mmlist_lock);
- if (!atomic_read(&mm->mm_users))
+ if (task->flags & PF_BORROWED_MM)
mm = NULL;
else
atomic_inc(&mm->mm_users);
- spin_unlock(&mmlist_lock);
}
task_unlock(task);
return mm;