From: "Hu, Boris" Split futex global spinlock futex_lock into hash bucket spinlocks. Add bucket spinlock recursively lock check fixed by Jakub Jelinek. Ulrigh has regression tested this, and measured a 6% speedup on a 4-way with a futex-intensive benchmark. kernel/futex.c | 117 ++++++++++++++++++++++++++++++++++----------------------- 1 files changed, 70 insertions(+), 47 deletions(-) diff -puN kernel/futex.c~futex_lock-splitup kernel/futex.c --- 25/kernel/futex.c~futex_lock-splitup 2003-09-16 22:25:51.000000000 -0700 +++ 25-akpm/kernel/futex.c 2003-09-16 22:25:51.000000000 -0700 @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include @@ -44,6 +44,7 @@ /* * Futexes are matched on equal values of this key. * The key type depends on whether it's a shared or private mapping. + * Don't rearrange members without looking at hash_futex(). */ union futex_key { struct { @@ -79,9 +80,15 @@ struct futex_q { struct file *filp; }; -/* The key for the hash is the address + index + offset within page */ -static struct list_head futex_queues[1<both.word - + (unsigned long) key->both.ptr - + key->both.offset, FUTEX_HASHBITS)]; + u32 hash = jhash2((u32*)&key->both.word, + (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + key->both.offset); + return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)]; } /* @@ -214,6 +222,7 @@ static int get_futex_key(unsigned long u static int futex_wake(unsigned long uaddr, int num) { struct list_head *i, *next, *head; + struct futex_hash_bucket *bh; union futex_key key; int ret; @@ -223,9 +232,10 @@ static int futex_wake(unsigned long uadd if (unlikely(ret != 0)) goto out; - head = hash_futex(&key); + bh = hash_futex(&key); + spin_lock(&bh->lock); + head = &bh->chain; - spin_lock(&futex_lock); list_for_each_safe(i, next, head) { struct futex_q *this = list_entry(i, struct futex_q, list); @@ -239,7 +249,7 @@ static int futex_wake(unsigned long uadd break; } } - spin_unlock(&futex_lock); + spin_unlock(&bh->lock); out: up_read(¤t->mm->mmap_sem); @@ -254,6 +264,7 @@ static int futex_requeue(unsigned long u int nr_wake, int nr_requeue) { struct list_head *i, *next, *head1, *head2; + struct futex_hash_bucket *bh1, *bh2; union futex_key key1, key2; int ret; @@ -266,10 +277,19 @@ static int futex_requeue(unsigned long u if (unlikely(ret != 0)) goto out; - head1 = hash_futex(&key1); - head2 = hash_futex(&key2); + bh1 = hash_futex(&key1); + bh2 = hash_futex(&key2); + if (bh1 < bh2) { + spin_lock(&bh1->lock); + spin_lock(&bh2->lock); + } else { + spin_lock(&bh2->lock); + if (bh1 > bh2) + spin_lock(&bh1->lock); + } + head1 = &bh1->chain; + head2 = &bh2->chain; - spin_lock(&futex_lock); list_for_each_safe(i, next, head1) { struct futex_q *this = list_entry(i, struct futex_q, list); @@ -291,8 +311,14 @@ static int futex_requeue(unsigned long u } } } - spin_unlock(&futex_lock); - + if (bh1 < bh2) { + spin_unlock(&bh2->lock); + spin_unlock(&bh1->lock); + } else { + if (bh1 > bh2) + spin_unlock(&bh1->lock); + spin_unlock(&bh2->lock); + } out: up_read(¤t->mm->mmap_sem); return ret; @@ -301,28 +327,30 @@ out: static inline void queue_me(struct futex_q *q, union futex_key *key, int fd, struct file *filp) { - struct list_head *head = hash_futex(key); + struct futex_hash_bucket *bh = hash_futex(key); + struct list_head *head = &bh->chain; q->key = *key; q->fd = fd; q->filp = filp; - spin_lock(&futex_lock); + spin_lock(&bh->lock); list_add_tail(&q->list, head); - spin_unlock(&futex_lock); + spin_unlock(&bh->lock); } /* Return 1 if we were still queued (ie. 0 means we were woken) */ static inline int unqueue_me(struct futex_q *q) { + struct futex_hash_bucket *bh = hash_futex(&q->key); int ret = 0; - spin_lock(&futex_lock); + spin_lock(&bh->lock); if (!list_empty(&q->list)) { list_del(&q->list); ret = 1; } - spin_unlock(&futex_lock); + spin_unlock(&bh->lock); return ret; } @@ -332,8 +360,8 @@ static int futex_wait(unsigned long uadd int ret, curval; union futex_key key; struct futex_q q; + struct futex_hash_bucket *bh = NULL; - try_again: init_waitqueue_head(&q.waiters); down_read(¤t->mm->mmap_sem); @@ -367,25 +395,26 @@ static int futex_wait(unsigned long uadd /* * There might have been scheduling since the queue_me(), as we * cannot hold a spinlock across the get_user() in case it - * faults. So we cannot just set TASK_INTERRUPTIBLE state when + * faults, and we cannot just set TASK_INTERRUPTIBLE state when * queueing ourselves into the futex hash. This code thus has to - * rely on the futex_wake() code doing a wakeup after removing - * the waiter from the list. + * rely on the futex_wake() code removing us from hash when it + * wakes us up. */ add_wait_queue(&q.waiters, &wait); - spin_lock(&futex_lock); + bh = hash_futex(&key); + spin_lock(&bh->lock); set_current_state(TASK_INTERRUPTIBLE); if (unlikely(list_empty(&q.list))) { /* * We were woken already. */ - spin_unlock(&futex_lock); + spin_unlock(&bh->lock); set_current_state(TASK_RUNNING); return 0; } - spin_unlock(&futex_lock); + spin_unlock(&bh->lock); time = schedule_timeout(time); set_current_state(TASK_RUNNING); @@ -394,26 +423,17 @@ static int futex_wait(unsigned long uadd * we are the only user of it. */ - /* - * Were we woken or interrupted for a valid reason? - */ - ret = unqueue_me(&q); - if (ret == 0) + /* If we were woken (and unqueued), we succeeded, whatever. */ + if (!unqueue_me(&q)) return 0; if (time == 0) return -ETIMEDOUT; - if (signal_pending(current)) - return -EINTR; - - /* - * No, it was a spurious wakeup. Try again. Should never happen. :) - */ - goto try_again; + /* A spurious wakeup should never happen. */ + WARN_ON(!signal_pending(current)); + return -EINTR; out_unqueue: - /* - * Were we unqueued anyway? - */ + /* If we were woken (and unqueued), we succeeded, whatever. */ if (!unqueue_me(&q)) ret = 0; out_release_sem: @@ -435,13 +455,14 @@ static unsigned int futex_poll(struct fi struct poll_table_struct *wait) { struct futex_q *q = filp->private_data; + struct futex_hash_bucket *bh = hash_futex(&q->key); int ret = 0; poll_wait(filp, &q->waiters, wait); - spin_lock(&futex_lock); + spin_lock(&bh->lock); if (list_empty(&q->list)) ret = POLLIN | POLLRDNORM; - spin_unlock(&futex_lock); + spin_unlock(&bh->lock); return ret; } @@ -587,8 +608,10 @@ static int __init init(void) register_filesystem(&futex_fs_type); futex_mnt = kern_mount(&futex_fs_type); - for (i = 0; i < ARRAY_SIZE(futex_queues); i++) - INIT_LIST_HEAD(&futex_queues[i]); + for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { + INIT_LIST_HEAD(&futex_queues[i].chain); + futex_queues[i].lock = SPIN_LOCK_UNLOCKED; + } return 0; } __initcall(init); _