From: William Lee Irwin III Eliminate the bh waitqueue hashtable using bit_waitqueue() via wait_on_bit() and wake_up_bit() to locate the waitqueue head associated with a bit. Signed-off-by: Andrew Morton --- 25-akpm/fs/buffer.c | 55 +++++--------------------------- 25-akpm/fs/jbd/transaction.c | 10 ++--- 25-akpm/include/linux/wait.h | 73 +++++++++++++++++++++++++++++++++++++++++++ 25-akpm/kernel/wait.c | 11 ++++++ 4 files changed, 98 insertions(+), 51 deletions(-) diff -puN fs/buffer.c~eliminate-bh-waitqueue-hashtable fs/buffer.c --- 25/fs/buffer.c~eliminate-bh-waitqueue-hashtable 2004-09-30 22:36:44.098167664 -0700 +++ 25-akpm/fs/buffer.c 2004-09-30 22:36:44.110165840 -0700 @@ -45,14 +45,6 @@ static void invalidate_bh_lrus(void); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) -/* - * Hashed waitqueue_head's for wait_on_buffer() - */ -#define BH_WAIT_TABLE_ORDER 7 -static struct bh_wait_queue_head { - wait_queue_head_t wqh; -} ____cacheline_aligned_in_smp bh_wait_queue_heads[1<b_private = private; } -/* - * Return the address of the waitqueue_head to be used for this - * buffer_head - */ -wait_queue_head_t *bh_waitq_head(struct buffer_head *bh) -{ - return &bh_wait_queue_heads[hash_ptr(bh, BH_WAIT_TABLE_ORDER)].wqh; -} -EXPORT_SYMBOL(bh_waitq_head); - void wake_up_buffer(struct buffer_head *bh) { - wait_queue_head_t *wq = bh_waitq_head(bh); - smp_mb(); - __wake_up_bit(wq, &bh->b_state, BH_Lock); + wake_up_bit(&bh->b_state, BH_Lock); } EXPORT_SYMBOL(wake_up_buffer); -static void sync_buffer(struct buffer_head *bh) +static int sync_buffer(void *word) { struct block_device *bd; + struct buffer_head *bh + = container_of(word, struct buffer_head, b_state); smp_mb(); bd = bh->b_bdev; if (bd) blk_run_address_space(bd->bd_inode->i_mapping); + io_schedule(); + return 0; } void fastcall __lock_buffer(struct buffer_head *bh) { - wait_queue_head_t *wqh = bh_waitq_head(bh); - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); - - do { - prepare_to_wait_exclusive(wqh, &wait.wait, - TASK_UNINTERRUPTIBLE); - if (buffer_locked(bh)) { - sync_buffer(bh); - io_schedule(); - } - } while (test_set_buffer_locked(bh)); - finish_wait(wqh, &wait.wait); + wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, + TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(__lock_buffer); @@ -120,15 +94,7 @@ void fastcall unlock_buffer(struct buffe */ void __wait_on_buffer(struct buffer_head * bh) { - wait_queue_head_t *wqh = bh_waitq_head(bh); - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); - - prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); - if (buffer_locked(bh)) { - sync_buffer(bh); - io_schedule(); - } - finish_wait(wqh, &wait.wait); + wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); } static void @@ -3092,14 +3058,11 @@ static int buffer_cpu_notify(struct noti void __init buffer_init(void) { - int i; int nrpages; bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, SLAB_PANIC, init_buffer_head, NULL); - for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++) - init_waitqueue_head(&bh_wait_queue_heads[i].wqh); /* * Limit the bh occupancy to 10% of ZONE_NORMAL diff -puN fs/jbd/transaction.c~eliminate-bh-waitqueue-hashtable fs/jbd/transaction.c --- 25/fs/jbd/transaction.c~eliminate-bh-waitqueue-hashtable 2004-09-30 22:36:44.100167360 -0700 +++ 25-akpm/fs/jbd/transaction.c 2004-09-30 22:36:44.111165688 -0700 @@ -633,21 +633,21 @@ repeat: * disk then we cannot do copy-out here. */ if (jh->b_jlist == BJ_Shadow) { - wait_queue_head_t *wqh; - DEFINE_WAIT(wait); + DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock); + wait_queue_head_t *wqh + = bit_waitqueue(&bh->b_state, BH_Lock); JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); /* commit wakes up all shadow buffers after IO */ - wqh = bh_waitq_head(bh); for ( ; ; ) { - prepare_to_wait(wqh, &wait, + prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); if (jh->b_jlist != BJ_Shadow) break; schedule(); } - finish_wait(wqh, &wait); + finish_wait(wqh, &wait.wait); goto repeat; } diff -puN include/linux/wait.h~eliminate-bh-waitqueue-hashtable include/linux/wait.h --- 25/include/linux/wait.h~eliminate-bh-waitqueue-hashtable 2004-09-30 22:36:44.102167056 -0700 +++ 25-akpm/include/linux/wait.h 2004-09-30 22:36:44.113165384 -0700 @@ -24,6 +24,7 @@ #include #include #include +#include typedef struct __wait_queue wait_queue_t; typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int sync, void *key); @@ -141,6 +142,22 @@ extern void FASTCALL(__wake_up_sync(wait void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int)); int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, void *, int, int (*)(void *), unsigned)); int FASTCALL(__wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, void *, int, int (*)(void *), unsigned)); +wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int)); + +/** + * wake_up_bit - wake up a waiter on a bit + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that wakes up waiters + * on a bit. For instance, if one were to have waiters on a bitflag, + * one would call wake_up_bit() after clearing the bit. + */ +static inline void wake_up_bit(void *word, int bit) +{ + __wake_up_bit(bit_waitqueue(word, bit), word, bit); +} #define wake_up(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr, NULL) @@ -344,6 +361,62 @@ int wake_bit_function(wait_queue_t *wait wait->func = autoremove_wake_function; \ INIT_LIST_HEAD(&wait->task_list); \ } while (0) + +/** + * wait_on_bit - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit. + * For instance, if one were to have waiters on a bitflag, one would + * call wait_on_bit() in threads waiting for the bit to clear. + * One uses wait_on_bit() where one is waiting for the bit to clear, + * but has no intention of setting it. + */ +static inline int wait_on_bit(void *word, int bit, + int (*action)(void *), unsigned mode) +{ + DEFINE_WAIT_BIT(q, word, bit); + wait_queue_head_t *wqh; + + if (!test_bit(bit, word)) + return 0; + + wqh = bit_waitqueue(word, bit); + return __wait_on_bit(wqh, &q, word, bit, action, mode); +} + +/** + * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit + * when one intends to set it, for instance, trying to lock bitflags. + * For instance, if one were to have waiters trying to set bitflag + * and waiting for it to clear before setting it, one would call + * wait_on_bit() in threads waiting to be able to set the bit. + * One uses wait_on_bit_lock() where one is waiting for the bit to + * clear with the intention of setting it, and when done, clearing it. + */ +static inline int wait_on_bit_lock(void *word, int bit, + int (*action)(void *), unsigned mode) +{ + DEFINE_WAIT_BIT(q, word, bit); + wait_queue_head_t *wqh; + + if (!test_bit(bit, word)) + return 0; + + wqh = bit_waitqueue(word, bit); + return __wait_on_bit_lock(wqh, &q, word, bit, action, mode); +} #endif /* __KERNEL__ */ diff -puN kernel/wait.c~eliminate-bh-waitqueue-hashtable kernel/wait.c --- 25/kernel/wait.c~eliminate-bh-waitqueue-hashtable 2004-09-30 22:36:44.104166752 -0700 +++ 25-akpm/kernel/wait.c 2004-09-30 22:36:44.113165384 -0700 @@ -8,6 +8,7 @@ #include #include #include +#include void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) { @@ -187,3 +188,13 @@ void fastcall __wake_up_bit(wait_queue_h __wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key); } EXPORT_SYMBOL(__wake_up_bit); + +fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit) +{ + const int shift = BITS_PER_LONG == 32 ? 5 : 6; + const struct zone *zone = page_zone(virt_to_page(word)); + unsigned long val = (unsigned long)word << shift | bit; + + return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; +} +EXPORT_SYMBOL(bit_waitqueue); _