diff -urNp 2.4.20rc1/drivers/block/ll_rw_blk.c hangs-2.4/drivers/block/ll_rw_blk.c --- 2.4.20rc1/drivers/block/ll_rw_blk.c Sat Nov 2 19:45:33 2002 +++ hangs-2.4/drivers/block/ll_rw_blk.c Tue Nov 12 02:18:35 2002 @@ -590,12 +590,20 @@ static struct request *__get_request_wai register struct request *rq; DECLARE_WAITQUEUE(wait, current); - generic_unplug_device(q); add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); - if (q->rq[rw].count == 0) + if (q->rq[rw].count == 0) { + /* + * All we care about is not to stall if any request + * is been released after we set TASK_UNINTERRUPTIBLE. + * This is the most efficient place to unplug the queue + * in case we hit the race and we can get the request + * without waiting. + */ + generic_unplug_device(q); schedule(); + } spin_lock_irq(&io_request_lock); rq = get_request(q, rw); spin_unlock_irq(&io_request_lock); @@ -829,9 +837,11 @@ void blkdev_release_request(struct reque */ if (q) { list_add(&req->queue, &q->rq[rw].free); - if (++q->rq[rw].count >= q->batch_requests && - waitqueue_active(&q->wait_for_requests[rw])) - wake_up(&q->wait_for_requests[rw]); + if (++q->rq[rw].count >= q->batch_requests) { + smp_mb(); + if (waitqueue_active(&q->wait_for_requests[rw])) + wake_up(&q->wait_for_requests[rw]); + } } } @@ -1200,6 +1210,11 @@ void submit_bh(int rw, struct buffer_hea generic_make_request(rw, bh); + /* fix race condition with wait_on_buffer() */ + smp_mb(); /* spin_unlock may have inclusive semantics */ + if (waitqueue_active(&bh->b_wait)) + wake_up(&bh->b_wait); + switch (rw) { case WRITE: kstat.pgpgout += count; diff -urNp 2.4.20rc1/fs/buffer.c hangs-2.4/fs/buffer.c --- 2.4.20rc1/fs/buffer.c Sat Nov 2 19:45:40 2002 +++ hangs-2.4/fs/buffer.c Tue Nov 12 02:17:56 2002 @@ -153,10 +153,23 @@ void __wait_on_buffer(struct buffer_head get_bh(bh); add_wait_queue(&bh->b_wait, &wait); do { - run_task_queue(&tq_disk); set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!buffer_locked(bh)) break; + /* + * We must read tq_disk in TQ_ACTIVE after the + * add_wait_queue effect is visible to other cpus. + * We could unplug some line above it wouldn't matter + * but we can't do that right after add_wait_queue + * without an smp_mb() in between because spin_unlock + * has inclusive semantics. + * Doing it here is the most efficient place so we + * don't do a suprious unplug if we get a racy + * wakeup that make buffer_locked to return 0, and + * doing it here avoids an explicit smp_mb() we + * rely on the implicit one in set_task_state. + */ + run_task_queue(&tq_disk); schedule(); } while (buffer_locked(bh)); tsk->state = TASK_RUNNING; @@ -1508,6 +1521,9 @@ static int __block_write_full_page(struc /* Done - end_buffer_io_async will unlock */ SetPageUptodate(page); + + wakeup_page_waiters(page); + return 0; out: @@ -1539,6 +1555,7 @@ out: } while (bh != head); if (need_unlock) UnlockPage(page); + wakeup_page_waiters(page); return err; } @@ -1755,6 +1772,8 @@ int block_read_full_page(struct page *pa else submit_bh(READ, bh); } + + wakeup_page_waiters(page); return 0; } @@ -2368,6 +2387,7 @@ int brw_page(int rw, struct page *page, submit_bh(rw, bh); bh = next; } while (bh != head); + wakeup_page_waiters(page); return 0; } diff -urNp 2.4.20rc1/fs/reiserfs/inode.c hangs-2.4/fs/reiserfs/inode.c --- 2.4.20rc1/fs/reiserfs/inode.c Sat Nov 2 19:45:46 2002 +++ hangs-2.4/fs/reiserfs/inode.c Tue Nov 12 02:17:56 2002 @@ -1993,6 +1993,7 @@ static int reiserfs_write_full_page(stru */ if (nr) { submit_bh_for_writepage(arr, nr) ; + wakeup_page_waiters(page); } else { UnlockPage(page) ; } diff -urNp 2.4.20rc1/include/linux/pagemap.h hangs-2.4/include/linux/pagemap.h --- 2.4.20rc1/include/linux/pagemap.h Sat Nov 2 19:45:48 2002 +++ hangs-2.4/include/linux/pagemap.h Tue Nov 12 04:35:52 2002 @@ -97,6 +97,8 @@ static inline void wait_on_page(struct p ___wait_on_page(page); } +extern void wakeup_page_waiters(struct page * page); + /* * Returns locked page at given index in given cache, creating it if needed. */ diff -urNp 2.4.20rc1/kernel/ksyms.c hangs-2.4/kernel/ksyms.c --- 2.4.20rc1/kernel/ksyms.c Sat Nov 2 19:45:48 2002 +++ hangs-2.4/kernel/ksyms.c Tue Nov 12 04:36:25 2002 @@ -293,6 +293,7 @@ EXPORT_SYMBOL(filemap_fdatasync); EXPORT_SYMBOL(filemap_fdatawait); EXPORT_SYMBOL(lock_page); EXPORT_SYMBOL(unlock_page); +EXPORT_SYMBOL(wakeup_page_waiters); /* device registration */ EXPORT_SYMBOL(register_chrdev); diff -urNp 2.4.20rc1/mm/filemap.c hangs-2.4/mm/filemap.c --- 2.4.20rc1/mm/filemap.c Sat Nov 2 19:45:48 2002 +++ hangs-2.4/mm/filemap.c Tue Nov 12 04:35:40 2002 @@ -909,6 +909,20 @@ void lock_page(struct page *page) } /* + * This must be called after every submit_bh with end_io + * callbacks that would result into the blkdev layer waking + * up the page after a queue unplug. + */ +void wakeup_page_waiters(struct page * page) +{ + wait_queue_head_t * head; + + head = page_waitqueue(page); + if (waitqueue_active(head)) + wake_up(head); +} + +/* * a rather lightweight function, finding and getting a reference to a * hashed page atomically. */