diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/Documentation/laptop-mode.txt x/Documentation/laptop-mode.txt --- x-ref/Documentation/laptop-mode.txt 1970-01-01 01:00:00.000000000 +0100 +++ x/Documentation/laptop-mode.txt 2003-10-01 04:55:10.000000000 +0200 @@ -0,0 +1,72 @@ +Laptop mode +=========== + +This small doc describes the 2.4 laptop mode patch. + +Last updated 2003-05-25, Jens Axboe + +Introduction +------------ + +A few properties of the Linux vm makes it virtually impossible to attempt +to spin down the hard drive in a laptop for a longer period of time (more +than a handful of seconds). This means you are lucky if you can even reach +the break even point with regards to power consumption, let alone expect any +decrease. + +One problem is the age time of dirty buffers. Linux uses 30 seconds per +default, so if you dirty any data then flusing of that data will commence +at most 30 seconds from then. Another is the journal commit interval of +journalled file systems such as ext3, which is 5 seconds on a stock kernel. +Both of these are tweakable either from proc/sysctl or as mount options +though, and thus partly solvable from user space. + +The kernel update daemon (kupdated) also runs at specific intervals, flushing +old dirty data out. Default is every 5 seconds, this too can be tweaked +from sysctl. + +So what does the laptop mode patch do? It attempts to fully utilize the +hard drive once it has been spun up, flushing the old dirty data out to +disk. Instead of flushing just the expired data, it will clean everything. +When a read causes the disk to spin up, we kick off this flushing after +a few seconds. This means that once the disk spins down again, everything +is up to date. That allows longer dirty data and journal expire times. + +It follows that you have to set long expire times to get long spin downs. +This means you could potentially loose 10 minutes worth of data, if you +set a 10 minute expire count instead of just 30 seconds worth. The biggest +risk here is undoubtedly running out of battery. + +Settings +-------- + +The main knob is /proc/sys/vm/laptop mode. Setting that to 1 switches the +vm (and block layer) to laptop mode. Leaving it to 0 makes the kernel work +like before. When in laptop mode, you also want to extend the intervals +desribed above. See the laptop-mode.sh script for how to do that. + +It can happen that the disk still keeps spinning up and you don't quite +know why or what causes it. The laptop mode patch has a little helper for +that as well, /proc/sys/vm/block-dump. When set to 1, it will dump info to +the kernel message buffer about what process caused the io. Be very careful +when playing with this setting, it is advisable to shut down syslog first! + +Result +------ + +Using the laptop-mode.sh script with its default settings, I get the full +10 minutes worth of drive spin down. Provided your work load is cached, +the disk will only spin up every 10 minutes (well actually, 9 minutes and 55 +seconds due to the 5 second delay in flushing dirty data after the last read +completes). I can't tell you exactly how much extra battery life you will +gain in laptop mode, it will vary greatly on the laptop and workload in +question. The only way to know for sure is to try it out. Getting 10% extra +battery life is not unrealistic. + +Notes +----- + +Patch only changes journal expire time for ext3. reiserfs uses a hardwire +value, should be trivial to adapt though (basically just make it call +get_buffer_flushtime() and uses that). I have not looked at other +journalling file systems, I'll happily accept patches to rectify that! diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/drivers/block/ll_rw_blk.c x/drivers/block/ll_rw_blk.c --- x-ref/drivers/block/ll_rw_blk.c 2003-10-01 04:55:08.000000000 +0200 +++ x/drivers/block/ll_rw_blk.c 2003-10-01 04:55:10.000000000 +0200 @@ -184,6 +184,10 @@ restart: return ret; } +int block_dump = 0; + +static struct timer_list writeback_timer; + static inline int get_max_sectors(kdev_t dev) { if (!max_sectors[MAJOR(dev)]) @@ -1519,6 +1523,9 @@ void __submit_bh(int rw, struct buffer_h break; } cond_resched(); + + if (block_dump) + printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev)); } /** @@ -1628,6 +1635,11 @@ sorry: extern int stram_device_init (void); #endif +static void blk_writeback_timer(unsigned long data) +{ + wakeup_bdflush(); + wakeup_kupdate(); +} /** * end_that_request_first - end I/O on one buffer. @@ -1684,10 +1696,18 @@ int end_that_request_first (struct reque return 0; } +extern int laptop_mode; + void end_that_request_last(struct request *req) { struct completion *waiting = req->waiting; + /* + * schedule the writeout of pending dirty data when the disk is idle + */ + if (laptop_mode && req->cmd == READ) + mod_timer(&writeback_timer, jiffies + 5 * HZ); + req_finished_io(req); blkdev_release_request(req); if (waiting) @@ -1719,6 +1739,9 @@ int __init blk_dev_init(void) blk_max_low_pfn = max_low_pfn - 1; blk_max_pfn = max_pfn - 1; + init_timer(&writeback_timer); + writeback_timer.function = blk_writeback_timer; + #ifdef CONFIG_AMIGA_Z2RAM z2_init(); #endif diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/fs/buffer.c x/fs/buffer.c --- x-ref/fs/buffer.c 2003-10-01 04:55:08.000000000 +0200 +++ x/fs/buffer.c 2003-10-01 04:55:10.000000000 +0200 @@ -89,6 +89,13 @@ static int grow_buffers(kdev_t dev, unsi static int osync_buffers_list(struct list_head *); static void __refile_buffer(struct buffer_head *); +/* + * A global sysctl-controlled flag which puts the machine into "laptop mode" + */ +int laptop_mode; + +static DECLARE_WAIT_QUEUE_HEAD(kupdate_wait); + /* This is used by some architectures to estimate available memory. */ atomic_t buffermem_pages = ATOMIC_INIT(0); @@ -1063,7 +1070,7 @@ static int bdflush_stop(void) dirty *= 100; dirty_limit = tot * bdf_prm.b_un.nfract_stop_bdflush; - if (dirty > dirty_limit) + if (!laptop_mode && dirty > dirty_limit) return 0; return 1; } @@ -1113,6 +1120,8 @@ void __mark_buffer_dirty(struct buffer_h void mark_buffer_dirty(struct buffer_head *bh) { if (!atomic_set_buffer_dirty(bh)) { + if (block_dump) + printk("%s: dirtied buffer\n", current->comm); __mark_dirty(bh); balance_dirty(); } @@ -2959,6 +2968,12 @@ void wakeup_bdflush(void) wake_up_interruptible(&bdflush_wait); } +void wakeup_kupdate(void) +{ + if (waitqueue_active(&kupdate_wait)) + wake_up(&kupdate_wait); +} + /* * Here we attempt to write back old buffers. We also try to flush inodes * and supers as well, since this function is essentially "update", and @@ -2979,7 +2994,9 @@ static int sync_old_buffers(void) spin_lock(&lru_list_lock); bh = lru_list[BUF_DIRTY]; - if (!bh || time_before(jiffies, bh->b_flushtime)) + if (!bh) + break; + if (time_before(jiffies, bh->b_flushtime) && !laptop_mode) break; if (write_some_buffers(NODEV)) continue; @@ -3117,16 +3134,20 @@ int kupdate(void *startup) complete((struct completion *)startup); for (;;) { + DECLARE_WAITQUEUE(wait, tsk); + + add_wait_queue(&kupdate_wait, &wait); + /* update interval */ interval = bdflush_interval(); if (interval) { tsk->state = TASK_INTERRUPTIBLE; schedule_timeout(interval); } else { - stop_kupdate: tsk->state = TASK_STOPPED; schedule(); /* wait for SIGCONT */ } + remove_wait_queue(&kupdate_wait, &wait); /* check for sigstop */ if (signal_pending(tsk)) { int stopped = 0; @@ -3137,13 +3158,17 @@ int kupdate(void *startup) } recalc_sigpending(tsk); spin_unlock_irq(&tsk->sigmask_lock); - if (stopped) - goto stop_kupdate; + if (stopped) { + tsk->state = TASK_STOPPED; + schedule(); /* wait for SIGCONT */ + } } #ifdef DEBUG printk(KERN_DEBUG "kupdate() activated...\n"); #endif sync_old_buffers(); + if (laptop_mode) + fsync_dev(NODEV); run_task_queue(&tq_disk); } } diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/fs/jbd/transaction.c x/fs/jbd/transaction.c --- x-ref/fs/jbd/transaction.c 2003-10-01 04:55:06.000000000 +0200 +++ x/fs/jbd/transaction.c 2003-10-01 04:55:10.000000000 +0200 @@ -57,7 +57,7 @@ static transaction_t * get_transaction ( transaction->t_journal = journal; transaction->t_state = T_RUNNING; transaction->t_tid = journal->j_transaction_sequence++; - transaction->t_expires = jiffies + (journal->j_commit_interval ? : bdflush_interval()); + transaction->t_expires = jiffies + (journal->j_commit_interval ? : get_buffer_flushtime()); INIT_LIST_HEAD(&transaction->t_jcb); /* Set up the commit timer for the new transaction. */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/fs/reiserfs/journal.c x/fs/reiserfs/journal.c --- x-ref/fs/reiserfs/journal.c 2003-08-26 00:13:04.000000000 +0200 +++ x/fs/reiserfs/journal.c 2003-10-01 04:55:17.000000000 +0200 @@ -58,6 +58,7 @@ #include #include #include +#include /* the number of mounted filesystems. This is used to decide when to ** start and kill the commit thread diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/linux/bdf_prm.h x/include/linux/bdf_prm.h --- x-ref/include/linux/bdf_prm.h 2003-10-01 04:55:06.000000000 +0200 +++ x/include/linux/bdf_prm.h 2003-10-01 04:55:10.000000000 +0200 @@ -29,6 +29,7 @@ union bdflush_param { unsigned int data[BDFLUSH_NR_PARAM]; }; extern union bdflush_param bdf_prm; -#define bdflush_interval() (bdf_prm.b_un.interval) +#define bdflush_interval() (bdf_prm.b_un.interval) +#define get_buffer_flushtime() (bdf_prm.b_un.age_buffer) #endif /* _LINUX_BDF_PRM_H */ diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/linux/fs.h x/include/linux/fs.h --- x-ref/include/linux/fs.h 2003-10-01 04:55:09.000000000 +0200 +++ x/include/linux/fs.h 2003-10-01 04:55:10.000000000 +0200 @@ -1604,8 +1604,10 @@ static inline struct buffer_head * sb_ge return get_hash_table(sb->s_dev, block, sb->s_blocksize); } extern void wakeup_bdflush(void); +extern void wakeup_kupdate(void); extern void put_unused_buffer_head(struct buffer_head * bh); extern struct buffer_head * get_unused_buffer_head(int async); +extern int block_dump; extern int brw_page(int, struct page *, kdev_t, int [], int); diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/linux/reiserfs_fs.h x/include/linux/reiserfs_fs.h --- x-ref/include/linux/reiserfs_fs.h 2003-09-02 03:46:56.000000000 +0200 +++ x/include/linux/reiserfs_fs.h 2003-10-01 04:55:10.000000000 +0200 @@ -1663,7 +1663,7 @@ extern wait_queue_head_t reiserfs_commit #define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ #define JOURNAL_MIN_RATIO 2 #define JOURNAL_MAX_COMMIT_AGE 30 -#define JOURNAL_MAX_TRANS_AGE 30 +#define JOURNAL_MAX_TRANS_AGE get_buffer_flushtime() #define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) /* both of these can be as low as 1, or as high as you want. The min is the diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/include/linux/sysctl.h x/include/linux/sysctl.h --- x-ref/include/linux/sysctl.h 2003-10-01 04:55:09.000000000 +0200 +++ x/include/linux/sysctl.h 2003-10-01 04:55:26.000000000 +0200 @@ -160,6 +160,8 @@ enum VM_MAPPED_RATIO=20, /* amount of unfreeable pages that triggers swapout */ VM_HEAP_STACK_GAP=21, /* int: page gap between heap and stack */ VM_ANON_LRU=22, /* immediatly insert anon pages in the vm page lru */ + VM_LAPTOP_MODE=23, /* kernel in laptop flush mode */ + VM_BLOCK_DUMP=24, /* dump fs activity to log */ }; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/kernel/sysctl.c x/kernel/sysctl.c --- x-ref/kernel/sysctl.c 2003-10-01 04:55:09.000000000 +0200 +++ x/kernel/sysctl.c 2003-10-01 04:55:10.000000000 +0200 @@ -56,6 +56,8 @@ extern int core_setuid_ok; extern char core_pattern[]; extern int cad_pid; extern int sysctl_sched_yield_scale; +extern int laptop_mode; +extern int block_dump; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -327,6 +329,10 @@ static ctl_table vm_table[] = { &vm_max_readahead,sizeof(int), 0644, NULL, &proc_dointvec}, {VM_MAX_MAP_COUNT, "max_map_count", &max_map_count, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_LAPTOP_MODE, "laptop_mode", + &laptop_mode, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_BLOCK_DUMP, "block_dump", + &block_dump, sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids x-ref/mm/filemap.c x/mm/filemap.c --- x-ref/mm/filemap.c 2003-10-01 04:55:07.000000000 +0200 +++ x/mm/filemap.c 2003-10-01 04:55:10.000000000 +0200 @@ -167,6 +167,8 @@ void set_page_dirty(struct page *page) if (mapping && mapping->host) mark_inode_dirty_pages(mapping->host); + if (block_dump) + printk(KERN_DEBUG "%s: dirtied page\n", current->comm); } } }