From: Dave Kleikamp jfs has never worked on architecutures where the page size was not 4K. Signed-off-by: Dave Kleikamp Signed-off-by: Andrew Morton --- fs/jfs/inode.c | 31 - fs/jfs/jfs_dmap.c | 12 fs/jfs/jfs_imap.c | 14 fs/jfs/jfs_incore.h | 1 fs/jfs/jfs_logmgr.c | 71 ++- fs/jfs/jfs_logmgr.h | 5 fs/jfs/jfs_metapage.c | 914 +++++++++++++++++++++++++++++++++----------------- fs/jfs/jfs_metapage.h | 80 +++- fs/jfs/jfs_mount.c | 5 fs/jfs/jfs_txnmgr.c | 89 ++-- fs/jfs/jfs_umount.c | 16 fs/jfs/resize.c | 3 fs/jfs/super.c | 33 + 13 files changed, 809 insertions(+), 465 deletions(-) diff -puN fs/jfs/inode.c~jfs-support-page-sizes-greater-than-4k fs/jfs/inode.c --- 25/fs/jfs/inode.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/inode.c Thu Apr 28 17:00:10 2005 @@ -175,31 +175,22 @@ jfs_get_blocks(struct inode *ip, sector_ { s64 lblock64 = lblock; int rc = 0; - int take_locks; xad_t xad; s64 xaddr; int xflag; s32 xlen; /* - * If this is a special inode (imap, dmap) - * the lock should already be taken - */ - take_locks = (JFS_IP(ip)->fileset != AGGREGATE_I); - - /* * Take appropriate lock on inode */ - if (take_locks) { - if (create) - IWRITE_LOCK(ip); - else - IREAD_LOCK(ip); - } + if (create) + IWRITE_LOCK(ip); + else + IREAD_LOCK(ip); if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && - (xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0) - == 0) && xlen) { + (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) && + xlen) { if (xflag & XAD_NOTRECORDED) { if (!create) /* @@ -258,12 +249,10 @@ jfs_get_blocks(struct inode *ip, sector_ /* * Release lock on inode */ - if (take_locks) { - if (create) - IWRITE_UNLOCK(ip); - else - IREAD_UNLOCK(ip); - } + if (create) + IWRITE_UNLOCK(ip); + else + IREAD_UNLOCK(ip); return rc; } diff -puN fs/jfs/jfs_dmap.c~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_dmap.c --- 25/fs/jfs/jfs_dmap.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_dmap.c Thu Apr 28 17:00:10 2005 @@ -471,6 +471,7 @@ dbUpdatePMap(struct inode *ipbmap, struct metapage *mp; struct jfs_log *log; int lsn, difft, diffp; + unsigned long flags; /* the blocks better be within the mapsize. */ if (blkno + nblocks > bmp->db_mapsize) { @@ -504,6 +505,7 @@ dbUpdatePMap(struct inode *ipbmap, 0); if (mp == NULL) return -EIO; + metapage_wait_for_io(mp); } dp = (struct dmap *) mp->data; @@ -578,34 +580,32 @@ dbUpdatePMap(struct inode *ipbmap, if (mp->lsn != 0) { /* inherit older/smaller lsn */ logdiff(diffp, mp->lsn, log); + LOGSYNC_LOCK(log, flags); if (difft < diffp) { mp->lsn = lsn; /* move bp after tblock in logsync list */ - LOGSYNC_LOCK(log); list_move(&mp->synclist, &tblk->synclist); - LOGSYNC_UNLOCK(log); } /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } else { mp->log = log; mp->lsn = lsn; /* insert bp after tblock in logsync list */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } } diff -puN fs/jfs/jfs_imap.c~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_imap.c --- 25/fs/jfs/jfs_imap.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_imap.c Thu Apr 28 17:00:10 2005 @@ -502,7 +502,7 @@ struct inode *diReadSpecial(struct super } - ip->i_mapping->a_ops = &jfs_aops; + ip->i_mapping->a_ops = &jfs_metapage_aops; mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); /* Allocations to metadata inodes should not affect quotas */ @@ -2791,6 +2791,7 @@ diUpdatePMap(struct inode *ipimap, u32 mask; struct jfs_log *log; int lsn, difft, diffp; + unsigned long flags; imap = JFS_IP(ipimap)->i_imap; /* get the iag number containing the inode */ @@ -2807,6 +2808,7 @@ diUpdatePMap(struct inode *ipimap, IREAD_UNLOCK(ipimap); if (rc) return (rc); + metapage_wait_for_io(mp); iagp = (struct iag *) mp->data; /* get the inode number and extent number of the inode within * the iag and the inode number within the extent. @@ -2870,30 +2872,28 @@ diUpdatePMap(struct inode *ipimap, /* inherit older/smaller lsn */ logdiff(difft, lsn, log); logdiff(diffp, mp->lsn, log); + LOGSYNC_LOCK(log, flags); if (difft < diffp) { mp->lsn = lsn; /* move mp after tblock in logsync list */ - LOGSYNC_LOCK(log); list_move(&mp->synclist, &tblk->synclist); - LOGSYNC_UNLOCK(log); } /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); assert(mp->clsn); logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } else { mp->log = log; mp->lsn = lsn; /* insert mp after tblock in logsync list */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } write_metapage(mp); return (0); diff -puN fs/jfs/jfs_incore.h~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_incore.h --- 25/fs/jfs/jfs_incore.h~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_incore.h Thu Apr 28 17:00:10 2005 @@ -165,6 +165,7 @@ struct jfs_sb_info { /* Formerly in ipbmap */ struct bmap *bmap; /* incore bmap descriptor */ struct nls_table *nls_tab; /* current codepage */ + struct inode *direct_inode; /* metadata inode */ uint state; /* mount/recovery state */ unsigned long flag; /* mount time flags */ uint p_state; /* state prior to going no integrity */ diff -puN fs/jfs/jfs_logmgr.c~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_logmgr.c --- 25/fs/jfs/jfs_logmgr.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_logmgr.c Thu Apr 28 17:00:10 2005 @@ -234,6 +234,7 @@ int lmLog(struct jfs_log * log, struct t int lsn; int diffp, difft; struct metapage *mp = NULL; + unsigned long flags; jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", log, tblk, lrd, tlck); @@ -254,7 +255,7 @@ int lmLog(struct jfs_log * log, struct t */ lsn = log->lsn; - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); /* * initialize page lsn if first log write of the page @@ -310,7 +311,7 @@ int lmLog(struct jfs_log * log, struct t } } - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); /* * write the log record @@ -334,7 +335,6 @@ int lmLog(struct jfs_log * log, struct t return lsn; } - /* * NAME: lmWriteRecord() * @@ -945,6 +945,15 @@ static int lmLogSync(struct jfs_log * lo struct lrd lrd; int lsn; struct logsyncblk *lp; + struct jfs_sb_info *sbi; + unsigned long flags; + + /* push dirty metapages out to disk */ + list_for_each_entry(sbi, &log->sb_list, log_list) { + filemap_flush(sbi->ipbmap->i_mapping); + filemap_flush(sbi->ipimap->i_mapping); + filemap_flush(sbi->direct_inode->i_mapping); + } /* * forward syncpt @@ -954,10 +963,7 @@ static int lmLogSync(struct jfs_log * lo */ if (log->sync == log->syncpt) { - LOGSYNC_LOCK(log); - /* ToDo: push dirty metapages out to disk */ -// bmLogSync(log); - + LOGSYNC_LOCK(log, flags); if (list_empty(&log->synclist)) log->sync = log->lsn; else { @@ -965,7 +971,7 @@ static int lmLogSync(struct jfs_log * lo struct logsyncblk, synclist); log->sync = lp->lsn; } - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } @@ -974,27 +980,6 @@ static int lmLogSync(struct jfs_log * lo * reset syncpt = sync */ if (log->sync != log->syncpt) { - struct jfs_sb_info *sbi; - - /* - * We need to make sure all of the "written" metapages - * actually make it to disk - */ - list_for_each_entry(sbi, &log->sb_list, log_list) { - if (sbi->flag & JFS_NOINTEGRITY) - continue; - filemap_fdatawrite(sbi->ipbmap->i_mapping); - filemap_fdatawrite(sbi->ipimap->i_mapping); - filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping); - } - list_for_each_entry(sbi, &log->sb_list, log_list) { - if (sbi->flag & JFS_NOINTEGRITY) - continue; - filemap_fdatawait(sbi->ipbmap->i_mapping); - filemap_fdatawait(sbi->ipimap->i_mapping); - filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping); - } - lrd.logtid = 0; lrd.backchain = 0; lrd.type = cpu_to_le16(LOG_SYNCPT); @@ -1547,6 +1532,7 @@ void jfs_flush_journal(struct jfs_log *l { int i; struct tblock *target = NULL; + struct jfs_sb_info *sbi; /* jfs_write_inode may call us during read-only mount */ if (!log) @@ -1608,12 +1594,18 @@ void jfs_flush_journal(struct jfs_log *l if (wait < 2) return; + list_for_each_entry(sbi, &log->sb_list, log_list) { + filemap_fdatawrite(sbi->ipbmap->i_mapping); + filemap_fdatawrite(sbi->ipimap->i_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + } + /* * If there was recent activity, we may need to wait * for the lazycommit thread to catch up */ if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { - for (i = 0; i < 800; i++) { /* Too much? */ + for (i = 0; i < 200; i++) { /* Too much? */ msleep(250); if (list_empty(&log->cqueue) && list_empty(&log->synclist)) @@ -1621,7 +1613,24 @@ void jfs_flush_journal(struct jfs_log *l } } assert(list_empty(&log->cqueue)); - assert(list_empty(&log->synclist)); + if (!list_empty(&log->synclist)) { + struct logsyncblk *lp; + + list_for_each_entry(lp, &log->synclist, synclist) { + if (lp->xflag & COMMIT_PAGE) { + struct metapage *mp = (struct metapage *)lp; + dump_mem("orphan metapage", lp, + sizeof(struct metapage)); + dump_mem("page", mp->page, sizeof(struct page)); + } + else + dump_mem("orphan tblock", lp, + sizeof(struct tblock)); + } +// current->state = TASK_INTERRUPTIBLE; +// schedule(); + } + //assert(list_empty(&log->synclist)); clear_bit(log_FLUSH, &log->flag); } diff -puN fs/jfs/jfs_logmgr.h~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_logmgr.h --- 25/fs/jfs/jfs_logmgr.h~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_logmgr.h Thu Apr 28 17:00:10 2005 @@ -490,8 +490,9 @@ struct logsyncblk { */ #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) -#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock) -#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock) +#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) +#define LOGSYNC_UNLOCK(log, flags) \ + spin_unlock_irqrestore(&(log)->synclock, flags) /* compute the difference in bytes of lsn from sync point */ #define logdiff(diff, lsn, log)\ diff -puN fs/jfs/jfs_metapage.c~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_metapage.c --- 25/fs/jfs/jfs_metapage.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_metapage.c Thu Apr 28 17:00:10 2005 @@ -1,5 +1,5 @@ /* - * Copyright (C) International Business Machines Corp., 2000-2003 + * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify @@ -18,10 +18,11 @@ */ #include +#include +#include #include #include #include -#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -29,8 +30,6 @@ #include "jfs_txnmgr.h" #include "jfs_debug.h" -static DEFINE_SPINLOCK(meta_lock); - #ifdef CONFIG_JFS_STATISTICS static struct { uint pagealloc; /* # of page allocations */ @@ -39,22 +38,8 @@ static struct { } mpStat; #endif - -#define HASH_BITS 10 /* This makes hash_table 1 4K page */ -#define HASH_SIZE (1 << HASH_BITS) -static struct metapage **hash_table = NULL; -static unsigned long hash_order; - - -static inline int metapage_locked(struct metapage *mp) -{ - return test_bit(META_locked, &mp->flag); -} - -static inline int trylock_metapage(struct metapage *mp) -{ - return test_and_set_bit(META_locked, &mp->flag); -} +#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) +#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag) static inline void unlock_metapage(struct metapage *mp) { @@ -62,26 +47,26 @@ static inline void unlock_metapage(struc wake_up(&mp->wait); } -static void __lock_metapage(struct metapage *mp) +static inline void __lock_metapage(struct metapage *mp) { DECLARE_WAITQUEUE(wait, current); - INCREMENT(mpStat.lockwait); - add_wait_queue_exclusive(&mp->wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); if (metapage_locked(mp)) { - spin_unlock(&meta_lock); + unlock_page(mp->page); schedule(); - spin_lock(&meta_lock); + lock_page(mp->page); } } while (trylock_metapage(mp)); __set_current_state(TASK_RUNNING); remove_wait_queue(&mp->wait, &wait); } -/* needs meta_lock */ +/* + * Must have mp->page locked + */ static inline void lock_metapage(struct metapage *mp) { if (trylock_metapage(mp)) @@ -92,6 +77,110 @@ static inline void lock_metapage(struct static kmem_cache_t *metapage_cache; static mempool_t *metapage_mempool; +#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE) + +#if MPS_PER_PAGE > 1 + +struct meta_anchor { + int mp_count; + atomic_t io_count; + struct metapage *mp[MPS_PER_PAGE]; +}; +#define mp_anchor(page) ((struct meta_anchor *)page->private) + +static inline struct metapage *page_to_mp(struct page *page, uint offset) +{ + if (!PagePrivate(page)) + return NULL; + return mp_anchor(page)->mp[offset >> L2PSIZE]; +} + +static inline int insert_metapage(struct page *page, struct metapage *mp) +{ + struct meta_anchor *a; + int index; + int l2mp_blocks; /* log2 blocks per metapage */ + + if (PagePrivate(page)) + a = mp_anchor(page); + else { + a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS); + if (!a) + return -ENOMEM; + memset(a, 0, sizeof(struct meta_anchor)); + page->private = (unsigned long)a; + SetPagePrivate(page); + kmap(page); + } + + if (mp) { + l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; + index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); + a->mp_count++; + a->mp[index] = mp; + } + + return 0; +} + +static inline void remove_metapage(struct page *page, struct metapage *mp) +{ + struct meta_anchor *a = mp_anchor(page); + int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; + int index; + + index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); + + BUG_ON(a->mp[index] != mp); + + a->mp[index] = NULL; + if (--a->mp_count == 0) { + kfree(a); + page->private = 0; + ClearPagePrivate(page); + kunmap(page); + } +} + +static inline void inc_io(struct page *page) +{ + atomic_inc(&mp_anchor(page)->io_count); +} + +static inline void dec_io(struct page *page, void (*handler) (struct page *)) +{ + if (atomic_dec_and_test(&mp_anchor(page)->io_count)) + handler(page); +} + +#else +static inline struct metapage *page_to_mp(struct page *page, uint offset) +{ + return PagePrivate(page) ? (struct metapage *)page->private : NULL; +} + +static inline int insert_metapage(struct page *page, struct metapage *mp) +{ + if (mp) { + page->private = (unsigned long)mp; + SetPagePrivate(page); + kmap(page); + } + return 0; +} + +static inline void remove_metapage(struct page *page, struct metapage *mp) +{ + page->private = 0; + ClearPagePrivate(page); + kunmap(page); +} + +#define inc_io(page) do {} while(0) +#define dec_io(page, handler) handler(page) + +#endif + static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; @@ -139,16 +228,6 @@ int __init metapage_init(void) kmem_cache_destroy(metapage_cache); return -ENOMEM; } - /* - * Now the hash list - */ - for (hash_order = 0; - ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE; - hash_order++); - hash_table = - (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order); - assert(hash_table); - memset(hash_table, 0, PAGE_SIZE << hash_order); return 0; } @@ -159,73 +238,388 @@ void metapage_exit(void) kmem_cache_destroy(metapage_cache); } +static inline void drop_metapage(struct page *page, struct metapage *mp) +{ + if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) || + test_bit(META_io, &mp->flag)) + return; + remove_metapage(page, mp); + INCREMENT(mpStat.pagefree); + free_metapage(mp); +} + /* - * Basically same hash as in pagemap.h, but using our hash table + * Metapage address space operations */ -static struct metapage **meta_hash(struct address_space *mapping, - unsigned long index) + +static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, + unsigned int *len) +{ + int rc = 0; + int xflag; + s64 xaddr; + sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >> + inode->i_blkbits; + + if (lblock >= file_blocks) + return 0; + if (lblock + *len > file_blocks) + *len = file_blocks - lblock; + + if (inode->i_ino) { + rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0); + if ((rc == 0) && *len) + lblock = (sector_t)xaddr; + else + lblock = 0; + } /* else no mapping */ + + return lblock; +} + +static void last_read_complete(struct page *page) +{ + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); +} + +static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done, + int err) { -#define i (((unsigned long)mapping)/ \ - (sizeof(struct inode) & ~(sizeof(struct inode) -1 ))) -#define s(x) ((x) + ((x) >> HASH_BITS)) - return hash_table + (s(i + index) & (HASH_SIZE - 1)); -#undef i -#undef s -} - -static struct metapage *search_hash(struct metapage ** hash_ptr, - struct address_space *mapping, - unsigned long index) -{ - struct metapage *ptr; - - for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) { - if ((ptr->mapping == mapping) && (ptr->index == index)) - return ptr; + struct page *page = bio->bi_private; + + if (bio->bi_size) + return 1; + + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + printk(KERN_ERR "metapage_read_end_io: I/O error\n"); + SetPageError(page); } - return NULL; + dec_io(page, last_read_complete); + bio_put(bio); + + return 0; } -static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr) +static void remove_from_logsync(struct metapage *mp) { - if (*hash_ptr) - (*hash_ptr)->hash_prev = mp; + struct jfs_log *log = mp->log; + unsigned long flags; +/* + * This can race. Recheck that log hasn't been set to null, and after + * acquiring logsync lock, recheck lsn + */ + if (!log) + return; - mp->hash_prev = NULL; - mp->hash_next = *hash_ptr; - *hash_ptr = mp; + LOGSYNC_LOCK(log, flags); + if (mp->lsn) { + mp->log = NULL; + mp->lsn = 0; + mp->clsn = 0; + log->count--; + list_del(&mp->synclist); + } + LOGSYNC_UNLOCK(log, flags); } -static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr) +static void last_write_complete(struct page *page) { - if (mp->hash_prev) - mp->hash_prev->hash_next = mp->hash_next; - else { - assert(*hash_ptr == mp); - *hash_ptr = mp->hash_next; + struct metapage *mp; + unsigned int offset; + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + if (mp && test_bit(META_io, &mp->flag)) { + if (mp->lsn) + remove_from_logsync(mp); + clear_bit(META_io, &mp->flag); + } + /* + * I'd like to call drop_metapage here, but I don't think it's + * safe unless I have the page locked + */ + } + end_page_writeback(page); +} + +static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done, + int err) +{ + struct page *page = bio->bi_private; + + BUG_ON(!PagePrivate(page)); + + if (bio->bi_size) + return 1; + + if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) { + printk(KERN_ERR "metapage_write_end_io: I/O error\n"); + SetPageError(page); + } + dec_io(page, last_write_complete); + bio_put(bio); + return 0; +} + +static int metapage_writepage(struct page *page, struct writeback_control *wbc) +{ + struct bio *bio = NULL; + unsigned int block_offset; /* block offset of mp within page */ + struct inode *inode = page->mapping->host; + unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; + unsigned int len; + unsigned int xlen; + struct metapage *mp; + int redirty = 0; + sector_t lblock; + sector_t pblock; + sector_t next_block = 0; + sector_t page_start; + unsigned long bio_bytes = 0; + unsigned long bio_offset = 0; + unsigned int offset; + + page_start = (sector_t)page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + BUG_ON(!PageLocked(page)); + BUG_ON(PageWriteback(page)); + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + + if (!mp || !test_bit(META_dirty, &mp->flag)) + continue; + + if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) { + redirty = 1; + continue; + } + + clear_bit(META_dirty, &mp->flag); + block_offset = offset >> inode->i_blkbits; + lblock = page_start + block_offset; + if (bio) { + if (xlen && lblock == next_block) { + /* Contiguous, in memory & on disk */ + len = min(xlen, blocks_per_mp); + xlen -= len; + bio_bytes += len << inode->i_blkbits; + set_bit(META_io, &mp->flag); + continue; + } + /* Not contiguous */ + if (bio_add_page(bio, page, bio_bytes, bio_offset) < + bio_bytes) + goto add_failed; + /* + * Increment counter before submitting i/o to keep + * count from hitting zero before we're through + */ + inc_io(page); + if (!bio->bi_size) + goto dump_bio; + submit_bio(WRITE, bio); + bio = NULL; + } else { + set_page_writeback(page); + inc_io(page); + } + xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; + pblock = metapage_get_blocks(inode, lblock, &xlen); + if (!pblock) { + /* Need better error handling */ + printk(KERN_ERR "JFS: metapage_get_blocks failed\n"); + dec_io(page, last_write_complete); + continue; + } + set_bit(META_io, &mp->flag); + len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage); + + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_sector = pblock << (inode->i_blkbits - 9); + bio->bi_end_io = metapage_write_end_io; + bio->bi_private = page; + + /* Don't call bio_add_page yet, we may add to this vec */ + bio_offset = offset; + bio_bytes = len << inode->i_blkbits; + + xlen -= len; + next_block = lblock + len; + } + if (bio) { + if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes) + goto add_failed; + if (!bio->bi_size) + goto dump_bio; + + submit_bio(WRITE, bio); + } + if (redirty) + redirty_page_for_writepage(wbc, page); + + unlock_page(page); + + return 0; +add_failed: + /* We should never reach here, since we're only adding one vec */ + printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); + goto skip; +dump_bio: + dump_mem("bio", bio, sizeof(*bio)); +skip: + bio_put(bio); + unlock_page(page); + dec_io(page, last_write_complete); + + return -EIO; +} + +static int metapage_readpage(struct file *fp, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct bio *bio = NULL; + unsigned int block_offset; + unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; + sector_t page_start; /* address of page in fs blocks */ + sector_t pblock; + unsigned int xlen; + unsigned int len; + unsigned int offset; + + BUG_ON(!PageLocked(page)); + page_start = (sector_t)page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + + block_offset = 0; + while (block_offset < blocks_per_page) { + xlen = blocks_per_page - block_offset; + pblock = metapage_get_blocks(inode, page_start + block_offset, + &xlen); + if (pblock) { + if (!PagePrivate(page)) + insert_metapage(page, NULL); + inc_io(page); + if (bio) + submit_bio(READ, bio); + + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_sector = pblock << (inode->i_blkbits - 9); + bio->bi_end_io = metapage_read_end_io; + bio->bi_private = page; + len = xlen << inode->i_blkbits; + offset = block_offset << inode->i_blkbits; + if (bio_add_page(bio, page, len, offset) < len) + goto add_failed; + block_offset += xlen; + } else + block_offset++; + } + if (bio) + submit_bio(READ, bio); + else + unlock_page(page); + + return 0; + +add_failed: + printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); + bio_put(bio); + dec_io(page, last_read_complete); + return -EIO; +} + +static int metapage_releasepage(struct page *page, int gfp_mask) +{ + struct metapage *mp; + int busy = 0; + unsigned int offset; + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + + if (!mp) + continue; + + jfs_info("metapage_releasepage: mp = 0x%p", mp); + if (mp->count || mp->nohomeok) { + jfs_info("count = %ld, nohomeok = %d", mp->count, + mp->nohomeok); + busy = 1; + continue; + } + wait_on_page_writeback(page); + //WARN_ON(test_bit(META_dirty, &mp->flag)); + if (test_bit(META_dirty, &mp->flag)) { + dump_mem("dirty mp in metapage_releasepage", mp, + sizeof(struct metapage)); + dump_mem("page", page, sizeof(struct page)); + dump_stack(); + } + WARN_ON(mp->lsn); + if (mp->lsn) + remove_from_logsync(mp); + remove_metapage(page, mp); + INCREMENT(mpStat.pagefree); + free_metapage(mp); } + if (busy) + return -1; + + return 0; +} + +static int metapage_invalidatepage(struct page *page, unsigned long offset) +{ + BUG_ON(offset); - if (mp->hash_next) - mp->hash_next->hash_prev = mp->hash_prev; + if (PageWriteback(page)) + return 0; + + return metapage_releasepage(page, 0); } +struct address_space_operations jfs_metapage_aops = { + .readpage = metapage_readpage, + .writepage = metapage_writepage, + .sync_page = block_sync_page, + .releasepage = metapage_releasepage, + .invalidatepage = metapage_invalidatepage, + .set_page_dirty = __set_page_dirty_nobuffers, +}; + struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, unsigned int size, int absolute, unsigned long new) { - struct metapage **hash_ptr; int l2BlocksPerPage; int l2bsize; struct address_space *mapping; - struct metapage *mp; + struct metapage *mp = NULL; + struct page *page; unsigned long page_index; unsigned long page_offset; - jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock); + jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d", + inode->i_ino, lblock, absolute); + l2bsize = inode->i_blkbits; + l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; + page_index = lblock >> l2BlocksPerPage; + page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize; + if ((page_offset + size) > PAGE_CACHE_SIZE) { + jfs_err("MetaData crosses page boundary!!"); + jfs_err("lblock = %lx, size = %d", lblock, size); + dump_stack(); + return NULL; + } if (absolute) - mapping = inode->i_sb->s_bdev->bd_inode->i_mapping; + mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping; else { /* * If an nfs client tries to read an inode that is larger @@ -237,312 +631,212 @@ struct metapage *__get_metapage(struct i mapping = inode->i_mapping; } - hash_ptr = meta_hash(mapping, lblock); -again: - spin_lock(&meta_lock); - mp = search_hash(hash_ptr, mapping, lblock); + if (new && (PSIZE == PAGE_CACHE_SIZE)) { + page = grab_cache_page(mapping, page_index); + if (!page) { + jfs_err("grab_cache_page failed!"); + return NULL; + } + SetPageUptodate(page); + } else { + page = read_cache_page(mapping, page_index, + (filler_t *)mapping->a_ops->readpage, NULL); + if (IS_ERR(page)) { + jfs_err("read_cache_page failed!"); + return NULL; + } + lock_page(page); + } + + mp = page_to_mp(page, page_offset); if (mp) { - page_found: - if (test_bit(META_stale, &mp->flag)) { - spin_unlock(&meta_lock); - msleep(1); - goto again; + if (mp->logical_size != size) { + jfs_error(inode->i_sb, + "__get_metapage: mp->logical_size != size"); + jfs_err("logical_size = %d, size = %d", + mp->logical_size, size); + dump_stack(); + goto unlock; } mp->count++; lock_metapage(mp); - spin_unlock(&meta_lock); if (test_bit(META_discard, &mp->flag)) { if (!new) { jfs_error(inode->i_sb, "__get_metapage: using a " "discarded metapage"); - release_metapage(mp); - return NULL; + discard_metapage(mp); + goto unlock; } clear_bit(META_discard, &mp->flag); } - jfs_info("__get_metapage: found 0x%p, in hash", mp); - if (mp->logical_size != size) { - jfs_error(inode->i_sb, - "__get_metapage: mp->logical_size != size"); - release_metapage(mp); - return NULL; - } } else { - l2bsize = inode->i_blkbits; - l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; - page_index = lblock >> l2BlocksPerPage; - page_offset = (lblock - (page_index << l2BlocksPerPage)) << - l2bsize; - if ((page_offset + size) > PAGE_CACHE_SIZE) { - spin_unlock(&meta_lock); - jfs_err("MetaData crosses page boundary!!"); - return NULL; - } - - /* - * Locks held on aggregate inode pages are usually - * not held long, and they are taken in critical code - * paths (committing dirty inodes, txCommit thread) - * - * Attempt to get metapage without blocking, tapping into - * reserves if necessary. - */ - mp = NULL; - if (JFS_IP(inode)->fileset == AGGREGATE_I) { - mp = alloc_metapage(GFP_ATOMIC); - if (!mp) { - /* - * mempool is supposed to protect us from - * failing here. We will try a blocking - * call, but a deadlock is possible here - */ - printk(KERN_WARNING - "__get_metapage: atomic call to mempool_alloc failed.\n"); - printk(KERN_WARNING - "Will attempt blocking call\n"); - } - } - if (!mp) { - struct metapage *mp2; - - spin_unlock(&meta_lock); - mp = alloc_metapage(GFP_NOFS); - spin_lock(&meta_lock); - - /* we dropped the meta_lock, we need to search the - * hash again. - */ - mp2 = search_hash(hash_ptr, mapping, lblock); - if (mp2) { - free_metapage(mp); - mp = mp2; - goto page_found; - } - } + INCREMENT(mpStat.pagealloc); + mp = alloc_metapage(GFP_NOFS); + mp->page = page; mp->flag = 0; - lock_metapage(mp); - if (absolute) - set_bit(META_absolute, &mp->flag); mp->xflag = COMMIT_PAGE; mp->count = 1; - atomic_set(&mp->nohomeok,0); - mp->mapping = mapping; - mp->index = lblock; - mp->page = NULL; + mp->nohomeok = 0; mp->logical_size = size; - add_to_hash(mp, hash_ptr); - spin_unlock(&meta_lock); - - if (new) { - jfs_info("__get_metapage: Calling grab_cache_page"); - mp->page = grab_cache_page(mapping, page_index); - if (!mp->page) { - jfs_err("grab_cache_page failed!"); - goto freeit; - } else { - INCREMENT(mpStat.pagealloc); - unlock_page(mp->page); - } - } else { - jfs_info("__get_metapage: Calling read_cache_page"); - mp->page = read_cache_page(mapping, lblock, - (filler_t *)mapping->a_ops->readpage, NULL); - if (IS_ERR(mp->page)) { - jfs_err("read_cache_page failed!"); - goto freeit; - } else - INCREMENT(mpStat.pagealloc); + mp->data = page_address(page) + page_offset; + mp->index = lblock; + if (unlikely(insert_metapage(page, mp))) { + free_metapage(mp); + goto unlock; } - mp->data = kmap(mp->page) + page_offset; + lock_metapage(mp); } - if (new) + if (new) { + jfs_info("zeroing mp = 0x%p", mp); memset(mp->data, 0, PSIZE); + } - jfs_info("__get_metapage: returning = 0x%p", mp); + unlock_page(page); + jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data); return mp; -freeit: - spin_lock(&meta_lock); - remove_from_hash(mp, hash_ptr); - free_metapage(mp); - spin_unlock(&meta_lock); +unlock: + unlock_page(page); return NULL; } -void hold_metapage(struct metapage * mp, int force) +void grab_metapage(struct metapage * mp) { - spin_lock(&meta_lock); - + jfs_info("grab_metapage: mp = 0x%p", mp); + page_cache_get(mp->page); + lock_page(mp->page); mp->count++; - - if (force) { - ASSERT (!(test_bit(META_forced, &mp->flag))); - if (trylock_metapage(mp)) - set_bit(META_forced, &mp->flag); - } else - lock_metapage(mp); - - spin_unlock(&meta_lock); + lock_metapage(mp); + unlock_page(mp->page); } -static void __write_metapage(struct metapage * mp) +void force_metapage(struct metapage *mp) { - int l2bsize = mp->mapping->host->i_blkbits; - int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; - unsigned long page_index; - unsigned long page_offset; - int rc; - - jfs_info("__write_metapage: mp = 0x%p", mp); - - page_index = mp->page->index; - page_offset = - (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; + struct page *page = mp->page; + jfs_info("force_metapage: mp = 0x%p", mp); + set_bit(META_forcewrite, &mp->flag); + clear_bit(META_sync, &mp->flag); + page_cache_get(page); + lock_page(page); + set_page_dirty(page); + write_one_page(page, 1); + clear_bit(META_forcewrite, &mp->flag); + page_cache_release(page); +} +extern void hold_metapage(struct metapage *mp) +{ lock_page(mp->page); - rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, - page_offset + - mp->logical_size); - if (rc) { - jfs_err("prepare_write return %d!", rc); - ClearPageUptodate(mp->page); +} + +extern void put_metapage(struct metapage *mp) +{ + if (mp->count || mp->nohomeok) { + /* Someone else will release this */ unlock_page(mp->page); - clear_bit(META_dirty, &mp->flag); return; } - rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset, - page_offset + - mp->logical_size); - if (rc) { - jfs_err("commit_write returned %d", rc); - } - + page_cache_get(mp->page); + mp->count++; + lock_metapage(mp); unlock_page(mp->page); - clear_bit(META_dirty, &mp->flag); - - jfs_info("__write_metapage done"); -} - -static inline void sync_metapage(struct metapage *mp) -{ - struct page *page = mp->page; - - page_cache_get(page); - lock_page(page); - - /* we're done with this page - no need to check for errors */ - if (page_has_buffers(page)) - write_one_page(page, 1); - else - unlock_page(page); - page_cache_release(page); + release_metapage(mp); } void release_metapage(struct metapage * mp) { - struct jfs_log *log; - + struct page *page = mp->page; jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); - spin_lock(&meta_lock); - if (test_bit(META_forced, &mp->flag)) { - clear_bit(META_forced, &mp->flag); - mp->count--; - spin_unlock(&meta_lock); - return; - } + BUG_ON(!page); + + lock_page(page); + unlock_metapage(mp); assert(mp->count); - if (--mp->count || atomic_read(&mp->nohomeok)) { - unlock_metapage(mp); - spin_unlock(&meta_lock); + if (--mp->count || mp->nohomeok) { + unlock_page(page); + page_cache_release(page); return; } - if (mp->page) { - set_bit(META_stale, &mp->flag); - spin_unlock(&meta_lock); - kunmap(mp->page); - mp->data = NULL; - if (test_bit(META_dirty, &mp->flag)) - __write_metapage(mp); + if (test_bit(META_dirty, &mp->flag)) { + set_page_dirty(page); if (test_bit(META_sync, &mp->flag)) { - sync_metapage(mp); clear_bit(META_sync, &mp->flag); + write_one_page(page, 1); + lock_page(page); /* write_one_page unlocks the page */ } + } else if (mp->lsn) /* discard_metapage doesn't remove it */ + remove_from_logsync(mp); - if (test_bit(META_discard, &mp->flag)) { - lock_page(mp->page); - block_invalidatepage(mp->page, 0); - unlock_page(mp->page); - } - - page_cache_release(mp->page); - mp->page = NULL; - INCREMENT(mpStat.pagefree); - spin_lock(&meta_lock); - } +#if MPS_PER_PAGE == 1 + /* + * If we know this is the only thing in the page, we can throw + * the page out of the page cache. If pages are larger, we + * don't want to do this. + */ - if (mp->lsn) { - /* - * Remove metapage from logsynclist. - */ - log = mp->log; - LOGSYNC_LOCK(log); - mp->log = NULL; - mp->lsn = 0; - mp->clsn = 0; - log->count--; - list_del(&mp->synclist); - LOGSYNC_UNLOCK(log); + /* Retest mp->count since we may have released page lock */ + if (test_bit(META_discard, &mp->flag) && !mp->count) { + clear_page_dirty(page); + ClearPageUptodate(page); +#ifdef _NOT_YET + if (page->mapping) { + /* Remove from page cache and page cache reference */ + remove_from_page_cache(page); + page_cache_release(page); + metapage_releasepage(page, 0); + } +#endif } - remove_from_hash(mp, meta_hash(mp->mapping, mp->index)); - spin_unlock(&meta_lock); - - free_metapage(mp); +#else + /* Try to keep metapages from using up too much memory */ + drop_metapage(page, mp); +#endif + unlock_page(page); + page_cache_release(page); } void __invalidate_metapages(struct inode *ip, s64 addr, int len) { - struct metapage **hash_ptr; - unsigned long lblock; + sector_t lblock; int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits; + int BlocksPerPage = 1 << l2BlocksPerPage; /* All callers are interested in block device's mapping */ - struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping; + struct address_space *mapping = + JFS_SBI(ip->i_sb)->direct_inode->i_mapping; struct metapage *mp; struct page *page; + unsigned int offset; /* - * First, mark metapages to discard. They will eventually be + * Mark metapages to discard. They will eventually be * released, but should not be written. */ - for (lblock = addr; lblock < addr + len; - lblock += 1 << l2BlocksPerPage) { - hash_ptr = meta_hash(mapping, lblock); -again: - spin_lock(&meta_lock); - mp = search_hash(hash_ptr, mapping, lblock); - if (mp) { - if (test_bit(META_stale, &mp->flag)) { - spin_unlock(&meta_lock); - msleep(1); - goto again; - } + for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len; + lblock += BlocksPerPage) { + page = find_lock_page(mapping, lblock >> l2BlocksPerPage); + if (!page) + continue; + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + if (!mp) + continue; + if (mp->index < addr) + continue; + if (mp->index >= addr + len) + break; clear_bit(META_dirty, &mp->flag); set_bit(META_discard, &mp->flag); - spin_unlock(&meta_lock); - } else { - spin_unlock(&meta_lock); - page = find_lock_page(mapping, lblock>>l2BlocksPerPage); - if (page) { - block_invalidatepage(page, 0); - unlock_page(page); - page_cache_release(page); - } + if (mp->lsn) + remove_from_logsync(mp); } + unlock_page(page); + page_cache_release(page); } } diff -puN fs/jfs/jfs_metapage.h~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_metapage.h --- 25/fs/jfs/jfs_metapage.h~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_metapage.h Thu Apr 28 17:00:10 2005 @@ -33,38 +33,27 @@ struct metapage { unsigned long flag; /* See Below */ unsigned long count; /* Reference count */ void *data; /* Data pointer */ - - /* list management stuff */ - struct metapage *hash_prev; - struct metapage *hash_next; /* Also used for free list */ - - /* - * mapping & index become redundant, but we need these here to - * add the metapage to the hash before we have the real page - */ - struct address_space *mapping; - unsigned long index; + sector_t index; /* block address of page */ wait_queue_head_t wait; /* implementation */ struct page *page; - unsigned long logical_size; + unsigned int logical_size; /* Journal management */ int clsn; - atomic_t nohomeok; + int nohomeok; struct jfs_log *log; }; /* metapage flag */ #define META_locked 0 -#define META_absolute 1 -#define META_free 2 -#define META_dirty 3 -#define META_sync 4 -#define META_discard 5 -#define META_forced 6 -#define META_stale 7 +#define META_free 1 +#define META_dirty 2 +#define META_sync 3 +#define META_discard 4 +#define META_forcewrite 5 +#define META_io 6 #define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag) @@ -80,7 +69,16 @@ extern struct metapage *__get_metapage(s __get_metapage(inode, lblock, size, absolute, TRUE) extern void release_metapage(struct metapage *); -extern void hold_metapage(struct metapage *, int); +extern void grab_metapage(struct metapage *); +extern void force_metapage(struct metapage *); + +/* + * hold_metapage and put_metapage are used in conjuction. The page lock + * is not dropped between the two, so no other threads can get or release + * the metapage + */ +extern void hold_metapage(struct metapage *); +extern void put_metapage(struct metapage *); static inline void write_metapage(struct metapage *mp) { @@ -101,6 +99,46 @@ static inline void discard_metapage(stru release_metapage(mp); } +static inline void metapage_nohomeok(struct metapage *mp) +{ + struct page *page = mp->page; + lock_page(page); + if (!mp->nohomeok++) { + mark_metapage_dirty(mp); + page_cache_get(page); + wait_on_page_writeback(page); + } + unlock_page(page); +} + +/* + * This serializes access to mp->lsn when metapages are added to logsynclist + * without setting nohomeok. i.e. updating imap & dmap + */ +static inline void metapage_wait_for_io(struct metapage *mp) +{ + if (test_bit(META_io, &mp->flag)) + wait_on_page_writeback(mp->page); +} + +/* + * This is called when already holding the metapage + */ +static inline void _metapage_homeok(struct metapage *mp) +{ + if (!--mp->nohomeok) + page_cache_release(mp->page); +} + +static inline void metapage_homeok(struct metapage *mp) +{ + hold_metapage(mp); + _metapage_homeok(mp); + put_metapage(mp); +} + +extern struct address_space_operations jfs_metapage_aops; + /* * This routines invalidate all pages for an extent. */ diff -puN fs/jfs/jfs_mount.c~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_mount.c --- 25/fs/jfs/jfs_mount.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_mount.c Thu Apr 28 17:00:10 2005 @@ -285,11 +285,6 @@ int jfs_mount_rw(struct super_block *sb, */ logMOUNT(sb); - /* - * Set page cache allocation policy - */ - mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS); - return rc; } diff -puN fs/jfs/jfs_txnmgr.c~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_txnmgr.c --- 25/fs/jfs/jfs_txnmgr.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_txnmgr.c Thu Apr 28 17:00:10 2005 @@ -227,6 +227,7 @@ static lid_t txLockAlloc(void) static void txLockFree(lid_t lid) { + TxLock[lid].tid = 0; TxLock[lid].next = TxAnchor.freelock; TxAnchor.freelock = lid; TxAnchor.tlocksInUse--; @@ -633,8 +634,10 @@ struct tlock *txLock(tid_t tid, struct i /* is page locked by the requester transaction ? */ tlck = lid_to_tlock(lid); - if ((xtid = tlck->tid) == tid) + if ((xtid = tlck->tid) == tid) { + TXN_UNLOCK(); goto grantLock; + } /* * is page locked by anonymous transaction/lock ? @@ -649,6 +652,7 @@ struct tlock *txLock(tid_t tid, struct i */ if (xtid == 0) { tlck->tid = tid; + TXN_UNLOCK(); tblk = tid_to_tblock(tid); /* * The order of the tlocks in the transaction is important @@ -706,17 +710,18 @@ struct tlock *txLock(tid_t tid, struct i */ tlck->tid = tid; + TXN_UNLOCK(); + /* mark tlock for meta-data page */ if (mp->xflag & COMMIT_PAGE) { tlck->flag = tlckPAGELOCK; /* mark the page dirty and nohomeok */ - mark_metapage_dirty(mp); - atomic_inc(&mp->nohomeok); + metapage_nohomeok(mp); jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", - mp, atomic_read(&mp->nohomeok), tid, tlck); + mp, mp->nohomeok, tid, tlck); /* if anonymous transaction, and buffer is on the group * commit synclist, mark inode to show this. This will @@ -762,8 +767,10 @@ struct tlock *txLock(tid_t tid, struct i if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; + TXN_LOCK(); list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); + TXN_UNLOCK(); } } @@ -821,8 +828,6 @@ struct tlock *txLock(tid_t tid, struct i grantLock: tlck->type |= type; - TXN_UNLOCK(); - return tlck; /* @@ -841,11 +846,19 @@ struct tlock *txLock(tid_t tid, struct i BUG(); } INCREMENT(stattx.waitlock); /* statistics */ + TXN_UNLOCK(); release_metapage(mp); + TXN_LOCK(); + xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", tid, xtid, lid); - TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); + + /* Recheck everything since dropping TXN_LOCK */ + if (xtid && (tlck->mp == mp) && (mp->lid == lid)) + TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); + else + TXN_UNLOCK(); jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); return NULL; @@ -906,6 +919,7 @@ static void txUnlock(struct tblock * tbl struct metapage *mp; struct jfs_log *log; int difft, diffp; + unsigned long flags; jfs_info("txUnlock: tblk = 0x%p", tblk); log = JFS_SBI(tblk->sb)->log; @@ -925,19 +939,14 @@ static void txUnlock(struct tblock * tbl assert(mp->xflag & COMMIT_PAGE); /* hold buffer - * - * It's possible that someone else has the metapage. - * The only things were changing are nohomeok, which - * is handled atomically, and clsn which is protected - * by the LOGSYNC_LOCK. */ - hold_metapage(mp, 1); + hold_metapage(mp); - assert(atomic_read(&mp->nohomeok) > 0); - atomic_dec(&mp->nohomeok); + assert(mp->nohomeok > 0); + _metapage_homeok(mp); /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); if (mp->clsn) { logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); @@ -945,16 +954,11 @@ static void txUnlock(struct tblock * tbl mp->clsn = tblk->clsn; } else mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); assert(!(tlck->flag & tlckFREEPAGE)); - if (tlck->flag & tlckWRITEPAGE) { - write_metapage(mp); - } else { - /* release page which has been forced */ - release_metapage(mp); - } + put_metapage(mp); } /* insert tlock, and linelock(s) of the tlock if any, @@ -981,10 +985,10 @@ static void txUnlock(struct tblock * tbl * has been inserted in logsync list at txUpdateMap()) */ if (tblk->lsn) { - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count--; list_del(&tblk->synclist); - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } } @@ -1573,8 +1577,8 @@ static int dataLog(struct jfs_log * log, * the last entry, so don't bother logging this */ mp->lid = 0; - hold_metapage(mp, 0); - atomic_dec(&mp->nohomeok); + grab_metapage(mp); + metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; return 0; @@ -2270,7 +2274,8 @@ void txForce(struct tblock * tblk) tlck->flag &= ~tlckWRITEPAGE; /* do not release page to freelist */ - + force_metapage(mp); +#if 0 /* * The "right" thing to do here is to * synchronously write the metadata. @@ -2282,9 +2287,10 @@ void txForce(struct tblock * tblk) * we can get by with synchronously writing * the pages when they are released. */ - assert(atomic_read(&mp->nohomeok)); + assert(mp->nohomeok); set_bit(META_dirty, &mp->flag); set_bit(META_sync, &mp->flag); +#endif } } } @@ -2344,7 +2350,7 @@ static void txUpdateMap(struct tblock * */ mp = tlck->mp; ASSERT(mp->xflag & COMMIT_PAGE); - hold_metapage(mp, 0); + grab_metapage(mp); } /* @@ -2394,8 +2400,8 @@ static void txUpdateMap(struct tblock * ASSERT(mp->lid == lid); tlck->mp->lid = 0; } - assert(atomic_read(&mp->nohomeok) == 1); - atomic_dec(&mp->nohomeok); + assert(mp->nohomeok == 1); + metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; } @@ -2861,24 +2867,9 @@ static void LogSyncRelease(struct metapa { struct jfs_log *log = mp->log; - assert(atomic_read(&mp->nohomeok)); + assert(mp->nohomeok); assert(log); - atomic_dec(&mp->nohomeok); - - if (atomic_read(&mp->nohomeok)) - return; - - hold_metapage(mp, 0); - - LOGSYNC_LOCK(log); - mp->log = NULL; - mp->lsn = 0; - mp->clsn = 0; - log->count--; - list_del_init(&mp->synclist); - LOGSYNC_UNLOCK(log); - - release_metapage(mp); + metapage_homeok(mp); } /* diff -puN fs/jfs/jfs_umount.c~jfs-support-page-sizes-greater-than-4k fs/jfs/jfs_umount.c --- 25/fs/jfs/jfs_umount.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/jfs_umount.c Thu Apr 28 17:00:10 2005 @@ -49,7 +49,6 @@ */ int jfs_umount(struct super_block *sb) { - struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping; struct jfs_sb_info *sbi = JFS_SBI(sb); struct inode *ipbmap = sbi->ipbmap; struct inode *ipimap = sbi->ipimap; @@ -109,8 +108,8 @@ int jfs_umount(struct super_block *sb) * Make sure all metadata makes it to disk before we mark * the superblock as clean */ - filemap_fdatawrite(bdev_mapping); - filemap_fdatawait(bdev_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); /* * ensure all file system file pages are propagated to their @@ -123,9 +122,6 @@ int jfs_umount(struct super_block *sb) if (log) { /* log = NULL if read-only mount */ updateSuper(sb, FM_CLEAN); - /* Restore default gfp_mask for bdev */ - mapping_set_gfp_mask(bdev_mapping, GFP_USER); - /* * close log: * @@ -140,7 +136,6 @@ int jfs_umount(struct super_block *sb) int jfs_umount_rw(struct super_block *sb) { - struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping; struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; @@ -166,13 +161,10 @@ int jfs_umount_rw(struct super_block *sb * mark the superblock clean before everything is flushed to * disk. */ - filemap_fdatawrite(bdev_mapping); - filemap_fdatawait(bdev_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); updateSuper(sb, FM_CLEAN); - /* Restore default gfp_mask for bdev */ - mapping_set_gfp_mask(bdev_mapping, GFP_USER); - return lmLogClose(sb); } diff -puN fs/jfs/resize.c~jfs-support-page-sizes-greater-than-4k fs/jfs/resize.c --- 25/fs/jfs/resize.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/resize.c Thu Apr 28 17:00:10 2005 @@ -209,6 +209,9 @@ int jfs_extendfs(struct super_block *sb, */ txQuiesce(sb); + /* Reset size of direct inode */ + sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size; + if (sbi->mntflag & JFS_INLINELOG) { /* * deactivate old inline log diff -puN fs/jfs/super.c~jfs-support-page-sizes-greater-than-4k fs/jfs/super.c --- 25/fs/jfs/super.c~jfs-support-page-sizes-greater-than-4k Thu Apr 28 17:00:10 2005 +++ 25-akpm/fs/jfs/super.c Thu Apr 28 17:00:10 2005 @@ -210,6 +210,10 @@ static void jfs_put_super(struct super_b unload_nls(sbi->nls_tab); sbi->nls_tab = NULL; + truncate_inode_pages(sbi->direct_inode->i_mapping, 0); + iput(sbi->direct_inode); + sbi->direct_inode = NULL; + kfree(sbi); } @@ -358,6 +362,12 @@ static int jfs_remount(struct super_bloc } if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + /* + * Invalidate any previously read metadata. fsck may have + * changed the on-disk data since we mounted r/o + */ + truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); + JFS_SBI(sb)->flag = flag; return jfs_mount_rw(sb, 1); } @@ -428,12 +438,26 @@ static int jfs_fill_super(struct super_b sb->s_op = &jfs_super_operations; sb->s_export_op = &jfs_export_operations; + /* + * Initialize direct-mapping inode/address-space + */ + inode = new_inode(sb); + if (inode == NULL) + goto out_kfree; + inode->i_ino = 0; + inode->i_nlink = 1; + inode->i_size = sb->s_bdev->bd_inode->i_size; + inode->i_mapping->a_ops = &jfs_metapage_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + + sbi->direct_inode = inode; + rc = jfs_mount(sb); if (rc) { if (!silent) { jfs_err("jfs_mount failed w/return code = %d", rc); } - goto out_kfree; + goto out_mount_failed; } if (sb->s_flags & MS_RDONLY) sbi->log = NULL; @@ -482,6 +506,13 @@ out_no_rw: if (rc) { jfs_err("jfs_umount failed with return code %d", rc); } +out_mount_failed: + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); + truncate_inode_pages(sbi->direct_inode->i_mapping, 0); + make_bad_inode(sbi->direct_inode); + iput(sbi->direct_inode); + sbi->direct_inode = NULL; out_kfree: if (sbi->nls_tab) unload_nls(sbi->nls_tab); _