From: Hans Reiser This is update to reiser4 in 2.6.12-rc5-mm2. There are changes in core patches which are needed for reiser4. reiser4-sb_sync_inodes-cleanup.patch is now needed. reiser4-allow-drop_inode-implementation.patch and reiser4-export-inode_lock.patch are not. Signed-off-by: Andrew Morton --- fs/reiser4/Kconfig | 2 fs/reiser4/Makefile | 3 fs/reiser4/as_ops.c | 35 - fs/reiser4/carry.c | 2 fs/reiser4/cluster.c | 62 ++ fs/reiser4/cluster.h | 48 +- fs/reiser4/context.c | 19 fs/reiser4/context.h | 7 fs/reiser4/crypt.c | 4 fs/reiser4/entd.c | 10 fs/reiser4/eottl.c | 122 ++++- fs/reiser4/file_ops.c | 1 fs/reiser4/flush.c | 54 +- fs/reiser4/flush.h | 2 fs/reiser4/init_super.c | 14 fs/reiser4/inode.c | 23 + fs/reiser4/inode.h | 20 fs/reiser4/inode_ops.c | 2 fs/reiser4/jnode.c | 29 - fs/reiser4/jnode.h | 5 fs/reiser4/plugin/compress/compress.c | 250 +++++++---- fs/reiser4/plugin/compress/compress.h | 8 fs/reiser4/plugin/compress/compress_mode.c | 108 +++++ fs/reiser4/plugin/compress/lzoconf.h | 5 fs/reiser4/plugin/compress/minilzo.c | 15 fs/reiser4/plugin/compress/minilzo.h | 4 fs/reiser4/plugin/cryptcompress.c | 606 +++++++++++++++-------------- fs/reiser4/plugin/cryptcompress.h | 22 - fs/reiser4/plugin/digest.c | 12 fs/reiser4/plugin/dir/dir.c | 7 fs/reiser4/plugin/dir/hashed_dir.c | 2 fs/reiser4/plugin/file/file.c | 123 ++--- fs/reiser4/plugin/file/file.h | 4 fs/reiser4/plugin/file/regular.c | 44 ++ fs/reiser4/plugin/file/tail_conversion.c | 15 fs/reiser4/plugin/item/ctail.c | 64 +-- fs/reiser4/plugin/item/extent_file_ops.c | 192 ++++++--- fs/reiser4/plugin/item/extent_flush_ops.c | 14 fs/reiser4/plugin/item/extent_item_ops.c | 6 fs/reiser4/plugin/item/static_stat.c | 111 ----- fs/reiser4/plugin/item/static_stat.h | 3 fs/reiser4/plugin/item/tail.c | 21 - fs/reiser4/plugin/object.c | 153 ------- fs/reiser4/plugin/plugin.c | 35 + fs/reiser4/plugin/plugin.h | 122 ++++- fs/reiser4/plugin/plugin_header.h | 3 fs/reiser4/plugin/plugin_set.c | 31 + fs/reiser4/plugin/plugin_set.h | 27 - fs/reiser4/plugin/pseudo/pseudo.c | 3 fs/reiser4/plugin/space/bitmap.c | 8 fs/reiser4/safe_link.c | 23 - fs/reiser4/safe_link.h | 2 fs/reiser4/search.c | 11 fs/reiser4/status_flags.c | 1 fs/reiser4/super.h | 4 fs/reiser4/tree.c | 2 fs/reiser4/tree_walk.c | 36 - fs/reiser4/tree_walk.h | 9 fs/reiser4/txnmgr.c | 84 +++- fs/reiser4/txnmgr.h | 8 fs/reiser4/vfs_ops.c | 78 ++- fs/reiser4/vfs_ops.h | 5 fs/reiser4/wander.c | 2 63 files changed, 1633 insertions(+), 1114 deletions(-) diff -puN fs/reiser4/as_ops.c~reiser4-update fs/reiser4/as_ops.c --- devel/fs/reiser4/as_ops.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/as_ops.c 2005-07-08 23:11:54.000000000 -0700 @@ -158,13 +158,9 @@ reiser4_readpage(struct file *f /* file result = fplug->readpage(f, page); else result = RETERR(-EINVAL); - if (result != 0) { - SetPageError(page); - unlock_page(page); - } reiser4_exit_context(&ctx); - return 0; + return result; } static int filler(void *vp, struct page *page) @@ -496,7 +492,6 @@ reiser4_internal int reiser4_releasepage(struct page *page, int gfp UNUSED_ARG) { jnode *node; - void *oid; assert("nikita-2257", PagePrivate(page)); assert("nikita-2259", PageLocked(page)); @@ -512,8 +507,6 @@ reiser4_releasepage(struct page *page, i assert("reiser4-4", page->mapping != NULL); assert("reiser4-5", page->mapping->host != NULL); - oid = (void *)(unsigned long)get_inode_oid(page->mapping->host); - /* is_page_cache_freeable() check (mapping + private + page_cache_get() by shrink_cache()) */ if (page_count(page) > 3) @@ -561,19 +554,6 @@ reiser4_releasepage(struct page *page, i #undef INC_NSTAT #undef INC_STAT -reiser4_internal void -move_inode_out_from_sync_inodes_loop(struct address_space * mapping) -{ - /* work around infinite loop in pdflush->sync_sb_inodes. */ - /* Problem: ->writepages() is supposed to submit io for the pages from - * ->io_pages list and to clean this list. */ - mapping->host->dirtied_when = jiffies; - spin_lock(&inode_lock); - list_move(&mapping->host->i_list, &mapping->host->i_sb->s_dirty); - spin_unlock(&inode_lock); - -} - /* reiser4 writepages() address space operation this captures anonymous pages and anonymous jnodes. Anonymous pages are pages which are dirtied via mmapping. Anonymous jnodes are ones which were created by reiser4_writepage @@ -588,12 +568,21 @@ reiser4_writepages(struct address_space inode = mapping->host; fplug = inode_file_plugin(inode); - if (fplug != NULL && fplug->capture != NULL) + if (fplug != NULL && fplug->capture != NULL) { /* call file plugin method to capture anonymous pages and anonymous jnodes */ ret = fplug->capture(inode, wbc); + if (is_in_reiser4_context()) { + if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) { + /* there are already pages to flush, flush them + out, do not delay until end of + reiser4_sync_inodes */ + writeout(inode->i_sb, wbc); + get_current_context()->nr_captured = 0; + } + } + } - move_inode_out_from_sync_inodes_loop(mapping); return ret; } diff -puN fs/reiser4/carry.c~reiser4-update fs/reiser4/carry.c --- devel/fs/reiser4/carry.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/carry.c 2005-07-08 23:11:54.000000000 -0700 @@ -966,7 +966,7 @@ lock_carry_node(carry_level * level /* l assert("nikita-1186", reference_point != NULL); } if (node->parent && (result == 0)) { - result = reiser4_get_parent(&tmp_lh, reference_point, ZNODE_WRITE_LOCK, 0); + result = reiser4_get_parent(&tmp_lh, reference_point, ZNODE_WRITE_LOCK); if (result != 0) { ; /* nothing */ } else if (znode_get_level(tmp_lh.node) == 0) { diff -puN fs/reiser4/cluster.c~reiser4-update fs/reiser4/cluster.c --- devel/fs/reiser4/cluster.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/cluster.c 2005-07-08 23:11:54.000000000 -0700 @@ -3,6 +3,10 @@ /* Contains cluster operations for cryptcompress object plugin (see http://www.namesys.com/cryptcompress_design.txt for details). */ +#include "plugin/plugin_header.h" +#include "plugin/plugin.h" +#include "inode.h" + /* Concepts of clustering. Definition of cluster size. Data clusters, page clusters, disk clusters. @@ -69,3 +73,61 @@ represent all data/page/disk clusters. (EDWARD-FIXME-HANS: are you sure that is good style? and where is the code that goes with this comment....;-) ) */ + +static int +change_cluster(struct inode * inode, reiser4_plugin * plugin) +{ + int result = 0; + + assert("edward-1324", inode != NULL); + assert("edward-1325", plugin != NULL); + assert("edward-1326", is_reiser4_inode(inode)); + assert("edward-1327", plugin->h.type_id == REISER4_CLUSTER_PLUGIN_TYPE); + + if (inode_file_plugin(inode)->h.id == DIRECTORY_FILE_PLUGIN_ID) + result = plugin_set_cluster(&reiser4_inode_data(inode)->pset, + &plugin->clust); + else + result = RETERR(-EINVAL); + return result; +} + +static reiser4_plugin_ops cluster_plugin_ops = { + .init = NULL, + .load = NULL, + .save_len = NULL, + .save = NULL, + .change = &change_cluster +}; + +#define SUPPORT_CLUSTER(SHIFT, ID, LABEL, DESC) \ + [CLUSTER_ ## ID ## _ID] = { \ + .h = { \ + .type_id = REISER4_CLUSTER_PLUGIN_TYPE, \ + .id = CLUSTER_ ## ID ## _ID, \ + .pops = &cluster_plugin_ops, \ + .label = LABEL, \ + .desc = DESC, \ + .linkage = TYPE_SAFE_LIST_LINK_ZERO \ + }, \ + .shift = SHIFT \ + } + +cluster_plugin cluster_plugins[LAST_CLUSTER_ID] = { + SUPPORT_CLUSTER(12, 4K, "4K", "Minimal"), + SUPPORT_CLUSTER(13, 8K, "8K", "Small"), + SUPPORT_CLUSTER(14, 16K, "16K", "Average"), + SUPPORT_CLUSTER(15, 32K, "32K", "Big"), + SUPPORT_CLUSTER(16, 64K, "64K", "Large") +}; + +/* + Local variables: + c-indentation-style: "K&R" + mode-name: "LC" + c-basic-offset: 8 + tab-width: 8 + fill-column: 120 + scroll-step: 1 + End: +*/ diff -puN fs/reiser4/cluster.h~reiser4-update fs/reiser4/cluster.h --- devel/fs/reiser4/cluster.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/cluster.h 2005-07-08 23:11:54.000000000 -0700 @@ -20,40 +20,39 @@ static inline int inode_cluster_shift (s { assert("edward-92", inode != NULL); assert("edward-93", reiser4_inode_data(inode) != NULL); - assert("edward-94", inode_get_flag(inode, REISER4_CLUSTER_KNOWN)); - return reiser4_inode_data(inode)->cluster_shift; + return inode_cluster_plugin(inode)->shift; } static inline unsigned -page_cluster_shift(struct inode * inode) +cluster_nrpages_shift(struct inode * inode) { - return inode_cluster_shift(inode) + PAGE_CACHE_SHIFT; + return inode_cluster_shift(inode) - PAGE_CACHE_SHIFT; } /* cluster size in page units */ static inline unsigned cluster_nrpages (struct inode * inode) { - return (1U << inode_cluster_shift(inode)); + return 1U << cluster_nrpages_shift(inode); } static inline size_t inode_cluster_size (struct inode * inode) { assert("edward-96", inode != NULL); - return (PAGE_CACHE_SIZE << inode_cluster_shift(inode)); + return 1U << inode_cluster_shift(inode); } static inline unsigned long pg_to_clust(unsigned long idx, struct inode * inode) { - return idx >> inode_cluster_shift(inode); + return idx >> cluster_nrpages_shift(inode); } static inline unsigned long clust_to_pg(unsigned long idx, struct inode * inode) { - return idx << inode_cluster_shift(inode); + return idx << cluster_nrpages_shift(inode); } static inline unsigned long @@ -77,13 +76,13 @@ pg_to_off(unsigned long idx) static inline unsigned long off_to_clust(loff_t off, struct inode * inode) { - return pg_to_clust(off_to_pg(off), inode); + return off >> inode_cluster_shift(inode); } static inline loff_t clust_to_off(unsigned long idx, struct inode * inode) { - return pg_to_off(clust_to_pg(idx, inode)); + return (loff_t)idx << inode_cluster_shift(inode); } static inline unsigned long @@ -100,17 +99,17 @@ count_to_nrpages(loff_t count) } /* number of clusters occupied by @count bytes */ -static inline unsigned long +static inline cloff_t count_to_nrclust(loff_t count, struct inode * inode) { - return count_to_nr(count, page_cluster_shift(inode)); + return count_to_nr(count, inode_cluster_shift(inode)); } /* number of clusters occupied by @count pages */ static inline cloff_t pgcount_to_nrclust(pgoff_t count, struct inode * inode) { - return count_to_nr(count, inode_cluster_shift(inode)); + return count_to_nr(count, cluster_nrpages_shift(inode)); } static inline loff_t @@ -257,6 +256,29 @@ void tfm_cluster_set_uptodate (tfm_clust void tfm_cluster_clr_uptodate (tfm_cluster_t * tc); unsigned long clust_by_coord(const coord_t * coord, struct inode * inode); +/* move cluster handle to the target position + specified by the page of index @pgidx +*/ +static inline void +move_cluster_forward(reiser4_cluster_t * clust, struct inode * inode, + pgoff_t pgidx, int * progress) +{ + assert("edward-1297", clust != NULL); + assert("edward-1298", inode != NULL); + + reset_cluster_params(clust); + if (*progress && + /* Hole in the indices. Hint became invalid and can not be + used by find_cluster_item() even if seal/node versions + will coincide */ + pg_to_clust(pgidx, inode) != clust->index + 1) { + unset_hint(clust->hint); + invalidate_hint_cluster(clust); + } + *progress = 1; + clust->index = pg_to_clust(pgidx, inode); +} + static inline int alloc_clust_pages(reiser4_cluster_t * clust, struct inode * inode ) { diff -puN fs/reiser4/context.c~reiser4-update fs/reiser4/context.c --- devel/fs/reiser4/context.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/context.c 2005-07-08 23:11:54.000000000 -0700 @@ -173,6 +173,25 @@ reiser4_internal void reiser4_exit_conte txn_restart(context); balance_dirty_pages_at(context); } + + /* if filesystem is mounted with -o sync or -o dirsync - commit + transaction. FIXME: TXNH_DONT_COMMIT is used to avoid + commiting on exit_context when inode semaphore is held and + to have ktxnmgrd to do commit instead to get better + concurrent filesystem accesses. But, when one mounts with -o + sync, he cares more about reliability than about + performance. So, for now we have this simple mount -o sync + support. */ + if (context->super->s_flags & (MS_SYNCHRONOUS | MS_DIRSYNC)) { + txn_atom *atom; + + atom = get_current_atom_locked_nocheck(); + if (atom) { + atom->flags |= ATOM_FORCE_COMMIT; + context->trans->flags &= ~TXNH_DONT_COMMIT; + UNLOCK_ATOM(atom); + } + } txn_end(context); } done_context(context); diff -puN fs/reiser4/context.h~reiser4-update fs/reiser4/context.h --- devel/fs/reiser4/context.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/context.h 2005-07-08 23:11:54.000000000 -0700 @@ -121,6 +121,13 @@ struct reiser4_context { /* count non-trivial jnode_set_dirty() calls */ unsigned long nr_marked_dirty; + + /* reiser4_sync_inodes calls (via generic_sync_sb_inodes) + * reiser4_writepages for each of dirty inodes. Reiser4_writepages + * captures pages. When number of pages captured in one + * reiser4_sync_inodes reaches some threshold - some atoms get + * flushed */ + int nr_captured; #if REISER4_DEBUG /* A link of all active contexts. */ context_list_link contexts_link; diff -puN fs/reiser4/crypt.c~reiser4-update fs/reiser4/crypt.c --- devel/fs/reiser4/crypt.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/crypt.c 2005-07-08 23:11:54.000000000 -0700 @@ -26,7 +26,7 @@ @pad */ UNUSED_ARG static int -align_cluster_common(__u8 *pad /* pointer to the first byte of aligning format */, +align_stream_common(__u8 *pad /* pointer to the first byte of aligning format */, int flow_size /* size of non-aligned flow */, int blocksize /* crypto-block size */) { @@ -73,7 +73,7 @@ crypto_plugin crypto_plugins[LAST_CRYPTO .free = free_none_crypt, .nr_keywords = NONE_EXPKEY_WORDS, .scale = scale_common, - .align_cluster = NULL, + .align_stream = NULL, .setkey = NULL, .encrypt = NULL, .decrypt = NULL diff -puN fs/reiser4/entd.c~reiser4-update fs/reiser4/entd.c --- devel/fs/reiser4/entd.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/entd.c 2005-07-08 23:11:54.000000000 -0700 @@ -242,14 +242,6 @@ static void kick_entd(entd_context * ent kcond_signal(&ent->wait); } -static void entd_capture_anonymous_pages( - struct super_block * super, struct writeback_control * wbc) -{ - spin_lock(&inode_lock); - generic_sync_sb_inodes(super, wbc); - spin_unlock(&inode_lock); -} - static void entd_flush(struct super_block *super) { long nr_submitted = 0; @@ -267,7 +259,7 @@ static void entd_flush(struct super_bloc ctx.entd = 1; - entd_capture_anonymous_pages(super, &wbc); + generic_sync_sb_inodes(super, &wbc); result = flush_some_atom(&nr_submitted, &wbc, JNODE_FLUSH_WRITE_BLOCKS); if (result != 0) warning("nikita-3100", "Flush failed: %i", result); diff -puN fs/reiser4/eottl.c~reiser4-update fs/reiser4/eottl.c --- devel/fs/reiser4/eottl.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/eottl.c 2005-07-08 23:11:54.000000000 -0700 @@ -106,41 +106,89 @@ restarted with lock_level modified so that next time we hit this problem, write lock will be held. Once we have write lock, balancing will be performed. - - - - - - */ -/* look to the right of @coord. If it is an item of internal type - 1 is - returned. If that item is in right neighbor and it is internal - @coord and - @lh are switched to that node: move lock handle, zload right neighbor and - zrelse znode coord was set to at the beginning + +/* look to an unit next to @coord. If it is an internal one - 1 is returned, + @coord is set to that unit. If that unit is in right neighbor, @lh is moved, + neighbor is loaded, original node is zrelsed, @coord is set to first unit + of neighbor. Otherwise, 0 is returned, @coord and @lh are left unchanged. + 2 is returned to restart search. */ -/* Audited by: green(2002.06.15) */ static int -is_next_item_internal(coord_t * coord) +is_next_item_internal(coord_t *coord, const reiser4_key *key, lock_handle *lh) { - if (coord->item_pos != node_num_items(coord->node) - 1) { - /* next item is in the same node */ - coord_t right; - - coord_dup(&right, coord); - check_me("vs-742", coord_next_item(&right) == 0); - if (item_is_internal(&right)) { - coord_dup(coord, &right); + coord_t next; + lock_handle rn; + int result; + + coord_dup(&next, coord); + if (coord_next_unit(&next) == 0) { + /* next unit is int this node */ + if (item_is_internal(&next)) { + coord_dup(coord, &next); return 1; } + assert("vs-3", item_is_extent(&next)); + return 0; + } + + assert("vs-5", UNDER_RW(dk, current_tree, read, keylt(key, znode_get_rd_key(coord->node)))); + + /* next unit either does not exist or is in right neighbor */ + init_lh(&rn); + result = reiser4_get_right_neighbor(&rn, coord->node, + znode_is_wlocked(coord->node) ? ZNODE_WRITE_LOCK : ZNODE_READ_LOCK, + GN_CAN_USE_UPPER_LEVELS); + if (result == -E_NO_NEIGHBOR){ + done_lh(&rn); + return 0; + } + + if (result) { + assert("vs-4", result < 0); + done_lh(&rn); + return result; + } + + /* check where anything managed to happen with right neighbor */ + result = UNDER_RW(dk, current_tree, read, keycmp(key, znode_get_ld_key(rn.node))); + assert("vs-6", result != EQUAL_TO); + if (result == GREATER_THAN) { + warning("vs-7", "smaller keys managed to get inserted to the right neighbor"); + done_lh(&rn); + return 2; + } + + result = zload(rn.node); + if (result) { + assert("vs-5", result < 0); + done_lh(&rn); + return result; + } + + coord_init_first_unit(&next, rn.node); + if (item_is_internal(&next)) { + coord_dup(coord, &next); + zrelse(rn.node); + done_lh(lh); + move_lh(lh, &rn); + /* coord and lock handle changed. Original node is not zrelsed, + though */ + return 1; } + + /* next item is extent */ + assert("vs-6", item_is_extent(&next)); + zrelse(rn.node); + done_lh(&rn); return 0; } /* inserting empty leaf after (or between) item of not internal type we have to know which right delimiting key corresponding znode has to be inserted with */ static reiser4_key * -rd_key(coord_t * coord, reiser4_key * key) +rd_key(coord_t *coord, reiser4_key *key) { coord_t dup; @@ -166,7 +214,7 @@ ON_DEBUG(void check_dkeys(const znode *) /* this is used to insert empty node into leaf level if tree lookup can not go further down because it stopped between items of not internal type */ static int -add_empty_leaf(coord_t * insert_coord, lock_handle * lh, const reiser4_key * key, const reiser4_key * rdkey) +add_empty_leaf(coord_t *insert_coord, lock_handle *lh, const reiser4_key *key, const reiser4_key *rdkey) { int result; carry_pool *pool; @@ -272,7 +320,7 @@ add_empty_leaf(coord_t * insert_coord, l /* handle extent-on-the-twig-level cases in tree traversal */ reiser4_internal int -handle_eottl(cbk_handle * h /* cbk handle */ , +handle_eottl(cbk_handle *h /* cbk handle */ , int *outcome /* how traversal should proceed */ ) { int result; @@ -311,8 +359,15 @@ handle_eottl(cbk_handle * h /* cbk handl } /* take a look at the item to the right of h -> coord */ - result = is_next_item_internal(coord); + result = is_next_item_internal(coord, h->key, h->active_lh); + if (unlikely(result < 0)) { + h->error = "get_right_neighbor failed"; + h->result = result; + *outcome = LOOKUP_DONE; + return 1; + } if (result == 0) { + znode *loaded; /* item to the right is also an extent one. Allocate a new node and insert pointer to it after item h -> coord. @@ -329,6 +384,7 @@ handle_eottl(cbk_handle * h /* cbk handl return 1; } + loaded = coord->node; result = add_empty_leaf(coord, h->active_lh, h->key, rd_key(coord, &key)); if (result) { h->error = "could not add empty leaf"; @@ -336,13 +392,17 @@ handle_eottl(cbk_handle * h /* cbk handl *outcome = LOOKUP_DONE; return 1; } - /* added empty leaf is locked, its parent node is unlocked, - coord is set as EMPTY */ + /* added empty leaf is locked (h->active_lh), its parent node + is unlocked, h->coord is set as EMPTY */ + assert("vs-13", coord->between == EMPTY_NODE); + assert("vs-14", znode_is_write_locked(coord->node)); + assert("vs-15", WITH_DATA(coord->node, node_is_empty(coord->node))); + assert("vs-16", jnode_is_leaf(ZJNODE(coord->node))); + assert("vs-17", coord->node == h->active_lh->node); *outcome = LOOKUP_DONE; h->result = CBK_COORD_NOTFOUND; return 1; - /*assert("vs-358", keyeq(h->key, item_key_by_coord(coord, &key)));*/ - } else { + } else if (result == 1) { /* this is special case mentioned in the comment on tree.h:cbk_flags. We have found internal item immediately on the right of extent, and we are going to insert new item @@ -356,8 +416,12 @@ handle_eottl(cbk_handle * h /* cbk handl is_next_item_internal(). */ h->flags &= ~CBK_TRUST_DK; + } else { + assert("vs-8", result == 2); + *outcome = LOOKUP_REST; + return 1; } - assert("vs-362", item_is_internal(coord)); + assert("vs-362", WITH_DATA(coord->node, item_is_internal(coord))); return 0; } diff -puN fs/reiser4/file_ops.c~reiser4-update fs/reiser4/file_ops.c --- devel/fs/reiser4/file_ops.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/file_ops.c 2005-07-08 23:11:54.000000000 -0700 @@ -221,6 +221,7 @@ reiser4_read(struct file *file /* file t /* unix_file_read is one method that might be invoked below */ result = fplug->read(file, buf, count, off); } + context_set_commit_async(&ctx); reiser4_exit_context(&ctx); return result; } diff -puN fs/reiser4/flush.c~reiser4-update fs/reiser4/flush.c --- devel/fs/reiser4/flush.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/flush.c 2005-07-08 23:11:54.000000000 -0700 @@ -408,7 +408,7 @@ static int jnode_lock_parent_coord(jnode lock_handle * parent_lh, load_count * parent_zh, znode_lock_mode mode, int try); -static int neighbor_in_slum(znode * node, lock_handle * right_lock, sideof side, znode_lock_mode mode); +static int neighbor_in_slum(znode * node, lock_handle * right_lock, sideof side, znode_lock_mode mode, int check_dirty); static int znode_same_parents(znode * a, znode * b); static int @@ -440,9 +440,6 @@ assert("nikita-3435", \ ON_DEBUG(atomic_t flush_cnt;) -/* FIXME: remove me */#define FLUSH_CHECKS_CONGESTION 1 - -#if defined (FLUSH_CHECKS_CONGESTION) /* check fs backing device for write congestion */ static int check_write_congestion (void) { @@ -453,10 +450,9 @@ static int check_write_congestion (void) bdi = get_super_fake(sb)->i_mapping->backing_dev_info; return bdi_write_congested(bdi); } -#endif /* FLUSH_CHECKS_CONGESTION */ /* conditionally write flush queue */ -static int write_prepped_nodes (flush_pos_t * pos, int check_congestion) +static int write_prepped_nodes (flush_pos_t * pos) { int ret; @@ -466,10 +462,8 @@ static int write_prepped_nodes (flush_po if (!(pos->flags & JNODE_FLUSH_WRITE_BLOCKS)) return 0; -#if defined (FLUSH_CHECKS_CONGESTION) - if (check_congestion && check_write_congestion()) + if (check_write_congestion()) return 0; -#endif /* FLUSH_CHECKS_CONGESTION */ ret = write_fq(pos->fq, pos->nr_written, WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM); @@ -637,7 +631,8 @@ static int prepare_flush_pos(flush_pos_t audit. */ -static int jnode_flush(jnode * node, long *nr_to_flush, long * nr_written, flush_queue_t * fq, int flags) +static int +jnode_flush(jnode *node, long *nr_written, flush_queue_t *fq, int flags) { long ret = 0; flush_scan right_scan; @@ -671,7 +666,6 @@ static int jnode_flush(jnode * node, lon /* Initialize a flush position. */ pos_init(&flush_pos); - flush_pos.nr_to_flush = nr_to_flush; flush_pos.nr_written = nr_written; flush_pos.fq = fq; flush_pos.flags = flags; @@ -835,14 +829,6 @@ static int jnode_flush(jnode * node, lon /* Any failure reaches this point. */ failed: - if (nr_to_flush != NULL) { - if (ret >= 0) { - (*nr_to_flush) = flush_pos.prep_or_free_cnt; - } else { - (*nr_to_flush) = 0; - } - } - switch (ret) { case -E_REPEAT: case -EINVAL: @@ -889,7 +875,7 @@ static int rapid_flush (flush_pos_t * po if (!wbq_available()) return 0; - return write_prepped_nodes(pos, 1); + return write_prepped_nodes(pos); } #else @@ -991,7 +977,7 @@ flush_current_atom (int flags, long *nr_ jref(node); UNLOCK_ATOM(*atom); UNLOCK_JNODE(node); - ret = jnode_flush(node, NULL, nr_submitted, fq, flags); + ret = jnode_flush(node, nr_submitted, fq, flags); jput(node); } @@ -1191,7 +1177,7 @@ static int alloc_pos_and_ancestors(flush } else { if (!znode_is_root(pos->lock.node)) { /* all formatted nodes except tree root */ - ret = reiser4_get_parent(&plock, pos->lock.node, ZNODE_WRITE_LOCK, 0); + ret = reiser4_get_parent(&plock, pos->lock.node, ZNODE_WRITE_LOCK); if (ret) goto exit; @@ -1483,7 +1469,7 @@ out: int ret1; /* NOTE: seems like io is done under long term locks. */ - ret1 = write_prepped_nodes(pos, 1); + ret1 = write_prepped_nodes(pos); if (ret1 < 0) return ret1; } @@ -1704,11 +1690,11 @@ static int squalloc_upper_levels (flush_ init_load_count(&left_parent_load); init_load_count(&right_parent_load); - ret = reiser4_get_parent(&left_parent_lock, left, ZNODE_WRITE_LOCK, 0); + ret = reiser4_get_parent(&left_parent_lock, left, ZNODE_WRITE_LOCK); if (ret) goto out; - ret = reiser4_get_parent(&right_parent_lock, right, ZNODE_WRITE_LOCK, 0); + ret = reiser4_get_parent(&right_parent_lock, right, ZNODE_WRITE_LOCK); if (ret) goto out; @@ -1811,7 +1797,7 @@ static int lock_parent_and_allocate_znod init_lh(&parent_lock); init_load_count(&parent_load); - ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK, 0); + ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK); if (ret) goto out; @@ -1849,7 +1835,8 @@ static int handle_pos_on_formatted (flus } while (1) { - ret = neighbor_in_slum(pos->lock.node, &right_lock, RIGHT_SIDE, ZNODE_WRITE_LOCK); + ret = neighbor_in_slum(pos->lock.node, &right_lock, RIGHT_SIDE, ZNODE_WRITE_LOCK, + !should_convert_next_node(pos, right_lock.node)); if (ret) break; @@ -2224,7 +2211,7 @@ static int handle_pos_to_twig (flush_pos init_lh(&parent_lock); init_load_count(&parent_load); - ret = reiser4_get_parent(&parent_lock, pos->lock.node, ZNODE_WRITE_LOCK, 0); + ret = reiser4_get_parent(&parent_lock, pos->lock.node, ZNODE_WRITE_LOCK); if (ret) goto out; @@ -2860,7 +2847,8 @@ jnode_lock_parent_coord(jnode * /* Get the (locked) next neighbor of a znode which is dirty and a member of the same atom. If there is no next neighbor or the neighbor is not in memory or if there is a - neighbor but it is not dirty or not in the same atom, -E_NO_NEIGHBOR is returned. */ + neighbor but it is not dirty or not in the same atom, -E_NO_NEIGHBOR is returned. + In some cases the slum may include nodes which are not dirty, if so @check_dirty should be 0 */ static int neighbor_in_slum( @@ -2870,8 +2858,9 @@ neighbor_in_slum( sideof side, /* left or right direction we seek the next node in */ - znode_lock_mode mode /* kind of lock we want */ + znode_lock_mode mode, /* kind of lock we want */ + int check_dirty /* true if the neighbor should be dirty */ ) { int ret; @@ -2889,7 +2878,8 @@ neighbor_in_slum( return ret; } - + if (!check_dirty) + return 0; /* Check dirty bit of locked znode, no races here */ if (znode_check_dirty(lock->node)) return 0; @@ -3338,7 +3328,7 @@ scan_by_coord(flush_scan * scan) if (coord_is_after_sideof_unit(&next_coord, scan->direction)) { /* We take the write lock because we may start flushing from this * coordinate. */ - ret = neighbor_in_slum(next_coord.node, &next_lock, scan->direction, ZNODE_WRITE_LOCK); + ret = neighbor_in_slum(next_coord.node, &next_lock, scan->direction, ZNODE_WRITE_LOCK, 1 /* check dirty */); if (ret == -E_NO_NEIGHBOR) { scan->stop = 1; diff -puN fs/reiser4/flush.h~reiser4-update fs/reiser4/flush.h --- devel/fs/reiser4/flush.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/flush.h 2005-07-08 23:11:54.000000000 -0700 @@ -103,8 +103,6 @@ struct flush_position { reiser4_blocknr_hint preceder; /* The flush 'hint' state. */ int leaf_relocate; /* True if enough leaf-level nodes were * found to suggest a relocate policy. */ - long *nr_to_flush; /* If called under memory pressure, - * indicates how many nodes the VM asked to flush. */ int alloc_cnt; /* The number of nodes allocated during squeeze and allococate. */ int prep_or_free_cnt; /* The number of nodes prepared for write (allocate) or squeezed and freed. */ flush_queue_t *fq; diff -puN fs/reiser4/init_super.c~reiser4-update fs/reiser4/init_super.c --- devel/fs/reiser4/init_super.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/init_super.c 2005-07-08 23:11:54.000000000 -0700 @@ -355,7 +355,19 @@ static struct { }, [PSET_COMPRESSION] = { .type = REISER4_COMPRESSION_PLUGIN_TYPE, - .id = NONE_COMPRESSION_ID + .id = LZO1_COMPRESSION_ID + }, + [PSET_COMPRESSION_MODE] = { + .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, + .id = SMART_COMPRESSION_MODE_ID + }, + [PSET_CLUSTER] = { + .type = REISER4_CLUSTER_PLUGIN_TYPE, + .id = CLUSTER_64K_ID + }, + [PSET_REGULAR_ENTRY] = { + .type = REISER4_REGULAR_PLUGIN_TYPE, + .id = UF_REGULAR_ID } }; diff -puN fs/reiser4/inode.c~reiser4-update fs/reiser4/inode.c --- devel/fs/reiser4/inode.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/inode.c 2005-07-08 23:11:54.000000000 -0700 @@ -473,7 +473,7 @@ reiser4_iget(struct super_block *super / print_key("sought for", key); print_key("found", &found_key); } - if (inode_file_plugin(inode)->not_linked(inode)) { + if (inode->i_nlink == 0) { warning("nikita-3559", "Unlinked inode found: %llu\n", (unsigned long long)get_inode_oid(inode)); } @@ -560,6 +560,27 @@ inode_compression_plugin(const struct in return reiser4_inode_data(inode)->pset->compression; } +reiser4_internal compression_mode_plugin * +inode_compression_mode_plugin(const struct inode * inode) +{ + assert("edward-1330", inode != NULL); + return reiser4_inode_data(inode)->pset->compression_mode; +} + +reiser4_internal cluster_plugin * +inode_cluster_plugin(const struct inode * inode) +{ + assert("edward-1328", inode != NULL); + return reiser4_inode_data(inode)->pset->cluster; +} + +reiser4_internal regular_plugin * +inode_regular_plugin(const struct inode * inode) +{ + assert("edward-1329", inode != NULL); + return reiser4_inode_data(inode)->pset->regular_entry; +} + reiser4_internal digest_plugin * inode_digest_plugin(const struct inode * inode) { diff -puN fs/reiser4/inode.h~reiser4-update fs/reiser4/inode.h --- devel/fs/reiser4/inode.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/inode.h 2005-07-08 23:11:54.000000000 -0700 @@ -49,19 +49,17 @@ typedef enum { REISER4_SDLEN_KNOWN = 5, /* reiser4_inode->crypt points to the crypto stat */ REISER4_CRYPTO_STAT_LOADED = 6, - /* reiser4_inode->cluster_shift makes sense */ - REISER4_CLUSTER_KNOWN = 7, /* cryptcompress_inode_data points to the secret key */ - REISER4_SECRET_KEY_INSTALLED = 8, + REISER4_SECRET_KEY_INSTALLED = 7, /* File (possibly) has pages corresponding to the tail items, that * were created by ->readpage. It is set by mmap_unix_file() and * sendfile_unix_file(). This bit is inspected by write_unix_file and * kill-hook of tail items. It is never cleared once set. This bit is * modified and inspected under i_sem. */ - REISER4_HAS_MMAP = 9, + REISER4_HAS_MMAP = 8, /* file was partially converted. It's body consists of a mix of tail * and extent items. */ - REISER4_PART_CONV = 10, + REISER4_PART_CONV = 9, } reiser4_file_plugin_flags; /* state associated with each inode. @@ -123,11 +121,6 @@ struct reiser4_inode { __u64 extmask; /* bitmask of non-default plugins for this inode */ __u16 plugin_mask; - /* cluster parameter for crypto and compression */ - __u8 cluster_shift; - /* secret key parameter for crypto */ - crypto_stat_t *crypt; - union { readdir_list_head readdir_list; struct list_head not_used; @@ -167,10 +160,6 @@ void loading_init_once(reiser4_inode *); void loading_alloc(reiser4_inode *); void loading_destroy(reiser4_inode *); - -#define I_JNODES (512) /* inode state bit. Set when in hash table there are more than 0 jnodes of unformatted nodes of - an inode */ - typedef struct reiser4_inode_object { /* private part */ reiser4_inode p; @@ -342,6 +331,9 @@ extern fibration_plugin *inode_fibration extern crypto_plugin *inode_crypto_plugin(const struct inode *inode); extern digest_plugin *inode_digest_plugin(const struct inode *inode); extern compression_plugin *inode_compression_plugin(const struct inode *inode); +extern compression_mode_plugin *inode_compression_mode_plugin(const struct inode *inode); +extern cluster_plugin *inode_cluster_plugin(const struct inode *inode); +extern regular_plugin *inode_regular_plugin(const struct inode *inode); extern item_plugin *inode_sd_plugin(const struct inode *inode); extern item_plugin *inode_dir_item_plugin(const struct inode *inode); diff -puN fs/reiser4/inode_ops.c~reiser4-update fs/reiser4/inode_ops.c --- devel/fs/reiser4/inode_ops.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/inode_ops.c 2005-07-08 23:11:54.000000000 -0700 @@ -90,7 +90,7 @@ reiser4_create(struct inode *parent /* i memset(&data, 0, sizeof data); data.mode = S_IFREG | mode; - data.id = UNIX_FILE_PLUGIN_ID; + data.id = inode_regular_plugin(parent)->id; return invoke_create_method(parent, dentry, &data); } diff -puN fs/reiser4/jnode.c~reiser4-update fs/reiser4/jnode.c --- devel/fs/reiser4/jnode.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/jnode.c 2005-07-08 23:11:54.000000000 -0700 @@ -266,8 +266,6 @@ jnode_init(jnode * node, reiser4_tree * spin_lock_irq(&sbinfo->all_guard); list_add(&node->jnodes, &sbinfo->all_jnodes); spin_unlock_irq(&sbinfo->all_guard); - /* link with which jnode is attached to reiser4_inode */ - inode_jnodes_list_clean(node); } #endif } @@ -445,21 +443,24 @@ jfind(struct address_space *mapping, uns static void inode_attach_jnode(jnode *node) { - struct inode * inode; - reiser4_inode * info; - struct radix_tree_root * rtree; + struct inode *inode; + reiser4_inode *info; + struct radix_tree_root *rtree; + assert("nikita-34391", rw_tree_is_write_locked(jnode_get_tree(node))); assert ("zam-1043", node->key.j.mapping != NULL); inode = node->key.j.mapping->host; info = reiser4_inode_data(inode); rtree = jnode_tree_by_reiser4_inode(info); - - spin_lock(&inode_lock); + if (rtree->height == 0) { + /* prevent inode from being pruned when it has jnodes attached to it */ + write_lock_irq(&inode->i_data.tree_lock); + inode->i_data.nrpages ++; + write_unlock_irq(&inode->i_data.tree_lock); + } assert("zam-1049", equi(rtree->rnode != NULL, info->nr_jnodes != 0)); check_me("zam-1045", !radix_tree_insert(rtree, node->key.j.index, node)); ON_DEBUG(info->nr_jnodes ++); - inode->i_state |= I_JNODES; - spin_unlock(&inode_lock); } static void inode_detach_jnode(jnode *node) @@ -468,12 +469,12 @@ static void inode_detach_jnode(jnode *no reiser4_inode *info; struct radix_tree_root *rtree; + assert("nikita-34392", rw_tree_is_write_locked(jnode_get_tree(node))); assert ("zam-1044", node->key.j.mapping != NULL); inode = node->key.j.mapping->host; info = reiser4_inode_data(inode); rtree = jnode_tree_by_reiser4_inode(info); - spin_lock(&inode_lock); assert("zam-1051", info->nr_jnodes != 0); assert("zam-1052", rtree->rnode != NULL); assert("vs-1730", !JF_ISSET(node, JNODE_EFLUSH)); @@ -481,10 +482,12 @@ static void inode_detach_jnode(jnode *no /* delete jnode from inode's radix tree of jnodes */ check_me("zam-1046", radix_tree_delete(rtree, node->key.j.index)); - if (rtree->rnode == NULL) { - inode->i_state &= ~I_JNODES; + if (rtree->height == 0) { + /* inode can be pruned now */ + write_lock_irq(&inode->i_data.tree_lock); + inode->i_data.nrpages --; + write_unlock_irq(&inode->i_data.tree_lock); } - spin_unlock(&inode_lock); } /* put jnode into hash table (where they can be found by flush who does not know diff -puN fs/reiser4/jnode.h~reiser4-update fs/reiser4/jnode.h --- devel/fs/reiser4/jnode.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/jnode.h 2005-07-08 23:11:54.000000000 -0700 @@ -179,8 +179,6 @@ struct jnode { int written; /* this indicates which atom's list the jnode is on */ atom_list list1; - /* for debugging jnodes of one inode are attached to inode via this list */ - inode_jnodes_list_link inode_link; #endif } __attribute__((aligned(16))); @@ -199,9 +197,6 @@ typedef enum { } jnode_type; TYPE_SAFE_LIST_DEFINE(capture, jnode, capture_link); -#if REISER4_DEBUG -TYPE_SAFE_LIST_DEFINE(inode_jnodes, jnode, inode_link); -#endif /* jnode states */ typedef enum { diff -puN fs/reiser4/Kconfig~reiser4-update fs/reiser4/Kconfig --- devel/fs/reiser4/Kconfig~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/Kconfig 2005-07-08 23:11:54.000000000 -0700 @@ -1,6 +1,6 @@ config REISER4_FS tristate "Reiser4 (EXPERIMENTAL)" - depends on EXPERIMENTAL && !4KSTACKS + depends on EXPERIMENTAL && !4KSTACKS && ZLIB_INFLATE help Reiser4 is a filesystem that performs all filesystem operations as atomic transactions, which means that it either performs a diff -puN fs/reiser4/Makefile~reiser4-update fs/reiser4/Makefile --- devel/fs/reiser4/Makefile~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/Makefile 2005-07-08 23:11:54.000000000 -0700 @@ -44,6 +44,7 @@ reiser4-y := \ emergency_flush.o \ entd.o\ readahead.o \ + cluster.o \ crypt.o \ status_flags.o \ init_super.o \ @@ -60,6 +61,7 @@ reiser4-y := \ \ plugin/compress/minilzo.o \ plugin/compress/compress.o \ + plugin/compress/compress_mode.o \ \ plugin/item/static_stat.o \ plugin/item/sde.o \ @@ -93,4 +95,5 @@ reiser4-y := \ \ plugin/file/pseudo.o \ plugin/file/file.o \ + plugin/file/regular.o \ plugin/file/tail_conversion.o diff -puN fs/reiser4/plugin/compress/compress.c~reiser4-update fs/reiser4/plugin/compress/compress.c --- devel/fs/reiser4/plugin/compress/compress.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/compress/compress.c 2005-07-08 23:11:54.000000000 -0700 @@ -2,6 +2,7 @@ /* reiser4 compression transform plugins */ #include "../../debug.h" +#include "../../inode.h" #include "../plugin.h" #include "../cryptcompress.h" #include "minilzo.h" @@ -11,41 +12,29 @@ #include #include -/******************************************************************************/ -/* null compression */ -/******************************************************************************/ - -#define NONE_NRCOPY 1 - static int -null_min_tfm_size(void) +change_compression(struct inode * inode, reiser4_plugin * plugin) { - return 1; -} - -static void -null_compress(coa_t coa, __u8 * src_first, unsigned src_len, - __u8 * dst_first, unsigned *dst_len) -{ - int i; - assert("edward-793", coa == NULL); - assert("edward-794", src_first != NULL); - assert("edward-795", dst_first != NULL); - assert("edward-796", src_len != 0); - assert("edward-797", dst_len != NULL); - - for (i = 0; i < NONE_NRCOPY; i++) - memcpy(dst_first, src_first, src_len); - *dst_len = src_len; - return; -} - -static void -null_decompress(coa_t coa, __u8 * src_first, unsigned src_len, - __u8 * dst_first, unsigned *dst_len) -{ - impossible("edward-798", "trying to decompress uncompressed data"); -} + assert("edward-1316", inode != NULL); + assert("edward-1317", plugin != NULL); + assert("edward-1318", is_reiser4_inode(inode)); + assert("edward-1319", plugin->h.type_id == REISER4_COMPRESSION_PLUGIN_TYPE); + + if (inode_file_plugin(inode)->h.id != DIRECTORY_FILE_PLUGIN_ID) + if (inode_compression_plugin(inode) != + dual_compression_plugin(&plugin->compression)) + return RETERR(-EINVAL); + return plugin_set_compression(&reiser4_inode_data(inode)->pset, + &plugin->compression); +} + +static reiser4_plugin_ops compression_plugin_ops = { + .init = NULL, + .load = NULL, + .save_len = NULL, + .save = NULL, + .change = &change_compression +}; /******************************************************************************/ /* gzip1 compression */ @@ -64,9 +53,7 @@ static coa_t gzip1_alloc(tfm_action act) { coa_t coa = NULL; - int ret = -ENXIO; -#if REISER4_GZIP_TFM - ret = 0; + int ret = 0; switch (act) { case TFM_WRITE: /* compress */ coa = vmalloc(zlib_deflate_workspacesize()); @@ -74,7 +61,7 @@ gzip1_alloc(tfm_action act) ret = -ENOMEM; break; } - xmemset(coa, 0, zlib_deflate_workspacesize()); + memset(coa, 0, zlib_deflate_workspacesize()); break; case TFM_READ: /* decompress */ coa = vmalloc(zlib_inflate_workspacesize()); @@ -82,13 +69,12 @@ gzip1_alloc(tfm_action act) ret = -ENOMEM; break; } - xmemset(coa, 0, zlib_inflate_workspacesize()); + memset(coa, 0, zlib_inflate_workspacesize()); break; default: impossible("edward-767", "trying to alloc workspace for unknown tfm action"); } -#endif if (ret) { warning("edward-768", "alloc workspace for gzip1 (tfm action = %d) failed\n", @@ -98,9 +84,36 @@ gzip1_alloc(tfm_action act) return coa; } +static coa_t +gzip1_nocompress_alloc(tfm_action act) +{ + coa_t coa = NULL; + int ret = 0; + switch (act) { + case TFM_WRITE: /* compress */ + break; + case TFM_READ: /* decompress */ + coa = vmalloc(zlib_inflate_workspacesize()); + if (!coa) { + ret = -ENOMEM; + break; + } + memset(coa, 0, zlib_inflate_workspacesize()); + break; + default: + impossible("edward-1299", "unknown tfm action"); + } + if (ret) { + warning("edward-1300", + "alloc workspace for gzip1 (tfm action = %d) failed\n", + act); + return ERR_PTR(ret); + } + return coa; +} + static void gzip1_free(coa_t coa, tfm_action act) { -#if REISER4_GZIP_TFM assert("edward-769", coa != NULL); switch (act) { @@ -111,15 +124,29 @@ static void gzip1_free(coa_t coa, tfm_ac vfree(coa); break; default: - impossible("edward-770", - "free workspace for unknown tfm action"); + impossible("edward-770", "unknown tfm action"); + } + return; +} + +static void gzip1_nocompress_free(coa_t coa, tfm_action act) +{ + assert("edward-1301", coa != NULL); + + switch (act) { + case TFM_READ: /* decompress */ + vfree(coa); + case TFM_WRITE: /* compress */ + impossible("edward-1302", + "trying to free non-allocated workspace"); + default: + impossible("edward-1303", "unknown tfm action"); } -#endif return; } static int -gzip1_min_tfm_size(void) +gzip1_min_size_deflate(void) { return 64; } @@ -128,11 +155,10 @@ static void gzip1_compress(coa_t coa, __u8 * src_first, unsigned src_len, __u8 * dst_first, unsigned *dst_len) { -#if REISER4_GZIP_TFM int ret = 0; struct z_stream_s stream; - xmemset(&stream, 0, sizeof(stream)); + memset(&stream, 0, sizeof(stream)); assert("edward-842", coa != NULL); assert("edward-875", src_len != 0); @@ -164,7 +190,6 @@ gzip1_compress(coa_t coa, __u8 * src_fir return; rollback: *dst_len = src_len; -#endif return; } @@ -172,11 +197,10 @@ static void gzip1_decompress(coa_t coa, __u8 * src_first, unsigned src_len, __u8 * dst_first, unsigned *dst_len) { -#if REISER4_GZIP_TFM int ret = 0; struct z_stream_s stream; - xmemset(&stream, 0, sizeof(stream)); + memset(&stream, 0, sizeof(stream)); assert("edward-843", coa != NULL); assert("edward-876", src_len != 0); @@ -215,20 +239,10 @@ gzip1_decompress(coa_t coa, __u8 * src_f return; } *dst_len = stream.total_out; -#endif return; } /******************************************************************************/ -/* none compression */ -/******************************************************************************/ - -static int none_overrun(unsigned src_len UNUSED_ARG) -{ - return 0; -} - -/******************************************************************************/ /* lzo1 compression */ /******************************************************************************/ @@ -277,17 +291,18 @@ lzo1_free(coa_t coa, tfm_action act) switch (act) { case TFM_WRITE: /* compress */ vfree(coa); - case TFM_READ: /* decompress */ break; + case TFM_READ: /* decompress */ + impossible("edward-1304", + "trying to free non-allocated workspace"); default: - impossible("edward-880", - "trying to free workspace for unknown tfm action"); + impossible("edward-880", "unknown tfm action"); } return; } static int -lzo1_min_tfm_size(void) +lzo1_min_size_deflate(void) { return 256; } @@ -347,74 +362,105 @@ lzo1_decompress(coa_t coa, __u8 * src_fi } compression_plugin compression_plugins[LAST_COMPRESSION_ID] = { - [NONE_COMPRESSION_ID] = { - .h = { - .type_id = - REISER4_COMPRESSION_PLUGIN_TYPE, - .id = NONE_COMPRESSION_ID, - .pops = NULL, - .label = "none", - .desc = - "absence of any compression transform", - .linkage = TYPE_SAFE_LIST_LINK_ZERO} - , - .overrun = none_overrun, - .alloc = NULL, - .free = NULL, - .min_tfm_size = NULL, - .compress = NULL, - .decompress = NULL} - , - [NULL_COMPRESSION_ID] = { - .h = { - .type_id = - REISER4_COMPRESSION_PLUGIN_TYPE, - .id = NULL_COMPRESSION_ID, - .pops = NULL, - .label = "null", - .desc = "NONE_NRCOPY times of memcpy", - .linkage = TYPE_SAFE_LIST_LINK_ZERO} - , - .overrun = none_overrun, - .alloc = NULL, - .free = NULL, - .min_tfm_size = null_min_tfm_size, - .compress = null_compress, - .decompress = null_decompress} - , [LZO1_COMPRESSION_ID] = { .h = { .type_id = REISER4_COMPRESSION_PLUGIN_TYPE, .id = LZO1_COMPRESSION_ID, - .pops = NULL, + .pops = &compression_plugin_ops, .label = "lzo1", .desc = "lzo1 compression transform", .linkage = TYPE_SAFE_LIST_LINK_ZERO} , + .dual = LZO1_NO_COMPRESSION_ID, + .init = NULL, .overrun = lzo1_overrun, .alloc = lzo1_alloc, .free = lzo1_free, - .min_tfm_size = lzo1_min_tfm_size, + .min_size_deflate = lzo1_min_size_deflate, + .checksum = reiser4_adler32, .compress = lzo1_compress, .decompress = lzo1_decompress} , + [LZO1_NO_COMPRESSION_ID] = { + .h = { + .type_id = + REISER4_COMPRESSION_PLUGIN_TYPE, + .id = LZO1_NO_COMPRESSION_ID, + .pops = &compression_plugin_ops, + .label = "lzo1_no", + .desc = "lzo1 no compression transform", + .linkage = TYPE_SAFE_LIST_LINK_ZERO} + , + .dual = LZO1_COMPRESSION_ID, + .init = NULL, + .overrun = NULL, + .alloc = NULL, + .free = NULL, + .min_size_deflate = NULL, + .checksum = reiser4_adler32, + .compress = NULL, + .decompress = lzo1_decompress} + , [GZIP1_COMPRESSION_ID] = { .h = { .type_id = REISER4_COMPRESSION_PLUGIN_TYPE, .id = GZIP1_COMPRESSION_ID, - .pops = NULL, + .pops = &compression_plugin_ops, .label = "gzip1", .desc = "gzip1 compression transform", .linkage = TYPE_SAFE_LIST_LINK_ZERO} , + .dual = GZIP1_NO_COMPRESSION_ID, + .init = NULL, .overrun = gzip1_overrun, .alloc = gzip1_alloc, .free = gzip1_free, - .min_tfm_size = gzip1_min_tfm_size, + .min_size_deflate = gzip1_min_size_deflate, + .checksum = NULL, .compress = gzip1_compress, .decompress = gzip1_decompress} + , + [GZIP1_NO_COMPRESSION_ID] = { + .h = { + .type_id = + REISER4_COMPRESSION_PLUGIN_TYPE, + .id = GZIP1_NO_COMPRESSION_ID, + .pops = &compression_plugin_ops, + .label = "gzip1_no", + .desc = "gzip1 no compression transform", + .linkage = TYPE_SAFE_LIST_LINK_ZERO} + , + .dual = GZIP1_COMPRESSION_ID, + .init = NULL, + .overrun = NULL, + .alloc = gzip1_nocompress_alloc, + .free = gzip1_nocompress_free, + .min_size_deflate = NULL, + .checksum = NULL, + .compress = NULL, + .decompress = gzip1_decompress} + , + [NONE_COMPRESSION_ID] = { + .h = { + .type_id = + REISER4_COMPRESSION_PLUGIN_TYPE, + .id = NONE_COMPRESSION_ID, + .pops = &compression_plugin_ops, + .label = "none", + .desc = "No compression transform", + .linkage = TYPE_SAFE_LIST_LINK_ZERO} + , + .dual = NONE_COMPRESSION_ID, + .init = NULL, + .overrun = NULL, + .alloc = NULL, + .free = NULL, + .min_size_deflate = NULL, + .checksum = NULL, + .compress = NULL, + .decompress = NULL} }; /* diff -puN fs/reiser4/plugin/compress/compress.h~reiser4-update fs/reiser4/plugin/compress/compress.h --- devel/fs/reiser4/plugin/compress/compress.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/compress/compress.h 2005-07-08 23:11:54.000000000 -0700 @@ -12,16 +12,20 @@ typedef enum { /* builtin compression plugins */ typedef enum { - NONE_COMPRESSION_ID, - NULL_COMPRESSION_ID, LZO1_COMPRESSION_ID, + LZO1_NO_COMPRESSION_ID, GZIP1_COMPRESSION_ID, + GZIP1_NO_COMPRESSION_ID, + NONE_COMPRESSION_ID, LAST_COMPRESSION_ID, } reiser4_compression_id; +typedef unsigned long cloff_t; typedef void * coa_t; typedef coa_t coa_set[LAST_COMPRESSION_ID]; +__u32 reiser4_adler32(char * data, __u32 len); + #endif /* __FS_REISER4_COMPRESS_H__ */ /* Make Linus happy. diff -puN /dev/null fs/reiser4/plugin/compress/compress_mode.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/compress/compress_mode.c 2005-07-08 23:11:54.000000000 -0700 @@ -0,0 +1,108 @@ +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ + +/* reiser4 compression mode plugin (used by cryptcompress object plugin) */ + +#include "../../inode.h" +#include "../plugin.h" +#include "../cryptcompress.h" + +/* plugin->should_deflate() */ +static int +should_deflate_test(cloff_t index) +{ + return !test_bit(0, &index); +} + +/* plugin->discard_deflate() */ + +static int +discard_deflate_nocond(struct inode * inode, cloff_t index) +{ + int result; + + result = force_plugin(inode, + PSET_COMPRESSION, + compression_plugin_to_plugin + (dual_compression_plugin + (inode_compression_plugin(inode)))); + if (result) + return result; + mark_inode_dirty(inode); + return 0; +} + +static int +discard_deflate_first(struct inode * inode, cloff_t index) +{ + assert("edward-1308", inode != NULL); + + return (index ? 0 : discard_deflate_nocond(inode, index)); +} + +/* compression mode_plugins */ +compression_mode_plugin compression_mode_plugins[LAST_COMPRESSION_MODE_ID] = { + [SMART_COMPRESSION_MODE_ID] = { + .h = { + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, + .id = SMART_COMPRESSION_MODE_ID, + .pops = NULL, + .label = "if-0-compressible", + .desc = "If-first-cluster-compressible heuristic", + .linkage = TYPE_SAFE_LIST_LINK_ZERO + }, + .should_deflate = NULL, + .save_deflate = NULL, + .discard_deflate = discard_deflate_first + }, + [LAZY_COMPRESSION_MODE_ID] = { + .h = { + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, + .id = LAZY_COMPRESSION_MODE_ID, + .pops = NULL, + .label = "if-all-compressible", + .desc = "If-all-compressible heuristic", + .linkage = TYPE_SAFE_LIST_LINK_ZERO + }, + .should_deflate = NULL, + .save_deflate = NULL, + .discard_deflate = discard_deflate_nocond + }, + [FORCE_COMPRESSION_MODE_ID] = { + .h = { + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, + .id = FORCE_COMPRESSION_MODE_ID, + .pops = NULL, + .label = "force", + .desc = "Compress everything", + .linkage = TYPE_SAFE_LIST_LINK_ZERO + }, + .should_deflate = NULL, + .save_deflate = NULL, + .discard_deflate = NULL + }, + [TEST_COMPRESSION_MODE_ID] = { + .h = { + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, + .id = TEST_COMPRESSION_MODE_ID, + .pops = NULL, + .label = "test", /* This mode is only for benchmarks */ + .desc = "Don't compress odd clusters", + .linkage = TYPE_SAFE_LIST_LINK_ZERO + }, + .should_deflate = should_deflate_test, + .save_deflate = NULL, + .discard_deflate = NULL + } +}; + + +/* + Local variables: + c-indentation-style: "K&R" + mode-name: "LC" + c-basic-offset: 8 + tab-width: 8 + fill-column: 120 + scroll-step: 1 + End: +*/ diff -puN fs/reiser4/plugin/compress/lzoconf.h~reiser4-update fs/reiser4/plugin/compress/lzoconf.h --- devel/fs/reiser4/plugin/compress/lzoconf.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/compress/lzoconf.h 2005-07-08 23:11:54.000000000 -0700 @@ -1,7 +1,8 @@ /* lzoconf.h -- configuration for the LZO real-time data compression library - adopted for reiser4 compression tramsform plugin + adopted for reiser4 compression transform plugin. - This file is part of the LZO real-time data compression library. + This file is part of the LZO real-time data compression library + and not included in any proprietary licenses of reiser4. Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer diff -puN fs/reiser4/plugin/compress/minilzo.c~reiser4-update fs/reiser4/plugin/compress/minilzo.c --- devel/fs/reiser4/plugin/compress/minilzo.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/compress/minilzo.c 2005-07-08 23:11:54.000000000 -0700 @@ -1,7 +1,8 @@ /* minilzo.c -- mini subset of the LZO real-time data compression library - Adopted for reiser4 compression transform plugin. + adopted for reiser4 compression transform plugin. - This file is part of the LZO real-time data compression library. + This file is part of the LZO real-time data compression library + and not included in any proprietary licenses of reiser4. Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer @@ -1110,16 +1111,6 @@ _lzo_config_check(void) COMPILE_TIME_ASSERT(lzo_sizeof_dict_t == sizeof(lzo_dict_t)); -#if defined(__LZO_IN_MINLZO) - if (r == 1) - { - lzo_uint32 adler; - adler = lzo_adler32(0, NULL, 0); - adler = lzo_adler32(adler, lzo_copyright(), 200); - r &= __lzo_assert(adler == 0xc76f1751L); - } -#endif - if (r == 1) { r &= __lzo_assert(!schedule_insns_bug()); diff -puN fs/reiser4/plugin/compress/minilzo.h~reiser4-update fs/reiser4/plugin/compress/minilzo.h --- devel/fs/reiser4/plugin/compress/minilzo.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/compress/minilzo.h 2005-07-08 23:11:54.000000000 -0700 @@ -1,6 +1,8 @@ /* minilzo.h -- mini subset of the LZO real-time data compression library + adopted for reiser4 compression transform plugin. - This file is part of the LZO real-time data compression library. + This file is part of the LZO real-time data compression library + and not included in any proprietary licenses of reiser4. Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer diff -puN fs/reiser4/plugin/cryptcompress.c~reiser4-update fs/reiser4/plugin/cryptcompress.c --- devel/fs/reiser4/plugin/cryptcompress.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/cryptcompress.c 2005-07-08 23:11:54.000000000 -0700 @@ -95,10 +95,9 @@ crc_generic_check_ok(void) reiser4_internal int crc_inode_ok(struct inode * inode) { - reiser4_inode * info = reiser4_inode_data(inode); cryptcompress_info_t * data = cryptcompress_inode_data(inode); - if ((info->cluster_shift <= MAX_CLUSTER_SHIFT) && + if (cluster_shift_ok(inode_cluster_shift(inode)) && (data->tfm[CRYPTO_TFM] == NULL) && (data->tfm[DIGEST_TFM] == NULL)) return 1; @@ -107,20 +106,39 @@ crc_inode_ok(struct inode * inode) } #endif +static int +check_cryptcompress(struct inode * inode) +{ + int result = 0; + + assert("edward-1307", inode_compression_plugin(inode) != NULL); + + if (inode_cluster_size(inode) < PAGE_CACHE_SIZE) { + warning("edward-1331", + "%s clusters are unsupported", + inode_cluster_plugin(inode)->h.label); + return RETERR(-EINVAL); + } + if (inode_compression_plugin(inode)->init) + result = inode_compression_plugin(inode)->init(); + return result; +} + static crypto_stat_t * inode_crypto_stat (struct inode * inode) { assert("edward-90", inode != NULL); assert("edward-91", reiser4_inode_data(inode) != NULL); - return (reiser4_inode_data(inode)->crypt); + + return (cryptcompress_inode_data(inode)->crypt); } /* NOTE-EDWARD: Do not use crypto without digest */ static int -alloc_crypto_tfm(struct inode * inode, crypto_data_t * data) +alloc_crypto_tfm(struct inode * inode, struct inode * parent) { int result; - crypto_plugin * cplug = crypto_plugin_by_id(data->cra); - digest_plugin * dplug = digest_plugin_by_id(data->dia); + crypto_plugin * cplug = inode_crypto_plugin(parent); + digest_plugin * dplug = inode_digest_plugin(parent); assert("edward-414", dplug != NULL); assert("edward-415", cplug != NULL); @@ -193,7 +211,7 @@ attach_crypto_stat(struct inode * inode, crypto_digest_update (dtfm, &sg, 1); crypto_digest_final (dtfm, stat->keyid); - reiser4_inode_data(inode)->crypt = stat; + cryptcompress_inode_data(inode)->crypt = stat; reiser4_kfree(txt); return 0; @@ -217,59 +235,25 @@ detach_crypto_stat(struct inode * object reiser4_kfree(stat); } -static void -init_default_crypto(crypto_data_t * data) -{ - assert("edward-692", data != NULL); - - memset(data, 0, sizeof(*data)); - - data->cra = get_default_plugin(PSET_CRYPTO)->h.id; - data->dia = get_default_plugin(PSET_DIGEST)->h.id; - return; -} - -static void -init_default_compression(compression_data_t * data) -{ - assert("edward-693", data != NULL); - - memset(data, 0, sizeof(*data)); - - data->coa = get_default_plugin(PSET_COMPRESSION)->h.id; -} - -static void -init_default_cluster(cluster_data_t * data) -{ - assert("edward-694", data != NULL); - - *data = DEFAULT_CLUSTER_SHIFT; -} - /* 1) fill crypto specific part of inode 2) set inode crypto stat which is supposed to be saved in stat-data */ static int -inode_set_crypto(struct inode * object, crypto_data_t * data) +inode_set_crypto(struct inode * object, struct inode * parent, + crypto_data_t * data) { int result; - crypto_data_t def; struct crypto_tfm * tfm; crypto_plugin * cplug; digest_plugin * dplug; reiser4_inode * info = reiser4_inode_data(object); - if (!data) { - init_default_crypto(&def); - data = &def; - } - cplug = crypto_plugin_by_id(data->cra); - dplug = digest_plugin_by_id(data->dia); + cplug = inode_crypto_plugin(parent); + dplug = inode_digest_plugin(parent); plugin_set_crypto(&info->pset, cplug); plugin_set_digest(&info->pset, dplug); - result = alloc_crypto_tfm(object, data); + result = alloc_crypto_tfm(object, parent); if (!result) return result; @@ -277,7 +261,6 @@ inode_set_crypto(struct inode * object, /* nothing to do anymore */ return 0; - assert("edward-416", data != NULL); assert("edward-414", dplug != NULL); assert("edward-415", cplug != NULL); assert("edward-417", data->key!= NULL); @@ -311,48 +294,60 @@ inode_set_crypto(struct inode * object, return result; } -static void -inode_set_compression(struct inode * object, compression_data_t * data) +static int +inode_set_compression(struct inode * object, struct inode * parent) { - compression_data_t def; + int result = 0; + compression_plugin * cplug; reiser4_inode * info = reiser4_inode_data(object); - if (!data) { - init_default_compression(&def); - data = &def; + cplug = inode_compression_plugin(parent); + + if (cplug->init != NULL) { + result = cplug->init(); + if (result) + return result; } - plugin_set_compression(&info->pset, compression_plugin_by_id(data->coa)); + plugin_set_compression(&info->pset, cplug); info->plugin_mask |= (1 << PSET_COMPRESSION); + return 0; +} + +static void +inode_set_compression_mode(struct inode * object, struct inode * parent) +{ + compression_mode_plugin * mplug; + reiser4_inode * info = reiser4_inode_data(object); + + mplug = inode_compression_mode_plugin(parent); + + plugin_set_compression_mode(&info->pset, mplug); + info->plugin_mask |= (1 << PSET_COMPRESSION_MODE); return; } static int -inode_set_cluster(struct inode * object, cluster_data_t * data) +inode_set_cluster(struct inode * object, struct inode * parent) { - int result = 0; - cluster_data_t def; reiser4_inode * info; + cluster_plugin * cplug; assert("edward-696", object != NULL); info = reiser4_inode_data(object); + cplug = inode_cluster_plugin(parent); - if(!data) { - /* NOTE-EDWARD: - this is a necessary parameter for cryptcompress object */ - warning("edward-418", "create_cryptcompress: default cluster size" - " (%u) was assigned for the object %llu\n", - (1U << PAGE_CACHE_SHIFT << DEFAULT_CLUSTER_SHIFT), - (unsigned long long)get_inode_oid(object)); - init_default_cluster(&def); - data = &def; + if (cplug->shift < PAGE_CACHE_SHIFT) { + warning("edward-1320", + "Can not support cluster size %p", + cplug->h.label); + return RETERR(-EINVAL); } - assert("edward-697", *data <= MAX_CLUSTER_SHIFT); + plugin_set_cluster(&info->pset, cplug); - info->cluster_shift = *data; - info->extmask |= (1 << CLUSTER_STAT); - return result; + info->plugin_mask |= (1 << PSET_CLUSTER); + return 0; } /* plugin->create() method for crypto-compressed files @@ -363,7 +358,8 @@ inode_set_cluster(struct inode * object, . attach cluster info */ reiser4_internal int -create_cryptcompress(struct inode *object, struct inode *parent, reiser4_object_create_data * data) +create_cryptcompress(struct inode *object, struct inode *parent, + reiser4_object_create_data * data) { int result; reiser4_inode * info; @@ -383,15 +379,17 @@ create_cryptcompress(struct inode *objec info->plugin_mask |= (1 << PSET_FILE); /* set crypto */ - result = inode_set_crypto(object, data->crypto); + result = inode_set_crypto(object, parent, data->crypto); if (result) goto error; - /* set compression */ - inode_set_compression(object, data->compression); + result = inode_set_compression(object, parent); + if (result) + goto error; + inode_set_compression_mode(object, parent); /* set cluster info */ - result = inode_set_cluster(object, data->cluster); + result = inode_set_cluster(object, parent); if (result) goto error; /* set plugin mask */ @@ -403,7 +401,6 @@ create_cryptcompress(struct inode *objec return 0; /* save() method failed, release attached crypto info */ inode_clr_flag(object, REISER4_CRYPTO_STAT_LOADED); - inode_clr_flag(object, REISER4_CLUSTER_KNOWN); error: free_crypto_tfm(object); detach_crypto_stat(object); @@ -424,12 +421,11 @@ destroy_inode_cryptcompress(struct inode { assert("edward-802", inode_file_plugin(inode) == file_plugin_by_id(CRC_FILE_PLUGIN_ID)); assert("edward-803", !is_bad_inode(inode) && is_inode_loaded(inode)); - assert("edward-804", inode_get_flag(inode, REISER4_CLUSTER_KNOWN)); free_crypto_tfm(inode); if (inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) detach_crypto_stat(inode); - inode_clr_flag(inode, REISER4_CLUSTER_KNOWN); + inode_clr_flag(inode, REISER4_CRYPTO_STAT_LOADED); inode_clr_flag(inode, REISER4_SECRET_KEY_INSTALLED); } @@ -451,7 +447,6 @@ reiser4_internal size_t inode_scaled_cluster_size (struct inode * inode) { assert("edward-110", inode != NULL); - assert("edward-111", inode_get_flag(inode, REISER4_CLUSTER_KNOWN)); return inode_scaled_offset(inode, inode_cluster_size(inode)); } @@ -470,7 +465,6 @@ set_cluster_nrpages(reiser4_cluster_t * assert("edward-180", clust != NULL); assert("edward-1040", inode != NULL); - assert("edward-1042", inode_get_flag(inode, REISER4_CLUSTER_KNOWN)); win = clust->win; if (!win) { @@ -626,7 +620,7 @@ eq_to_ldk(znode *node, const reiser4_key #endif /* The core search procedure. - If result is not cbk_errored current znode is locked */ + If returned value is not cbk_errored, current znode is locked */ static int find_cluster_item(hint_t * hint, const reiser4_key *key, /* key of the item we are @@ -727,7 +721,7 @@ crypto_overhead(size_t len /* advised le assert("edward-486", clust != 0); - if (!inode_get_crypto(inode) || !inode_crypto_plugin(inode)->align_cluster) + if (!inode_get_crypto(inode) || !inode_crypto_plugin(inode)->align_stream) return 0; if (!len) size = clust->len; @@ -763,42 +757,45 @@ crypto_overhead(size_t len /* advised le } #endif -/* maximal aligning overhead which can be appended - to the flow before encryption if any */ +/* the following two functions are to evaluate results + of compression transform */ static unsigned max_crypto_overhead(struct inode * inode) { - if (!inode_get_crypto(inode) || !inode_crypto_plugin(inode)->align_cluster) + if (!inode_get_crypto(inode) || !inode_crypto_plugin(inode)->align_stream) return 0; return crypto_blocksize(inode); } -static unsigned -compress_overhead(struct inode * inode, int in_len) +static int +deflate_overhead(struct inode * inode) { - return inode_compression_plugin(inode)->overrun(in_len); + return (inode_compression_plugin(inode)->checksum ? DC_CHECKSUM_SIZE : 0); } -/* Since small input stream can not get compressed, - we try to awoid a lot of useless job */ -static int -min_size_to_compress(struct inode * inode) +/* to estimate size of allocating transform stream */ +static unsigned +deflate_overrun(struct inode * inode, int in_len) { - assert("edward-1036", - inode_compression_plugin(inode)->min_tfm_size != NULL); - return inode_compression_plugin(inode)->min_tfm_size(); + return (inode_compression_plugin(inode)->overrun != NULL ? + inode_compression_plugin(inode)->overrun(in_len) : + 0); } - /* The following two functions represent reiser4 compression policy */ static int -try_compress(tfm_cluster_t * tc, struct inode * inode) +try_compress(tfm_cluster_t * tc, cloff_t index, struct inode * inode) { - assert("edward-1037", min_size_to_compress(inode) > 0 && - min_size_to_compress(inode) < inode_cluster_size(inode)); + compression_plugin * cplug = inode_compression_plugin(inode); + compression_mode_plugin * mplug = inode_compression_mode_plugin(inode); - return (inode_compression_plugin(inode) != compression_plugin_by_id(NONE_COMPRESSION_ID)) && - (tc->len >= min_size_to_compress(inode)); + assert("edward-1321", tc->len != 0); + assert("edward-1322", cplug != NULL); + assert("edward-1323", mplug != NULL); + + return (cplug->compress != NULL) && + (mplug->should_deflate != NULL ? mplug->should_deflate(index) : 1) && + (cplug->min_size_deflate != NULL ? tc->len >= cplug->min_size_deflate() : 1); } static int @@ -807,36 +804,63 @@ try_encrypt(struct inode * inode) return inode_get_crypto(inode) != NULL; } -/* Decide by the lengths of compressed and decompressed cluster, should we save or should - we discard the result of compression. The policy is that the length of compressed then - encrypted cluster including _all_ appended infrasrtucture should be _less_ then its lenght - before compression. */ +/* Evaluation results of compression transform. */ static int save_compressed(int old_size, int new_size, struct inode * inode) { - return (new_size + DC_CHECKSUM_SIZE + max_crypto_overhead(inode) < old_size); + return (new_size + deflate_overhead(inode) + max_crypto_overhead(inode) < old_size); } -/* guess if the cluster was compressed */ +/* Guess result of the evaluation above */ static int -need_decompression(reiser4_cluster_t * clust, struct inode * inode, - int encrypted /* is cluster encrypted */) +need_inflate(reiser4_cluster_t * clust, struct inode * inode, + int encrypted /* is cluster encrypted */) { tfm_cluster_t * tc = &clust->tc; assert("edward-142", tc != 0); assert("edward-143", inode != NULL); - return (inode_compression_plugin(inode) != compression_plugin_by_id(NONE_COMPRESSION_ID)) && - (tc->len < (encrypted ? inode_scaled_offset(inode, fsize_to_count(clust, inode)) : fsize_to_count(clust, inode))); + return tc->len < + (encrypted ? + inode_scaled_offset(inode, fsize_to_count(clust, inode)) : + fsize_to_count(clust, inode)); +} + +/* append checksum at the end of input transform stream + and increase its length */ +static void +dc_set_checksum(compression_plugin * cplug, tfm_cluster_t * tc) +{ + __u32 checksum; + + assert("edward-1309", tc != NULL); + assert("edward-1310", tc->len > 0); + assert("edward-1311", cplug->checksum != NULL); + checksum = cplug->checksum(tfm_stream_data(tc, OUTPUT_STREAM), tc->len); + cputod32(checksum, (d32 *)(tfm_stream_data(tc, OUTPUT_STREAM) + tc->len)); + tc->len += (int)DC_CHECKSUM_SIZE; } -static void set_compression_magic(__u8 * magic) +/* returns 0 if checksums coincide, otherwise returns 1, + increase the length of input transform stream */ +static int +dc_check_checksum(compression_plugin * cplug, tfm_cluster_t * tc) { - /* FIXME-EDWARD: Use a checksum here */ - assert("edward-279", magic != NULL); - memset(magic, 0, DC_CHECKSUM_SIZE); + assert("edward-1312", tc != NULL); + assert("edward-1313", tc->len > (int)DC_CHECKSUM_SIZE); + assert("edward-1314", cplug->checksum != NULL); + + if (cplug->checksum(tfm_stream_data(tc, INPUT_STREAM), tc->len - (int)DC_CHECKSUM_SIZE) != + d32tocpu((d32 *)(tfm_stream_data(tc, INPUT_STREAM) + tc->len - (int)DC_CHECKSUM_SIZE))) { + warning("edward-156", "bad disk cluster checksum %d, (should be %d)\n", + (int)d32tocpu((d32 *)(tfm_stream_data(tc, INPUT_STREAM) + tc->len - (int)DC_CHECKSUM_SIZE)), + (int)cplug->checksum(tfm_stream_data(tc, INPUT_STREAM), tc->len - (int)DC_CHECKSUM_SIZE)); + return 1; + } + tc->len -= (int)DC_CHECKSUM_SIZE; + return 0; } reiser4_internal int @@ -849,7 +873,7 @@ grab_tfm_stream(struct inode * inode, tf assert("edward-1027", inode_compression_plugin(inode) != NULL); if (act == TFM_WRITE) - size += compress_overhead(inode, inode_cluster_size(inode)); + size += deflate_overrun(inode, inode_cluster_size(inode)); if (!tfm_stream(tc, id) && id == INPUT_STREAM) alternate_streams(tc); @@ -864,7 +888,6 @@ grab_tfm_stream(struct inode * inode, tf } /* Common deflate cluster manager */ - reiser4_internal int deflate_cluster(reiser4_cluster_t * clust, struct inode * inode) { @@ -876,13 +899,12 @@ deflate_cluster(reiser4_cluster_t * clus assert("edward-903", tfm_stream_is_set(tc, INPUT_STREAM)); assert("edward-498", !tfm_cluster_is_uptodate(tc)); - if (result) - return result; - if (try_compress(tc, inode)) { + if (try_compress(tc, clust->index, inode)) { /* try to compress, discard bad results */ __u32 dst_len; compression_plugin * cplug = inode_compression_plugin(inode); - + compression_mode_plugin * mplug = + inode_compression_mode_plugin(inode); assert("edward-602", cplug != NULL); result = grab_tfm_stream(inode, tc, TFM_WRITE, OUTPUT_STREAM); @@ -896,14 +918,28 @@ deflate_cluster(reiser4_cluster_t * clus /* make sure we didn't overwrite extra bytes */ assert("edward-603", dst_len <= tfm_stream_size(tc, OUTPUT_STREAM)); - /* should we accept or discard the result of compression transform */ + /* evaluate results of compression transform */ if (save_compressed(tc->len, dst_len, inode)) { - /* accept */ + /* good result, accept */ tc->len = dst_len; - - set_compression_magic(tfm_stream_data(tc, OUTPUT_STREAM) + tc->len); - tc->len += DC_CHECKSUM_SIZE; + if (cplug->checksum != NULL) + dc_set_checksum(cplug, tc); transformed = 1; + if (mplug->save_deflate != NULL) + mplug->save_deflate(inode); + } + else { + /* bad result, discard */ +#if REISER4_DEBUG + warning("edward-1309", + "incompressible data: inode %llu, cluster %lu", + (unsigned long long)get_inode_oid(inode), clust->index); +#endif + if (mplug->discard_deflate != NULL) { + result = mplug->discard_deflate(inode, clust->index); + if (result) + return result; + } } } if (try_encrypt(inode)) { @@ -959,8 +995,7 @@ inflate_cluster(reiser4_cluster_t * clus transformed = 1; } - if (need_decompression(clust, inode, 0)) { - __u8 magic[DC_CHECKSUM_SIZE]; + if (need_inflate(clust, inode, 0)) { unsigned dst_len = inode_cluster_size(inode); compression_plugin * cplug = inode_compression_plugin(inode); @@ -970,14 +1005,15 @@ inflate_cluster(reiser4_cluster_t * clus result = grab_tfm_stream(inode, tc, TFM_READ, OUTPUT_STREAM); if (result) return result; + assert("edward-1305", cplug->decompress != NULL); assert("edward-910", tfm_cluster_is_set(tc)); - /* Check compression magic for possible IO errors. + /* Check compression checksum for possible IO errors. End-of-cluster format created before encryption: data - compression_magic (4) Indicates presence of compression + checksum (4) Indicates presence of compression infrastructure, should be private. Can be absent. crypto_overhead Created by ->align() method of crypto-plugin, @@ -989,17 +1025,11 @@ inflate_cluster(reiser4_cluster_t * clus tail_size (1) size of aligning tail, 1 <= tail_size <= blksize */ - set_compression_magic(magic); - - if (memcmp(tfm_stream_data(tc, INPUT_STREAM) + (tc->len - (size_t)DC_CHECKSUM_SIZE), - magic, (size_t)DC_CHECKSUM_SIZE)) { - printk("edward-156: wrong compression magic %d (should be %d)\n", - *((int *)(tfm_stream_data(tc, INPUT_STREAM) + (tc->len - (size_t)DC_CHECKSUM_SIZE))), *((int *)magic)); - result = -EIO; - return result; + if (cplug->checksum != NULL) { + result = dc_check_checksum(cplug, tc); + if (result) + return RETERR(-EIO); } - tc->len -= (size_t)DC_CHECKSUM_SIZE; - /* decompress cluster */ cplug->decompress(get_coa(tc, cplug->h.id), tfm_stream_data(tc, INPUT_STREAM), tc->len, @@ -1033,6 +1063,9 @@ readpage_cryptcompress(void *vp, struct assert("edward-88", PageLocked(page)); assert("edward-89", page->mapping && page->mapping->host); + result = check_cryptcompress(page->mapping->host); + if (result) + return result; file = vp; if (file) assert("edward-113", page->mapping == file->f_dentry->d_inode->i_mapping); @@ -1071,6 +1104,8 @@ readpages_cryptcompress(struct file *fil assert("edward-1112", mapping != NULL); assert("edward-1113", mapping->host != NULL); + if (check_cryptcompress(mapping->host)) + return; fplug = inode_file_plugin(mapping->host); assert("edward-1114", fplug == file_plugin_by_id(CRC_FILE_PLUGIN_ID)); @@ -1265,13 +1300,10 @@ try_capture_cluster(reiser4_cluster_t * assert("edward-1035", node != NULL); - if (clust->win) { - spin_lock_inode(inode); - LOCK_JNODE(node); + LOCK_JNODE(node); + if (clust->win) inode_set_new_size(clust, inode); - } - else - LOCK_JNODE(node); + result = try_capture(node, ZNODE_WRITE_LOCK, 0, 0); if (result) goto exit; @@ -1279,8 +1311,6 @@ try_capture_cluster(reiser4_cluster_t * exit: assert("edward-1034", !result); UNLOCK_JNODE(node); - if (clust->win) - spin_unlock_inode(inode); jput(node); return result; } @@ -1361,20 +1391,6 @@ grab_cluster_pages(struct inode * inode, return result; } -UNUSED_ARG static void -set_cluster_unlinked(reiser4_cluster_t * clust, struct inode * inode) -{ - jnode * node; - - node = jprivate(clust->pages[0]); - - assert("edward-640", node); - - LOCK_JNODE(node); - JF_SET(node, JNODE_NEW); - UNLOCK_JNODE(node); -} - /* put cluster pages */ static void release_cluster_pages(reiser4_cluster_t * clust, int from) @@ -1700,7 +1716,7 @@ invalidate_hint_cluster(reiser4_cluster_ assert("edward-1291", clust != NULL); assert("edward-1292", clust->hint != NULL); - longterm_unlock_znode(clust->hint->ext_coord.lh); + done_lh(clust->hint->ext_coord.lh); clust->hint->ext_coord.valid = 0; } @@ -1870,7 +1886,12 @@ find_cluster(reiser4_cluster_t * clust, set_key_offset(&ra_info.key_to_stop, get_key_offset(max_key())); while (f.length) { - result = find_cluster_item(hint, &f.key, (write ? ZNODE_WRITE_LOCK : ZNODE_READ_LOCK), NULL, FIND_EXACT, 0); + result = find_cluster_item(hint, + &f.key, + (write ? ZNODE_WRITE_LOCK : ZNODE_READ_LOCK), + NULL, + FIND_EXACT, + (write ? CBK_FOR_INSERT : 0)); switch (result) { case CBK_COORD_NOTFOUND: if (inode_scaled_offset(inode, clust_to_off(cl_idx, inode)) == get_key_offset(&f.key)) { @@ -2131,10 +2152,31 @@ jnode_truncate_ok(struct inode *inode, c { jnode * node; node = jlookup(current_tree, get_inode_oid(inode), clust_to_pg(index, inode)); - if (node) + if (node) { + warning("edward-1315", "jnode %p is untruncated\n", node); jput(node); + } return (node == NULL); } + +static int +jnodes_truncate_ok(struct inode * inode, cloff_t start) +{ + int result; + jnode * node; + reiser4_inode *info = reiser4_inode_data(inode); + reiser4_tree * tree = tree_by_inode(inode); + + RLOCK_TREE(tree); + + result = radix_tree_gang_lookup(jnode_tree_by_reiser4_inode(info), (void **)&node, + clust_to_pg(start, inode), 1); + RUNLOCK_TREE(tree); + if (result) + warning("edward-1332", "Untruncated jnode %p\n", node); + return !result; +} + #endif /* Collect unlocked cluster pages and jnode (the last is in the @@ -2214,16 +2256,16 @@ truncate_page_cluster(struct inode *inod return; } -/* Prepare cluster handle before write. Called by all the clients which - age going to modify the page cluster and put it into a transaction - (file_write, truncate, writepages, etc..) +/* Prepare cluster handle before write and(or) capture. This function + is called by all the clients which modify page cluster and(or) put + it into a transaction (file_write, truncate, writepages, etc..) . grab cluster pages; . reserve disk space; . maybe read pages from disk and set the disk cluster dirty; . maybe write hole; - . maybe create 'unprepped' disk cluster (if the disk cluster is fake (isn't represenred - by any items on disk) + . maybe create 'unprepped' disk cluster if the last one is fake + (isn't represenred by any items on disk) */ static int @@ -2310,7 +2352,6 @@ set_cluster_params(struct inode * inode, assert("edward-197", clust != NULL); assert("edward-1072", win != NULL); assert("edward-198", inode != NULL); - assert("edward-747", reiser4_inode_data(inode)->cluster_shift <= MAX_CLUSTER_SHIFT); result = alloc_cluster_pgset(clust, cluster_nrpages(inode)); if (result) @@ -2321,10 +2362,6 @@ set_cluster_params(struct inode * inode, loff_t hole_size; hole_size = file_off - inode->i_size; - printk("edward-176, Warning: Hole of size %llu in " - "cryptcompress file (inode %llu, offset %llu) \n", - hole_size, (unsigned long long)get_inode_oid(inode), file_off); - set_window(clust, win, inode, inode->i_size, file_off); win->stat = HOLE_WINDOW; if (win->off + hole_size < inode_cluster_size(inode)) @@ -2370,9 +2407,11 @@ write_cryptcompress_flow(struct file * f assert("edward-161", schedulable()); assert("edward-748", crc_inode_ok(inode)); assert("edward-159", current_blocksize == PAGE_CACHE_SIZE); - assert("edward-749", reiser4_inode_data(inode)->cluster_shift <= MAX_CLUSTER_SHIFT); assert("edward-1274", get_current_context()->grabbed_blocks == 0); + result = check_cryptcompress(inode); + if (result) + return result; result = load_file_hint(file, &hint); if (result) return result; @@ -2593,7 +2632,6 @@ ssize_t read_cryptcompress(struct file * inode = file->f_dentry->d_inode; assert("edward-1194", !inode_get_flag(inode, REISER4_NO_SD)); - assert("edward-1195", inode_get_flag(inode, REISER4_CLUSTER_KNOWN)); info = cryptcompress_inode_data(inode); needed = cryptcompress_estimate_read(inode); @@ -2623,7 +2661,7 @@ static void set_append_cluster_key(const coord_t *coord, reiser4_key *key, struct inode *inode) { item_key_by_coord(coord, key); - set_key_offset(key, ((__u64)(clust_by_coord(coord, inode)) + 1) << inode_cluster_shift(inode) << PAGE_CACHE_SHIFT); + set_key_offset(key, clust_to_off(clust_by_coord(coord, inode) + 1, inode)); } /* If @index > 0, find real disk cluster of the index (@index - 1), @@ -2632,7 +2670,8 @@ set_append_cluster_key(const coord_t *co It succes was returned: (@index == 0 && @found == 0) means that the object doesn't have real disk clusters. - (@index != 0 && @found == 0) means that disk cluster of @index doesn't exist. + (@index != 0 && @found == 0) means that disk cluster of (@index -1 ) doesn't + exist. */ static int find_real_disk_cluster(struct inode * inode, cloff_t * found, cloff_t index) @@ -2696,7 +2735,7 @@ find_real_disk_cluster(struct inode * in } static int -find_actual_cloff(struct inode *inode, cloff_t * index) +find_fake_appended(struct inode *inode, cloff_t *index) { return find_real_disk_cluster(inode, index, 0 /* find last real one */); } @@ -2835,7 +2874,7 @@ cut_tree_worker_cryptcompress(tap_t * ta } /* Append or expand hole in two steps (exclusive access should be aquired!) - 1) write zeroes to the last existing cluster, + 1) write zeroes to the current real cluster, 2) expand hole via fake clusters (just increase i_size) */ static int cryptcompress_append_hole(struct inode * inode /*contains old i_size */, @@ -2853,6 +2892,7 @@ cryptcompress_append_hole(struct inode * assert("edward-1134", schedulable()); assert("edward-1135", crc_inode_ok(inode)); assert("edward-1136", current_blocksize == PAGE_CACHE_SIZE); + assert("edward-1333", off_to_cloff(inode->i_size, inode) != 0); init_lh(&lh); hint_init_zero(&hint); @@ -2862,12 +2902,7 @@ cryptcompress_append_hole(struct inode * reiser4_cluster_init(&clust, &win); clust.hint = &hint; - if (off_to_cloff(inode->i_size, inode) == 0) - /* appending hole to cluster boundary */ - goto fake_append; - /* set cluster handle */ - result = alloc_cluster_pgset(&clust, cluster_nrpages(inode)); if (result) goto out; @@ -2878,13 +2913,9 @@ cryptcompress_append_hole(struct inode * win.stat = HOLE_WINDOW; assert("edward-1137", clust.index == off_to_clust(inode->i_size, inode)); -#if REISER4_DEBUG - printk("edward-1138, Warning: Hole of size %llu in " - "cryptcompress file (inode %llu); " - "%u zeroes appended to cluster (index = %lu) \n", - hole_size, (unsigned long long)get_inode_oid(inode), nr_zeroes, clust.index); -#endif + result = prepare_cluster(inode, 0, 0, &clust, PCL_APPEND); + assert("edward-1271", !result); if (result) goto out; @@ -2896,8 +2927,8 @@ cryptcompress_append_hole(struct inode * if (!hole_size) /* nothing to append anymore */ goto out; - fake_append: + /* fake_append: */ INODE_SET_FIELD(inode, i_size, new_size); out: done_lh(&lh); @@ -2907,7 +2938,7 @@ cryptcompress_append_hole(struct inode * #if REISER4_DEBUG static int -page_truncate_ok(struct inode * inode, loff_t old_size, pgoff_t start) +pages_truncate_ok(struct inode * inode, loff_t old_size, pgoff_t start) { struct pagevec pvec; int i; @@ -2944,7 +2975,7 @@ body_truncate_ok(struct inode * inode, c int result; cloff_t raidx; - result = find_actual_cloff(inode, &raidx); + result = find_fake_appended(inode, &raidx); return !result && (aidx == raidx); } #endif @@ -2970,14 +3001,14 @@ prune_cryptcompress(struct inode * inode unsigned nr_zeroes; loff_t to_prune; loff_t old_size; - cloff_t fidx; + cloff_t ridx; hint_t hint; lock_handle lh; reiser4_slide_t win; reiser4_cluster_t clust; - assert("edward-1140", inode->i_size > new_size); + assert("edward-1140", inode->i_size >= new_size); assert("edward-1141", schedulable()); assert("edward-1142", crc_inode_ok(inode)); assert("edward-1143", current_blocksize == PAGE_CACHE_SIZE); @@ -2990,14 +3021,14 @@ prune_cryptcompress(struct inode * inode reiser4_cluster_init(&clust, &win); clust.hint = &hint; - /* first completely truncated cluster */ - fidx = count_to_nrclust(new_size, inode); + /* rightmost completely truncated cluster */ + ridx = count_to_nrclust(new_size, inode); - assert("edward-1174", fidx <= aidx); + assert("edward-1174", ridx <= aidx); old_size = inode->i_size; - if (fidx != aidx) { + if (ridx != aidx) { result = cut_file_items(inode, - clust_to_off(fidx, inode), + clust_to_off(ridx, inode), update_sd, clust_to_off(aidx, inode), update_cryptcompress_size); @@ -3014,14 +3045,14 @@ prune_cryptcompress(struct inode * inode to_prune = inode->i_size - new_size; /* check if partially truncated cluster is fake */ - result = find_real_disk_cluster(inode, &aidx, fidx); + result = find_real_disk_cluster(inode, &aidx, ridx); if (result) goto out; if (!aidx) /* yup, this is fake one */ goto finish; - assert("edward-1148", aidx == fidx); + assert("edward-1148", aidx == ridx); /* try to capture partially truncated page cluster */ result = alloc_cluster_pgset(&clust, cluster_nrpages(inode)); @@ -3033,7 +3064,7 @@ prune_cryptcompress(struct inode * inode set_window(&clust, &win, inode, new_size, new_size + nr_zeroes); win.stat = HOLE_WINDOW; - assert("edward-1149", clust.index == fidx - 1); + assert("edward-1149", clust.index == ridx - 1); result = prepare_cluster(inode, 0, 0, &clust, PCL_TRUNCATE); if (result) @@ -3043,73 +3074,92 @@ prune_cryptcompress(struct inode * inode clust.dstat == UNPR_DISK_CLUSTER); assert("edward-1191", inode->i_size == new_size); - assert("edward-1206", body_truncate_ok(inode, fidx)); + assert("edward-1206", body_truncate_ok(inode, ridx)); finish: - /* drop all the pages that don't have jnodes - because of holes represented by fake disk clusters - including the pages of partially truncated cluster - which was released by prepare_cluster() */ - truncate_inode_pages(inode->i_mapping, - pg_to_off(count_to_nrpages(new_size))); + /* drop all the pages that don't have jnodes (i.e. pages + which can not be truncated by cut_file_items() because + of holes represented by fake disk clusters) including + the pages of partially truncated cluster which was + released by prepare_cluster() */ + truncate_inode_pages(inode->i_mapping, new_size); INODE_SET_FIELD(inode, i_size, new_size); out: + assert("edward-1334", !result); + assert("edward-1209", + pages_truncate_ok(inode, old_size, count_to_nrpages(new_size))); + assert("edward-1335", + jnodes_truncate_ok(inode, count_to_nrclust(new_size, inode))); done_lh(&lh); put_cluster_handle(&clust, TFM_READ); return result; } -/* returns true if the cluster we prune or append to is fake */ static int -truncating_last_fake_dc(struct inode * inode, cloff_t aidx, loff_t new_size) +start_truncate_fake(struct inode *inode, cloff_t aidx, loff_t new_size, int update_sd) { - return aidx == 0 /* no items */|| - (aidx <= off_to_clust(inode->i_size, inode) && - aidx <= off_to_clust(new_size, inode)); + int result = 0; + int bytes; + + if (new_size > inode->i_size) { + /* append */ + if (inode->i_size < clust_to_off(aidx, inode)) + /* no fake bytes */ + return 0; + bytes = new_size - inode->i_size; + INODE_SET_FIELD(inode, i_size, inode->i_size + bytes); + } + else { + /* prune */ + if (inode->i_size <= clust_to_off(aidx, inode)) + /* no fake bytes */ + return 0; + bytes = inode->i_size - max_count(new_size, clust_to_off(aidx, inode)); + if (!bytes) + return 0; + INODE_SET_FIELD(inode, i_size, inode->i_size - bytes); + /* In the case of fake prune we need to drop page cluster. + There are only 2 cases for partially truncated page: + 1. If is is dirty, therefore it is anonymous + (was dirtied via mmap), and will be captured + later via ->capture(). + 2. If is clean, therefore it is filled by zeroes. + In both cases we don't need to make it dirty and + capture here. + */ + truncate_inode_pages(inode->i_mapping, inode->i_size); + assert("edward-1336", + jnodes_truncate_ok(inode, count_to_nrclust(inode->i_size, inode))); + } + if (update_sd) + result = update_sd_cryptcompress(inode); + return result; } /* This is called in setattr_cryptcompress when it is used to truncate, and in delete_cryptcompress */ - static int cryptcompress_truncate(struct inode *inode, /* old size */ loff_t new_size, /* new size */ int update_sd) { int result; - cloff_t aidx; /* appended index to the last actual one */ - loff_t old_size = inode->i_size; + cloff_t aidx; - assert("edward-1167", (new_size != old_size) || (!new_size && !old_size)); - - result = find_actual_cloff(inode, &aidx); + result = find_fake_appended(inode, &aidx); if (result) return result; - assert("edward-1208", ergo(aidx > 0, inode->i_size > clust_to_off(aidx - 1, inode))); - if (truncating_last_fake_dc(inode, aidx, new_size)) { - /* we do not need to truncate items, so just drop pages - which can not acquire jnodes because of exclusive access */ - - INODE_SET_FIELD(inode, i_size, new_size); - if (old_size > new_size) { - truncate_inode_pages(inode->i_mapping, - pg_to_off(count_to_nrpages(new_size))); - assert("edward-663", ergo(!new_size, - reiser4_inode_data(inode)->anonymous_eflushed == 0 && - reiser4_inode_data(inode)->captured_eflushed == 0)); - } - if (update_sd) - result = update_sd_cryptcompress(inode); + result = start_truncate_fake(inode, aidx, new_size, update_sd); + if (result) return result; - } - result = (old_size < new_size ? cryptcompress_append_hole(inode, new_size) : - prune_cryptcompress(inode, new_size, update_sd, aidx)); - - assert("edward-1209", - page_truncate_ok(inode, old_size, count_to_nrpages(new_size))); - return result; + if (inode->i_size == new_size) + /* nothing to truncate anymore */ + return 0; + return (inode->i_size < new_size ? + cryptcompress_append_hole(inode, new_size) : + prune_cryptcompress(inode, new_size, update_sd, aidx)); } /* plugin->u.file.truncate */ @@ -3135,28 +3185,21 @@ capture_anonymous_cluster(reiser4_cluste set_cluster_pages_dirty(clust); result = try_capture_cluster(clust, inode); - set_hint_cluster(inode, clust->hint, clust->index + 1, ZNODE_WRITE_LOCK); + put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK); if (result) release_cluster_pages_and_jnode(clust); return result; } -static void -redirty_inode(struct inode *inode) -{ - spin_lock(&inode_lock); - inode->i_state |= I_DIRTY; - spin_unlock(&inode_lock); -} - -#define CAPTURE_APAGE_BURST (1024) +#define MAX_CLUSTERS_TO_CAPTURE(inode) (1024 >> inode_cluster_shift(inode)) +/* read lock should be acquired */ static int -capture_anonymous_clusters(struct address_space * mapping, pgoff_t * index) +capture_anonymous_clusters(struct address_space * mapping, pgoff_t * index, int to_capture) { int result = 0; - int to_capture; int found; + int progress = 0; struct page * page = NULL; hint_t hint; lock_handle lh; @@ -3174,40 +3217,32 @@ capture_anonymous_clusters(struct addres result = alloc_cluster_pgset(&clust, cluster_nrpages(mapping->host)); if (result) goto out; - to_capture = (__u32)CAPTURE_APAGE_BURST >> inode_cluster_shift(mapping->host); - do { + while (to_capture > 0) { found = find_get_pages_tag(mapping, index, PAGECACHE_TAG_REISER4_MOVED, 1, &page); - if (!found) + if (!found) { + *index = (pgoff_t) - 1; break; + } assert("edward-1109", page != NULL); - clust.index = pg_to_clust(*index, mapping->host); - + move_cluster_forward(&clust, mapping->host, page->index, &progress); result = capture_anonymous_cluster(&clust, mapping->host); - if (result) { - page_cache_release(page); - break; - } page_cache_release(page); + if (result) + break; to_capture --; - - assert("edward-1076", clust.index <= pg_to_clust(*index, mapping->host)); - /* index of the next cluster to capture */ - if (clust.index == pg_to_clust(*index, mapping->host)) - *index = clust_to_pg(clust.index + 1, mapping->host); - } while (to_capture); - + } if (result) { warning("edward-1077", "Cannot capture anon pages: result=%i (captured=%d)\n", result, - ((__u32)CAPTURE_APAGE_BURST >> inode_cluster_shift(mapping->host)) - to_capture); + ((__u32)MAX_CLUSTERS_TO_CAPTURE(mapping->host)) - to_capture); } else { /* something had to be found */ - assert("edward-1078", to_capture <= CAPTURE_APAGE_BURST); - if (to_capture == 0) + assert("edward-1078", to_capture <= MAX_CLUSTERS_TO_CAPTURE(mapping->host)); + if (to_capture <= 0) /* there may be left more pages */ - redirty_inode(mapping->host); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } out: done_lh(&lh); @@ -3229,6 +3264,8 @@ reiser4_internal int capture_cryptcompress(struct inode *inode, struct writeback_control *wbc) { int result; + int to_capture; + pgoff_t nrpages; pgoff_t index = 0; cryptcompress_info_t * info; @@ -3236,11 +3273,19 @@ capture_cryptcompress(struct inode *inod return 0; info = cryptcompress_inode_data(inode); + nrpages = count_to_nrpages(i_size_read(inode)); + if (wbc->sync_mode != WB_SYNC_ALL) + to_capture = min_count(wbc->nr_to_write, MAX_CLUSTERS_TO_CAPTURE(inode)); + else + to_capture = MAX_CLUSTERS_TO_CAPTURE(inode); do { reiser4_context ctx; if (is_in_reiser4_context()) { + /* FIXME-EDWARD: REMOVEME */ + all_grabbed2free(); + /* It can be in the context of write system call from balance_dirty_pages() */ if (down_read_trylock(&info->lock) == 0) { @@ -3257,7 +3302,7 @@ capture_cryptcompress(struct inode *inod LOCK_CNT_INC(inode_sem_r); - result = capture_anonymous_clusters(inode->i_mapping, &index); + result = capture_anonymous_clusters(inode->i_mapping, &index, to_capture); up_read(&info->lock); @@ -3269,7 +3314,7 @@ capture_cryptcompress(struct inode *inod } result = txnmgr_force_commit_all(inode->i_sb, 0); reiser4_exit_context(&ctx); - } while (result == 0 && crc_inode_has_anon_pages(inode)); + } while (result == 0 && index < nrpages); return result; } @@ -3365,6 +3410,9 @@ setattr_cryptcompress(struct inode *inod { int result; + result = check_cryptcompress(inode); + if (result) + return result; if (attr->ia_valid & ATTR_SIZE) { /* EDWARD-FIXME-HANS: VS-FIXME-HANS: Q: this case occurs when? truncate? diff -puN fs/reiser4/plugin/cryptcompress.h~reiser4-update fs/reiser4/plugin/cryptcompress.h --- devel/fs/reiser4/plugin/cryptcompress.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/cryptcompress.h 2005-07-08 23:11:54.000000000 -0700 @@ -11,13 +11,20 @@ #include #define MIN_CLUSTER_SIZE PAGE_CACHE_SIZE -#define MAX_CLUSTER_SHIFT 4 -#define MAX_CLUSTER_NRPAGES (1 << MAX_CLUSTER_SHIFT) +#define MIN_CLUSTER_SHIFT PAGE_CACHE_SHIFT +#define MAX_CLUSTER_SHIFT 16 +#define MAX_CLUSTER_NRPAGES (1U << MAX_CLUSTER_SHIFT >> PAGE_CACHE_SHIFT) #define DEFAULT_CLUSTER_SHIFT 0 #define DC_CHECKSUM_SIZE 4 #define MIN_CRYPTO_BLOCKSIZE 8 -typedef unsigned long cloff_t; +#if REISER4_DEBUG +static inline int +cluster_shift_ok (int shift) +{ + return (shift >= MIN_CLUSTER_SHIFT) && (shift <= MAX_CLUSTER_SHIFT); +} +#endif /* Set of transform id's supported by reiser4, each transform is implemented by appropriate transform plugin: */ @@ -412,13 +419,14 @@ put_cluster_handle(reiser4_cluster_t * c typedef struct crypto_stat { __u8 * keyid; /* pointer to a fingerprint */ __u16 keysize; /* key size, bits */ + __u32 * expkey; } crypto_stat_t; /* cryptcompress specific part of reiser4_inode */ typedef struct cryptcompress_info { struct rw_semaphore lock; struct crypto_tfm *tfm[LAST_TFM]; - __u32 * expkey; + crypto_stat_t * crypt; } cryptcompress_info_t; cryptcompress_info_t *cryptcompress_inode_data(const struct inode * inode); @@ -479,6 +487,12 @@ crypto_blocksize(struct inode * inode) return crypto_tfm_alg_blocksize(inode_get_tfm(inode, CRYPTO_TFM)); } +static inline compression_plugin * +dual_compression_plugin(compression_plugin * cplug) +{ + return compression_plugin_by_id(cplug->dual); +} + #define REGISTER_NONE_ALG(ALG, TFM) \ static int alloc_none_ ## ALG (struct inode * inode) \ { \ diff -puN fs/reiser4/plugin/digest.c~reiser4-update fs/reiser4/plugin/digest.c --- devel/fs/reiser4/plugin/digest.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/digest.c 2005-07-08 23:11:54.000000000 -0700 @@ -26,7 +26,17 @@ digest_plugin digest_plugins[LAST_DIGEST }, .dsize = NONE_DIGEST_SIZE, .alloc = alloc_none_digest, - .free = free_none_digest, + .free = free_none_digest } }; +/* + Local variables: + c-indentation-style: "K&R" + mode-name: "LC" + c-basic-offset: 8 + tab-width: 8 + fill-column: 120 + scroll-step: 1 + End: +*/ diff -puN fs/reiser4/plugin/dir/dir.c~reiser4-update fs/reiser4/plugin/dir/dir.c --- devel/fs/reiser4/plugin/dir/dir.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/dir/dir.c 2005-07-08 23:11:54.000000000 -0700 @@ -141,8 +141,9 @@ link_common(struct inode *parent /* pare * For such inode we have to undo special processing done in * reiser4_unlink() viz. creation of safe-link. */ - if (unlikely(inode_file_plugin(object)->not_linked(object))) { - result = safe_link_del(object, SAFE_UNLINK); + if (unlikely(object->i_nlink == 0)) { + result = safe_link_del(tree_by_inode(object), + get_inode_oid(object), SAFE_UNLINK); if (result != 0) return result; } @@ -290,7 +291,7 @@ unlink_common(struct inode *parent /* pa marked for update. --SUS */ reiser4_update_dir(parent); /* add safe-link for this file */ - if (fplug->not_linked(object)) + if (object->i_nlink == 0) safe_link_add(object, SAFE_UNLINK); } } diff -puN fs/reiser4/plugin/dir/hashed_dir.c~reiser4-update fs/reiser4/plugin/dir/hashed_dir.c --- devel/fs/reiser4/plugin/dir/hashed_dir.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/dir/hashed_dir.c 2005-07-08 23:11:54.000000000 -0700 @@ -1032,7 +1032,7 @@ rename_hashed(struct inode *old_dir /* d /* add safe-link for target file (in case we removed * last reference to the poor fellow */ fplug = inode_file_plugin(new_inode); - if (fplug->not_linked(new_inode)) + if (new_inode->i_nlink == 0) result = safe_link_add(new_inode, SAFE_UNLINK); } } diff -puN fs/reiser4/plugin/file/file.c~reiser4-update fs/reiser4/plugin/file/file.c --- devel/fs/reiser4/plugin/file/file.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/file/file.c 2005-07-08 23:11:54.000000000 -0700 @@ -733,8 +733,10 @@ load_file_hint(struct file *file, hint_t if (IS_ERR(fsdata)) return PTR_ERR(fsdata); + spin_lock_inode(file->f_dentry->d_inode); if (seal_is_set(&fsdata->reg.hint.seal)) { *hint = fsdata->reg.hint; + spin_unlock_inode(file->f_dentry->d_inode); /* force re-validation of the coord on the first * iteration of the read/write loop. */ hint->ext_coord.valid = 0; @@ -743,6 +745,7 @@ load_file_hint(struct file *file, hint_t return 0; } memset(&fsdata->reg.hint, 0, sizeof(hint_t)); + spin_unlock_inode(file->f_dentry->d_inode); } hint_init_zero(hint); return 0; @@ -762,7 +765,10 @@ save_file_hint(struct file *file, const assert("vs-965", !IS_ERR(fsdata)); assert("nikita-19891", coords_equal(&hint->seal.coord1, &hint->ext_coord.coord)); + + spin_lock_inode(file->f_dentry->d_inode); fsdata->reg.hint = *hint; + spin_unlock_inode(file->f_dentry->d_inode); return; } @@ -929,14 +935,6 @@ capturepage_unix_file(struct page * page return result; } -static void -redirty_inode(struct inode *inode) -{ - spin_lock(&inode_lock); - inode->i_state |= I_DIRTY; - spin_unlock(&inode_lock); -} - /* * Support for "anonymous" pages and jnodes. * @@ -1009,9 +1007,6 @@ capture_anonymous_page(struct page *pg, return result; } - -#define CAPTURE_APAGE_BURST (1024l) - /* look for pages tagged REISER4_MOVED starting from the index-th page, return number of captured pages, update index to next page after the last found one */ @@ -1341,21 +1336,11 @@ capture_unix_file(struct inode *inode, s * this function is out of reiser4 context and may safely * sleep on semaphore. */ - if (is_in_reiser4_context()) { - if (down_read_trylock(&uf_info->latch) == 0) { -/* ZAM-FIXME-HANS: please explain this error handling here, grep for - * all instances of returning EBUSY, and tell me whether any of them - * represent busy loops that we should recode. Also tell me whether - * any of them fail to return EBUSY to user space, and if yes, then - * recode them to not use the EBUSY macro.*/ - result = RETERR(-EBUSY); - reiser4_exit_context(&ctx); - break; - } - } else - down_read(&uf_info->latch); - LOCK_CNT_INC(inode_sem_r); + assert("", LOCK_CNT_NIL(inode_sem_w)); + assert("", LOCK_CNT_NIL(inode_sem_r)); + txn_restart_current(); + get_nonexclusive_access(uf_info, 0); while (to_capture > 0) { pgoff_t start; @@ -1379,7 +1364,7 @@ capture_unix_file(struct inode *inode, s if (result < 0) break; to_capture -= result; - wbc->nr_to_write -= result; + get_current_context()->nr_captured -= result; if (jindex == (pgoff_t)-1) { assert("vs-1728", pindex == (pgoff_t)-1); @@ -1388,10 +1373,9 @@ capture_unix_file(struct inode *inode, s } if (to_capture <= 0) /* there may be left more pages */ - redirty_inode(inode); + __mark_inode_dirty(inode, I_DIRTY_PAGES); - up_read(&uf_info->latch); - LOCK_CNT_DEC(inode_sem_r); + drop_nonexclusive_access(uf_info); if (result < 0) { /* error happened */ reiser4_exit_context(&ctx); @@ -1502,12 +1486,13 @@ readpage_unix_file(void *vp, struct page if (result != CBK_COORD_FOUND) { /* this indicates file corruption */ done_lh(&lh); + unlock_page(page); return result; } if (PageUptodate(page)) { done_lh(&lh); - unlock_page(page); + unlock_page(page); return 0; } @@ -1515,6 +1500,7 @@ readpage_unix_file(void *vp, struct page result = zload(coord->node); if (result) { done_lh(&lh); + unlock_page(page); return result; } @@ -1531,6 +1517,7 @@ readpage_unix_file(void *vp, struct page zrelse(coord->node); done_lh(&lh); + unlock_page(page); return RETERR(-EIO); } @@ -1546,14 +1533,17 @@ readpage_unix_file(void *vp, struct page set_key_offset(&key, (loff_t) (page->index + 1) << PAGE_CACHE_SHIFT); /* FIXME should call set_hint() */ unset_hint(&hint); - } else + } else { + unlock_page(page); unset_hint(&hint); + } zrelse(coord->node); done_lh(&lh); save_file_hint(file, &hint); - assert("vs-979", ergo(result == 0, (PageLocked(page) || PageUptodate(page)))); + assert("vs-979", ergo(result == 0, (PageLocked(page) || PageUptodate(page)))); + assert("vs-9791", ergo(result != 0, !PageLocked(page))); return result; } @@ -1631,7 +1621,7 @@ reiser4_put_user_pages(struct page **pag /* this is called with nonexclusive access obtained, file's container can not change */ static size_t -read_file(hint_t *hint, file_container_t container, +read_file(hint_t *hint, struct file *file, /* file to write to */ char *buf, /* address of user-space buffer */ size_t count, /* number of bytes to write */ @@ -1646,33 +1636,6 @@ read_file(hint_t *hint, file_container_t inode = file->f_dentry->d_inode; - /* we have nonexclusive access (NA) obtained. File's container may not - change until we drop NA. If possible - calculate read function - beforehand */ - switch(container) { - case UF_CONTAINER_EXTENTS: - read_f = item_plugin_by_id(EXTENT_POINTER_ID)->s.file.read; - break; - - case UF_CONTAINER_TAILS: - /* this is read-ahead for tails-only files */ - result = reiser4_file_readahead(file, *off, count); - if (result) - return result; - - read_f = item_plugin_by_id(FORMATTING_ID)->s.file.read; - break; - - case UF_CONTAINER_UNKNOWN: - read_f = 0; - break; - - case UF_CONTAINER_EMPTY: - default: - warning("vs-1297", "File (ino %llu) has unexpected state: %d\n", - (unsigned long long)get_inode_oid(inode), container); - return RETERR(-EIO); - } /* build flow */ assert("vs-1250", inode_file_plugin(inode)->flow_by_inode == flow_by_inode_unix_file); @@ -1704,9 +1667,9 @@ read_file(hint_t *hint, file_container_t if (hint->ext_coord.valid == 0) validate_extended_coord(&hint->ext_coord, get_key_offset(&flow.key)); + assert("vs-4", hint->ext_coord.valid == 1); /* call item's read method */ - if (!read_f) - read_f = item_plugin_by_coord(coord)->s.file.read; + read_f = item_plugin_by_coord(coord)->s.file.read; result = read_f(file, &flow, hint); zrelse(loaded); done_lh(hint->ext_coord.lh); @@ -1807,7 +1770,7 @@ read_unix_file(struct file *file, char * to_read = inode->i_size - *off; assert("vs-1706", to_read <= left); - read = read_file(&hint, uf_info->container, file, buf, to_read, off); + read = read_file(&hint, file, buf, to_read, off); if (user_space) reiser4_put_user_pages(pages, nr_pages); @@ -1943,7 +1906,7 @@ append_and_or_overwrite(hint_t *hint, st return result; } loaded = lh.node; - + assert("vs-11", hint->ext_coord.coord.node == loaded); result = write_f(inode, flow, hint, @@ -2059,6 +2022,7 @@ mmap_unix_file(struct file *file, struct inode = file->f_dentry->d_inode; uf_info = unix_file_inode_data(inode); + down(&uf_info->write); get_exclusive_access(uf_info); if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) { @@ -2067,12 +2031,14 @@ mmap_unix_file(struct file *file, struct result = finish_conversion(inode); if (result) { drop_exclusive_access(uf_info); + up(&uf_info->write); return result; } result = find_file_state(uf_info); if (result != 0) { drop_exclusive_access(uf_info); + up(&uf_info->write); return result; } @@ -2084,6 +2050,7 @@ mmap_unix_file(struct file *file, struct result = check_pages_unix_file(inode); if (result) { drop_exclusive_access(uf_info); + up(&uf_info->write); return result; } } @@ -2097,6 +2064,7 @@ mmap_unix_file(struct file *file, struct } drop_exclusive_access(uf_info); + up(&uf_info->write); return result; } @@ -2299,9 +2267,9 @@ write_unix_file(struct file *file, /* fi (unsigned long long)get_inode_oid(inode)); } + save_file_hint(file, &hint); up(&uf_info->write); current->backing_dev_info = 0; - save_file_hint(file, &hint); return count ? count : result; } @@ -2317,6 +2285,7 @@ release_unix_file(struct inode *object, uf_info = unix_file_inode_data(object); result = 0; + down(&uf_info->write); get_exclusive_access(uf_info); if (atomic_read(&file->f_dentry->d_count) == 1 && uf_info->container == UF_CONTAINER_EXTENTS && @@ -2329,6 +2298,7 @@ release_unix_file(struct inode *object, } } drop_exclusive_access(uf_info); + up(&uf_info->write); return 0; } @@ -2521,7 +2491,7 @@ setattr_truncate(struct inode *inode, st s_result = safe_link_grab(tree, BA_CAN_COMMIT); if (s_result == 0) - s_result = safe_link_del(inode, SAFE_TRUNCATE); + s_result = safe_link_del(tree, get_inode_oid(inode), SAFE_TRUNCATE); if (s_result != 0) { warning("nikita-3417", "Cannot kill safelink %lli: %i", (unsigned long long)get_inode_oid(inode), s_result); @@ -2578,14 +2548,31 @@ init_inode_data_unix_file(struct inode * init_inode_ordering(inode, crd, create); } -/* VS-FIXME-HANS: what is pre deleting all about? */ -/* plugin->u.file.pre_delete */ +/* plugin->u.file.pre_delete + + We need this because generic_delete_inode calls truncate_inode_pages before + filesystem's delete_inode method. As result of this, reiser4 tree may have + unallocated extents which do not have pages pointed by them (those pages are + removed by truncate_inode_pages), which may confuse flush code. The solution + for this problem is to call pre_delete method from reiser4_put_inode to + remove file items together with corresponding pages. Generic_delete_inode + will call truncate_inode_pages which will do nothing and + reiser4_delete_inode which completes file deletion by removing stat data + from the tree. + This method is to be called from reiser4_put_inode when file is already + unlinked and iput is about to drop last reference to inode. If nfsd manages + to iget the file after pre_delete started, it will either be able to access + a file content (if it will get access to file earlier than pre_delete) or it + will get file truncated to 0 size if pre_delete goes first +*/ reiser4_internal int pre_delete_unix_file(struct inode *inode) { unix_file_info_t *uf_info; int result; + txn_restart_current(); + /* FIXME: put comment here */ uf_info = unix_file_inode_data(inode); get_exclusive_access(uf_info); diff -puN fs/reiser4/plugin/file/file.h~reiser4-update fs/reiser4/plugin/file/file.h --- devel/fs/reiser4/plugin/file/file.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/file/file.h 2005-07-08 23:11:54.000000000 -0700 @@ -85,6 +85,10 @@ typedef struct unix_file_info { } unix_file_info_t; struct unix_file_info *unix_file_inode_data(const struct inode * inode); +void get_exclusive_access(unix_file_info_t *); +void drop_exclusive_access(unix_file_info_t *); +void get_nonexclusive_access(unix_file_info_t *, int); +void drop_nonexclusive_access(unix_file_info_t *); #include "../item/extent.h" #include "../item/tail.h" diff -puN /dev/null fs/reiser4/plugin/file/regular.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/file/regular.c 2005-07-08 23:11:54.000000000 -0700 @@ -0,0 +1,44 @@ +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ + +/* Contains Reiser4 regular plugins which: + . specify a set of reiser4 regular object plugins, + . used by directory plugin to create entries powered by specified + regular plugins */ + +#include "../plugin.h" + +regular_plugin regular_plugins[LAST_REGULAR_ID] = { + [UF_REGULAR_ID] = { + .h = { + .type_id = REISER4_REGULAR_PLUGIN_TYPE, + .id = UF_REGULAR_ID, + .pops = NULL, + .label = "unixfile", + .desc = "Unix file regular plugin", + .linkage = TYPE_SAFE_LIST_LINK_ZERO + }, + .id = UNIX_FILE_PLUGIN_ID + }, + [CRC_REGULAR_ID] = { + .h = { + .type_id = REISER4_REGULAR_PLUGIN_TYPE, + .id = CRC_REGULAR_ID, + .pops = NULL, + .label = "cryptcompress", + .desc = "Cryptcompress regular plugin", + .linkage = TYPE_SAFE_LIST_LINK_ZERO + }, + .id = CRC_FILE_PLUGIN_ID + } +}; + +/* + Local variables: + c-indentation-style: "K&R" + mode-name: "LC" + c-basic-offset: 8 + tab-width: 8 + fill-column: 120 + scroll-step: 1 + End: +*/ diff -puN fs/reiser4/plugin/file/tail_conversion.c~reiser4-update fs/reiser4/plugin/file/tail_conversion.c --- devel/fs/reiser4/plugin/file/tail_conversion.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/file/tail_conversion.c 2005-07-08 23:11:54.000000000 -0700 @@ -32,6 +32,7 @@ get_exclusive_access(unix_file_info_t *u BUG_ON(get_current_context()->trans->atom != NULL); LOCK_CNT_INC(inode_sem_w); down_write(&uf_info->latch); + uf_info->exclusive_use = 1; assert("vs-1713", uf_info->ea_owner == NULL); assert("vs-1713", atomic_read(&uf_info->nr_neas) == 0); ON_DEBUG(uf_info->ea_owner = current); @@ -43,6 +44,7 @@ drop_exclusive_access(unix_file_info_t * assert("vs-1714", uf_info->ea_owner == current); assert("vs-1715", atomic_read(&uf_info->nr_neas) == 0); ON_DEBUG(uf_info->ea_owner = NULL); + uf_info->exclusive_use = 0; up_write(&uf_info->latch); assert("nikita-3049", LOCK_CNT_NIL(inode_sem_r)); assert("nikita-3049", LOCK_CNT_GTZ(inode_sem_w)); @@ -334,7 +336,7 @@ tail2extent(unix_file_info_t *uf_info) not risk deadlock appearance */ assert("vs-983", !PagePrivate(page)); - + reiser4_invalidate_pages(inode->i_mapping, page->index, 1, 0); for (page_off = 0; page_off < PAGE_CACHE_SIZE;) { coord_t coord; lock_handle lh; @@ -408,8 +410,19 @@ tail2extent(unix_file_info_t *uf_info) release_all_pages(pages, sizeof_array(pages)); if (result) goto error; + /* we have to drop exclusive access to avoid deadlock + * which may happen because called by + * reiser4_writepages capture_unix_file requires to get + * non-exclusive access to a file. It is safe to drop + * EA in the middle of tail2extent conversion because + * write_unix_file/unix_setattr(truncate)/release_unix_file(extent2tail) + * are serialized by uf_info->write semaphore and + * because read_unix_file works (should at least) on + * partially converted files */ + drop_exclusive_access(uf_info); /* throttle the conversion */ reiser4_throttle_write(inode); + get_exclusive_access(uf_info); } } diff -puN fs/reiser4/plugin/item/ctail.c~reiser4-update fs/reiser4/plugin/item/ctail.c --- devel/fs/reiser4/plugin/item/ctail.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/item/ctail.c 2005-07-08 23:11:54.000000000 -0700 @@ -54,11 +54,10 @@ cluster_shift_by_coord(const coord_t * c } static unsigned long -pg_by_coord(const coord_t * coord) +off_by_coord(const coord_t * coord) { reiser4_key key; - - return get_key_offset(item_key_by_coord(coord, &key)) >> PAGE_CACHE_SHIFT; + return get_key_offset(item_key_by_coord(coord, &key)); } static int @@ -88,7 +87,7 @@ clust_by_coord(const coord_t * coord, st assert("edward-1237", !coord_is_unprepped_ctail(coord)); shift = cluster_shift_by_coord(coord); } - return pg_by_coord(coord) >> shift; + return off_by_coord(coord) >> shift; } static int unsigned long @@ -100,7 +99,7 @@ disk_cluster_size (const coord_t * coord is meaninless if ctail is unprepped */ assert("edward-1238", !coord_is_unprepped_ctail(coord)); - return PAGE_CACHE_SIZE << cluster_shift_by_coord(coord); + return 1 << cluster_shift_by_coord(coord); } /* true if the key is of first disk cluster item */ @@ -235,15 +234,18 @@ init_ctail(coord_t * to /* coord of item return 0; } +/* plugin->u.item.b.lookup: + NULL: We are looking for item keys only */ + +#if REISER4_DEBUG reiser4_internal int ctail_ok (const coord_t *coord) { return coord_is_unprepped_ctail(coord) || - (cluster_shift_by_coord(coord) <= MAX_CLUSTER_SHIFT); + cluster_shift_ok(cluster_shift_by_coord(coord)); } -/* plugin->u.item.b.lookup: - NULL: We are looking for item keys only */ +/* plugin->u.item.b.check */ reiser4_internal int check_ctail (const coord_t * coord, const char **error) { @@ -254,9 +256,7 @@ check_ctail (const coord_t * coord, cons } return 0; } - -/* plugin->u.item.b.check */ - +#endif /* plugin->u.item.b.paste */ reiser4_internal int @@ -564,7 +564,6 @@ ctail_read_cluster (reiser4_cluster_t * assert("edward-671", clust->hint != NULL); assert("edward-140", clust->dstat == INVAL_DISK_CLUSTER); assert("edward-672", crc_inode_ok(inode)); - assert("edward-145", inode_get_flag(inode, REISER4_CLUSTER_KNOWN)); /* set input stream */ result = grab_tfm_stream(inode, &clust->tc, TFM_READ, INPUT_STREAM); @@ -814,15 +813,8 @@ readpages_ctail(void *vp, struct address continue; } unlock_page(page); - reset_cluster_params(&clust); - - if (progress && - /* hole in the indices */ - pg_to_clust(page->index, inode) != clust.index + 1) - invalidate_hint_cluster(&clust); - progress++; - clust.index = pg_to_clust(page->index, inode); + move_cluster_forward(&clust, inode, page->index, &progress); ret = ctail_read_page_cluster(&clust, inode); if (ret) goto exit; @@ -867,7 +859,7 @@ reiser4_internal reiser4_key * append_key_ctail(const coord_t *coord, reiser4_key *key) { assert("edward-1241", item_id_by_coord(coord) == CTAIL_ID); - assert("edward-1242", cluster_shift_by_coord(coord) <= MAX_CLUSTER_SHIFT); + assert("edward-1242", cluster_shift_ok(cluster_shift_by_coord(coord))); item_key_by_coord(coord, key); set_key_offset(key, ((__u64)(clust_by_coord(coord, NULL)) + 1) << cluster_shift_by_coord(coord) << PAGE_CACHE_SHIFT); @@ -1074,7 +1066,7 @@ do_convert_ctail(flush_pos_t * pos, crc_ switch (mode) { case CRC_APPEND_ITEM: assert("edward-1229", info->flow.length != 0); - assert("edward-1256", cluster_shift_by_coord(&pos->coord) <= MAX_CLUSTER_SHIFT); + assert("edward-1256", cluster_shift_ok(cluster_shift_by_coord(&pos->coord))); result = insert_crc_flow_in_place(&pos->coord, &pos->lock, &info->flow, info->inode); break; case CRC_OVERWRITE_ITEM: @@ -1336,8 +1328,6 @@ detach_convert_idata(convert_info_t * sq info = sq->itm; assert("edward-255", info->inode != NULL); - assert("edward-1175", - inode_get_flag(info->inode, REISER4_CLUSTER_KNOWN)); assert("edward-1212", info->flow.length == 0); /* the final release of pages */ @@ -1367,7 +1357,9 @@ utmost_child_ctail(const coord_t * coord if (!is_disk_cluster_key(&key, coord)) *child = NULL; else - *child = jlookup(current_tree, get_key_objectid(item_key_by_coord(coord, &key)), pg_by_coord(coord)); + *child = jlookup(current_tree, + get_key_objectid(item_key_by_coord(coord, &key)), + off_to_pg(get_key_offset(&key))); return 0; } @@ -1439,17 +1431,21 @@ next_item_dc_stat(flush_pos_t * pos) item_convert_data(pos)->d_next = DC_CHAINED_ITEM; if (!znode_is_dirty(lh.node)) { - warning("edward-1024", - "next slum item mergeable, " - "but znode %p isn't dirty\n", - lh.node); + /* + warning("edward-1024", + "next slum item mergeable, " + "but znode %p isn't dirty\n", + lh.node); + */ znode_make_dirty(lh.node); } if (!znode_convertible(lh.node)) { - warning("edward-1272", - "next slum item mergeable, " - "but znode %p isn't convertible\n", - lh.node); + /* + warning("edward-1272", + "next slum item mergeable, " + "but znode %p isn't convertible\n", + lh.node); + */ znode_set_convertible(lh.node); } stop = 1; @@ -1608,7 +1604,7 @@ convert_ctail(flush_pos_t * pos) /* convert unpprepped ctail to prepped one */ int shift; shift = inode_cluster_shift(item_convert_data(pos)->inode); - assert("edward-1259", shift <= MAX_CLUSTER_SHIFT); + assert("edward-1259", cluster_shift_ok(shift)); cputod8(shift, &ctail_formatted_at(&pos->coord)->cluster_shift); } break; diff -puN fs/reiser4/plugin/item/extent_file_ops.c~reiser4-update fs/reiser4/plugin/item/extent_file_ops.c --- devel/fs/reiser4/plugin/item/extent_file_ops.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/item/extent_file_ops.c 2005-07-08 23:11:54.000000000 -0700 @@ -7,6 +7,7 @@ #include "../object.h" #include +#include static inline reiser4_extent * ext_by_offset(const znode *node, int offset) @@ -447,9 +448,11 @@ check_make_extent_result(int result, wri return; assert("vs-960", znode_is_write_locked(lh->node)); - zload(lh->node); + + check_me("vs-9", zload(lh->node) == 0); result = lh->node->nplug->lookup(lh->node, key, FIND_EXACT, &coord); assert("vs-1502", result == NS_FOUND); + assert("vs-16561", coord.node == lh->node); assert("vs-1656", coord_is_existing_unit(&coord)); if (blocknr_is_fake(&block)) { @@ -643,6 +646,8 @@ extent_balance_dirty_pages(struct inode hint_t *hint) { int result; + int excl; + unix_file_info_t *uf_info; if (hint->ext_coord.valid) set_hint(hint, &f->key, ZNODE_WRITE_LOCK); @@ -665,7 +670,19 @@ extent_balance_dirty_pages(struct inode return result; } - reiser4_throttle_write(inode); + if (!reiser4_is_set(inode->i_sb, REISER4_ATOMIC_WRITE)) { + uf_info = unix_file_inode_data(inode); + excl = unix_file_inode_data(inode)->exclusive_use; + if (excl) + drop_exclusive_access(uf_info); + else + drop_nonexclusive_access(uf_info); + reiser4_throttle_write(inode); + if (excl) + get_exclusive_access(uf_info); + else + get_nonexclusive_access(uf_info, 0); + } return 0; } @@ -688,9 +705,6 @@ extent_write_flow(struct inode *inode, f reiser4_key page_key; reiser4_block_nr blocknr; int created; - int err; - - err = 0; assert("nikita-3139", !inode_get_flag(inode, REISER4_NO_SD)); assert("vs-885", current_blocksize == PAGE_CACHE_SIZE); @@ -724,16 +738,13 @@ extent_write_flow(struct inode *inode, f count = flow->length; result = make_extent(&page_key, uf_coord, mode, &blocknr, &created, inode/* check quota */); - if (result) { - err = 2; + if (result) goto exit1; - } /* look for jnode and create it if it does not exist yet */ j = find_get_jnode(tree, inode->i_mapping, oid, page_nr); if (IS_ERR(j)) { result = PTR_ERR(j); - err = 3; goto exit1; } @@ -741,7 +752,6 @@ extent_write_flow(struct inode *inode, f page = jnode_get_page_locked(j, GFP_KERNEL); if (IS_ERR(page)) { result = PTR_ERR(page); - err = 4; goto exit2; } @@ -772,15 +782,12 @@ extent_write_flow(struct inode *inode, f UNLOCK_JNODE(j); } result = page_io(page, j, READ, GFP_KERNEL); - if (result) { - err = 5; + if (result) goto exit3; - } + lock_page(page); - if (!PageUptodate(page)) { - err = 6; + if (!PageUptodate(page)) goto exit3; - } } else { zero_around(page, page_off, count); } @@ -821,7 +828,6 @@ extent_write_flow(struct inode *inode, f /* FIXME: write(fd, 0, 10); to empty file will write no data but file will get increased size. */ result = RETERR(-EFAULT); - err = 7; goto exit3; } @@ -829,9 +835,7 @@ extent_write_flow(struct inode *inode, f SetPageUptodate(page); if (!PageReferenced(page)) SetPageReferenced(page); - unlock_page(page); - page_cache_release(page); /* FIXME: possible optimization: if jnode is not dirty yet - it gets into clean list in try_capture and then in @@ -841,12 +845,13 @@ extent_write_flow(struct inode *inode, f result = try_capture(j, ZNODE_WRITE_LOCK, 0, 1/* can_coc */); if (result) { UNLOCK_JNODE(j); - err = 8; + page_cache_release(page); goto exit2; } jnode_make_dirty_locked(j); UNLOCK_JNODE(j); + page_cache_release(page); jput(j); move_flow_forward(flow, count); @@ -856,10 +861,8 @@ extent_write_flow(struct inode *inode, f result = extent_balance_dirty_pages(inode, flow, hint); if (!grabbed) all_grabbed2free(); - if (result) { - err = 9; + if (result) break; - } page_off = 0; page_nr ++; @@ -894,13 +897,13 @@ extent_write_flow(struct inode *inode, f } while (1); - if (err) { - assert("", !hint_is_set(hint)); - } else - assert("", ergo(hint_is_set(hint), + if (result && result != -E_REPEAT) + assert("vs-18", !hint_is_set(hint)); + else + assert("vs-19", ergo(hint_is_set(hint), coords_equal(&hint->ext_coord.coord, &hint->seal.coord1) && keyeq(&flow->key, &hint->seal.key))); - assert("", lock_stack_isclean(get_current_lock_stack())); + assert("vs-20", lock_stack_isclean(get_current_lock_stack())); return result; } @@ -1173,6 +1176,10 @@ extent_readpage_filler(void *data, struc hint = (hint_t *)data; ext_coord = &hint->ext_coord; + + BUG_ON(PageUptodate(page)); + unlock_page(page); + if (hint_validate(hint, &key, 1/* check key */, ZNODE_READ_LOCK) != 0) { result = coord_by_key(current_tree, &key, &ext_coord->coord, ext_coord->lh, ZNODE_READ_LOCK, @@ -1190,6 +1197,13 @@ extent_readpage_filler(void *data, struc done_lh(ext_coord->lh); return RETERR(-EIO); } + if (!item_is_extent(&ext_coord->coord)) { + /* tail conversion is running in parallel */ + zrelse(ext_coord->coord.node); + unset_hint(hint); + done_lh(ext_coord->lh); + return RETERR(-EIO); + } if (ext_coord->valid == 0) init_coord_extension_extent(ext_coord, offset); @@ -1197,8 +1211,16 @@ extent_readpage_filler(void *data, struc assert("", (coord_extension_is_ok(ext_coord) && coord_extension_is_ok2(ext_coord, &key))); - result = do_readpage_extent(ext_by_ext_coord(ext_coord), - ext_coord->extension.extent.pos_in_unit, page); + lock_page(page); + if (!PageUptodate(page)) { + result = do_readpage_extent(ext_by_ext_coord(ext_coord), + ext_coord->extension.extent.pos_in_unit, page); + if (result) + unlock_page(page); + } else { + unlock_page(page); + result = 0; + } if (!result && move_coord_forward(ext_coord) == 0) { set_key_offset(&key, offset + PAGE_CACHE_SIZE); set_hint(hint, &key, ZNODE_READ_LOCK); @@ -1217,22 +1239,47 @@ extent_readpages_hook(struct address_spa read_cache_pages(mapping, pages, extent_readpage_filler, data); } -static void +static int call_page_cache_readahead(struct address_space *mapping, struct file *file, hint_t *hint, unsigned long page_nr, - unsigned long ra_pages) + unsigned long ra_pages, + struct file_ra_state *ra) { reiser4_file_fsdata *fsdata; + int result; fsdata = reiser4_get_file_fsdata(file); - if (fsdata == NULL) - return; + if (IS_ERR(fsdata)) + return page_nr; fsdata->ra2.data = hint; fsdata->ra2.readpages = extent_readpages_hook; - page_cache_readahead(mapping, &file->f_ra, file, page_nr, ra_pages); + result = page_cache_readahead(mapping, ra, file, page_nr, ra_pages); fsdata->ra2.readpages = NULL; + return result; +} + + +/* this is called when readahead did not */ +static int +call_readpage(struct file *file, struct page *page) +{ + int result; + + result = readpage_unix_file(file, page); + if (result) + return result; + + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_detach_jnode(page, page->mapping, page->index); + warning("jmacd-97178", "page is not up to date"); + return RETERR(-EIO); + } + unlock_page(page); + return 0; } /* Implements plugin->u.item.s.file.read operation for extent items. */ @@ -1241,14 +1288,15 @@ read_extent(struct file *file, flow_t *f { int result; struct page *page; - unsigned long page_nr; + unsigned long cur_page, next_page; unsigned long page_off, count; struct address_space *mapping; loff_t file_off; uf_coord_t *uf_coord; coord_t *coord; extent_coord_extension_t *ext_coord; - unsigned long ra_pages; + unsigned long nr_pages, prev_page; + struct file_ra_state ra; assert("vs-1353", current_blocksize == PAGE_CACHE_SIZE); assert("vs-572", flow->user == 1); @@ -1267,14 +1315,16 @@ read_extent(struct file *file, flow_t *f /* offset in a file to start read from */ file_off = get_key_offset(&flow->key); - /* index of page containing that offset */ - page_nr = (unsigned long)(file_off >> PAGE_CACHE_SHIFT); /* offset within the page to start read from */ page_off = (unsigned long)(file_off & (PAGE_CACHE_SIZE - 1)); /* bytes which can be read from the page which contains file_off */ count = PAGE_CACHE_SIZE - page_off; + + /* index of page containing offset read is to start from */ + cur_page = (unsigned long)(file_off >> PAGE_CACHE_SHIFT); + next_page = cur_page; /* number of pages flow spans over */ - ra_pages = (flow->length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + nr_pages = ((file_off + flow->length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - cur_page; /* we start having twig node read locked. However, we do not want to keep that lock all the time readahead works. So, set a sel and @@ -1282,26 +1332,50 @@ read_extent(struct file *file, flow_t *f set_hint(hint, &flow->key, ZNODE_READ_LOCK); longterm_unlock_znode(hint->ext_coord.lh); + ra = file->f_ra; + prev_page = ra.prev_page; do { - call_page_cache_readahead(mapping, file, hint, page_nr, ra_pages); + if (next_page == cur_page) + next_page = call_page_cache_readahead(mapping, file, hint, cur_page, nr_pages, &ra); - /* this will return page if it exists and is uptodate, - otherwise it will allocate page and call readpage_extent to - fill it */ - page = read_cache_page(mapping, page_nr, readpage_unix_file, file); - if (IS_ERR(page)) - return PTR_ERR(page); + page = find_get_page(mapping, cur_page); + if (unlikely(page == NULL)) { + handle_ra_miss(mapping, &ra, cur_page); + page = read_cache_page(mapping, cur_page, readpage_unix_file, file); + if (IS_ERR(page)) + return PTR_ERR(page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_detach_jnode(page, mapping, cur_page); + page_cache_release(page); + warning("jmacd-97178", "extent_read: page is not up to date"); + return RETERR(-EIO); + } + unlock_page(page); + } else { + if (!PageUptodate(page)) { + lock_page(page); - wait_on_page_locked(page); - if (!PageUptodate(page)) { - page_detach_jnode(page, mapping, page_nr); - page_cache_release(page); - warning("jmacd-97178", "extent_read: page is not up to date"); - return RETERR(-EIO); + assert("", page->mapping == mapping); + if (PageUptodate(page)) + unlock_page(page); + else { + result = call_readpage(file, page); + if (result) { + page_cache_release(page); + return RETERR(result); + } + } + } + if (prev_page != cur_page) + mark_page_accessed(page); + prev_page = cur_page; } - /* If users can be writing to this page using arbitrary virtual addresses, take care about potential - aliasing before reading the page on the kernel side. + /* If users can be writing to this page using arbitrary virtual + addresses, take care about potential aliasing before reading + the page on the kernel side. */ if (mapping_writably_mapped(mapping)) flush_dcache_page(page); @@ -1324,11 +1398,12 @@ read_extent(struct file *file, flow_t *f move_flow_forward(flow, count); page_off = 0; - page_nr ++; + cur_page ++; count = PAGE_CACHE_SIZE; - ra_pages --; + nr_pages --; } while (flow->length); + file->f_ra = ra; return 0; } @@ -1457,7 +1532,8 @@ get_block_address_extent(const coord_t * { reiser4_extent *ext; - assert("vs-1321", coord_is_existing_unit(coord)); + if (!coord_is_existing_unit(coord)) + return RETERR(-EINVAL); ext = extent_by_coord(coord); diff -puN fs/reiser4/plugin/item/extent_flush_ops.c~reiser4-update fs/reiser4/plugin/item/extent_flush_ops.c --- devel/fs/reiser4/plugin/item/extent_flush_ops.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/item/extent_flush_ops.c 2005-07-08 23:11:54.000000000 -0700 @@ -279,7 +279,9 @@ exit: /* ask block allocator for some blocks */ static void extent_allocate_blocks(reiser4_blocknr_hint *preceder, - reiser4_block_nr wanted_count, reiser4_block_nr *first_allocated, reiser4_block_nr *allocated, block_stage_t block_stage) + reiser4_block_nr wanted_count, + reiser4_block_nr *first_allocated, reiser4_block_nr *allocated, + block_stage_t block_stage) { *allocated = wanted_count; preceder->max_dist = 0; /* scan whole disk, if needed */ @@ -288,7 +290,7 @@ extent_allocate_blocks(reiser4_blocknr_h preceder->block_stage = block_stage; /* FIXME: we do not handle errors here now */ - check_me("vs-420", reiser4_alloc_blocks (preceder, first_allocated, allocated, BA_PERMANENT) == 0); + check_me("vs-420", reiser4_alloc_blocks(preceder, first_allocated, allocated, BA_PERMANENT) == 0); /* update flush_pos's preceder to last allocated block number */ preceder->blk = *first_allocated + *allocated - 1; } @@ -795,6 +797,10 @@ alloc_extent(flush_pos_t *flush_pos) else block_stage = BLOCK_UNALLOCATED; + /* look at previous unit if possible. If it is allocated, make preceder more precise */ + if (coord->unit_pos && (state_of_extent(ext - 1) == ALLOCATED_EXTENT)) + pos_hint(flush_pos)->blk = extent_get_start(ext - 1) + extent_get_width(ext - 1); + /* allocate new block numbers for protected nodes */ extent_allocate_blocks(pos_hint(flush_pos), protected, &first_allocated, &allocated, block_stage); @@ -934,6 +940,10 @@ squalloc_extent(znode *left, const coord else block_stage = BLOCK_UNALLOCATED; + /* look at previous unit if possible. If it is allocated, make preceder more precise */ + if (coord->unit_pos && (state_of_extent(ext - 1) == ALLOCATED_EXTENT)) + pos_hint(flush_pos)->blk = extent_get_start(ext - 1) + extent_get_width(ext - 1); + /* allocate new block numbers for protected nodes */ extent_allocate_blocks(pos_hint(flush_pos), protected, &first_allocated, &allocated, block_stage); if (allocated != protected) diff -puN fs/reiser4/plugin/item/extent_item_ops.c~reiser4-update fs/reiser4/plugin/item/extent_item_ops.c --- devel/fs/reiser4/plugin/item/extent_item_ops.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/item/extent_item_ops.c 2005-07-08 23:11:54.000000000 -0700 @@ -349,10 +349,9 @@ kill_hook_extent(const coord_t *coord, p */ /* if neighbors of item being removed are znodes - * link them */ - UNDER_RW_VOID(tree, tree, - write, link_left_and_right(left, right)); - + WLOCK_TREE(tree); WLOCK_DK(tree); + link_left_and_right(left, right); if (left) { /* update right delimiting key of left * neighbor of extent item */ @@ -368,6 +367,7 @@ kill_hook_extent(const coord_t *coord, p znode_set_rd_key(left, &key); } WUNLOCK_DK(tree); + WUNLOCK_TREE(tree); from_off = get_key_offset(&min_item_key) >> PAGE_CACHE_SHIFT; to_off = (get_key_offset(&max_item_key) + 1) >> PAGE_CACHE_SHIFT; diff -puN fs/reiser4/plugin/item/static_stat.c~reiser4-update fs/reiser4/plugin/item/static_stat.c --- devel/fs/reiser4/plugin/item/static_stat.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/item/static_stat.c 2005-07-08 23:11:54.000000000 -0700 @@ -952,26 +952,27 @@ save_plugin_sd(struct inode *inode /* ob Allocates memory for crypto stat, keyid and attaches it to the inode */ static int crypto_stat_to_inode (struct inode *inode, - crypto_stat_t * tmp, + reiser4_crypto_stat * sd, unsigned int size /* fingerprint size */) { crypto_stat_t * stat; - assert ("edward-11", (reiser4_inode_data(inode))->crypt == NULL); + assert ("edward-11", (cryptcompress_inode_data(inode))->crypt == NULL); assert ("edward-33", !inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)); stat = reiser4_kmalloc(sizeof(*stat), GFP_KERNEL); if (!stat) return RETERR(-ENOMEM); + memset(stat, 0, sizeof *stat); stat->keyid = reiser4_kmalloc((size_t)size, GFP_KERNEL); if (!stat->keyid) { reiser4_kfree(stat); return RETERR(-ENOMEM); } /* load inode crypto-stat */ - stat->keysize = tmp->keysize; - memcpy(stat->keyid, tmp->keyid, (size_t)size); - reiser4_inode_data(inode)->crypt = stat; + stat->keysize = d16tocpu(&sd->keysize); + memcpy(stat->keyid, sd->keyid, (size_t)size); + cryptcompress_inode_data(inode)->crypt = stat; inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED); return 0; @@ -983,9 +984,7 @@ static int present_crypto_sd(struct inod { int result; reiser4_crypto_stat *sd; - crypto_stat_t stat; digest_plugin * dplug = inode_digest_plugin(inode); - unsigned int keyid_size; assert("edward-06", dplug != NULL); @@ -1004,10 +1003,8 @@ static int present_crypto_sd(struct inod assert("edward-75", sizeof(*sd) + keyid_size <= *len); sd = (reiser4_crypto_stat *) * area; - stat.keysize = d16tocpu(&sd->keysize); - stat.keyid = (__u8 *)sd->keyid; - result = crypto_stat_to_inode(inode, &stat, keyid_size); + result = crypto_stat_to_inode(inode, sd, keyid_size); move_on(len, area, sizeof(*sd) + keyid_size); return result; } @@ -1036,11 +1033,12 @@ static int save_crypto_sd(struct inode * sd = (reiser4_crypto_stat *) *area; if (!inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) { /* file is just created */ - crypto_stat_t * stat = reiser4_inode_data(inode)->crypt; + crypto_stat_t * stat; + stat = cryptcompress_inode_data(inode)->crypt; assert("edward-15", stat != NULL); - /* copy inode crypto-stat to the disk stat-data */ + /* copy everything but private key to the disk stat-data */ cputod16(stat->keysize, &sd->keysize); memcpy(sd->keyid, stat->keyid, (size_t)dplug->dsize); inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED); @@ -1065,76 +1063,6 @@ print_crypto_sd(const char *prefix, char } #endif -/* cluster stat-data extension */ - -static int present_cluster_sd(struct inode *inode, char **area, int *len) -{ - reiser4_inode * info; - - assert("edward-77", inode != NULL); - assert("edward-78", area != NULL); - assert("edward-79", *area != NULL); - assert("edward-80", len != NULL); - assert("edward-81", !inode_get_flag(inode, REISER4_CLUSTER_KNOWN)); - - info = reiser4_inode_data(inode); - - assert("edward-82", info != NULL); - - if (*len >= (int) sizeof (reiser4_cluster_stat)) { - reiser4_cluster_stat *sd; - sd = (reiser4_cluster_stat *) * area; - info->cluster_shift = d8tocpu(&sd->cluster_shift); - inode_set_flag(inode, REISER4_CLUSTER_KNOWN); - move_on(len, area, sizeof *sd); - return 0; - } - else - return not_enough_space(inode, "cluster sd"); -} - -static int absent_cluster_sd(struct inode * inode) -{ - return -EIO; -} - -static int save_len_cluster_sd(struct inode *inode UNUSED_ARG) -{ - return sizeof (reiser4_cluster_stat); -} - -static int save_cluster_sd(struct inode *inode, char **area) -{ - reiser4_cluster_stat *sd; - - assert("edward-106", inode != NULL); - assert("edward-107", area != NULL); - assert("edward-108", *area != NULL); - - sd = (reiser4_cluster_stat *) * area; - if (!inode_get_flag(inode, REISER4_CLUSTER_KNOWN)) { - cputod8(reiser4_inode_data(inode)->cluster_shift, &sd->cluster_shift); - inode_set_flag(inode, REISER4_CLUSTER_KNOWN); - } - else { - /* do nothing */ - } - *area += sizeof *sd; - return 0; -} - -#if REISER4_DEBUG_OUTPUT -static void -print_cluster_sd(const char *prefix, char **area /* position in stat-data */, - int *len /* remaining length */ ) -{ - reiser4_cluster_stat *sd = (reiser4_cluster_stat *) * area; - - printk("%s: %u\n", prefix, d8tocpu(&sd->cluster_shift)); - move_on(len, area, sizeof *sd); -} -#endif - static int eio(struct inode *inode, char **area, int *len) { return RETERR(-EIO); @@ -1268,25 +1196,6 @@ sd_ext_plugin sd_ext_plugins[LAST_SD_EXT #endif .alignment = 8 }, - [CLUSTER_STAT] = { - .h = { - .type_id = REISER4_SD_EXT_PLUGIN_TYPE, - .id = CLUSTER_STAT, - .pops = NULL, - .label = "cluster-sd", - .desc = "cluster shift", - .linkage = TYPE_SAFE_LIST_LINK_ZERO} - , - .present = present_cluster_sd, - .absent = absent_cluster_sd, - /* return IO_ERROR if smthng is wrong */ - .save_len = save_len_cluster_sd, - .save = save_cluster_sd, -#if REISER4_DEBUG_OUTPUT - .print = print_cluster_sd, -#endif - .alignment = 8 - }, [CRYPTO_STAT] = { .h = { .type_id = REISER4_SD_EXT_PLUGIN_TYPE, diff -puN fs/reiser4/plugin/item/static_stat.h~reiser4-update fs/reiser4/plugin/item/static_stat.h --- devel/fs/reiser4/plugin/item/static_stat.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/item/static_stat.h 2005-07-08 23:11:54.000000000 -0700 @@ -77,9 +77,6 @@ typedef enum { /* this extension contains capabilities sets, associated with this file. Layout is in reiser4_capabilities_stat */ CAPABILITIES_STAT, - /* this extension contains the information about minimal unit size for - file data processing. Layout is in reiser4_cluster_stat */ - CLUSTER_STAT, /* this extension contains size and public id of the secret key. Layout is in reiser4_crypto_stat */ CRYPTO_STAT, diff -puN fs/reiser4/plugin/item/tail.c~reiser4-update fs/reiser4/plugin/item/tail.c --- devel/fs/reiser4/plugin/item/tail.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/item/tail.c 2005-07-08 23:11:54.000000000 -0700 @@ -454,6 +454,8 @@ tail_balance_dirty_pages(struct address_ { int result; struct inode *inode; + int excl; + unix_file_info_t *uf_info; if (hint->ext_coord.valid) set_hint(hint, &f->key, ZNODE_WRITE_LOCK); @@ -475,12 +477,19 @@ tail_balance_dirty_pages(struct address_ return result; } - /* FIXME-VS: this is temporary: the problem is that bdp takes inodes - from sb's dirty list and it looks like nobody puts there inodes of - files which are built of tails */ - move_inode_out_from_sync_inodes_loop(mapping); - - reiser4_throttle_write(inode); + if (!reiser4_is_set(inode->i_sb, REISER4_ATOMIC_WRITE)) { + uf_info = unix_file_inode_data(inode); + excl = unix_file_inode_data(inode)->exclusive_use; + if (excl) + drop_exclusive_access(uf_info); + else + drop_nonexclusive_access(uf_info); + reiser4_throttle_write(inode); + if (excl) + get_exclusive_access(uf_info); + else + get_nonexclusive_access(uf_info, 0); + } return 0; } diff -puN fs/reiser4/plugin/object.c~reiser4-update fs/reiser4/plugin/object.c --- devel/fs/reiser4/plugin/object.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/object.c 2005-07-08 23:11:54.000000000 -0700 @@ -203,6 +203,9 @@ insert_new_sd(struct inode *inode /* ino ref = reiser4_inode_data(inode); spin_lock_inode(inode); + if (ref->plugin_mask != 0) + /* inode has non-standard plugins */ + inode_set_extension(inode, PLUGIN_STAT); /* * prepare specification of new item to be inserted */ @@ -506,7 +509,9 @@ common_object_delete_no_reserve(struct i if (result == 0) { oid_count_released(); - result = safe_link_del(inode, SAFE_UNLINK); + result = safe_link_del(tree_by_inode(inode), + get_inode_oid(inode), + SAFE_UNLINK); } } } else @@ -740,21 +745,21 @@ rem_link_common(struct inode *object, st return 0; } -/* ->not_linked() method for file plugins */ +/* default (for directories) ->rem_link() method of file plugin */ static int -not_linked_common(const struct inode *inode) +rem_link_common_dir(struct inode *object, struct inode *parent UNUSED_ARG) { - assert("nikita-2007", inode != NULL); - return (inode->i_nlink == 0); -} + assert("nikita-20211", object != NULL); + assert("nikita-21631", object->i_nlink > 0); -/* ->not_linked() method the for directory file plugin */ -static int -not_linked_dir(const struct inode *inode) -{ - assert("nikita-2008", inode != NULL); - /* one link from dot */ - return (inode->i_nlink == 1); + /* + * decrement ->i_nlink and update ->i_ctime + */ + INODE_DEC_FIELD(object, i_nlink); + if (object->i_nlink == 1) + INODE_DEC_FIELD(object, i_nlink); + object->i_ctime = CURRENT_TIME; + return 0; } /* ->adjust_to_parent() method for regular files */ @@ -953,102 +958,6 @@ setattr_common(struct inode *inode /* Ob return result; } -/* doesn't seem to be exported in headers. */ -extern spinlock_t inode_lock; - -/* ->delete_inode() method. This is called by - * iput()->iput_final()->drop_inode() when last reference to inode is released - * and inode has no names. */ -static void delete_inode_common(struct inode *object) -{ - /* create context here. - * - * removal of inode from the hash table (done at the very beginning of - * generic_delete_inode(), truncate of pages, and removal of file's - * extents has to be performed in the same atom. Otherwise, it may so - * happen, that twig node with unallocated extent will be flushed to - * the disk. - */ - reiser4_context ctx; - - /* - * FIXME: this resembles generic_delete_inode - */ - list_del_init(&object->i_list); - list_del_init(&object->i_sb_list); - object->i_state |= I_FREEING; - inodes_stat.nr_inodes--; - spin_unlock(&inode_lock); - - init_context(&ctx, object->i_sb); - - kill_cursors(object); - - if (!is_bad_inode(object)) { - file_plugin *fplug; - - /* truncate object body */ - fplug = inode_file_plugin(object); - if (fplug->pre_delete != NULL && fplug->pre_delete(object) != 0) - warning("vs-1216", "Failed to delete file body %llu", - (unsigned long long)get_inode_oid(object)); - else - assert("vs-1430", - reiser4_inode_data(object)->anonymous_eflushed == 0 && - reiser4_inode_data(object)->captured_eflushed == 0); - } - - if (object->i_data.nrpages) { - warning("vs-1434", "nrpages %ld\n", object->i_data.nrpages); - truncate_inode_pages(&object->i_data, 0); - } - security_inode_delete(object); - if (!is_bad_inode(object)) - DQUOT_INIT(object); - - object->i_sb->s_op->delete_inode(object); - - spin_lock(&inode_lock); - hlist_del_init(&object->i_hash); - spin_unlock(&inode_lock); - wake_up_inode(object); - if (object->i_state != I_CLEAR) - BUG(); - destroy_inode(object); - reiser4_exit_context(&ctx); -} - -/* - * ->forget_inode() method. Called by iput()->iput_final()->drop_inode() when - * last reference to inode with names is released - */ -static void forget_inode_common(struct inode *object) -{ - generic_forget_inode(object); -} - -/* ->drop_inode() method. Called by iput()->iput_final() when last reference - * to inode is released */ -static void drop_common(struct inode * object) -{ - file_plugin *fplug; - - assert("nikita-2643", object != NULL); - - /* -not- creating context in this method, because it is frequently - called and all existing ->not_linked() methods are one liners. */ - - fplug = inode_file_plugin(object); - /* fplug is NULL for fake inode */ - if (fplug != NULL && fplug->not_linked(object)) { - assert("nikita-3231", fplug->delete_inode != NULL); - fplug->delete_inode(object); - } else { - assert("nikita-3232", fplug->forget_inode != NULL); - fplug->forget_inode(object); - } -} - static ssize_t isdir(void) { @@ -1292,7 +1201,6 @@ file_plugin file_plugins[LAST_FILE_PLUGI .owns_item = owns_item_unix_file, .can_add_link = can_add_link_common, .can_rem_link = NULL, - .not_linked = not_linked_common, .setattr = setattr_unix_file, .getattr = getattr_common, .seek = NULL, @@ -1314,10 +1222,7 @@ file_plugin file_plugins[LAST_FILE_PLUGI .init_inode_data = init_inode_data_unix_file, .pre_delete = pre_delete_unix_file, .cut_tree_worker = cut_tree_worker_common, - .drop = drop_common, - .delete_inode = delete_inode_common, .destroy_inode = NULL, - .forget_inode = forget_inode_common, .sendfile = sendfile_unix_file, .prepare_write = prepare_write_unix_file }, @@ -1349,11 +1254,10 @@ file_plugin file_plugins[LAST_FILE_PLUGI .delete = delete_directory_common, .sync = sync_common, .add_link = add_link_common, - .rem_link = rem_link_common, + .rem_link = rem_link_common_dir, .owns_item = owns_item_hashed, .can_add_link = can_add_link_common, .can_rem_link = can_rem_dir, - .not_linked = not_linked_dir, .setattr = setattr_common, .getattr = getattr_common, .seek = seek_dir, @@ -1375,10 +1279,7 @@ file_plugin file_plugins[LAST_FILE_PLUGI .init_inode_data = init_inode_ordering, .pre_delete = NULL, .cut_tree_worker = cut_tree_worker_common, - .drop = drop_common, - .delete_inode = delete_inode_common, .destroy_inode = NULL, - .forget_inode = forget_inode_common, }, [SYMLINK_FILE_PLUGIN_ID] = { .h = { @@ -1415,7 +1316,6 @@ file_plugin file_plugins[LAST_FILE_PLUGI .owns_item = NULL, .can_add_link = can_add_link_common, .can_rem_link = NULL, - .not_linked = not_linked_common, .setattr = setattr_common, .getattr = getattr_common, .seek = NULL, @@ -1437,10 +1337,7 @@ file_plugin file_plugins[LAST_FILE_PLUGI .init_inode_data = init_inode_ordering, .pre_delete = NULL, .cut_tree_worker = cut_tree_worker_common, - .drop = drop_common, - .delete_inode = delete_inode_common, .destroy_inode = destroy_inode_symlink, - .forget_inode = forget_inode_common, }, [SPECIAL_FILE_PLUGIN_ID] = { .h = { @@ -1475,7 +1372,6 @@ file_plugin file_plugins[LAST_FILE_PLUGI .owns_item = owns_item_common, .can_add_link = can_add_link_common, .can_rem_link = NULL, - .not_linked = not_linked_common, .setattr = setattr_common, .getattr = getattr_common, .seek = NULL, @@ -1497,10 +1393,7 @@ file_plugin file_plugins[LAST_FILE_PLUGI .init_inode_data = init_inode_ordering, .pre_delete = NULL, .cut_tree_worker = cut_tree_worker_common, - .drop = drop_common, - .delete_inode = delete_inode_common, .destroy_inode = NULL, - .forget_inode = forget_inode_common, }, [PSEUDO_FILE_PLUGIN_ID] = { .h = { @@ -1535,7 +1428,6 @@ file_plugin file_plugins[LAST_FILE_PLUGI .owns_item = NULL, .can_add_link = cannot, .can_rem_link = cannot, - .not_linked = NULL, .setattr = inode_setattr, .getattr = getattr_common, .seek = seek_pseudo, @@ -1557,10 +1449,7 @@ file_plugin file_plugins[LAST_FILE_PLUGI .init_inode_data = NULL, .pre_delete = NULL, .cut_tree_worker = cut_tree_worker_common, - .drop = drop_pseudo, - .delete_inode = NULL, .destroy_inode = NULL, - .forget_inode = NULL, }, [CRC_FILE_PLUGIN_ID] = { .h = { @@ -1596,7 +1485,6 @@ file_plugin file_plugins[LAST_FILE_PLUGI .owns_item = owns_item_common, .can_add_link = can_add_link_common, .can_rem_link = NULL, - .not_linked = not_linked_common, .setattr = setattr_cryptcompress, .getattr = getattr_common, .seek = NULL, @@ -1619,10 +1507,7 @@ file_plugin file_plugins[LAST_FILE_PLUGI .init_inode_data = init_inode_data_cryptcompress, .pre_delete = pre_delete_cryptcompress, .cut_tree_worker = cut_tree_worker_cryptcompress, - .drop = drop_common, - .delete_inode = delete_inode_common, .destroy_inode = destroy_inode_cryptcompress, - .forget_inode = forget_inode_common, .sendfile = sendfile_common, .prepare_write = prepare_write_common } diff -puN fs/reiser4/plugin/plugin.c~reiser4-update fs/reiser4/plugin/plugin.c --- devel/fs/reiser4/plugin/plugin.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/plugin.c 2005-07-08 23:11:54.000000000 -0700 @@ -455,8 +455,14 @@ extern fibration_plugin fibration_plugin extern crypto_plugin crypto_plugins[LAST_CRYPTO_ID]; /* defined in fs/reiser4/plugin/digest.c */ extern digest_plugin digest_plugins[LAST_DIGEST_ID]; -/* defined in fs/reiser4/plugin/compress.c */ +/* defined in fs/reiser4/plugin/compress/compress.c */ extern compression_plugin compression_plugins[LAST_COMPRESSION_ID]; +/* defined in fs/reiser4/plugin/compress/compression_mode.c */ +extern compression_mode_plugin compression_mode_plugins[LAST_COMPRESSION_MODE_ID]; +/* defined in fs/reiser4/plugin/cluster.c */ +extern cluster_plugin cluster_plugins[LAST_CLUSTER_ID]; +/* defined in fs/reiser4/plugin/file/regular.c */ +extern regular_plugin regular_plugins[LAST_REGULAR_ID]; /* defined in fs/reiser4/plugin/tail.c */ extern formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID]; /* defined in fs/reiser4/plugin/security/security.c */ @@ -609,6 +615,33 @@ reiser4_plugin_type_data plugins[REISER4 .builtin = pseudo_plugins, .plugins_list = TYPE_SAFE_LIST_HEAD_ZERO, .size = sizeof (pseudo_plugin) + }, + [REISER4_COMPRESSION_MODE_PLUGIN_TYPE] = { + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE, + .label = "compression_mode", + .desc = "Defines compression mode", + .builtin_num = sizeof_array(compression_mode_plugins), + .builtin = compression_mode_plugins, + .plugins_list = TYPE_SAFE_LIST_HEAD_ZERO, + .size = sizeof (compression_mode_plugin) + }, + [REISER4_CLUSTER_PLUGIN_TYPE] = { + .type_id = REISER4_CLUSTER_PLUGIN_TYPE, + .label = "cluster", + .desc = "Defines cluster size", + .builtin_num = sizeof_array(cluster_plugins), + .builtin = cluster_plugins, + .plugins_list = TYPE_SAFE_LIST_HEAD_ZERO, + .size = sizeof (cluster_plugin) + }, + [REISER4_REGULAR_PLUGIN_TYPE] = { + .type_id = REISER4_REGULAR_PLUGIN_TYPE, + .label = "regular", + .desc = "Defines kind of regular file", + .builtin_num = sizeof_array(regular_plugins), + .builtin = regular_plugins, + .plugins_list = TYPE_SAFE_LIST_HEAD_ZERO, + .size = sizeof (regular_plugin) } }; diff -puN fs/reiser4/plugin/plugin.h~reiser4-update fs/reiser4/plugin/plugin.h --- devel/fs/reiser4/plugin/plugin.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/plugin.h 2005-07-08 23:11:54.000000000 -0700 @@ -139,6 +139,27 @@ typedef struct reiser4_object_on_wire re plugins. */ + +/* builtin file-plugins */ +typedef enum { + /* regular file */ + UNIX_FILE_PLUGIN_ID, + /* directory */ + DIRECTORY_FILE_PLUGIN_ID, + /* symlink */ + SYMLINK_FILE_PLUGIN_ID, + /* for objects completely handled by the VFS: fifos, devices, + sockets */ + SPECIAL_FILE_PLUGIN_ID, + /* Plugin id for crypto-compression objects */ + CRC_FILE_PLUGIN_ID, + /* pseudo file */ + PSEUDO_FILE_PLUGIN_ID, + /* number of file plugins. Used as size of arrays to hold + file plugins. */ + LAST_FILE_PLUGIN_ID +} reiser4_file_id; + typedef struct file_plugin { /* generic fields */ @@ -238,8 +259,6 @@ Elena doing this for you if that helps. int (*can_add_link) (const struct inode * inode); /* checks whether hard links to this object can be removed */ int (*can_rem_link) (const struct inode * inode); - /* true if there is only one link (aka name) for this file */ - int (*not_linked) (const struct inode * inode); /* change inode attributes. */ int (*setattr) (struct inode * inode, struct iattr * attr); @@ -293,16 +312,9 @@ Elena doing this for you if that helps. /* truncate file to zero size. called by reiser4_drop_inode before truncate_inode_pages */ int (*pre_delete)(struct inode *); - /* called from reiser4_drop_inode() */ - void (*drop)(struct inode *); - - /* called from ->drop() when there are no links, and object should be - * garbage collected. */ - void (*delete_inode)(struct inode *); - /* called from ->destroy_inode() */ void (*destroy_inode)(struct inode *); - void (*forget_inode)(struct inode *); + ssize_t (*sendfile)(struct file *, loff_t *, size_t, read_actor_t, void __user *); /* * methods to serialize object identify. This is used, for example, by @@ -433,7 +445,7 @@ typedef struct crypto_plugin { size. This method is to align any flow up to crypto block size when we pass it to crypto algorithm. To align means to append padding of special format specific to the crypto algorithm */ - int (*align_cluster)(__u8 *tail, int clust_size, int blocksize); + int (*align_stream)(__u8 *tail, int clust_size, int blocksize); /* low-level key manager (check, install, etc..) */ int (*setkey) (struct crypto_tfm *tfm, const __u8 *key, unsigned int keylen); /* main text processing procedures */ @@ -453,13 +465,17 @@ typedef struct digest_plugin { typedef struct compression_plugin { /* generic fields */ plugin_header h; + /* id of the dual plugin */ + reiser4_compression_id dual; + int (*init) (void); /* the maximum number of bytes the size of the "compressed" data can * exceed the uncompressed data. */ int (*overrun) (unsigned src_len); coa_t (*alloc) (tfm_action act); void (*free) (coa_t coa, tfm_action act); /* minimal size of the flow we still try to compress */ - int (*min_tfm_size) (void); + int (*min_size_deflate) (void); + __u32 (*checksum) (char * data, __u32 length); /* main transform procedures */ void (*compress) (coa_t coa, __u8 *src_first, unsigned src_len, __u8 *dst_first, unsigned *dst_len); @@ -467,6 +483,30 @@ typedef struct compression_plugin { __u8 *dst_first, unsigned *dst_len); }compression_plugin; +typedef struct compression_mode_plugin { + /* generic fields */ + plugin_header h; + /* called before compression transform */ + int (*should_deflate) (cloff_t index); + /* called when results of compression should be saved */ + void (*save_deflate) (struct inode * inode); + /* called when results of compression should be discarded */ + int (*discard_deflate) (struct inode * inode, cloff_t index); +} compression_mode_plugin; + +typedef struct regular_plugin { + /* generic fields */ + plugin_header h; + /* file plugin id which implements regular file */ + reiser4_file_id id; +}regular_plugin; + +typedef struct cluster_plugin { + /* generic fields */ + plugin_header h; + int shift; +}cluster_plugin; + typedef struct sd_ext_plugin { /* generic fields */ plugin_header h; @@ -579,6 +619,12 @@ union reiser4_plugin { jnode_plugin jnode; /* plugin for pseudo files */ pseudo_plugin pseudo; + /* compression_mode_plugin, used by object plugin */ + compression_mode_plugin compression_mode; + /* cluster_plugin, used by object plugin */ + cluster_plugin clust; + /* regular plugin, used by directory plugin */ + regular_plugin regular; /* place-holder for new plugin types that can be registered dynamically, and used by other dynamically loaded plugins. */ void *generic; @@ -622,26 +668,6 @@ void move_flow_forward(flow_t * f, unsig /* builtin plugins */ -/* builtin file-plugins */ -typedef enum { - /* regular file */ - UNIX_FILE_PLUGIN_ID, - /* directory */ - DIRECTORY_FILE_PLUGIN_ID, - /* symlink */ - SYMLINK_FILE_PLUGIN_ID, - /* for objects completely handled by the VFS: fifos, devices, - sockets */ - SPECIAL_FILE_PLUGIN_ID, - /* Plugin id for crypto-compression objects */ - CRC_FILE_PLUGIN_ID, - /* pseudo file */ - PSEUDO_FILE_PLUGIN_ID, - /* number of file plugins. Used as size of arrays to hold - file plugins. */ - LAST_FILE_PLUGIN_ID -} reiser4_file_id; - /* builtin dir-plugins */ typedef enum { HASHED_DIR_PLUGIN_ID, @@ -675,6 +701,32 @@ typedef enum { LAST_DIGEST_ID } reiser4_digest_id; +/* builtin compression mode plugins */ +typedef enum { + SMART_COMPRESSION_MODE_ID, + LAZY_COMPRESSION_MODE_ID, + FORCE_COMPRESSION_MODE_ID, + TEST_COMPRESSION_MODE_ID, + LAST_COMPRESSION_MODE_ID +} reiser4_compression_mode_id; + +/* builtin cluster plugins */ +typedef enum { + CLUSTER_4K_ID, + CLUSTER_8K_ID, + CLUSTER_16K_ID, + CLUSTER_32K_ID, + CLUSTER_64K_ID, + LAST_CLUSTER_ID +} reiser4_cluster_id; + +/* builtin regular plugins */ +typedef enum { + UF_REGULAR_ID, + CRC_REGULAR_ID, + LAST_REGULAR_ID +} reiser4_regular_id; + /* builtin tail-plugins */ typedef enum { @@ -784,6 +836,9 @@ PLUGIN_BY_ID(formatting_plugin, REISER4_ PLUGIN_BY_ID(disk_format_plugin, REISER4_FORMAT_PLUGIN_TYPE, format); PLUGIN_BY_ID(jnode_plugin, REISER4_JNODE_PLUGIN_TYPE, jnode); PLUGIN_BY_ID(pseudo_plugin, REISER4_PSEUDO_PLUGIN_TYPE, pseudo); +PLUGIN_BY_ID(compression_mode_plugin, REISER4_COMPRESSION_MODE_PLUGIN_TYPE, compression_mode); +PLUGIN_BY_ID(cluster_plugin, REISER4_CLUSTER_PLUGIN_TYPE, clust); +PLUGIN_BY_ID(regular_plugin, REISER4_REGULAR_PLUGIN_TYPE, regular); extern int save_plugin_id(reiser4_plugin * plugin, d16 * area); @@ -811,6 +866,9 @@ typedef enum { PSET_CRYPTO, PSET_DIGEST, PSET_COMPRESSION, + PSET_COMPRESSION_MODE, + PSET_CLUSTER, + PSET_REGULAR_ENTRY, PSET_LAST } pset_member; diff -puN fs/reiser4/plugin/plugin_header.h~reiser4-update fs/reiser4/plugin/plugin_header.h --- devel/fs/reiser4/plugin/plugin_header.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/plugin_header.h 2005-07-08 23:11:54.000000000 -0700 @@ -26,6 +26,9 @@ typedef enum { REISER4_DIGEST_PLUGIN_TYPE, REISER4_COMPRESSION_PLUGIN_TYPE, REISER4_PSEUDO_PLUGIN_TYPE, + REISER4_COMPRESSION_MODE_PLUGIN_TYPE, + REISER4_CLUSTER_PLUGIN_TYPE, + REISER4_REGULAR_PLUGIN_TYPE, REISER4_PLUGIN_TYPES } reiser4_plugin_type; diff -puN fs/reiser4/plugin/plugin_set.c~reiser4-update fs/reiser4/plugin/plugin_set.c --- devel/fs/reiser4/plugin/plugin_set.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/plugin_set.c 2005-07-08 23:11:54.000000000 -0700 @@ -67,7 +67,10 @@ pseq(const unsigned long * a1, const uns sizeof set1->dir_item + sizeof set1->crypto + sizeof set1->digest + - sizeof set1->compression); + sizeof set1->compression + + sizeof set1->compression_mode + + sizeof set1->cluster + + sizeof set1->regular_entry); set1 = cast_to(a1); set2 = cast_to(a2); @@ -84,7 +87,10 @@ pseq(const unsigned long * a1, const uns set1->dir_item == set2->dir_item && set1->crypto == set2->crypto && set1->digest == set2->digest && - set1->compression == set2->compression; + set1->compression == set2->compression && + set1->compression_mode == set2->compression_mode && + set1->cluster == set2->cluster && + set1->regular_entry == set2->regular_entry; } #define HASH_FIELD(hash, set, field) \ @@ -108,6 +114,9 @@ static inline unsigned long calculate_ha HASH_FIELD(result, set, crypto); HASH_FIELD(result, set, digest); HASH_FIELD(result, set, compression); + HASH_FIELD(result, set, compression_mode); + HASH_FIELD(result, set, cluster); + HASH_FIELD(result, set, regular_entry); return result & (PS_TABLE_SIZE - 1); } @@ -138,6 +147,9 @@ static plugin_set empty_set = { .crypto = NULL, .digest = NULL, .compression = NULL, + .compression_mode = NULL, + .cluster = NULL, + .regular_entry = NULL, .link = { NULL } }; @@ -248,6 +260,18 @@ static struct { [PSET_COMPRESSION] = { .offset = offsetof(plugin_set, compression), .type = REISER4_COMPRESSION_PLUGIN_TYPE + }, + [PSET_COMPRESSION_MODE] = { + .offset = offsetof(plugin_set, compression_mode), + .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE + }, + [PSET_CLUSTER] = { + .offset = offsetof(plugin_set, cluster), + .type = REISER4_CLUSTER_PLUGIN_TYPE + }, + [PSET_REGULAR_ENTRY] = { + .offset = offsetof(plugin_set, regular_entry), + .type = REISER4_REGULAR_PLUGIN_TYPE } }; @@ -306,6 +330,9 @@ DEFINE_PLUGIN_SET(item_plugin, sd) DEFINE_PLUGIN_SET(crypto_plugin, crypto) DEFINE_PLUGIN_SET(digest_plugin, digest) DEFINE_PLUGIN_SET(compression_plugin, compression) +DEFINE_PLUGIN_SET(compression_mode_plugin, compression_mode) +DEFINE_PLUGIN_SET(cluster_plugin, cluster) +DEFINE_PLUGIN_SET(regular_plugin, regular_entry) reiser4_internal int plugin_set_init(void) { diff -puN fs/reiser4/plugin/plugin_set.h~reiser4-update fs/reiser4/plugin/plugin_set.h --- devel/fs/reiser4/plugin/plugin_set.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/plugin_set.h 2005-07-08 23:11:54.000000000 -0700 @@ -39,21 +39,30 @@ struct plugin_set { digest_plugin *digest; /* compression plugin */ compression_plugin *compression; + /* compression mode plugin */ + compression_mode_plugin * compression_mode; + /* cluster plugin */ + cluster_plugin *cluster; + /* plugin of regular child should be created */ + regular_plugin *regular_entry; ps_hash_link link; }; extern plugin_set *plugin_set_get_empty(void); extern void plugin_set_put(plugin_set *set); -extern int plugin_set_file (plugin_set **set, file_plugin *file); -extern int plugin_set_dir (plugin_set **set, dir_plugin *file); -extern int plugin_set_formatting (plugin_set **set, formatting_plugin *file); -extern int plugin_set_hash (plugin_set **set, hash_plugin *file); -extern int plugin_set_fibration (plugin_set **set, fibration_plugin *file); -extern int plugin_set_sd (plugin_set **set, item_plugin *file); -extern int plugin_set_crypto (plugin_set **set, crypto_plugin *file); -extern int plugin_set_digest (plugin_set **set, digest_plugin *file); -extern int plugin_set_compression(plugin_set **set, compression_plugin *file); +extern int plugin_set_file (plugin_set **set, file_plugin *plug); +extern int plugin_set_dir (plugin_set **set, dir_plugin *plug); +extern int plugin_set_formatting (plugin_set **set, formatting_plugin *plug); +extern int plugin_set_hash (plugin_set **set, hash_plugin *plug); +extern int plugin_set_fibration (plugin_set **set, fibration_plugin *plug); +extern int plugin_set_sd (plugin_set **set, item_plugin *plug); +extern int plugin_set_crypto (plugin_set **set, crypto_plugin *plug); +extern int plugin_set_digest (plugin_set **set, digest_plugin *plug); +extern int plugin_set_compression (plugin_set **set, compression_plugin *plug); +extern int plugin_set_compression_mode(plugin_set **set, compression_mode_plugin *plug); +extern int plugin_set_cluster (plugin_set **set, cluster_plugin *plug); +extern int plugin_set_regular (plugin_set **set, regular_plugin *plug); extern int plugin_set_init(void); extern void plugin_set_done(void); diff -puN fs/reiser4/plugin/pseudo/pseudo.c~reiser4-update fs/reiser4/plugin/pseudo/pseudo.c --- devel/fs/reiser4/plugin/pseudo/pseudo.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/pseudo/pseudo.c 2005-07-08 23:11:54.000000000 -0700 @@ -938,6 +938,9 @@ static plugin_entry pentry[] = { PLUGIN_ENTRY(crypto, PSET_CRYPTO), PLUGIN_ENTRY(digest, PSET_DIGEST), PLUGIN_ENTRY(compression, PSET_COMPRESSION), + PLUGIN_ENTRY(compression_mode, PSET_COMPRESSION_MODE), + PLUGIN_ENTRY(cluster, PSET_CLUSTER), + PLUGIN_ENTRY(regular, PSET_REGULAR_ENTRY), { .name = NULL, } diff -puN fs/reiser4/plugin/space/bitmap.c~reiser4-update fs/reiser4/plugin/space/bitmap.c --- devel/fs/reiser4/plugin/space/bitmap.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/plugin/space/bitmap.c 2005-07-08 23:11:54.000000000 -0700 @@ -461,11 +461,11 @@ reiser4_set_bits(char *addr, bmap_off_t Jean-loup Gailly Mark Adler jloup@gzip.org madler@alumni.caltech.edu - The above comment applies only to the adler32 function. + The above comment applies only to the reiser4_adler32 function. */ -static __u32 -adler32(char *data, __u32 len) +reiser4_internal __u32 +reiser4_adler32(char *data, __u32 len) { unsigned char *t = data; __u32 s1 = 1; @@ -493,7 +493,7 @@ adler32(char *data, __u32 len) static __u32 bnode_calc_crc(const struct bitmap_node *bnode, unsigned long size) { - return adler32(bnode_commit_data(bnode), bmap_size(size)); + return reiser4_adler32(bnode_commit_data(bnode), bmap_size(size)); } diff -puN fs/reiser4/safe_link.c~reiser4-update fs/reiser4/safe_link.c --- devel/fs/reiser4/safe_link.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/safe_link.c 2005-07-08 23:11:54.000000000 -0700 @@ -91,11 +91,12 @@ safe_link_locality(reiser4_tree *tree) */ static reiser4_key * -build_link_key(struct inode *inode, reiser4_safe_link_t link, reiser4_key *key) +build_link_key(reiser4_tree *tree, oid_t oid, reiser4_safe_link_t link, + reiser4_key *key) { reiser4_key_init(key); - set_key_locality(key, safe_link_locality(tree_by_inode(inode))); - set_key_objectid(key, get_inode_oid(inode)); + set_key_locality(key, safe_link_locality(tree)); + set_key_objectid(key, oid); set_key_offset(key, link); return key; } @@ -171,7 +172,7 @@ reiser4_internal int safe_link_add(struc cputod64(inode->i_size, &sl.size); } tree = tree_by_inode(inode); - build_link_key(inode, link, &key); + build_link_key(tree, get_inode_oid(inode), link, &key); result = store_black_box(tree, &key, &sl, length); if (result == -EEXIST) @@ -183,12 +184,13 @@ reiser4_internal int safe_link_add(struc * remove safe-link corresponding to the operation @link on inode @inode from * the tree. */ -reiser4_internal int safe_link_del(struct inode *inode, reiser4_safe_link_t link) +reiser4_internal int +safe_link_del(reiser4_tree *tree, oid_t oid, reiser4_safe_link_t link) { reiser4_key key; - return kill_black_box(tree_by_inode(inode), - build_link_key(inode, link, &key)); + return kill_black_box(tree, + build_link_key(tree, oid, link, &key)); } /* @@ -273,6 +275,7 @@ static int process_safelink(struct super fplug = inode_file_plugin(inode); assert("nikita-3428", fplug != NULL); + assert("", oid == get_inode_oid(inode)); if (fplug->safelink != NULL) { /* txn_restart_current is not necessary because * mounting is signle thread. However, without it @@ -295,11 +298,11 @@ static int process_safelink(struct super reiser4_iget_complete(inode); iput(inode); if (result == 0) { - result = safe_link_grab(tree_by_inode(inode), + result = safe_link_grab(get_tree(super), BA_CAN_COMMIT); if (result == 0) - result = safe_link_del(inode, link); - safe_link_release(tree_by_inode(inode)); + result = safe_link_del(get_tree(super), oid, link); + safe_link_release(get_tree(super)); /* * restart transaction: if there was large number of * safe-links, their processing may fail to fit into diff -puN fs/reiser4/safe_link.h~reiser4-update fs/reiser4/safe_link.h --- devel/fs/reiser4/safe_link.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/safe_link.h 2005-07-08 23:11:54.000000000 -0700 @@ -11,7 +11,7 @@ int safe_link_grab(reiser4_tree *tree, reiser4_ba_flags_t flags); void safe_link_release(reiser4_tree *tree); int safe_link_add(struct inode *inode, reiser4_safe_link_t link); -int safe_link_del(struct inode *inode, reiser4_safe_link_t link); +int safe_link_del(reiser4_tree *, oid_t oid, reiser4_safe_link_t link); int process_safelinks(struct super_block *super); diff -puN fs/reiser4/search.c~reiser4-update fs/reiser4/search.c --- devel/fs/reiser4/search.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/search.c 2005-07-08 23:11:54.000000000 -0700 @@ -987,9 +987,7 @@ cbk_level_lookup(cbk_handle * h /* searc ret = cbk_node_lookup(h); - /* reget @active from handle, because it can change in - cbk_node_lookup() */ - /*active = h->active_lh->node;*/ + /* h->active_lh->node might change, but active is yet to be zrelsed */ zrelse(active); return ret; @@ -1115,17 +1113,16 @@ cbk_node_lookup(cbk_handle * h /* search assert("vs-361", h->level > h->stop_level); if (handle_eottl(h, &result)) { - /**/ assert("vs-1674", result == LOOKUP_DONE || result == LOOKUP_REST); return result; } + /* go down to next level */ + check_me("vs-12", zload(h->coord->node) == 0); assert("nikita-2116", item_is_internal(h->coord)); iplug = item_plugin_by_coord(h->coord); - - /* go down to next level */ - assert("vs-515", item_is_internal(h->coord)); iplug->s.internal.down_link(h->coord, h->key, &h->block); + zrelse(h->coord->node); --h->level; return LOOKUP_CONT; /* continue */ } diff -puN fs/reiser4/status_flags.c~reiser4-update fs/reiser4/status_flags.c --- devel/fs/reiser4/status_flags.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/status_flags.c 2005-07-08 23:11:54.000000000 -0700 @@ -3,7 +3,6 @@ /* Functions that deal with reiser4 status block, query status and update it, if needed */ -#include #include #include #include diff -puN fs/reiser4/super.h~reiser4-update fs/reiser4/super.h --- devel/fs/reiser4/super.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/super.h 2005-07-08 23:11:54.000000000 -0700 @@ -63,7 +63,9 @@ typedef enum { * directories. */ REISER4_NO_PSEUDO = 5, /* load all bitmap blocks at mount time */ - REISER4_DONT_LOAD_BITMAP = 6 + REISER4_DONT_LOAD_BITMAP = 6, + /* enforce atomicity during write(2) */ + REISER4_ATOMIC_WRITE = 7 } reiser4_fs_flag; /* diff -puN fs/reiser4/tree.c~reiser4-update fs/reiser4/tree.c --- devel/fs/reiser4/tree.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/tree.c 2005-07-08 23:11:54.000000000 -0700 @@ -1400,7 +1400,7 @@ reiser4_internal int delete_node (znode init_lh(&parent_lock); - ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK, 0); + ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK); if (ret) return ret; diff -puN fs/reiser4/tree_walk.c~reiser4-update fs/reiser4/tree_walk.c --- devel/fs/reiser4/tree_walk.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/tree_walk.c 2005-07-08 23:11:54.000000000 -0700 @@ -137,27 +137,6 @@ reiser4_get_parent_flags(lock_handle * r ZNODE_LOCK_HIPRI, flags, 1)); } -/* A wrapper for reiser4_get_parent_flags(). */ -reiser4_internal int -reiser4_get_parent(lock_handle * result /* resulting lock - * handle */ , - znode * node /* child node */ , - znode_lock_mode mode /* type of lock: read or write */ , - int only_connected_p /* if this is true, parent is - * only returned when it is - * connected. If parent is - * unconnected, -E_NO_NEIGHBOR is - * returned. Normal users should - * pass 1 here. Only during carry - * we want to access still - * unconnected parents. */ ) -{ - assert("umka-238", znode_get_tree(node) != NULL); - - return reiser4_get_parent_flags(result, node, mode, - only_connected_p ? 0 : GN_ALLOW_NOT_CONNECTED); -} - /* wrapper function to lock right or left neighbor depending on GN_GO_LEFT bit in @flags parameter */ /* Audited by: umka (2002.06.14) */ @@ -259,7 +238,7 @@ link_left_and_right(znode * left, znode ON_DEBUG(left->right_version = atomic_inc_return(&delim_key_version);); - } else if (ZF_ISSET(left->right, JNODE_HEARD_BANSHEE)) { + } else if (ZF_ISSET(left->right, JNODE_HEARD_BANSHEE) && left->right != right) { ON_DEBUG( left->right->left_version = atomic_inc_return(&delim_key_version); @@ -292,7 +271,7 @@ link_left_and_right(znode * left, znode ON_DEBUG(right->left_version = atomic_inc_return(&delim_key_version);); - } else if (ZF_ISSET(right->left, JNODE_HEARD_BANSHEE)) { + } else if (ZF_ISSET(right->left, JNODE_HEARD_BANSHEE) && right->left != left) { ON_DEBUG( right->left->right_version = atomic_inc_return(&delim_key_version); @@ -705,7 +684,7 @@ again: /* before establishing of sibling link we lock parent node; it is required by renew_neighbor() to work. */ init_lh(&path[0]); - ret = reiser4_get_parent(&path[0], node, ZNODE_READ_LOCK, 1); + ret = reiser4_get_parent(&path[0], node, ZNODE_READ_LOCK); if (ret) return ret; if (znode_above_root(path[0].node)) { @@ -753,7 +732,7 @@ again: /* sibling link is not available -- we go upward. */ init_lh(&path[h + 1]); - ret = reiser4_get_parent(&path[h + 1], parent, ZNODE_READ_LOCK, 1); + ret = reiser4_get_parent(&path[h + 1], parent, ZNODE_READ_LOCK); if (ret) goto fail; ++h; @@ -786,7 +765,10 @@ fail: /* unlock path */ do { - longterm_unlock_znode(&path[h]); + /* FIXME-Zam: when we get here from case -E_DEADLOCK's goto + fail; path[0] is already done_lh-ed, therefore + longterm_unlock_znode(&path[h]); is not applicable */ + done_lh(&path[h]); --h; } while (h + 1 != 0); @@ -1034,7 +1016,7 @@ static int tw_up (struct tw_handle * h) init_load_count(&load); do { - ret = reiser4_get_parent(&lock, h->tap.lh->node, ZNODE_WRITE_LOCK, 0); + ret = reiser4_get_parent(&lock, h->tap.lh->node, ZNODE_WRITE_LOCK); if (ret) break; if (znode_above_root(lock.node)) { diff -puN fs/reiser4/tree_walk.h~reiser4-update fs/reiser4/tree_walk.h --- devel/fs/reiser4/tree_walk.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/tree_walk.h 2005-07-08 23:11:54.000000000 -0700 @@ -37,8 +37,6 @@ int reiser4_get_parent_flags(lock_handle * result, znode * node, znode_lock_mode mode, int flags); -int reiser4_get_parent(lock_handle * result, znode * node, znode_lock_mode mode, int only_connected_p); - /* bits definition for reiser4_get_neighbor function `flags' arg. */ typedef enum { /* If sibling pointer is NULL, this flag allows get_neighbor() to try to @@ -62,6 +60,13 @@ typedef enum { GN_ASYNC = 0x80 } znode_get_neigbor_flags; +/* A commonly used wrapper for reiser4_get_parent_flags(). */ +static inline int reiser4_get_parent( + lock_handle * result, znode * node, znode_lock_mode mode) +{ + return reiser4_get_parent_flags(result, node, mode, GN_ALLOW_NOT_CONNECTED); +} + int reiser4_get_neighbor(lock_handle * neighbor, znode * node, znode_lock_mode lock_mode, int flags); /* there are wrappers for most common usages of reiser4_get_neighbor() */ diff -puN fs/reiser4/txnmgr.c~reiser4-update fs/reiser4/txnmgr.c --- devel/fs/reiser4/txnmgr.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/txnmgr.c 2005-07-08 23:11:54.000000000 -0700 @@ -245,7 +245,7 @@ year old --- define all technical terms static void atom_free(txn_atom * atom); -static long commit_txnh(txn_handle * txnh); +static int commit_txnh(txn_handle * txnh); static void wakeup_atom_waitfor_list(txn_atom * atom); static void wakeup_atom_waiting_list(txn_atom * atom); @@ -487,7 +487,7 @@ txn_begin(reiser4_context * context) } /* Finish a transaction handle context. */ -reiser4_internal long +reiser4_internal int txn_end(reiser4_context * context) { long ret = 0; @@ -1380,6 +1380,51 @@ commit_some_atoms(txn_mgr * mgr) return 0; } +static int txn_try_to_fuse_small_atom (txn_mgr * tmgr, txn_atom * atom) +{ + int atom_stage; + txn_atom *atom_2; + int repeat; + + assert ("zam-1051", atom->stage < ASTAGE_PRE_COMMIT); + + atom_stage = atom->stage; + repeat = 0; + + if (!spin_trylock_txnmgr(tmgr)) { + UNLOCK_ATOM(atom); + spin_lock_txnmgr(tmgr); + LOCK_ATOM(atom); + repeat = 1; + if (atom->stage != atom_stage) + goto out; + } + + for_all_type_safe_list(atom, &tmgr->atoms_list, atom_2) { + if (atom == atom_2) + continue; + /* if trylock does not succeed we just do not fuse with that + * atom. */ + if (spin_trylock_atom(atom_2)) { + if (atom_2->stage < ASTAGE_PRE_COMMIT) { + spin_unlock_txnmgr(tmgr); + capture_fuse_into(atom_2, atom); + /* all locks are lost we can only repeat here */ + return -E_REPEAT; + } + UNLOCK_ATOM(atom_2); + } + } + atom->flags |= ATOM_CANCEL_FUSION; + out: + spin_unlock_txnmgr(tmgr); + if (repeat) { + UNLOCK_ATOM(atom); + return -E_REPEAT; + } + return 0; +} + /* Calls jnode_flush for current atom if it exists; if not, just take another atom and call jnode_flush() for him. If current transaction handle has already assigned atom (current atom) we have to close current transaction @@ -1397,17 +1442,15 @@ reiser4_internal int flush_some_atom(long *nr_submitted, const struct writeback_control *wbc, int flags) { reiser4_context *ctx = get_current_context(); + txn_mgr *tmgr = &get_super_private(ctx->super)->tmgr; txn_handle *txnh = ctx->trans; txn_atom *atom; int ret; - int ret1; assert("zam-1042", txnh != NULL); repeat: if (txnh->atom == NULL) { - /* current atom is available, take first from txnmgr */ - txn_mgr *tmgr = &get_super_private(ctx->super)->tmgr; - + /* current atom is not available, take first from txnmgr */ spin_lock_txnmgr(tmgr); /* traverse the list of all atoms */ @@ -1463,6 +1506,14 @@ flush_some_atom(long *nr_submitted, cons ret = flush_current_atom(flags, nr_submitted, &atom); if (ret == 0) { if (*nr_submitted == 0 || atom_should_commit_asap(atom)) { + if (atom->capture_count < tmgr->atom_min_size && + !(atom->flags & ATOM_CANCEL_FUSION)) { + ret =txn_try_to_fuse_small_atom(tmgr, atom); + if (ret == -E_REPEAT) { + preempt_point(); + goto repeat; + } + } /* if early flushing could not make more nodes clean, * or atom is too old/large, * we force current atom to commit */ @@ -1474,16 +1525,19 @@ flush_some_atom(long *nr_submitted, cons } UNLOCK_ATOM(atom); } else if (ret == -E_REPEAT) { - if (*nr_submitted == 0) + if (*nr_submitted == 0) { + /* let others who hampers flushing (hold longterm locks, + for instance) to free the way for flush */ + preempt_point(); goto repeat; + } ret = 0; } - - ret1 = txn_end(ctx); - assert("vs-1692", ret1 == 0); - if (ret1 > 0) - *nr_submitted += ret1; - txn_begin(ctx); +/* + if (*nr_submitted > wbc->nr_to_write) + warning("", "asked for %ld, written %ld\n", wbc->nr_to_write, *nr_submitted); +*/ + txn_restart(ctx); return ret; } @@ -1644,7 +1698,7 @@ try_commit_txnh(commit_data *cd) return 0; if (atom_should_commit(cd->atom)) { - /* if atom is _very_ large schedule it for common as soon as + /* if atom is _very_ large schedule it for commit as soon as * possible. */ if (atom_should_commit_asap(cd->atom)) { /* @@ -1737,7 +1791,7 @@ try_commit_txnh(commit_data *cd) /* Called to commit a transaction handle. This decrements the atom's number of open handles and if it is the last handle to commit and the atom should commit, initiates atom commit. if commit does not fail, return number of written blocks */ -static long +static int commit_txnh(txn_handle * txnh) { commit_data cd; diff -puN fs/reiser4/txnmgr.h~reiser4-update fs/reiser4/txnmgr.h --- devel/fs/reiser4/txnmgr.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/txnmgr.h 2005-07-08 23:11:54.000000000 -0700 @@ -161,7 +161,10 @@ typedef enum { /* Certain flags may be set in the txn_atom->flags field. */ typedef enum { /* Indicates that the atom should commit as soon as possible. */ - ATOM_FORCE_COMMIT = (1 << 0) + ATOM_FORCE_COMMIT = (1 << 0), + /* to avoid endless loop, mark the atom (which was considered as too + * small) after failed attempt to fuse it. */ + ATOM_CANCEL_FUSION = (1 << 1) } txn_flags; /* Flags for controlling commit_txnh */ @@ -434,6 +437,7 @@ struct txn_mgr { /* parameters. Adjustable through mount options. */ unsigned int atom_max_size; unsigned int atom_max_age; + unsigned int atom_min_size; /* max number of concurrent flushers for one atom, 0 - unlimited. */ unsigned int atom_max_flushers; }; @@ -455,7 +459,7 @@ extern int txnmgr_done(txn_mgr * mgr); extern int txn_reserve(int reserved); extern void txn_begin(reiser4_context * context); -extern long txn_end(reiser4_context * context); +extern int txn_end(reiser4_context * context); extern void txn_restart(reiser4_context * context); extern void txn_restart_current(void); diff -puN fs/reiser4/vfs_ops.c~reiser4-update fs/reiser4/vfs_ops.c --- devel/fs/reiser4/vfs_ops.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/vfs_ops.c 2005-07-08 23:11:54.000000000 -0700 @@ -56,14 +56,14 @@ /* super operations */ -static struct inode *reiser4_alloc_inode(struct super_block *super); -static void reiser4_destroy_inode(struct inode *inode); -static void reiser4_drop_inode(struct inode *); +static struct inode *reiser4_alloc_inode(struct super_block *); +static void reiser4_destroy_inode(struct inode *); +static void reiser4_put_inode(struct inode *); static void reiser4_delete_inode(struct inode *); static void reiser4_write_super(struct super_block *); static int reiser4_statfs(struct super_block *, struct kstatfs *); static int reiser4_show_options(struct seq_file *m, struct vfsmount *mnt); -static void reiser4_sync_inodes(struct super_block *s, struct writeback_control * wbc); +static void reiser4_sync_inodes(struct super_block *, struct writeback_control *); extern struct dentry_operations reiser4_dentry_operation; @@ -486,8 +486,6 @@ reiser4_alloc_inode(struct super_block * #endif seal_init(&info->sd_seal, NULL, NULL); coord_init_invalid(&info->sd_coord, NULL); - info->cluster_shift = 0; - info->crypt = NULL; info->flags = 0; spin_inode_object_init(info); /* this deals with info's loading semaphore */ @@ -530,32 +528,36 @@ reiser4_destroy_inode(struct inode *inod kmem_cache_free(inode_cache, container_of(info, reiser4_inode_object, p)); } -/* our ->drop_inode() method. This is called by iput_final() when last - * reference on inode is released */ +/* put_inode of super_operations + + we use put_inode to call pre_delete method of file plugin if it is defined + and if inode is unlinked and if it is about to drop inode reference count to + 0. */ static void -reiser4_drop_inode(struct inode *object) +reiser4_put_inode(struct inode *inode) { + reiser4_context ctx; file_plugin *fplug; - assert("nikita-2643", object != NULL); - - /* -not- creating context in this method, because it is frequently - called and all existing ->not_linked() methods are one liners. */ + fplug = inode_file_plugin(inode); + if (fplug == NULL || + inode->i_nlink != 0 || + atomic_read(&inode->i_count) > 1 || + fplug->pre_delete == NULL) + return; - fplug = inode_file_plugin(object); - /* fplug is NULL for fake inode */ - if (fplug != NULL) { - assert("nikita-3251", fplug->drop != NULL); - fplug->drop(object); - } else - generic_forget_inode(object); + init_context(&ctx, inode->i_sb); + /* kill cursors which might be attached to inode if it were a directory one */ + kill_cursors(inode); + fplug->pre_delete(inode); + reiser4_exit_context(&ctx); } /* * Called by reiser4_sync_inodes(), during speculative write-back (through * pdflush, or balance_dirty_pages()). */ -static void +void writeout(struct super_block *sb, struct writeback_control *wbc) { long written = 0; @@ -566,10 +568,6 @@ writeout(struct super_block *sb, struct * nothing to flush, commits some atoms. */ - /* reiser4 has its own means of periodical write-out */ - if (wbc->for_kupdate) - return; - /* Commit all atoms if reiser4_writepages() is called from sys_sync() or sys_fsync(). */ if (wbc->sync_mode != WB_SYNC_NONE) { @@ -589,9 +587,7 @@ writeout(struct super_block *sb, struct /* do not put more requests to overload write queue */ if (wbc->nonblocking && bdi_write_congested(mapping->backing_dev_info)) { - blk_run_address_space(mapping); - /*blk_run_queues();*/ wbc->encountered_congestion = 1; break; } @@ -616,21 +612,26 @@ reiser4_sync_inodes(struct super_block * { reiser4_context ctx; + /* reiser4 has its own means of periodical write-out */ + if (wbc->for_kupdate) + return; + + assert("", wbc->older_than_this == NULL); + init_context(&ctx, sb); - wbc->older_than_this = NULL; /* - * What we are trying to do here is to capture all "anonymous" pages. + * call reiser4_writepages for each of dirty inodes to turn dirty pages + * into transactions if they were not yet. */ generic_sync_sb_inodes(sb, wbc); - /*capture_reiser4_inodes(sb, wbc);*/ - spin_unlock(&inode_lock); + + /* flush goes here */ writeout(sb, wbc); /* avoid recursive calls to ->sync_inodes */ context_set_commit_async(&ctx); reiser4_exit_context(&ctx); - spin_lock(&inode_lock); } void reiser4_throttle_write(struct inode * inode) @@ -955,6 +956,12 @@ do { \ decimal. */ PUSH_SB_FIELD_OPT(tmgr.atom_max_age, "%u"); + /* tmgr.atom_min_size=N + + In committing an atom to free dirty pages, force the atom less than N in + size to fuse with another one. + */ + PUSH_SB_FIELD_OPT(tmgr.atom_min_size, "%u"); /* tmgr.atom_max_flushers=N limit of concurrent flushers for one atom. 0 means no limit. @@ -1012,6 +1019,8 @@ do { \ /* Don't load all bitmap blocks at mount time, it is useful for machines with tiny RAM and large disks. */ PUSH_BIT_OPT("dont_load_bitmap", REISER4_DONT_LOAD_BITMAP); + /* disable transaction commits during write() */ + PUSH_BIT_OPT("atomic_write", REISER4_ATOMIC_WRITE); PUSH_OPT ({ /* tree traversal readahead parameters: @@ -1047,6 +1056,7 @@ do { \ sbinfo->tmgr.atom_max_size = txnmgr_get_max_atom_size(s); sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE / HZ; + sbinfo->tmgr.atom_min_size = 256; sbinfo->tmgr.atom_max_flushers = ATOM_MAX_FLUSHERS; sbinfo->tree.cbk_cache.nr_slots = CBK_CACHE_SLOTS; @@ -1351,8 +1361,8 @@ struct super_operations reiser4_super_op .read_inode = noop_read_inode, .dirty_inode = NULL, .write_inode = NULL, - .put_inode = NULL, - .drop_inode = reiser4_drop_inode, + .put_inode = reiser4_put_inode, + .drop_inode = NULL, .delete_inode = reiser4_delete_inode, .put_super = reiser4_put_super, .write_super = reiser4_write_super, diff -puN fs/reiser4/vfs_ops.h~reiser4-update fs/reiser4/vfs_ops.h --- devel/fs/reiser4/vfs_ops.h~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/vfs_ops.h 2005-07-08 23:11:54.000000000 -0700 @@ -38,9 +38,12 @@ extern int reiser4_invalidatepage(struct extern int reiser4_releasepage(struct page *page, int gfp); extern int reiser4_writepages(struct address_space *, struct writeback_control *wbc); extern int reiser4_start_up_io(struct page *page); -extern void move_inode_out_from_sync_inodes_loop(struct address_space * mapping); extern void reiser4_clear_page_dirty(struct page *); extern void reiser4_throttle_write(struct inode*); + +#define CAPTURE_APAGE_BURST (1024l) +void writeout(struct super_block *, struct writeback_control *); + /* * this is used to speed up lookups for directory entry: on initial call to * ->lookup() seal and coord of directory entry (if found, that is) are stored diff -puN fs/reiser4/wander.c~reiser4-update fs/reiser4/wander.c --- devel/fs/reiser4/wander.c~reiser4-update 2005-07-08 23:11:54.000000000 -0700 +++ devel-akpm/fs/reiser4/wander.c 2005-07-08 23:11:54.000000000 -0700 @@ -803,6 +803,7 @@ write_jnodes_to_disk_extent(capture_list LOCK_JNODE(cur); JF_SET(cur, JNODE_WRITEBACK); JF_CLR(cur, JNODE_DIRTY); + ON_DEBUG(cur->written ++); UNLOCK_JNODE(cur); SetPageWriteback(pg); @@ -1230,6 +1231,7 @@ write_jnodes_to_disk_extent(capture_list assert("nikita-3165", !jnode_is_releasable(cur)); JF_SET(cur, JNODE_WRITEBACK); JF_CLR(cur, JNODE_DIRTY); + ON_DEBUG(cur->written ++); UNLOCK_JNODE(cur); set_page_writeback(pg); _