From: Oleg Drokin This patch allows insertion of more than one "indirect" block pointer into the tree in reiserfs. (with all the necessary balancing code changes). The first user of that feature is hole-creation code that is now ~1000 times more cpu-efficient for the case of large holes. fs/reiserfs/do_balan.c | 104 +++++++++++++++++++++--------------------- fs/reiserfs/inode.c | 48 ++++++++++++++++--- fs/reiserfs/tail_conversion.c | 5 -- 3 files changed, 94 insertions(+), 63 deletions(-) diff -puN fs/reiserfs/do_balan.c~reiserfs-multiple-block-insertion fs/reiserfs/do_balan.c --- 25/fs/reiserfs/do_balan.c~reiserfs-multiple-block-insertion 2003-05-17 14:09:35.000000000 -0700 +++ 25-akpm/fs/reiserfs/do_balan.c 2003-05-17 14:09:35.000000000 -0700 @@ -319,8 +319,6 @@ static int balance_leaf (struct tree_bal int new_item_len; int version; - RFALSE (!is_direct_le_ih (ih), - "PAP-12075: only direct inserted item can be broken. %h", ih); ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1); /* Calculate item length to insert to S[0] */ @@ -343,7 +341,7 @@ static int balance_leaf (struct tree_bal version = ih_version (ih); /* Calculate key component, item length and body to insert into S[0] */ - set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + tb->lbytes ); + set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) ); put_ih_item_len( ih, new_item_len ); if ( tb->lbytes > zeros_num ) { @@ -452,23 +450,28 @@ static int balance_leaf (struct tree_bal ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)), l_n,body, zeros_num > l_n ? l_n : zeros_num ); - - RFALSE( l_n && - is_indirect_le_ih(B_N_PITEM_HEAD - (tb->L[0], - n + item_pos - ret_val)), - "PAP-12110: pasting more than 1 unformatted node pointer into indirect item"); - /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/ { - int version; - - version = ih_version (B_N_PITEM_HEAD (tbS0, 0)); - set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), - le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + l_n); - version = ih_version (B_N_PITEM_HEAD(tb->CFL[0],tb->lkey[0])); - set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), - le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + l_n); + int version; + int temp_l = l_n; + + RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)), + "PAP-12106: item length must be 0"); + RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0), + B_N_PKEY (tb->L[0], + n + item_pos - ret_val)), + "PAP-12107: items must be of the same file"); + if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0], + n + item_pos - ret_val))) { + temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT); + } + /* update key of first item in S0 */ + version = ih_version (B_N_PITEM_HEAD (tbS0, 0)); + set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), + le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l); + /* update left delimiting key */ + set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), + le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l); } /* Calculate new body, position in item and insert_size[0] */ @@ -537,7 +540,7 @@ static int balance_leaf (struct tree_bal ); /* if appended item is indirect item, put unformatted node into un list */ if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + set_ih_free_space (pasted, 0); tb->insert_size[0] = 0; zeros_num = 0; } @@ -565,15 +568,11 @@ static int balance_leaf (struct tree_bal { /* new item or its part falls to R[0] */ if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 ) { /* part of new item falls into R[0] */ - int old_key_comp, old_len, r_zeros_number; + loff_t old_key_comp, old_len, r_zeros_number; const char * r_body; int version; loff_t offset; - RFALSE( !is_direct_le_ih (ih), - "PAP-12135: only direct item can be split. (%h)", - ih); - leaf_shift_right(tb,tb->rnum[0]-1,-1); version = ih_version(ih); @@ -582,7 +581,7 @@ static int balance_leaf (struct tree_bal old_len = ih_item_len(ih); /* Calculate key component and item length to insert into R[0] */ - offset = le_ih_k_offset( ih ) + (old_len - tb->rbytes ); + offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)); set_le_ih_k_offset( ih, offset ); put_ih_item_len( ih, tb->rbytes); /* Insert part of the item into R[0] */ @@ -590,13 +589,13 @@ static int balance_leaf (struct tree_bal bi.bi_bh = tb->R[0]; bi.bi_parent = tb->FR[0]; bi.bi_position = get_right_neighbor_position (tb, 0); - if ( offset - old_key_comp > zeros_num ) { + if ( (old_len - tb->rbytes) > zeros_num ) { r_zeros_number = 0; - r_body = body + offset - old_key_comp - zeros_num; + r_body = body + (old_len - tb->rbytes) - zeros_num; } else { r_body = body; - r_zeros_number = zeros_num - (offset - old_key_comp); + r_zeros_number = zeros_num - (old_len - tb->rbytes); zeros_num -= r_zeros_number; } @@ -707,12 +706,17 @@ static int balance_leaf (struct tree_bal { int version; + unsigned long temp_rem = n_rem; version = ih_version (B_N_PITEM_HEAD (tb->R[0],0)); + if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){ + temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - + UNFM_P_SHIFT); + } set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0), - le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + n_rem); + le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem); set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]), - le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + n_rem); + le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem); } /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ @@ -736,13 +740,12 @@ static int balance_leaf (struct tree_bal leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number); if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) { - +#if 0 RFALSE( n_rem, "PAP-12160: paste more than one unformatted node pointer"); - - set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), ((struct unfm_nodeinfo*)body)->unfm_freespace); +#endif + set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0); } - tb->insert_size[0] = n_rem; if ( ! n_rem ) pos_in_item ++; @@ -781,7 +784,7 @@ static int balance_leaf (struct tree_bal } if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + set_ih_free_space (pasted, 0); zeros_num = tb->insert_size[0] = 0; } } @@ -858,12 +861,6 @@ static int balance_leaf (struct tree_bal const char * r_body; int version; - RFALSE( !is_direct_le_ih(ih), - /* The items which can be inserted are: - Stat_data item, direct item, indirect item and directory item which consist of only two entries "." and "..". - These items must not be broken except for a direct one. */ - "PAP-12205: non-direct item can not be broken when inserting"); - /* Move snum[i]-1 items from S[0] to S_new[i] */ leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]); /* Remember key component and item length */ @@ -873,7 +870,7 @@ static int balance_leaf (struct tree_bal /* Calculate key component and item length to insert into S_new[i] */ set_le_ih_k_offset( ih, - le_ih_k_offset(ih) + (old_len - sbytes[i] ) ); + le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) ); put_ih_item_len( ih, sbytes[i] ); @@ -883,13 +880,13 @@ static int balance_leaf (struct tree_bal bi.bi_parent = 0; bi.bi_position = 0; - if ( le_ih_k_offset (ih) - old_key_comp > zeros_num ) { + if ( (old_len - sbytes[i]) > zeros_num ) { r_zeros_number = 0; - r_body = body + (le_ih_k_offset(ih) - old_key_comp) - zeros_num; + r_body = body + (old_len - sbytes[i]) - zeros_num; } else { r_body = body; - r_zeros_number = zeros_num - (le_ih_k_offset (ih) - old_key_comp); + r_zeros_number = zeros_num - (old_len - sbytes[i]); zeros_num -= r_zeros_number; } @@ -1010,11 +1007,13 @@ static int balance_leaf (struct tree_bal tmp = B_N_PITEM_HEAD(S_new[i],0); if (is_indirect_le_ih (tmp)) { - if (n_rem) - reiserfs_panic (tb->tb_sb, "PAP-12230: balance_leaf: invalid action with indirect item"); - set_ih_free_space (tmp, ((struct unfm_nodeinfo*)body)->unfm_freespace); + set_ih_free_space (tmp, 0); + set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT))); + } else { + set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + + n_rem ); } - set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + n_rem ); } tb->insert_size[0] = n_rem; @@ -1060,7 +1059,7 @@ static int balance_leaf (struct tree_bal /* if we paste to indirect item update ih_free_space */ if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + set_ih_free_space (pasted, 0); zeros_num = tb->insert_size[0] = 0; } } @@ -1152,11 +1151,12 @@ static int balance_leaf (struct tree_bal leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); if (is_indirect_le_ih (pasted)) { - +#if 0 RFALSE( tb->insert_size[0] != UNFM_P_SIZE, "PAP-12280: insert_size for indirect item must be %d, not %d", UNFM_P_SIZE, tb->insert_size[0]); - set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); +#endif + set_ih_free_space (pasted, 0); } tb->insert_size[0] = 0; } diff -puN fs/reiserfs/inode.c~reiserfs-multiple-block-insertion fs/reiserfs/inode.c --- 25/fs/reiserfs/inode.c~reiserfs-multiple-block-insertion 2003-05-17 14:09:35.000000000 -0700 +++ 25-akpm/fs/reiserfs/inode.c 2003-05-17 14:09:35.000000000 -0700 @@ -766,7 +766,11 @@ int reiserfs_get_block (struct inode * i pointer to 'block'-th block use block, which is already allocated */ struct cpu_key tmp_key; - struct unfm_nodeinfo un = {0, 0}; + unp_t unf_single=0; // We use this in case we need to allocate only + // one block which is a fastpath + unp_t *un; + __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE; + __u64 blocks_needed; RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, "vs-804: invalid position for append"); @@ -775,30 +779,58 @@ int reiserfs_get_block (struct inode * i le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize), //pos_in_item * inode->i_sb->s_blocksize, TYPE_INDIRECT, 3);// key type is unimportant - - if (cpu_key_k_offset (&tmp_key) == cpu_key_k_offset (&key)) { + + blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits); + RFALSE( blocks_needed < 0, "green-805: invalid offset"); + + if ( blocks_needed == 1 ) { + un = &unf_single; + } else { + un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE, + GFP_ATOMIC); // We need to avoid scheduling. + if ( !un) { + un = &unf_single; + blocks_needed = 1; + max_to_insert = 0; + } else + memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert)); + } + if ( blocks_needed <= max_to_insert) { /* we are going to add target block to the file. Use allocated block for that */ - un.unfm_nodenum = cpu_to_le32 (allocated_block_nr); + un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr); set_block_dev_mapped (bh_result, allocated_block_nr, inode); set_buffer_new(bh_result); done = 1; } else { /* paste hole to the indirect item */ + /* If kmalloc failed, max_to_insert becomes zero and it means we + only have space for one block */ + blocks_needed=max_to_insert?max_to_insert:1; } - retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)&un, UNFM_P_SIZE); + retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed); + + if (blocks_needed != 1) + kfree(un); + if (retval) { reiserfs_free_block (&th, allocated_block_nr); goto failure; } - if (un.unfm_nodenum) + if (done) { inode->i_blocks += inode->i_sb->s_blocksize / 512; + } else { + /* We need to mark new file size in case this function will be + interrupted/aborted later on. And we may do this only for + holes. */ + inode->i_size += inode->i_sb->s_blocksize * blocks_needed; + } //mark_tail_converted (inode); } - + if (done == 1) break; - + /* this loop could log more blocks than we had originally asked ** for. So, we have to allow the transaction to end if it is ** too big or too full. Update the inode so things are diff -puN fs/reiserfs/tail_conversion.c~reiserfs-multiple-block-insertion fs/reiserfs/tail_conversion.c --- 25/fs/reiserfs/tail_conversion.c~reiserfs-multiple-block-insertion 2003-05-17 14:09:35.000000000 -0700 +++ 25-akpm/fs/reiserfs/tail_conversion.c 2003-05-17 14:09:35.000000000 -0700 @@ -30,7 +30,7 @@ int direct2indirect (struct reiserfs_tra key of unfm pointer to be pasted */ int n_blk_size, n_retval; /* returned value for reiserfs_insert_item and clones */ - struct unfm_nodeinfo unfm_ptr; /* Handle on an unformatted node + unp_t unfm_ptr; /* Handle on an unformatted node that will be inserted in the tree. */ @@ -59,8 +59,7 @@ int direct2indirect (struct reiserfs_tra p_le_ih = PATH_PITEM_HEAD (path); - unfm_ptr.unfm_nodenum = cpu_to_le32 (unbh->b_blocknr); - unfm_ptr.unfm_freespace = 0; // ??? + unfm_ptr = cpu_to_le32 (unbh->b_blocknr); if ( is_statdata_le_ih (p_le_ih) ) { /* Insert new indirect item. */ _