From be735d8a0acacc5c901396d4a618be7043e5a1a9 Mon Sep 17 00:00:00 2001 From: Daniel Phillips Date: Sun, 27 Apr 2014 15:23:24 +0900 Subject: tux3: Remove dleaf1 data leaf format The idea of dleaf1 was to implement a compressed leaf format with about 40% more extents per block but implementation details were too complex. If we ever want this and have time to make it work properly we can add it back. Signed-off-by: Daniel Phillips Signed-off-by: OGAWA Hirofumi --- fs/tux3/dleaf.c | 887 -------------------------------------------------------- 1 file changed, 887 deletions(-) delete mode 100644 fs/tux3/dleaf.c diff --git a/fs/tux3/dleaf.c b/fs/tux3/dleaf.c deleted file mode 100644 index 88a30c98c74cd5..00000000000000 --- a/fs/tux3/dleaf.c +++ /dev/null @@ -1,887 +0,0 @@ -/* - * File index btree leaf operations - * - * Original copyright (c) 2008 Daniel Phillips - * Licensed under the GPL version 2 - * - * By contributing changes to this file you grant the original copyright holder - * the right to distribute those changes under any license. - */ - -#include "tux3.h" -#include "dleaf.h" - -#ifndef trace -#define trace trace_on -#endif - -/* - * Leaf index format - * - * A leaf has a small header followed by a table of extents. A two level - * index grows down from the top of the leaf towards the top of the extent - * table. The index maps each unique logical address in the leaf to one or - * more extents beginning at that address. - * - * The top level index is a table of groups of entries all having the same - * high 24 bits of logical address which is only stored once, along with the - * 8 bit count of entries in the group. Since there can be more than 256 - * entries at the same logical address, there could be more than one group - * with the same logical address. The group count is used both to know the - * number of entries in the group and to find the beginning of the entry table - * for a given group, by adding up the sizes of the proceeding groups. - * - * The 8 bit entry limit limits the number of different versions at the same - * logical address to 255. For now. - * - * The second level entry tables are stored end to end in reverse immediately - * below the groups table, also stored in reverse. Each entry has the low 24 - * bits of the logical address and the 8 bit 'limit' offset of the last extent - * for that logical address, measuring from the first extent for the group in - * units of extent size. The limit is used rather than an offset so that the - * final offset is the count of extents in the group, which is summed up to - * locate the first extent for the group in the extent table. The difference - * between and entry limit and the limit of its predecessor gives the count of - * extents for the logical address specified by the entry. - * - * At the top level of a very large or very sparse btree it is likely that the - * group table will be relatively larger, up to the same size as all the entry - * tables. This does not matter much in terms of overall btree bulk. A few - * levels down the logical address space will have been split to the point - * where most entries in a leaf fit into one entry table. - * - * This leaf indexing scheme has some obscure boundary conditions, such as - * the zeroth entry of a group having no predecessor and thus needing to have - * a special check to supply zero as the preceding limit. Inserting and - * deleting are fairly involved and subtle. But the space required to index - * extents in a deep btree is reduced considerably, which is compelling. In - * the end, the indexing scheme provides access to a simple linear table of - * extents and a count, so there is little impact on the specialized methods - * that operate on those extents due to the complexity of the indexing scheme. - * The lookup operation on this index is very efficient. Each level of the - * index is suited to binary search. A sequence of inserts in ascending order - * in the same group requires no existing entries to be relocated, the reason - * the entry list is stored in reverse. - */ - -static void dleaf_btree_init(struct btree *btree) -{ - btree->entries_per_leaf = 64; /* FIXME: should depend on blocksize */ -} - -int dleaf_init(struct btree *btree, void *leaf) -{ - struct dleaf *dleaf = leaf; - *dleaf = (struct dleaf){ - .magic = cpu_to_be16(TUX3_MAGIC_DLEAF), - .free = cpu_to_be16(sizeof(struct dleaf)), - .used = cpu_to_be16(btree->sb->blocksize) - }; - return 0; -} - -unsigned dleaf_free(struct btree *btree, void *leaf) -{ - struct dleaf *dleaf = leaf; - return be16_to_cpu(dleaf->used) - be16_to_cpu(dleaf->free); -} - -static unsigned dleaf_need(struct btree *btree, void *vleaf) -{ - struct dleaf *leaf = vleaf; - return btree->sb->blocksize - dleaf_free(btree, leaf) - sizeof(struct dleaf); -} - -static inline tuxkey_t get_index(struct group *group, struct entry *entry) -{ - return ((tuxkey_t)group_keyhi(group) << 24) | entry_keylo(entry); -} - -static int dleaf_sniff(struct btree *btree, void *leaf) -{ - struct dleaf *dleaf = leaf; - if (dleaf->magic != cpu_to_be16(TUX3_MAGIC_DLEAF)) - return -1; - return 0; -} - -static int dleaf_can_free(struct btree *btree, void *vleaf) -{ - return dleaf_need(btree, vleaf) == 0; -} - -void dleaf_dump(struct btree *btree, void *vleaf) -{ - if (!tux3_trace) - return; - - unsigned blocksize = btree->sb->blocksize; - struct dleaf *leaf = vleaf; - struct group *gdict = (void *)leaf + blocksize, *gbase = --gdict - dleaf_groups(leaf); - struct entry *edict = (void *)(gbase + 1), *entry = edict; - struct diskextent *extents = leaf->table; - - __tux3_dbg("%i entry groups:\n", dleaf_groups(leaf)); - for (struct group *group = gdict; group > gbase; group--) { - __tux3_dbg(" %ti/%i:", gdict - group, group_count(group)); - //__tux3_dbg(" [%i]", extents - leaf->table); - struct entry *ebase = entry - group_count(group); - while (entry > ebase) { - --entry; - unsigned offset = entry == edict - 1 ? 0 : entry_limit(entry + 1); - int count = entry_limit(entry) - offset; - __tux3_dbg(" %Lx =>", get_index(group, entry)); - //__tux3_dbg(" %p (%i)", entry, entry_limit(entry)); - if (count < 0) - __tux3_dbg(" "); - else for (int i = 0; i < count; i++) { - struct diskextent extent = extents[offset + i]; - __tux3_dbg(" %Lx", extent_block(extent)); - if (extent_count(extent)) - __tux3_dbg("/%x", extent_count(extent)); - } - //__tux3_dbg(" {%u}", entry_limit(entry)); - __tux3_dbg(";"); - } - __tux3_dbg("\n"); - extents += entry_limit(entry); - edict -= group_count(group); - } -} - -static int dleaf_free2(struct dleaf *leaf, unsigned blocksize) -{ - struct group *gdict = (void *)leaf + blocksize, *gstop = gdict - dleaf_groups(leaf); - struct entry *edict = (void *)gstop, *entry = edict; - struct diskextent *extents = leaf->table; - - for (struct group *group = gdict; group-- > gstop;) - extents += entry_limit(entry -= group_count(group)); - return (void *)entry - (void *)extents; -} - -static int dleaf_check(struct dleaf *leaf, unsigned blocksize) -{ - struct group *gdict = (void *)leaf + blocksize, *gstop = gdict - dleaf_groups(leaf); - struct entry *edict = (void *)gstop, *estop = edict; - struct diskextent *extents = leaf->table; - unsigned excount = 0, encount = 0; - char *why; - - if (!dleaf_groups(leaf)) - return 0; - - unsigned keyhi = 0; - struct diskextent *exbase = leaf->table; - for (struct group *group = gdict - 1; group >= gstop; group--) { - assert(group_keyhi(group) >= keyhi); - assert(group_count(group) > 0); - assert(group_count(group) <= MAX_GROUP_ENTRIES); - keyhi = group_keyhi(group); - struct entry *entry = estop; - estop -= group_count(group); - unsigned limit = 0, keylo = -1; - while (--entry >= estop) { - assert((int)entry_keylo(entry) > (int)keylo); - assert(entry_limit(entry) > limit); - keylo = entry_keylo(entry); - limit = entry_limit(entry); - } - struct diskextent *exstop = exbase + entry_limit(estop); - block_t block = 0; - while (exbase < exstop) { - assert(extent_block(*exbase) != block); - exbase++; - } - excount += entry_limit(estop); - encount += group_count(group); - } - //tux3_dbg("encount = %i, excount = %i", encount, excount); - why = "used count wrong"; - if (be16_to_cpu(leaf->used) != (void *)(edict - encount) - (void *)leaf) - goto eek; - why = "free count wrong"; - if (be16_to_cpu(leaf->free) != (void *)(extents + excount) - (void *)leaf) - goto eek; - why = "free check mismatch"; - if (be16_to_cpu(leaf->used) - be16_to_cpu(leaf->free) != dleaf_free2(leaf, blocksize)) - goto eek; - return 0; - -eek: - tux3_dbg("error: free %i, used %i", be16_to_cpu(leaf->free), be16_to_cpu(leaf->used)); - tux3_dbg("error: %s!", why); - return -1; -} - -static int dleaf_split_at(void *from, void *into, int split, unsigned blocksize) -{ - struct dleaf *leaf = from, *leaf2 = into; - unsigned groups = dleaf_groups(leaf), groups2; - struct group *gdict = from + blocksize, *gbase = gdict - groups; - struct entry *edict = (void *)gbase, *ebase = (void *)leaf + be16_to_cpu(leaf->used); - unsigned recount = 0, grsplit = 0, exsplit = 0; - unsigned entries = edict - ebase; - - trace("split %p into %p at %x", leaf, leaf2, split); - if (!groups) - return 0; - assert(split < entries); - for (struct group *group = gdict - 1; group >= gbase; group--, grsplit++) { - if (recount + group_count(group) > split) - break; - edict -= group_count(group); - exsplit += entry_limit(edict); - recount += group_count(group); - } - - /* have to split a group? */ - unsigned cut = split - recount; - if (cut) - exsplit += entry_limit(edict - cut); - edict = (void *)gbase; /* restore it */ - trace("split %i entries at group %i, entry %x", entries, grsplit, cut); - trace("split extents at %i", exsplit); - /* copy extents */ - unsigned size = from + be16_to_cpu(leaf->free) - (void *)(leaf->table + exsplit); - memcpy(leaf2->table, leaf->table + exsplit, size); - - /* copy groups */ - struct group *gdict2 = (void *)leaf2 + blocksize; - set_dleaf_groups(leaf2, groups2 = (groups - grsplit)); - veccopy(gdict2 - dleaf_groups(leaf2), gbase, dleaf_groups(leaf2)); - inc_group_count(gdict2 - 1, -cut); - set_dleaf_groups(leaf, groups = (grsplit + !!cut)); - gbase = gdict - groups; - if (cut) - set_group_count(gdict - groups, cut); - - /* copy entries */ - struct entry *edict2 = (void *)(gdict2 - groups2); - - assert((struct entry *)((void *)leaf + be16_to_cpu(leaf->used)) == edict - entries); - - unsigned encopy = entries - split; - veccopy(edict2 - encopy, ebase, encopy); - if (cut) - for (int i = 1; i <= group_count((gdict2 - 1)); i++) - inc_entry_limit(edict2 - i, -entry_limit(edict - split)); - vecmove(gdict - groups - split, edict - split, split); - - /* clean up */ - leaf->free = cpu_to_be16((void *)(leaf->table + exsplit) - from); - leaf->used = cpu_to_be16((void *)(gbase - split) - from); - leaf2->free = cpu_to_be16((void *)leaf->table + size - from); - leaf2->used = cpu_to_be16((void *)(gdict - groups2 - encopy) - from); - memset(from + be16_to_cpu(leaf->free), 0, be16_to_cpu(leaf->used) - be16_to_cpu(leaf->free)); - assert(!dleaf_check(leaf, blocksize)); - assert(!dleaf_check(leaf2, blocksize)); - return groups2; -} - -/* - * Split dleaf at middle in terms of entries, may be unbalanced in extents. - * Not used for now because we do the splits by hand in filemap.c - */ -static tuxkey_t dleaf_split(struct btree *btree, tuxkey_t hint, void *from, void *into) -{ - struct dleaf *leaf = from, *leaf2 = into; - assert(!dleaf_sniff(btree, from)); - unsigned blocksize = btree->sb->blocksize; - struct group *gdict = from + blocksize, *gbase = gdict - dleaf_groups(leaf); - struct entry *edict = (void *)gbase; - struct entry *ebase = (void *)leaf + be16_to_cpu(leaf->used); - unsigned entries = edict - ebase; - assert(entries >= 2); - unsigned groups2 = dleaf_split_at(from, into, entries / 2, blocksize); - struct group *gdict2 = (void *)leaf2 + blocksize; - - return get_index(gdict2 - 1, (struct entry *)(gdict2 - groups2) - 1); -} - -/* - * Try to merge dleaf - * - * return value: - * 0 - couldn't merge - * 1 - merged - */ -int dleaf_merge(struct btree *btree, void *vinto, void *vfrom) -{ - struct dleaf *leaf = vinto, *from = vfrom; - struct group *gdict = (void *)leaf + btree->sb->blocksize; - struct group *gstop = gdict - dleaf_groups(leaf); - struct entry *edict = (struct entry *)gstop; - unsigned free = be16_to_cpu(leaf->free); - struct group *gdict2 = (void *)from + btree->sb->blocksize; - struct group *group2 = gdict2 - 1; - struct group *gstop2 = gdict2 - dleaf_groups(from); - struct entry *edict2 = (struct entry *)gstop2; - unsigned merge_gcount = 0, rest_gcount = 0; - int can_merge_group = 0; - - /* Source is empty, so we do nothing */ - if (dleaf_groups(from) == 0) - return 1; - - /* Destination is empty, so we just copy */ - if (dleaf_groups(leaf) == 0) { - unsigned used = be16_to_cpu(from->used); - memcpy(leaf, from, be16_to_cpu(from->free)); - memcpy((void *)leaf + used, (void *)from + used, btree->sb->blocksize - used); - return 1; - } - - /* - * Check if there is space. - * FIXME: we may be able to merge more if group/entry can be merged - */ - if (dleaf_need(btree, vfrom) > dleaf_free(btree, vinto)) - return 0; - - /* Try to merge group, and prepare to adjust */ - if (group_keyhi(gstop) == group_keyhi(group2) && - group_count(gstop) < MAX_GROUP_ENTRIES) { - unsigned gcount2 = group_count(group2); - unsigned room = MAX_GROUP_ENTRIES - group_count(gstop); - /* All entries can merge to this group? */ - if (room < gcount2) { - /* Calc adjust for the rest of group/entries */ - rest_gcount = gcount2 - room; - gcount2 = room; - } else - can_merge_group = 1; - - /* group/entries which merge */ - merge_gcount = gcount2; - } - - /* append extents */ - unsigned size = be16_to_cpu(from->free) - sizeof(struct dleaf); - memcpy((void *)leaf + free, from->table, size); - leaf->free = cpu_to_be16(free + size); - - /* make space and append groups except for possibly merged group */ - assert(sizeof(struct group) == sizeof(struct entry)); - unsigned addgroups = dleaf_groups(from) - can_merge_group; - struct entry *ebase2 = (void *)from + be16_to_cpu(from->used); - struct entry *ebase = (void *)leaf + be16_to_cpu(leaf->used); - vecmove(ebase - addgroups, ebase, edict - ebase); - veccopy(gstop - addgroups, gstop2, addgroups); - ebase -= addgroups; - inc_dleaf_groups(leaf, addgroups); - - /* append entries */ - size = (void *)edict2 - (void *)ebase2; - memcpy((void *)ebase - size, ebase2, size); - leaf->used = cpu_to_be16((void *)ebase - size - (void *)leaf); - - if (merge_gcount) { - /* adjust merged group */ - struct entry *estop = ebase - merge_gcount; - unsigned limit_adjust = entry_limit(ebase); - inc_group_count(gstop, merge_gcount); - while (--ebase >= estop) - inc_entry_limit(ebase, limit_adjust); - if (rest_gcount) { - /* adjust the group/entries which couldn't merge */ - ebase = estop; - estop = ebase - rest_gcount; - limit_adjust = entry_limit(edict2 - merge_gcount); - set_group_count(gstop - 1, rest_gcount); - while (--ebase >= estop) - inc_entry_limit(ebase, -limit_adjust); - } - } - assert(!dleaf_check(leaf, btree->sb->blocksize)); - - return 1; -} - -/* - * dleaf format and dwalk structure - * - * min address +--------------------------+ - * | dleaf header | - * | | extent <0> (gr 0, ent 0) | __ walk->exbase - * growing downwards | | extent <0> (gr 1, ent 0) | __ walk->extent - * | | extent <1> (gr 1, ent 1) | __ walk->exstop - * V | extent <2> (gr 1, ent 2) | - * | | - * | ....... | - * | | __ walk->estop - * | entry <2> (gr 1) | - * | entry <1> (gr 1) | __ walk->entry - * ^ | entry <0> (gr 1) | - * | | entry <0> (gr 0) | __ walk->group,walk->gstop - * growing upwards | | group <1> | - * | | group <0> | - * max address +--------------------------+ __ walk->gdict - * - * The above is dleaf format, and now dwalk_next() was called 2 times. - * - * ->gdict is the end of dleaf. - * ->group is the current group (group <1>) - * ->gstop is the last group in this dleaf - * ->entry is the current entry (entry <0> (gr 1)) - * ->estop is the last entry in current group - * ->exbase is the first extent in current group - * ->extent is the current extent (extent <1> (gr1, ent 1)). - * ->exstop is the first extent in next entry. - * (I.e. the address that dwalk_next() has to update to next entry. - * If there is no next, it will stop with ->extent == ->exstop.) - */ - -void dwalk_redirect(struct dwalk *walk, struct dleaf *src, struct dleaf *dst) -{ - walk->leaf = dst; - walk->group = ptr_redirect(walk->group, src, dst); - walk->gstop = ptr_redirect(walk->gstop, src, dst); - walk->gdict = ptr_redirect(walk->gdict, src, dst); - walk->entry = ptr_redirect(walk->entry, src, dst); - walk->estop = ptr_redirect(walk->estop, src, dst); - walk->exbase = ptr_redirect(walk->exbase, src, dst); - walk->extent = ptr_redirect(walk->extent, src, dst); - walk->exstop = ptr_redirect(walk->exstop, src, dst); -} - -/* FIXME: current code is assuming the entry has only one extent. */ - -/* The first extent in dleaf */ -static int dwalk_first(struct dwalk *walk) -{ - return walk->leaf->table == walk->extent; -} - -/* The end of extent in dleaf */ -int dwalk_end(struct dwalk *walk) -{ - return walk->extent == walk->exstop; -} - -tuxkey_t dwalk_index(struct dwalk *walk) -{ - return get_index(walk->group, walk->entry); -} - -block_t dwalk_block(struct dwalk *walk) -{ - return extent_block(*walk->extent); -} - -unsigned dwalk_count(struct dwalk *walk) -{ - return extent_count(*walk->extent); -} - -/* unused */ -void dwalk_dump(struct dwalk *walk) -{ - if (walk->leaf->table == walk->exstop) { - trace_on("empty leaf"); - return; - } - if (dwalk_end(walk)) { - trace_on("end of extent"); - return; - } - struct diskextent *entry_exbase; - if (walk->entry + 1 == walk->estop + group_count(walk->group)) - entry_exbase = walk->exbase; - else - entry_exbase = walk->exbase + entry_limit(walk->entry + 1); - trace_on("leaf %p", walk->leaf); - trace_on("group %tu/%tu", (walk->gdict - walk->group) - 1, walk->gdict - walk->gstop); - trace_on("entry %tu/%u", group_count(walk->group) - (walk->entry - walk->estop) - 1, group_count(walk->group)); - trace_on("extent %tu/%tu", walk->extent - entry_exbase, walk->exstop - entry_exbase); -} - -static void dwalk_check(struct dwalk *walk) -{ - if (!dleaf_groups(walk->leaf)) { - assert(walk->group == walk->gstop); - assert(walk->entry == walk->estop); - assert(walk->exbase == walk->extent); - assert(walk->extent == walk->exstop); - assert(walk->leaf->table == walk->exstop); - } else if (dwalk_end(walk)) { - assert(walk->group == walk->gstop); - assert(walk->entry == walk->estop); - assert(walk->exbase < walk->extent); - assert(walk->extent == walk->exstop); - } else { - assert(walk->group >= walk->gstop); - assert(walk->entry >= walk->estop); - assert(walk->exbase <= walk->extent); - assert(walk->extent < walk->exstop); - /* - * This checks ->entry has only 1 ->extent. - * FIXME (and re-think): Assuming dleaf->entry has only 1 - * ->extent on some functions - */ - if (walk->entry + 1 == walk->estop + group_count(walk->group)) - assert(entry_limit(walk->entry) == 1); - else - assert(entry_limit(walk->entry) - entry_limit(walk->entry + 1) == 1); - } -} - -/* Set the cursor to next extent */ -int dwalk_next(struct dwalk *walk) -{ - /* last extent of this dleaf, or empty dleaf */ - if (dwalk_end(walk)) - return 0; - walk->extent++; - if (walk->extent == walk->exstop) { - if (walk->entry == walk->estop) { - if (walk->group == walk->gstop) - return 0; - walk->group--; - walk->exbase += entry_limit(walk->estop); - walk->estop -= group_count(walk->group); - } - walk->entry--; - walk->exstop = walk->exbase + entry_limit(walk->entry); - } - dwalk_check(walk); - return 1; -} - -/* Back to the previous extent. (i.e. rewind the previous dwalk_next()) */ -int dwalk_back(struct dwalk *walk) -{ - /* first extent of this dleaf, or empty dleaf */ - if (dwalk_first(walk)) - return 0; - struct diskextent *entry_exbase; - - if (walk->entry + 1 == walk->estop + group_count(walk->group)) - entry_exbase = walk->exbase; - else - entry_exbase = walk->exbase + entry_limit(walk->entry + 1); - walk->extent--; - if (walk->extent < entry_exbase) { - if (walk->extent < walk->exbase) { - if (walk->group == walk->gdict) - return 1; - walk->group++; - walk->estop = walk->entry + 1; - walk->exbase -= entry_limit(walk->entry + 1); - } - walk->entry++; - walk->exstop = walk->exbase + entry_limit(walk->entry); - } - dwalk_check(walk); - return 1; -} - -/* - * Probe the extent position with key. If not found, position is next - * extent of key. If probed all extents return 0, otherwise return 1 - * (I.e. current extent is valid. IOW, !dwalk_end()). - */ -int dwalk_probe(struct dleaf *leaf, unsigned blocksize, struct dwalk *walk, tuxkey_t key) -{ - trace("probe for 0x%Lx", key); - unsigned keylo = key & 0xffffff, keyhi = key >> 24; - - walk->leaf = leaf; - walk->gdict = (void *)leaf + blocksize; - walk->gstop = walk->gdict - dleaf_groups(leaf); - walk->group = walk->gdict; - walk->estop = (struct entry *)walk->gstop; - walk->exbase = leaf->table; - if (!dleaf_groups(leaf)) { - /* dwalk_first() and dwalk_end() will return true */ - walk->entry = (struct entry *)walk->gstop; - walk->extent = leaf->table; - walk->exstop = leaf->table; - dwalk_check(walk); - return 0; - } - - while (walk->group > walk->gstop) { - walk->group--; - walk->entry = walk->estop - 1; - walk->estop -= group_count(walk->group); - if (group_keyhi(walk->group) > keyhi) - goto no_group; - if (group_keyhi(walk->group) == keyhi) { - if (entry_keylo(walk->entry) > keylo) - goto no_group; - if (walk->group == walk->gstop) - goto probe_entry; - if (group_keyhi(walk->group - 1) > keyhi) - goto probe_entry; - if (entry_keylo(walk->estop - 1) > keylo) - goto probe_entry; - } - walk->exbase += entry_limit(walk->estop); - } - /* There is no group after this key */ - walk->entry = walk->estop; - walk->exstop = walk->exbase; - walk->extent = walk->exbase; - walk->exbase = walk->exbase - entry_limit(walk->estop); - dwalk_check(walk); - return 0; - -no_group: - /* There is no interesting group, set first extent in this group */ - walk->extent = walk->exbase; - walk->exstop = walk->exbase + entry_limit(walk->entry); - dwalk_check(walk); - return 1; - -probe_entry: - /* There is interesting group, next is probe interesting entry */ - walk->extent = walk->exbase; - walk->exstop = walk->exbase + entry_limit(walk->entry); - while (walk->entry > walk->estop) { - if (entry_keylo(walk->entry - 1) > keylo) - break; - walk->entry--; - walk->extent = walk->exstop; - walk->exstop = walk->exbase + entry_limit(walk->entry); - } - /* Now, entry has the nearest keylo (<= key), probe extent */ - /* FIXME: this is assuming the entry has only one extent */ - if (key < dwalk_index(walk) + dwalk_count(walk)) - return 1; - /* This entry didn't have the target extent, set next entry */ - dwalk_next(walk); - return !dwalk_end(walk); -} - -int dwalk_mock(struct dwalk *walk, tuxkey_t index, struct diskextent extent) -{ - if (!dleaf_groups(walk->leaf) || walk->entry == walk->estop || dwalk_index(walk) != index) { - trace("add entry 0x%Lx", index); - unsigned keylo = index & 0xffffff, keyhi = index >> 24; - if (!walk->mock.groups || group_keyhi(&walk->mock.group) != keyhi || group_count(&walk->mock.group) >= MAX_GROUP_ENTRIES) { - trace("add group %i", walk->mock.groups); - walk->exbase += entry_limit(&walk->mock.entry); - walk->mock.group = make_group(keyhi, 0); - walk->mock.used -= sizeof(struct group); - walk->mock.groups++; - } - walk->mock.used -= sizeof(struct entry); - walk->mock.entry = make_entry(keylo, walk->extent - walk->exbase); - inc_group_count(&walk->mock.group, 1); - } - trace("add extent 0x%Lx => 0x%Lx/%x", index, extent_block(extent), extent_count(extent)); - walk->mock.free += sizeof(*walk->extent); - walk->extent++; - inc_entry_limit(&walk->mock.entry, 1); - return 0; -} - -/* This copy extents >= this extent to another dleaf. */ -void dwalk_copy(struct dwalk *walk, struct dleaf *dest) -{ - struct dleaf *leaf = walk->leaf; - unsigned blocksize = (void *)walk->gdict - (void *)leaf; - - assert(dleaf_groups(dest) == 0); - if (dwalk_end(walk)) - return; - if (dwalk_first(walk)) { - memcpy(dest, leaf, blocksize); - return; - } - - struct group *gdict2 = (void *)dest + blocksize; - unsigned groups2 = walk->group + 1 - walk->gstop; - struct entry *ebase = (void *)leaf + be16_to_cpu(leaf->used); - unsigned entries = (walk->entry + 1) - ebase; - struct entry *edict2 = (struct entry *)(gdict2 - groups2); - struct diskextent *exend = (void *)leaf + be16_to_cpu(leaf->free); - struct diskextent *entry_exbase; - unsigned limit_adjust, extents; - - if (walk->entry + 1 == walk->estop + group_count(walk->group)) { - entry_exbase = walk->exbase; - limit_adjust = 0; - } else { - entry_exbase = walk->exbase + entry_limit(walk->entry + 1); - limit_adjust = entry_limit(walk->entry + 1); - } - extents = exend - entry_exbase; - - veccopy(gdict2 - groups2, walk->gstop, groups2); - veccopy(edict2 - entries, ebase, entries); - veccopy(dest->table, entry_exbase, extents); - - unsigned gcount2 = (walk->entry + 1) - walk->estop; - set_dleaf_groups(dest, groups2); - dest->free = cpu_to_be16((void *)(dest->table + extents) - (void *)dest); - dest->used = cpu_to_be16((void *)(edict2 - entries) - (void *)dest); - set_group_count(gdict2 - 1, gcount2); - struct entry *entry2 = edict2 - 1, *estop2 = edict2 - gcount2; - while (entry2 >= estop2) { - inc_entry_limit(entry2, -limit_adjust); - entry2--; - } - assert(!dleaf_check(dest, blocksize)); -} - -/* This removes extents >= this extent. (cursor position is dwalk_end()). */ -void dwalk_chop(struct dwalk *walk) -{ - trace(" "); - if (dwalk_end(walk)) - return; - struct dleaf *leaf = walk->leaf; - - if (dwalk_first(walk)) { - unsigned blocksize = (void *)walk->gdict - (void *)leaf; - set_dleaf_groups(leaf, 0); - leaf->free = cpu_to_be16(sizeof(struct dleaf)); - leaf->used = cpu_to_be16(blocksize); - /* Initialize dwalk state */ - dwalk_probe(leaf, blocksize, walk, 0); - return; - } - - /* If extent is first extent on this group, remove this group too */ - dwalk_back(walk); - - struct entry *ebase = walk->estop + group_count(walk->group); - void *entry = walk->entry; - set_dleaf_groups(leaf, walk->gdict - walk->group); - set_group_count(walk->group, ebase - walk->entry); - entry += (void *)walk->group - (void *)walk->gstop; - memmove(entry, walk->entry, (void *)walk->gstop - (void *)walk->entry); - walk->estop = walk->entry = entry; - walk->gstop = walk->group; - walk->exstop = walk->exbase + entry_limit(walk->entry); - walk->extent = walk->exstop; - leaf->free = cpu_to_be16((void *)walk->exstop - (void *)leaf); - leaf->used = cpu_to_be16((void *)walk->estop - (void *)leaf); - dwalk_check(walk); - assert(!dleaf_check(leaf, (void *)walk->gdict - (void *)leaf)); -} - -/* - * Add extent to dleaf. This can use only if dwalk_end() is true. - * Note, dwalk state is invalid after this. (I.e. it can be used only - * for dwalk_add()) - */ -int dwalk_add(struct dwalk *walk, tuxkey_t index, struct diskextent extent) -{ - struct dleaf *leaf = walk->leaf; - unsigned groups = dleaf_groups(leaf); - unsigned free = be16_to_cpu(leaf->free); - unsigned used = be16_to_cpu(leaf->used); - - /* FIXME: assume entry has only one extent */ - assert(!groups || dwalk_index(walk) != index); - assert(extent_block(extent) > 0 && extent_count(extent) > 0); - - trace("group %ti/%i", walk->gstop + groups - 1 - walk->group, groups); - if (!groups || dwalk_index(walk) != index) { - trace("add entry 0x%Lx", index); - unsigned keylo = index & 0xffffff, keyhi = index >> 24; - if (!groups || group_keyhi(walk->group) != keyhi || group_count(walk->group) >= MAX_GROUP_ENTRIES) { - trace("add group %i", groups); - /* will it fit? */ - assert(sizeof(*walk->entry) == sizeof(*walk->group)); - assert(free <= used - sizeof(*walk->entry)); - /* move entries down, adjust walk state */ - /* could preplan this to avoid move: need additional pack state */ - vecmove(walk->entry - 1, walk->entry, (struct entry *)walk->group - walk->entry); - walk->entry--; /* adjust to moved position */ - walk->exbase += groups ? entry_limit(walk->entry) : 0; - *--walk->group = make_group(keyhi, 0); - used -= sizeof(*walk->group); - set_dleaf_groups(leaf, ++groups); - } - assert(free <= used - sizeof(*walk->entry)); - used -= sizeof(*walk->entry); - leaf->used = cpu_to_be16(used); - *--walk->entry = make_entry(keylo, walk->extent - walk->exbase); - inc_group_count(walk->group, 1); - } - trace("add extent %ti", walk->extent - leaf->table); - assert(free + sizeof(*walk->extent) <= used); - free += sizeof(*walk->extent); - leaf->free = cpu_to_be16(free); - *walk->extent++ = extent; - inc_entry_limit(walk->entry, 1); - - assert(!dleaf_check(leaf, (void *)walk->gdict - (void *)walk->leaf)); - - return 0; // extent out of order??? leaf full??? -} - -/* Update this extent. The caller have to check new extent isn't overlapping. */ -static void dwalk_update(struct dwalk *walk, struct diskextent extent) -{ - *walk->extent = extent; -} - -/* - * Reasons this dleaf truncator sucks: - * - * * Does not check for integrity at all so a corrupted leaf can cause overflow - * and system corruption. - * - * * Assumes all block pointers after the truncation point will be deleted, - * which does not hold when versions arrive. - * - * * Modifies a group count in the middle of the traversal knowing that it has - * already loaded the changed field and will not load it again, fragile. - * - * * Does not provide a generic mechanism that can be adapted to other - * truncation tasks. - * - * But it does truncate so it is getting checked in just for now. - */ -static int dleaf_chop(struct btree *btree, tuxkey_t start, u64 len, void *vleaf) -{ - struct sb *sb = btree->sb; - struct dleaf *leaf = vleaf; - struct dwalk walk; - - /* FIXME: range chop is unsupported for now */ - assert(len == TUXKEY_LIMIT); - - if (!dwalk_probe(leaf, sb->blocksize, &walk, start)) - return 0; - - /* Chop this extent partially */ - if (dwalk_index(&walk) < start) { - block_t block = dwalk_block(&walk); - unsigned count = start - dwalk_index(&walk); - - defer_bfree(sb, &sb->defree, block + count, dwalk_count(&walk) - count); - log_bfree(sb, block + count, dwalk_count(&walk) - count); - - dwalk_update(&walk, make_extent(block, count)); - if (!dwalk_next(&walk)) - goto out; - } - struct dwalk rewind = walk; - do { - defer_bfree(sb, &sb->defree, dwalk_block(&walk), dwalk_count(&walk)); - log_bfree(sb, dwalk_block(&walk), dwalk_count(&walk)); - } while (dwalk_next(&walk)); - dwalk_chop(&rewind); -out: - assert(!dleaf_check(leaf, sb->blocksize)); - return 1; -} - -struct btree_ops dtree1_ops = { - .btree_init = dleaf_btree_init, - .leaf_init = dleaf_init, - .leaf_split = dleaf_split, -// .leaf_resize = dleaf_resize, - .leaf_chop = dleaf_chop, - .leaf_merge = dleaf_merge, - - .leaf_sniff = dleaf_sniff, - .leaf_can_free = dleaf_can_free, - .leaf_dump = dleaf_dump, -}; -- cgit 1.2.3-korg