aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Phillips <daniel@tux3.org>2014-04-27 15:23:24 +0900
committerDaniel Phillips <daniel@tux3.org>2014-04-27 15:23:24 +0900
commitbe735d8a0acacc5c901396d4a618be7043e5a1a9 (patch)
tree9ba7319ff5690faf592307e321bd06917024c645
parent3351d81d41fd072518c435a937e22815909b1f97 (diff)
downloadlinux-tux3-be735d8a0acacc5c901396d4a618be7043e5a1a9.tar.gz
tux3: Remove dleaf1 data leaf format
The idea of dleaf1 was to implement a compressed leaf format with about 40% more extents per block but implementation details were too complex. If we ever want this and have time to make it work properly we can add it back. Signed-off-by: Daniel Phillips <d.phillips@partner.samsung.com> Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r--fs/tux3/dleaf.c887
1 files changed, 0 insertions, 887 deletions
diff --git a/fs/tux3/dleaf.c b/fs/tux3/dleaf.c
deleted file mode 100644
index 88a30c98c74cd5..00000000000000
--- a/fs/tux3/dleaf.c
+++ /dev/null
@@ -1,887 +0,0 @@
-/*
- * File index btree leaf operations
- *
- * Original copyright (c) 2008 Daniel Phillips <phillips@phunq.net>
- * Licensed under the GPL version 2
- *
- * By contributing changes to this file you grant the original copyright holder
- * the right to distribute those changes under any license.
- */
-
-#include "tux3.h"
-#include "dleaf.h"
-
-#ifndef trace
-#define trace trace_on
-#endif
-
-/*
- * Leaf index format
- *
- * A leaf has a small header followed by a table of extents. A two level
- * index grows down from the top of the leaf towards the top of the extent
- * table. The index maps each unique logical address in the leaf to one or
- * more extents beginning at that address.
- *
- * The top level index is a table of groups of entries all having the same
- * high 24 bits of logical address which is only stored once, along with the
- * 8 bit count of entries in the group. Since there can be more than 256
- * entries at the same logical address, there could be more than one group
- * with the same logical address. The group count is used both to know the
- * number of entries in the group and to find the beginning of the entry table
- * for a given group, by adding up the sizes of the proceeding groups.
- *
- * The 8 bit entry limit limits the number of different versions at the same
- * logical address to 255. For now.
- *
- * The second level entry tables are stored end to end in reverse immediately
- * below the groups table, also stored in reverse. Each entry has the low 24
- * bits of the logical address and the 8 bit 'limit' offset of the last extent
- * for that logical address, measuring from the first extent for the group in
- * units of extent size. The limit is used rather than an offset so that the
- * final offset is the count of extents in the group, which is summed up to
- * locate the first extent for the group in the extent table. The difference
- * between and entry limit and the limit of its predecessor gives the count of
- * extents for the logical address specified by the entry.
- *
- * At the top level of a very large or very sparse btree it is likely that the
- * group table will be relatively larger, up to the same size as all the entry
- * tables. This does not matter much in terms of overall btree bulk. A few
- * levels down the logical address space will have been split to the point
- * where most entries in a leaf fit into one entry table.
- *
- * This leaf indexing scheme has some obscure boundary conditions, such as
- * the zeroth entry of a group having no predecessor and thus needing to have
- * a special check to supply zero as the preceding limit. Inserting and
- * deleting are fairly involved and subtle. But the space required to index
- * extents in a deep btree is reduced considerably, which is compelling. In
- * the end, the indexing scheme provides access to a simple linear table of
- * extents and a count, so there is little impact on the specialized methods
- * that operate on those extents due to the complexity of the indexing scheme.
- * The lookup operation on this index is very efficient. Each level of the
- * index is suited to binary search. A sequence of inserts in ascending order
- * in the same group requires no existing entries to be relocated, the reason
- * the entry list is stored in reverse.
- */
-
-static void dleaf_btree_init(struct btree *btree)
-{
- btree->entries_per_leaf = 64; /* FIXME: should depend on blocksize */
-}
-
-int dleaf_init(struct btree *btree, void *leaf)
-{
- struct dleaf *dleaf = leaf;
- *dleaf = (struct dleaf){
- .magic = cpu_to_be16(TUX3_MAGIC_DLEAF),
- .free = cpu_to_be16(sizeof(struct dleaf)),
- .used = cpu_to_be16(btree->sb->blocksize)
- };
- return 0;
-}
-
-unsigned dleaf_free(struct btree *btree, void *leaf)
-{
- struct dleaf *dleaf = leaf;
- return be16_to_cpu(dleaf->used) - be16_to_cpu(dleaf->free);
-}
-
-static unsigned dleaf_need(struct btree *btree, void *vleaf)
-{
- struct dleaf *leaf = vleaf;
- return btree->sb->blocksize - dleaf_free(btree, leaf) - sizeof(struct dleaf);
-}
-
-static inline tuxkey_t get_index(struct group *group, struct entry *entry)
-{
- return ((tuxkey_t)group_keyhi(group) << 24) | entry_keylo(entry);
-}
-
-static int dleaf_sniff(struct btree *btree, void *leaf)
-{
- struct dleaf *dleaf = leaf;
- if (dleaf->magic != cpu_to_be16(TUX3_MAGIC_DLEAF))
- return -1;
- return 0;
-}
-
-static int dleaf_can_free(struct btree *btree, void *vleaf)
-{
- return dleaf_need(btree, vleaf) == 0;
-}
-
-void dleaf_dump(struct btree *btree, void *vleaf)
-{
- if (!tux3_trace)
- return;
-
- unsigned blocksize = btree->sb->blocksize;
- struct dleaf *leaf = vleaf;
- struct group *gdict = (void *)leaf + blocksize, *gbase = --gdict - dleaf_groups(leaf);
- struct entry *edict = (void *)(gbase + 1), *entry = edict;
- struct diskextent *extents = leaf->table;
-
- __tux3_dbg("%i entry groups:\n", dleaf_groups(leaf));
- for (struct group *group = gdict; group > gbase; group--) {
- __tux3_dbg(" %ti/%i:", gdict - group, group_count(group));
- //__tux3_dbg(" [%i]", extents - leaf->table);
- struct entry *ebase = entry - group_count(group);
- while (entry > ebase) {
- --entry;
- unsigned offset = entry == edict - 1 ? 0 : entry_limit(entry + 1);
- int count = entry_limit(entry) - offset;
- __tux3_dbg(" %Lx =>", get_index(group, entry));
- //__tux3_dbg(" %p (%i)", entry, entry_limit(entry));
- if (count < 0)
- __tux3_dbg(" <corrupt>");
- else for (int i = 0; i < count; i++) {
- struct diskextent extent = extents[offset + i];
- __tux3_dbg(" %Lx", extent_block(extent));
- if (extent_count(extent))
- __tux3_dbg("/%x", extent_count(extent));
- }
- //__tux3_dbg(" {%u}", entry_limit(entry));
- __tux3_dbg(";");
- }
- __tux3_dbg("\n");
- extents += entry_limit(entry);
- edict -= group_count(group);
- }
-}
-
-static int dleaf_free2(struct dleaf *leaf, unsigned blocksize)
-{
- struct group *gdict = (void *)leaf + blocksize, *gstop = gdict - dleaf_groups(leaf);
- struct entry *edict = (void *)gstop, *entry = edict;
- struct diskextent *extents = leaf->table;
-
- for (struct group *group = gdict; group-- > gstop;)
- extents += entry_limit(entry -= group_count(group));
- return (void *)entry - (void *)extents;
-}
-
-static int dleaf_check(struct dleaf *leaf, unsigned blocksize)
-{
- struct group *gdict = (void *)leaf + blocksize, *gstop = gdict - dleaf_groups(leaf);
- struct entry *edict = (void *)gstop, *estop = edict;
- struct diskextent *extents = leaf->table;
- unsigned excount = 0, encount = 0;
- char *why;
-
- if (!dleaf_groups(leaf))
- return 0;
-
- unsigned keyhi = 0;
- struct diskextent *exbase = leaf->table;
- for (struct group *group = gdict - 1; group >= gstop; group--) {
- assert(group_keyhi(group) >= keyhi);
- assert(group_count(group) > 0);
- assert(group_count(group) <= MAX_GROUP_ENTRIES);
- keyhi = group_keyhi(group);
- struct entry *entry = estop;
- estop -= group_count(group);
- unsigned limit = 0, keylo = -1;
- while (--entry >= estop) {
- assert((int)entry_keylo(entry) > (int)keylo);
- assert(entry_limit(entry) > limit);
- keylo = entry_keylo(entry);
- limit = entry_limit(entry);
- }
- struct diskextent *exstop = exbase + entry_limit(estop);
- block_t block = 0;
- while (exbase < exstop) {
- assert(extent_block(*exbase) != block);
- exbase++;
- }
- excount += entry_limit(estop);
- encount += group_count(group);
- }
- //tux3_dbg("encount = %i, excount = %i", encount, excount);
- why = "used count wrong";
- if (be16_to_cpu(leaf->used) != (void *)(edict - encount) - (void *)leaf)
- goto eek;
- why = "free count wrong";
- if (be16_to_cpu(leaf->free) != (void *)(extents + excount) - (void *)leaf)
- goto eek;
- why = "free check mismatch";
- if (be16_to_cpu(leaf->used) - be16_to_cpu(leaf->free) != dleaf_free2(leaf, blocksize))
- goto eek;
- return 0;
-
-eek:
- tux3_dbg("error: free %i, used %i", be16_to_cpu(leaf->free), be16_to_cpu(leaf->used));
- tux3_dbg("error: %s!", why);
- return -1;
-}
-
-static int dleaf_split_at(void *from, void *into, int split, unsigned blocksize)
-{
- struct dleaf *leaf = from, *leaf2 = into;
- unsigned groups = dleaf_groups(leaf), groups2;
- struct group *gdict = from + blocksize, *gbase = gdict - groups;
- struct entry *edict = (void *)gbase, *ebase = (void *)leaf + be16_to_cpu(leaf->used);
- unsigned recount = 0, grsplit = 0, exsplit = 0;
- unsigned entries = edict - ebase;
-
- trace("split %p into %p at %x", leaf, leaf2, split);
- if (!groups)
- return 0;
- assert(split < entries);
- for (struct group *group = gdict - 1; group >= gbase; group--, grsplit++) {
- if (recount + group_count(group) > split)
- break;
- edict -= group_count(group);
- exsplit += entry_limit(edict);
- recount += group_count(group);
- }
-
- /* have to split a group? */
- unsigned cut = split - recount;
- if (cut)
- exsplit += entry_limit(edict - cut);
- edict = (void *)gbase; /* restore it */
- trace("split %i entries at group %i, entry %x", entries, grsplit, cut);
- trace("split extents at %i", exsplit);
- /* copy extents */
- unsigned size = from + be16_to_cpu(leaf->free) - (void *)(leaf->table + exsplit);
- memcpy(leaf2->table, leaf->table + exsplit, size);
-
- /* copy groups */
- struct group *gdict2 = (void *)leaf2 + blocksize;
- set_dleaf_groups(leaf2, groups2 = (groups - grsplit));
- veccopy(gdict2 - dleaf_groups(leaf2), gbase, dleaf_groups(leaf2));
- inc_group_count(gdict2 - 1, -cut);
- set_dleaf_groups(leaf, groups = (grsplit + !!cut));
- gbase = gdict - groups;
- if (cut)
- set_group_count(gdict - groups, cut);
-
- /* copy entries */
- struct entry *edict2 = (void *)(gdict2 - groups2);
-
- assert((struct entry *)((void *)leaf + be16_to_cpu(leaf->used)) == edict - entries);
-
- unsigned encopy = entries - split;
- veccopy(edict2 - encopy, ebase, encopy);
- if (cut)
- for (int i = 1; i <= group_count((gdict2 - 1)); i++)
- inc_entry_limit(edict2 - i, -entry_limit(edict - split));
- vecmove(gdict - groups - split, edict - split, split);
-
- /* clean up */
- leaf->free = cpu_to_be16((void *)(leaf->table + exsplit) - from);
- leaf->used = cpu_to_be16((void *)(gbase - split) - from);
- leaf2->free = cpu_to_be16((void *)leaf->table + size - from);
- leaf2->used = cpu_to_be16((void *)(gdict - groups2 - encopy) - from);
- memset(from + be16_to_cpu(leaf->free), 0, be16_to_cpu(leaf->used) - be16_to_cpu(leaf->free));
- assert(!dleaf_check(leaf, blocksize));
- assert(!dleaf_check(leaf2, blocksize));
- return groups2;
-}
-
-/*
- * Split dleaf at middle in terms of entries, may be unbalanced in extents.
- * Not used for now because we do the splits by hand in filemap.c
- */
-static tuxkey_t dleaf_split(struct btree *btree, tuxkey_t hint, void *from, void *into)
-{
- struct dleaf *leaf = from, *leaf2 = into;
- assert(!dleaf_sniff(btree, from));
- unsigned blocksize = btree->sb->blocksize;
- struct group *gdict = from + blocksize, *gbase = gdict - dleaf_groups(leaf);
- struct entry *edict = (void *)gbase;
- struct entry *ebase = (void *)leaf + be16_to_cpu(leaf->used);
- unsigned entries = edict - ebase;
- assert(entries >= 2);
- unsigned groups2 = dleaf_split_at(from, into, entries / 2, blocksize);
- struct group *gdict2 = (void *)leaf2 + blocksize;
-
- return get_index(gdict2 - 1, (struct entry *)(gdict2 - groups2) - 1);
-}
-
-/*
- * Try to merge dleaf
- *
- * return value:
- * 0 - couldn't merge
- * 1 - merged
- */
-int dleaf_merge(struct btree *btree, void *vinto, void *vfrom)
-{
- struct dleaf *leaf = vinto, *from = vfrom;
- struct group *gdict = (void *)leaf + btree->sb->blocksize;
- struct group *gstop = gdict - dleaf_groups(leaf);
- struct entry *edict = (struct entry *)gstop;
- unsigned free = be16_to_cpu(leaf->free);
- struct group *gdict2 = (void *)from + btree->sb->blocksize;
- struct group *group2 = gdict2 - 1;
- struct group *gstop2 = gdict2 - dleaf_groups(from);
- struct entry *edict2 = (struct entry *)gstop2;
- unsigned merge_gcount = 0, rest_gcount = 0;
- int can_merge_group = 0;
-
- /* Source is empty, so we do nothing */
- if (dleaf_groups(from) == 0)
- return 1;
-
- /* Destination is empty, so we just copy */
- if (dleaf_groups(leaf) == 0) {
- unsigned used = be16_to_cpu(from->used);
- memcpy(leaf, from, be16_to_cpu(from->free));
- memcpy((void *)leaf + used, (void *)from + used, btree->sb->blocksize - used);
- return 1;
- }
-
- /*
- * Check if there is space.
- * FIXME: we may be able to merge more if group/entry can be merged
- */
- if (dleaf_need(btree, vfrom) > dleaf_free(btree, vinto))
- return 0;
-
- /* Try to merge group, and prepare to adjust */
- if (group_keyhi(gstop) == group_keyhi(group2) &&
- group_count(gstop) < MAX_GROUP_ENTRIES) {
- unsigned gcount2 = group_count(group2);
- unsigned room = MAX_GROUP_ENTRIES - group_count(gstop);
- /* All entries can merge to this group? */
- if (room < gcount2) {
- /* Calc adjust for the rest of group/entries */
- rest_gcount = gcount2 - room;
- gcount2 = room;
- } else
- can_merge_group = 1;
-
- /* group/entries which merge */
- merge_gcount = gcount2;
- }
-
- /* append extents */
- unsigned size = be16_to_cpu(from->free) - sizeof(struct dleaf);
- memcpy((void *)leaf + free, from->table, size);
- leaf->free = cpu_to_be16(free + size);
-
- /* make space and append groups except for possibly merged group */
- assert(sizeof(struct group) == sizeof(struct entry));
- unsigned addgroups = dleaf_groups(from) - can_merge_group;
- struct entry *ebase2 = (void *)from + be16_to_cpu(from->used);
- struct entry *ebase = (void *)leaf + be16_to_cpu(leaf->used);
- vecmove(ebase - addgroups, ebase, edict - ebase);
- veccopy(gstop - addgroups, gstop2, addgroups);
- ebase -= addgroups;
- inc_dleaf_groups(leaf, addgroups);
-
- /* append entries */
- size = (void *)edict2 - (void *)ebase2;
- memcpy((void *)ebase - size, ebase2, size);
- leaf->used = cpu_to_be16((void *)ebase - size - (void *)leaf);
-
- if (merge_gcount) {
- /* adjust merged group */
- struct entry *estop = ebase - merge_gcount;
- unsigned limit_adjust = entry_limit(ebase);
- inc_group_count(gstop, merge_gcount);
- while (--ebase >= estop)
- inc_entry_limit(ebase, limit_adjust);
- if (rest_gcount) {
- /* adjust the group/entries which couldn't merge */
- ebase = estop;
- estop = ebase - rest_gcount;
- limit_adjust = entry_limit(edict2 - merge_gcount);
- set_group_count(gstop - 1, rest_gcount);
- while (--ebase >= estop)
- inc_entry_limit(ebase, -limit_adjust);
- }
- }
- assert(!dleaf_check(leaf, btree->sb->blocksize));
-
- return 1;
-}
-
-/*
- * dleaf format and dwalk structure
- *
- * min address +--------------------------+
- * | dleaf header |
- * | | extent <0> (gr 0, ent 0) | __ walk->exbase
- * growing downwards | | extent <0> (gr 1, ent 0) | __ walk->extent
- * | | extent <1> (gr 1, ent 1) | __ walk->exstop
- * V | extent <2> (gr 1, ent 2) |
- * | |
- * | ....... |
- * | | __ walk->estop
- * | entry <2> (gr 1) |
- * | entry <1> (gr 1) | __ walk->entry
- * ^ | entry <0> (gr 1) |
- * | | entry <0> (gr 0) | __ walk->group,walk->gstop
- * growing upwards | | group <1> |
- * | | group <0> |
- * max address +--------------------------+ __ walk->gdict
- *
- * The above is dleaf format, and now dwalk_next() was called 2 times.
- *
- * ->gdict is the end of dleaf.
- * ->group is the current group (group <1>)
- * ->gstop is the last group in this dleaf
- * ->entry is the current entry (entry <0> (gr 1))
- * ->estop is the last entry in current group
- * ->exbase is the first extent in current group
- * ->extent is the current extent (extent <1> (gr1, ent 1)).
- * ->exstop is the first extent in next entry.
- * (I.e. the address that dwalk_next() has to update to next entry.
- * If there is no next, it will stop with ->extent == ->exstop.)
- */
-
-void dwalk_redirect(struct dwalk *walk, struct dleaf *src, struct dleaf *dst)
-{
- walk->leaf = dst;
- walk->group = ptr_redirect(walk->group, src, dst);
- walk->gstop = ptr_redirect(walk->gstop, src, dst);
- walk->gdict = ptr_redirect(walk->gdict, src, dst);
- walk->entry = ptr_redirect(walk->entry, src, dst);
- walk->estop = ptr_redirect(walk->estop, src, dst);
- walk->exbase = ptr_redirect(walk->exbase, src, dst);
- walk->extent = ptr_redirect(walk->extent, src, dst);
- walk->exstop = ptr_redirect(walk->exstop, src, dst);
-}
-
-/* FIXME: current code is assuming the entry has only one extent. */
-
-/* The first extent in dleaf */
-static int dwalk_first(struct dwalk *walk)
-{
- return walk->leaf->table == walk->extent;
-}
-
-/* The end of extent in dleaf */
-int dwalk_end(struct dwalk *walk)
-{
- return walk->extent == walk->exstop;
-}
-
-tuxkey_t dwalk_index(struct dwalk *walk)
-{
- return get_index(walk->group, walk->entry);
-}
-
-block_t dwalk_block(struct dwalk *walk)
-{
- return extent_block(*walk->extent);
-}
-
-unsigned dwalk_count(struct dwalk *walk)
-{
- return extent_count(*walk->extent);
-}
-
-/* unused */
-void dwalk_dump(struct dwalk *walk)
-{
- if (walk->leaf->table == walk->exstop) {
- trace_on("empty leaf");
- return;
- }
- if (dwalk_end(walk)) {
- trace_on("end of extent");
- return;
- }
- struct diskextent *entry_exbase;
- if (walk->entry + 1 == walk->estop + group_count(walk->group))
- entry_exbase = walk->exbase;
- else
- entry_exbase = walk->exbase + entry_limit(walk->entry + 1);
- trace_on("leaf %p", walk->leaf);
- trace_on("group %tu/%tu", (walk->gdict - walk->group) - 1, walk->gdict - walk->gstop);
- trace_on("entry %tu/%u", group_count(walk->group) - (walk->entry - walk->estop) - 1, group_count(walk->group));
- trace_on("extent %tu/%tu", walk->extent - entry_exbase, walk->exstop - entry_exbase);
-}
-
-static void dwalk_check(struct dwalk *walk)
-{
- if (!dleaf_groups(walk->leaf)) {
- assert(walk->group == walk->gstop);
- assert(walk->entry == walk->estop);
- assert(walk->exbase == walk->extent);
- assert(walk->extent == walk->exstop);
- assert(walk->leaf->table == walk->exstop);
- } else if (dwalk_end(walk)) {
- assert(walk->group == walk->gstop);
- assert(walk->entry == walk->estop);
- assert(walk->exbase < walk->extent);
- assert(walk->extent == walk->exstop);
- } else {
- assert(walk->group >= walk->gstop);
- assert(walk->entry >= walk->estop);
- assert(walk->exbase <= walk->extent);
- assert(walk->extent < walk->exstop);
- /*
- * This checks ->entry has only 1 ->extent.
- * FIXME (and re-think): Assuming dleaf->entry has only 1
- * ->extent on some functions
- */
- if (walk->entry + 1 == walk->estop + group_count(walk->group))
- assert(entry_limit(walk->entry) == 1);
- else
- assert(entry_limit(walk->entry) - entry_limit(walk->entry + 1) == 1);
- }
-}
-
-/* Set the cursor to next extent */
-int dwalk_next(struct dwalk *walk)
-{
- /* last extent of this dleaf, or empty dleaf */
- if (dwalk_end(walk))
- return 0;
- walk->extent++;
- if (walk->extent == walk->exstop) {
- if (walk->entry == walk->estop) {
- if (walk->group == walk->gstop)
- return 0;
- walk->group--;
- walk->exbase += entry_limit(walk->estop);
- walk->estop -= group_count(walk->group);
- }
- walk->entry--;
- walk->exstop = walk->exbase + entry_limit(walk->entry);
- }
- dwalk_check(walk);
- return 1;
-}
-
-/* Back to the previous extent. (i.e. rewind the previous dwalk_next()) */
-int dwalk_back(struct dwalk *walk)
-{
- /* first extent of this dleaf, or empty dleaf */
- if (dwalk_first(walk))
- return 0;
- struct diskextent *entry_exbase;
-
- if (walk->entry + 1 == walk->estop + group_count(walk->group))
- entry_exbase = walk->exbase;
- else
- entry_exbase = walk->exbase + entry_limit(walk->entry + 1);
- walk->extent--;
- if (walk->extent < entry_exbase) {
- if (walk->extent < walk->exbase) {
- if (walk->group == walk->gdict)
- return 1;
- walk->group++;
- walk->estop = walk->entry + 1;
- walk->exbase -= entry_limit(walk->entry + 1);
- }
- walk->entry++;
- walk->exstop = walk->exbase + entry_limit(walk->entry);
- }
- dwalk_check(walk);
- return 1;
-}
-
-/*
- * Probe the extent position with key. If not found, position is next
- * extent of key. If probed all extents return 0, otherwise return 1
- * (I.e. current extent is valid. IOW, !dwalk_end()).
- */
-int dwalk_probe(struct dleaf *leaf, unsigned blocksize, struct dwalk *walk, tuxkey_t key)
-{
- trace("probe for 0x%Lx", key);
- unsigned keylo = key & 0xffffff, keyhi = key >> 24;
-
- walk->leaf = leaf;
- walk->gdict = (void *)leaf + blocksize;
- walk->gstop = walk->gdict - dleaf_groups(leaf);
- walk->group = walk->gdict;
- walk->estop = (struct entry *)walk->gstop;
- walk->exbase = leaf->table;
- if (!dleaf_groups(leaf)) {
- /* dwalk_first() and dwalk_end() will return true */
- walk->entry = (struct entry *)walk->gstop;
- walk->extent = leaf->table;
- walk->exstop = leaf->table;
- dwalk_check(walk);
- return 0;
- }
-
- while (walk->group > walk->gstop) {
- walk->group--;
- walk->entry = walk->estop - 1;
- walk->estop -= group_count(walk->group);
- if (group_keyhi(walk->group) > keyhi)
- goto no_group;
- if (group_keyhi(walk->group) == keyhi) {
- if (entry_keylo(walk->entry) > keylo)
- goto no_group;
- if (walk->group == walk->gstop)
- goto probe_entry;
- if (group_keyhi(walk->group - 1) > keyhi)
- goto probe_entry;
- if (entry_keylo(walk->estop - 1) > keylo)
- goto probe_entry;
- }
- walk->exbase += entry_limit(walk->estop);
- }
- /* There is no group after this key */
- walk->entry = walk->estop;
- walk->exstop = walk->exbase;
- walk->extent = walk->exbase;
- walk->exbase = walk->exbase - entry_limit(walk->estop);
- dwalk_check(walk);
- return 0;
-
-no_group:
- /* There is no interesting group, set first extent in this group */
- walk->extent = walk->exbase;
- walk->exstop = walk->exbase + entry_limit(walk->entry);
- dwalk_check(walk);
- return 1;
-
-probe_entry:
- /* There is interesting group, next is probe interesting entry */
- walk->extent = walk->exbase;
- walk->exstop = walk->exbase + entry_limit(walk->entry);
- while (walk->entry > walk->estop) {
- if (entry_keylo(walk->entry - 1) > keylo)
- break;
- walk->entry--;
- walk->extent = walk->exstop;
- walk->exstop = walk->exbase + entry_limit(walk->entry);
- }
- /* Now, entry has the nearest keylo (<= key), probe extent */
- /* FIXME: this is assuming the entry has only one extent */
- if (key < dwalk_index(walk) + dwalk_count(walk))
- return 1;
- /* This entry didn't have the target extent, set next entry */
- dwalk_next(walk);
- return !dwalk_end(walk);
-}
-
-int dwalk_mock(struct dwalk *walk, tuxkey_t index, struct diskextent extent)
-{
- if (!dleaf_groups(walk->leaf) || walk->entry == walk->estop || dwalk_index(walk) != index) {
- trace("add entry 0x%Lx", index);
- unsigned keylo = index & 0xffffff, keyhi = index >> 24;
- if (!walk->mock.groups || group_keyhi(&walk->mock.group) != keyhi || group_count(&walk->mock.group) >= MAX_GROUP_ENTRIES) {
- trace("add group %i", walk->mock.groups);
- walk->exbase += entry_limit(&walk->mock.entry);
- walk->mock.group = make_group(keyhi, 0);
- walk->mock.used -= sizeof(struct group);
- walk->mock.groups++;
- }
- walk->mock.used -= sizeof(struct entry);
- walk->mock.entry = make_entry(keylo, walk->extent - walk->exbase);
- inc_group_count(&walk->mock.group, 1);
- }
- trace("add extent 0x%Lx => 0x%Lx/%x", index, extent_block(extent), extent_count(extent));
- walk->mock.free += sizeof(*walk->extent);
- walk->extent++;
- inc_entry_limit(&walk->mock.entry, 1);
- return 0;
-}
-
-/* This copy extents >= this extent to another dleaf. */
-void dwalk_copy(struct dwalk *walk, struct dleaf *dest)
-{
- struct dleaf *leaf = walk->leaf;
- unsigned blocksize = (void *)walk->gdict - (void *)leaf;
-
- assert(dleaf_groups(dest) == 0);
- if (dwalk_end(walk))
- return;
- if (dwalk_first(walk)) {
- memcpy(dest, leaf, blocksize);
- return;
- }
-
- struct group *gdict2 = (void *)dest + blocksize;
- unsigned groups2 = walk->group + 1 - walk->gstop;
- struct entry *ebase = (void *)leaf + be16_to_cpu(leaf->used);
- unsigned entries = (walk->entry + 1) - ebase;
- struct entry *edict2 = (struct entry *)(gdict2 - groups2);
- struct diskextent *exend = (void *)leaf + be16_to_cpu(leaf->free);
- struct diskextent *entry_exbase;
- unsigned limit_adjust, extents;
-
- if (walk->entry + 1 == walk->estop + group_count(walk->group)) {
- entry_exbase = walk->exbase;
- limit_adjust = 0;
- } else {
- entry_exbase = walk->exbase + entry_limit(walk->entry + 1);
- limit_adjust = entry_limit(walk->entry + 1);
- }
- extents = exend - entry_exbase;
-
- veccopy(gdict2 - groups2, walk->gstop, groups2);
- veccopy(edict2 - entries, ebase, entries);
- veccopy(dest->table, entry_exbase, extents);
-
- unsigned gcount2 = (walk->entry + 1) - walk->estop;
- set_dleaf_groups(dest, groups2);
- dest->free = cpu_to_be16((void *)(dest->table + extents) - (void *)dest);
- dest->used = cpu_to_be16((void *)(edict2 - entries) - (void *)dest);
- set_group_count(gdict2 - 1, gcount2);
- struct entry *entry2 = edict2 - 1, *estop2 = edict2 - gcount2;
- while (entry2 >= estop2) {
- inc_entry_limit(entry2, -limit_adjust);
- entry2--;
- }
- assert(!dleaf_check(dest, blocksize));
-}
-
-/* This removes extents >= this extent. (cursor position is dwalk_end()). */
-void dwalk_chop(struct dwalk *walk)
-{
- trace(" ");
- if (dwalk_end(walk))
- return;
- struct dleaf *leaf = walk->leaf;
-
- if (dwalk_first(walk)) {
- unsigned blocksize = (void *)walk->gdict - (void *)leaf;
- set_dleaf_groups(leaf, 0);
- leaf->free = cpu_to_be16(sizeof(struct dleaf));
- leaf->used = cpu_to_be16(blocksize);
- /* Initialize dwalk state */
- dwalk_probe(leaf, blocksize, walk, 0);
- return;
- }
-
- /* If extent is first extent on this group, remove this group too */
- dwalk_back(walk);
-
- struct entry *ebase = walk->estop + group_count(walk->group);
- void *entry = walk->entry;
- set_dleaf_groups(leaf, walk->gdict - walk->group);
- set_group_count(walk->group, ebase - walk->entry);
- entry += (void *)walk->group - (void *)walk->gstop;
- memmove(entry, walk->entry, (void *)walk->gstop - (void *)walk->entry);
- walk->estop = walk->entry = entry;
- walk->gstop = walk->group;
- walk->exstop = walk->exbase + entry_limit(walk->entry);
- walk->extent = walk->exstop;
- leaf->free = cpu_to_be16((void *)walk->exstop - (void *)leaf);
- leaf->used = cpu_to_be16((void *)walk->estop - (void *)leaf);
- dwalk_check(walk);
- assert(!dleaf_check(leaf, (void *)walk->gdict - (void *)leaf));
-}
-
-/*
- * Add extent to dleaf. This can use only if dwalk_end() is true.
- * Note, dwalk state is invalid after this. (I.e. it can be used only
- * for dwalk_add())
- */
-int dwalk_add(struct dwalk *walk, tuxkey_t index, struct diskextent extent)
-{
- struct dleaf *leaf = walk->leaf;
- unsigned groups = dleaf_groups(leaf);
- unsigned free = be16_to_cpu(leaf->free);
- unsigned used = be16_to_cpu(leaf->used);
-
- /* FIXME: assume entry has only one extent */
- assert(!groups || dwalk_index(walk) != index);
- assert(extent_block(extent) > 0 && extent_count(extent) > 0);
-
- trace("group %ti/%i", walk->gstop + groups - 1 - walk->group, groups);
- if (!groups || dwalk_index(walk) != index) {
- trace("add entry 0x%Lx", index);
- unsigned keylo = index & 0xffffff, keyhi = index >> 24;
- if (!groups || group_keyhi(walk->group) != keyhi || group_count(walk->group) >= MAX_GROUP_ENTRIES) {
- trace("add group %i", groups);
- /* will it fit? */
- assert(sizeof(*walk->entry) == sizeof(*walk->group));
- assert(free <= used - sizeof(*walk->entry));
- /* move entries down, adjust walk state */
- /* could preplan this to avoid move: need additional pack state */
- vecmove(walk->entry - 1, walk->entry, (struct entry *)walk->group - walk->entry);
- walk->entry--; /* adjust to moved position */
- walk->exbase += groups ? entry_limit(walk->entry) : 0;
- *--walk->group = make_group(keyhi, 0);
- used -= sizeof(*walk->group);
- set_dleaf_groups(leaf, ++groups);
- }
- assert(free <= used - sizeof(*walk->entry));
- used -= sizeof(*walk->entry);
- leaf->used = cpu_to_be16(used);
- *--walk->entry = make_entry(keylo, walk->extent - walk->exbase);
- inc_group_count(walk->group, 1);
- }
- trace("add extent %ti", walk->extent - leaf->table);
- assert(free + sizeof(*walk->extent) <= used);
- free += sizeof(*walk->extent);
- leaf->free = cpu_to_be16(free);
- *walk->extent++ = extent;
- inc_entry_limit(walk->entry, 1);
-
- assert(!dleaf_check(leaf, (void *)walk->gdict - (void *)walk->leaf));
-
- return 0; // extent out of order??? leaf full???
-}
-
-/* Update this extent. The caller have to check new extent isn't overlapping. */
-static void dwalk_update(struct dwalk *walk, struct diskextent extent)
-{
- *walk->extent = extent;
-}
-
-/*
- * Reasons this dleaf truncator sucks:
- *
- * * Does not check for integrity at all so a corrupted leaf can cause overflow
- * and system corruption.
- *
- * * Assumes all block pointers after the truncation point will be deleted,
- * which does not hold when versions arrive.
- *
- * * Modifies a group count in the middle of the traversal knowing that it has
- * already loaded the changed field and will not load it again, fragile.
- *
- * * Does not provide a generic mechanism that can be adapted to other
- * truncation tasks.
- *
- * But it does truncate so it is getting checked in just for now.
- */
-static int dleaf_chop(struct btree *btree, tuxkey_t start, u64 len, void *vleaf)
-{
- struct sb *sb = btree->sb;
- struct dleaf *leaf = vleaf;
- struct dwalk walk;
-
- /* FIXME: range chop is unsupported for now */
- assert(len == TUXKEY_LIMIT);
-
- if (!dwalk_probe(leaf, sb->blocksize, &walk, start))
- return 0;
-
- /* Chop this extent partially */
- if (dwalk_index(&walk) < start) {
- block_t block = dwalk_block(&walk);
- unsigned count = start - dwalk_index(&walk);
-
- defer_bfree(sb, &sb->defree, block + count, dwalk_count(&walk) - count);
- log_bfree(sb, block + count, dwalk_count(&walk) - count);
-
- dwalk_update(&walk, make_extent(block, count));
- if (!dwalk_next(&walk))
- goto out;
- }
- struct dwalk rewind = walk;
- do {
- defer_bfree(sb, &sb->defree, dwalk_block(&walk), dwalk_count(&walk));
- log_bfree(sb, dwalk_block(&walk), dwalk_count(&walk));
- } while (dwalk_next(&walk));
- dwalk_chop(&rewind);
-out:
- assert(!dleaf_check(leaf, sb->blocksize));
- return 1;
-}
-
-struct btree_ops dtree1_ops = {
- .btree_init = dleaf_btree_init,
- .leaf_init = dleaf_init,
- .leaf_split = dleaf_split,
-// .leaf_resize = dleaf_resize,
- .leaf_chop = dleaf_chop,
- .leaf_merge = dleaf_merge,
-
- .leaf_sniff = dleaf_sniff,
- .leaf_can_free = dleaf_can_free,
- .leaf_dump = dleaf_dump,
-};