aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOGAWA Hirofumi <hirofumi@mail.parknet.co.jp>2013-12-04 20:34:23 +0900
committerDaniel Phillips <daniel@tux3.org>2013-12-04 20:34:23 +0900
commitf3e94a51db3d2b809ec59e5c1b10701b0882e3ed (patch)
treee5ee7b7089c4ae003ec7432ca8f07392356e16e1
parentd9231b2a4d1465307d4cf65f420a4e84bd54124a (diff)
downloadlinux-tux3-f3e94a51db3d2b809ec59e5c1b10701b0882e3ed.tar.gz
tux3: Fix partial allocation and rewrite dleaf2_write()
Current dleaf2_write() is assuming rq->seg_alloc() can allocate the seg[] to fit dleaf2's space. However, of course, it is not true. Furthermore, current implementation has some limitations (e.g. can't write across leaves, assuming to read existing seg[] by dleaf2_read() as preparation, inefficient overwrite mode). So, this rewrites dleaf2_write(). Strategy is, 1) Find start dex position. 2) Find end dex position. 3) Allocates seg[]. 4a) If seg[] is fit to dleaf2 space, apply seg[] to dleaf2. Done. 4b) If seg[] is not fit to dleaf2 or there is no seg[] space, we do (2) again after shrinking key region. 5) If some seg[] was fit to dleaf2 space, do split. Then, do from (1) again. Note for seg[] shrinking here, New helpers - ->seg_find() finds available blocks from block bitmap. - ->seg_alloc() commits allocation of blocks found by ->seg_find() - ->seg_free() frees blocks replaced by new seg[] To do shrink for (4b), we uses ->seg_find() and ->seg_alloc() pair to avoid to modify bitmap by temporary state. With this new implementation, we can write across multiple leaves by one btree_write() call. And IMO, new implement is simpler than older one. Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
-rw-r--r--fs/tux3/dleaf2.c338
-rw-r--r--fs/tux3/dleaf2.h9
-rw-r--r--fs/tux3/filemap.c15
3 files changed, 235 insertions, 127 deletions
diff --git a/fs/tux3/dleaf2.c b/fs/tux3/dleaf2.c
index 775a44462c717d..079209c1c04ada 100644
--- a/fs/tux3/dleaf2.c
+++ b/fs/tux3/dleaf2.c
@@ -384,10 +384,7 @@ static void dleaf2_resize(struct dleaf2 *dleaf, struct diskextent2 *head,
if (diff == 0)
return;
- if (diff > 0)
- memmove(head + diff, head, limit - (void *)head);
- else
- memmove(head, head - diff, limit - (void *)(head - diff));
+ memmove(head + diff, head, limit - (void *)head);
be16_add_cpu(&dleaf->count, diff);
}
@@ -409,6 +406,99 @@ static tuxkey_t dleaf2_split_at_center(struct dleaf2 *dleaf)
return ex.logical;
}
+/* dex information to modify */
+struct dex_info {
+ /* start extent */
+ struct diskextent2 *start_dex;
+ /* physical range of partial start */
+ block_t start_block;
+ unsigned start_count;
+
+ /* end extent */
+ struct diskextent2 *end_dex;
+ /* remaining physical range of partial end */
+ block_t end_block;
+
+ int dleaf_count; /* count of dex except overwritten */
+ int overwrite_cnt; /* count of dex overwritten */
+ int need_sentinel; /* segments needs new sentinel? */
+};
+
+static void find_start_dex(struct btree *btree, struct dleaf2 *dleaf,
+ block_t key_start, struct dex_info *info)
+{
+ struct diskextent2 *dex_limit;
+
+ dex_limit = dleaf->table + be16_to_cpu(dleaf->count);
+
+ info->start_block = 0;
+ info->start_count = 0;
+
+ /* Lookup the dex for start of seg[]. */
+ info->start_dex = dleaf2_lookup_index(btree, dleaf, key_start);
+ if (info->start_dex < dex_limit - 1) {
+ struct extent ex;
+
+ get_extent(info->start_dex, &ex);
+ /* Start is at middle of dex: can't overwrite this dex */
+ if (key_start > ex.logical) {
+ block_t prev = ex.logical, physical = ex.physical;
+
+ info->start_dex++;
+ get_extent(info->start_dex, &ex);
+
+ if (physical)
+ info->start_block = physical + key_start - prev;
+ info->start_count = ex.logical - key_start;
+ }
+ }
+}
+
+static void find_end_dex(struct btree *btree, struct dleaf2 *dleaf,
+ block_t key_end, struct dex_info *info)
+{
+ struct diskextent2 *dex_limit;
+ u16 dleaf_count = be16_to_cpu(dleaf->count);
+
+ dex_limit = dleaf->table + dleaf_count;
+
+ info->need_sentinel = 0;
+ info->end_block = 0;
+ info->dleaf_count = dleaf_count;
+
+ /* Lookup the dex for end of seg[]. */
+ info->end_dex = dleaf2_lookup_index(btree, dleaf, key_end);
+ if (info->end_dex < dex_limit - 1) {
+ struct extent ex;
+
+ get_extent(info->end_dex, &ex);
+ if (key_end > ex.logical) {
+ block_t offset = key_end - ex.logical;
+
+ /* End is at middle of dex: can overwrite this dex */
+ info->end_dex++;
+
+ /* Need new end of segment */
+ info->need_sentinel = 1;
+ /* Save new physical for end of seg[] */
+ if (ex.physical)
+ info->end_block = ex.physical + offset;
+ }
+ }
+
+ /*
+ * Calculate dleaf2 space informations
+ */
+ /* Number of dex can be overwritten */
+ info->overwrite_cnt = info->end_dex - info->start_dex;
+ /* Need new dex sentinel? */
+ info->need_sentinel |= info->end_dex == dex_limit;
+
+ /* Calculate new dleaf->count except dex is overwritten by segments. */
+ info->dleaf_count -= info->overwrite_cnt;
+ info->dleaf_count += info->need_sentinel;
+}
+
/*
* Write extents.
*/
@@ -420,152 +510,158 @@ static int dleaf2_write(struct btree *btree, tuxkey_t key_bottom,
struct dleaf_req *rq = container_of(key, struct dleaf_req, key);
struct sb *sb = btree->sb;
struct dleaf2 *dleaf = leaf;
- struct diskextent2 *dex_start, *dex_end, *dex_limit;
+ struct diskextent2 *dex;
struct extent ex;
- tuxkey_t limit;
- block_t end_physical;
- unsigned need, between, write_segs, rest_segs;
- int need_split, ret;
-
-recheck:
- /* Paranoia checks */
- assert(key->len == seg_total_count(rq->seg + rq->seg_idx,
- rq->seg_cnt - rq->seg_idx));
+ struct dex_info info;
+ unsigned free_len, seg_len, alloc_len;
+ int err, diff, seg_cnt, space;
/*
- * Overwrite existent diskextent2 by specified segs. To do
- * it, check existent diskextent2 and resize number of
- * dleaf->count, then overwrite.
+ * Strategy: check free space in dleaf2, then allocate
+ * segments, and write segments to dleaf2. If there is no
+ * space in dleaf2, shrink segments to fit space of dleaf2,
+ * and split.
*
- * FIXME: should try to merge at start and last index position.
+ * FIXME: should try to merge at start and new last extents.
*/
dleaf2_init_sentinel(sb, dleaf, key_bottom);
- limit = key->start + key->len;
- write_segs = rq->seg_cnt - rq->seg_idx;
- dex_limit = dleaf->table + be16_to_cpu(dleaf->count);
+ /* Get the info of dex for start of seg[]. */
+ find_start_dex(btree, dleaf, key->start, &info);
- need = write_segs + 1; /* +1 is for sentinel */
+ seg_len = min_t(tuxkey_t, key_limit - key->start, key->len);
+ /* Get the info of dex for end of seg[]. */
+ find_end_dex(btree, dleaf, key->start + seg_len, &info);
- /* Find start of diskextent2 to overwrite */
- dex_start = dleaf2_lookup_index(btree, dleaf, key->start);
- if (dex_start < dex_limit) {
- /* Overwrite only if logical is same with index. */
- get_extent(dex_start, &ex);
- assert(ex.logical <= key->start); /* should have key_bottom */
- if (ex.logical < key->start)
- dex_start++;
+ /*
+ * Allocate blocks to seg after dleaf redirect. With this, our
+ * allocation order is, bnode => dleaf => data, and we can use
+ * physical address of dleaf as allocation hint for data
+ * blocks.
+ */
+ space = btree->entries_per_leaf - info.dleaf_count;
+#if 0
+ /* If there is no space to store 1 dex (start and end), split */
+ if (space <= 2)
+ goto need_split;
+#else
+ /* If there is no space, split */
+ if (space <= 0)
+ goto need_split;
+#endif
+
+ err = rq->seg_find(btree, rq, space, seg_len, &alloc_len);
+ if (err < 0) {
+ assert(err != -ENOSPC); /* block reservation bug */
+ tux3_err(sb, "extent allocation failed: %d", err);
+ return err;
}
- /* How many diskextent2 is needed for head? */
- need += dex_start - dleaf->table;
-
- /* Find end of diskextent2 to overwrite */
- dex_end = dleaf2_lookup_index(btree, dleaf, limit);
- if (dex_end < dex_limit) {
- if (dex_end < dex_start) {
- /* This is splitting one extent */
- between = 0;
- } else
- between = (dex_end - dex_start) + 1;
-
- /* Prepare to overwrite end */
- get_extent(dex_end, &ex);
- end_physical = ex.physical;
- if (end_physical)
- end_physical += limit - ex.logical;
-
- /* How many diskextent2 is needed for tail? */
- need += (dex_limit - dex_end) - 1;
- } else {
- between = dex_end - dex_start;
- /* Write new sentinel */
- end_physical = 0;
+ seg_cnt = rq->seg_cnt - rq->seg_idx;
+ assert(seg_cnt > 0);
+
+ /* Ugh, there was not space enough to store, adjust number of seg[]. */
+ while (seg_len != alloc_len) {
+ seg_len = alloc_len;
+ /* Re-calculate end of seg[] can be allocated */
+ find_end_dex(btree, dleaf, key->start + alloc_len, &info);
+
+ /* Shrink segments to fit to space of dleaf2 */
+ while (info.dleaf_count + seg_cnt > btree->entries_per_leaf) {
+ seg_cnt--;
+ alloc_len -= rq->seg[rq->seg_idx + seg_cnt].count;
+ if (!seg_cnt) {
+ /* Didn't fit at all, cancel allocation */
+ rq->seg_alloc(btree, rq, 0);
+ goto need_split;
+ }
+ }
}
- need_split = 0;
- rest_segs = 0;
- /* Check if we need leaf split */
- if (need > btree->entries_per_leaf) {
- need_split = 1;
-
- /*
- * If there is no space, we temporary merge segs as hole.
- * Then, we overwrite existent diskextent2 now (and it
- * will be overwritten by real segs after split)
- * to avoid re-calculate for temporary state.
- */
- rest_segs = need - btree->entries_per_leaf;
- /* Can we write 1 seg at least? */
- if (rest_segs >= write_segs) {
- /* FIXME: use better split position */
- if (dex_start + 1 < dex_limit - 1) {
- get_extent(dex_start + 1, &ex);
- *split_hint = ex.logical;
- } else
- *split_hint = key->start;
-
- return need_split;
- }
- /* We can write partially segs and temporary hole */
- write_segs -= rest_segs;
-#if 1
- /* Just for debugging below assert() */
- need -= rest_segs;
+ /* Commit allocation of writable segments */
+ err = rq->seg_alloc(btree, rq, seg_len);
+ assert(key->start + seg_len <= key_limit);
+#if 0
+ tux3_dbg("start %lu, end %lu",
+ info.start_dex - dleaf->table, info.end_dex - dleaf->table);
+ tux3_dbg("dleaf_count %u (%u) (seg_cnt %u, overwrite %lu, sentinel %u)",
+ info.dleaf_count, info.dleaf_count + seg_cnt,
+ seg_cnt, info.end_dex - info.start_dex, info.need_sentinel);
#endif
- /* Reserve space for temporary hole */
- rest_segs++;
- }
/*
- * Allocate blocks to seg after dleaf redirect. With this, our
- * allocation order is, bnode => dleaf => data, and we can use
- * physical address of dleaf as allocation hint for data blocks.
+ * Free segments which is overwritten.
*/
- ret = rq->seg_alloc(btree, rq, write_segs);
- if (ret < 0) {
- assert(ret != -ENOSPC); /* block reservation bug */
- tux3_err(sb, "extent allocation failed: %d", ret);
- return ret;
- } else if (ret) {
- /*
- * There was partial allocation. Total length of
- * segments, or size of rq->seg[] can be changed, so
- * recalc writable segments.
- */
- key->len = seg_total_count(rq->seg + rq->seg_idx,
- rq->seg_cnt - rq->seg_idx);
- goto recheck;
+ free_len = seg_len;
+ if (info.start_count) {
+ unsigned count = min_t(block_t, free_len, info.start_count);
+ if (info.start_block)
+ rq->seg_free(btree, info.start_block, count);
+ free_len -= count;
+ }
+ if (info.start_dex < info.end_dex) {
+ struct diskextent2 *limit = info.end_dex;
+
+ if (limit != dleaf->table + be16_to_cpu(dleaf->count))
+ limit++;
+
+ get_extent(info.start_dex, &ex);
+ for (dex = info.start_dex + 1; free_len && dex < limit; dex++) {
+ block_t prev = ex.logical, physical = ex.physical;
+ unsigned count;
+
+ get_extent(dex, &ex);
+ count = min_t(block_t, free_len, ex.logical - prev);
+ if (physical)
+ rq->seg_free(btree, physical, count);
+ free_len -= count;
+ }
}
- /* Expand/shrink space for segs */
- dleaf2_resize(dleaf, dex_start, (write_segs + 1) - between);
- assert(need == be16_to_cpu(dleaf->count));
+ /* Calculate difference of dleaf->count on old and new. */
+ diff = seg_cnt - info.overwrite_cnt + info.need_sentinel;
+ /*
+ * Expand/shrink space for segs
+ */
+ dleaf2_resize(dleaf, info.end_dex, diff);
+ assert(info.dleaf_count + seg_cnt == be16_to_cpu(dleaf->count));
+ assert(info.dleaf_count + seg_cnt <= btree->entries_per_leaf);
- /* Fill extents */
- while (rq->seg_idx < rq->seg_cnt - rest_segs) {
+ /*
+ * Fill extents
+ */
+ while (seg_len) {
struct block_segment *seg = rq->seg + rq->seg_idx;
- put_extent(dex_start, sb->version, key->start, seg->block);
+ put_extent(info.start_dex, sb->version, key->start, seg->block);
key->start += seg->count;
key->len -= seg->count;
+
+ seg_len -= seg->count;
rq->seg_idx++;
- dex_start++;
+ info.start_dex++;
}
- if (rest_segs) {
- assert(need_split);
- /* Fill as temporary hole */
- put_extent(dex_start, sb->version, key->start, 0);
- dex_start++;
-
- /* Split at half. FIXME: better split position? */
- *split_hint = dleaf2_split_at_center(dleaf);
+ if (info.need_sentinel) {
+ /* Fill sentinel */
+ put_extent(info.start_dex, sb->version, key->start,
+ info.end_block);
}
- /* Fill sentinel */
- put_extent(dex_start, sb->version, limit, end_physical);
- return need_split;
+ if (rq->seg_cnt == rq->seg_max) {
+ /* Stop if there is no space in seg[] */
+ key->len = 0;
+ } else if (key->start < key_limit && key->len) {
+ /* If there are remaining range, split */
+ goto need_split;
+ }
+
+ return 0;
+
+need_split:
+ /* FIXME: use better split position */
+ *split_hint = dleaf2_split_at_center(dleaf);
+ return 1; /* try to split dleaf2 */
}
/* Read extents */
diff --git a/fs/tux3/dleaf2.h b/fs/tux3/dleaf2.h
index 7d2f1cf3526a21..7c392175e10be6 100644
--- a/fs/tux3/dleaf2.h
+++ b/fs/tux3/dleaf2.h
@@ -4,13 +4,18 @@
struct dleaf_req {
struct btree_key_range key; /* index and count */
- int seg_idx; /* Current offset for seg[] */
int seg_cnt; /* How many segs are available */
int seg_max; /* Max size of seg[] */
struct block_segment *seg; /* Pointer to seg[] */
+ int seg_idx; /* use by dleaf2_write() internally */
+ int overwrite;
+
/* Callback to allocate blocks to ->seg for write */
- int (*seg_alloc)(struct btree *, struct dleaf_req *, int);
+ int (*seg_find)(struct btree *, struct dleaf_req *, int, unsigned,
+ unsigned *);
+ int (*seg_alloc)(struct btree *, struct dleaf_req *, unsigned);
+ void (*seg_free)(struct btree *, block_t, unsigned);
};
static inline unsigned seg_total_count(struct block_segment *seg, int nr_segs)
diff --git a/fs/tux3/filemap.c b/fs/tux3/filemap.c
index 150f56997cb736..9023d7d53a5127 100644
--- a/fs/tux3/filemap.c
+++ b/fs/tux3/filemap.c
@@ -351,6 +351,11 @@ out_unlock:
return segs;
}
+static void seg_free(struct btree *btree, block_t block, unsigned count)
+{
+ map_bfree(btree_inode(btree), block, count);
+}
+
/*
* Callback to allocate blocks to ->seg. dleaf is doing to write segs,
* now we have to assign physical address to segs.
@@ -407,20 +412,20 @@ static int seg_alloc(struct btree *btree, struct dleaf_req *rq,
}
static int overwrite_seg_alloc(struct btree *btree, struct dleaf_req *rq,
- int write_segs)
+ unsigned write_segs)
{
/* If overwrite mode, set SEG_NEW to allocated seg */
return seg_alloc(btree, rq, write_segs, BLOCK_SEG_NEW);
}
static int redirect_seg_alloc(struct btree *btree, struct dleaf_req *rq,
- int write_segs)
+ unsigned write_segs)
{
/* If redirect mode, doesn't set SEG_NEW to allocated seg */
return seg_alloc(btree, rq, write_segs, 0);
}
-static int (*seg_alloc_funs[])(struct btree *, struct dleaf_req *, int) = {
+static int (*seg_alloc_funs[])(struct btree *, struct dleaf_req *, unsigned) = {
[MAP_WRITE] = overwrite_seg_alloc,
[MAP_REDIRECT] = redirect_seg_alloc,
};
@@ -539,7 +544,9 @@ static int map_region2(struct inode *inode, block_t start, unsigned count,
.seg_cnt = segs,
.seg_max = seg_max,
.seg = seg,
- .seg_alloc = seg_alloc_funs[mode],
+ .seg_find = seg_find,
+ .seg_alloc = seg_alloc,
+ .seg_free = seg_free,
};
err = btree_write(cursor, &rq.key);
if (err) {