tux3: Make block allocation goal depend on inode number and logical file offset

Make block allocation goal depend on inode number and logical file offset. If the inode is outside the volume block range, "fold" it by masking against the volume mask, which is the smallest binary number greater than the volume size. If still outside, subtract half the volume mask so that it is inside. Volume coverage of masked values is not perfectly even, but close enough. The purpose of masking this way is to resist fragmentation if the volume size changes. Use linear allocation without extrapolation when inodes are being written in sequential order. If this is too far from the extrapolated goal, then use the extrapolated goal instead. Signed-off-by: Daniel Phillips <d.phillips@partner.samsung.com> Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
author: Daniel Phillips <daniel@tux3.org> 2014-05-01 17:37:56 +0900
committer: Daniel Phillips <daniel@tux3.org> 2014-05-01 17:37:56 +0900
commit: 6cc4b7554124f72798791d2e241dd35b53956def (patch)
tree: 220af22fe20df9c694030d7e4d53b0e626f69e66
parent: 90b49960c5736a5dd3d73a633925330a7a5f3f26 (diff)
download: linux-tux3-6cc4b7554124f72798791d2e241dd35b53956def.tar.gz
5 files changed, 77 insertions, 7 deletions
diff --git a/fs/tux3/buffer_writeback.c b/fs/tux3/buffer_writeback.c
index 95a1c1734af079..55abea4df10beb 100644
--- a/fs/tux3/buffer_writeback.c
+++ b/fs/tux3/buffer_writeback.c
@@ -782,6 +782,7 @@ static int buffer_index_cmp(void *priv, struct list_head *a,
 int flush_list(struct inode *inode, struct tux3_iattr_data *idata,
 	       struct list_head *head, int req_flag)
 {
+	struct tux3_inode *tuxnode = tux_inode(inode);
 	struct bufvec bufvec;
 	int err = 0;
 
@@ -798,8 +799,9 @@ int flush_list(struct inode *inode, struct tux3_iattr_data *idata,
 	while (bufvec_next_buffer_page(&bufvec)) {
 		/* Collect contiguous buffer range */
 		if (bufvec_contig_collect(&bufvec)) {
-			/* Start I/O */
-			err = tux_inode(inode)->io(WRITE | req_flag, &bufvec);
+			policy_extents(&bufvec);
+
+			err = tuxnode->io(WRITE | req_flag, &bufvec);
 			if (err)
 				break;
 		}
diff --git a/fs/tux3/commit.c b/fs/tux3/commit.c
index f54a0fc33194e5..a01fd9ef0f51f7 100644
--- a/fs/tux3/commit.c
+++ b/fs/tux3/commit.c
@@ -74,6 +74,12 @@ static loff_t calc_maxbytes(loff_t blocksize)
 	return min_t(loff_t, blocksize << MAX_BLOCKS_BITS, MAX_LFS_FILESIZE);
 }
 
+/* FIXME: Should goes into core */
+static inline u64 roundup_pow_of_two64(u64 n)
+{
+	return 1ULL << fls64(n - 1);
+}
+
 /* Setup sb by on-disk super block */
 static void __setup_sb(struct sb *sb, struct disksuper *super)
 {
@@ -92,6 +98,7 @@ static void __setup_sb(struct sb *sb, struct disksuper *super)
 	sb->blocksize = 1 << sb->blockbits;
 	sb->blockmask = (1 << sb->blockbits) - 1;
 	sb->groupbits = 13; // FIXME: put in disk super?
+	sb->volmask = roundup_pow_of_two64(sb->volblocks) - 1;
 	sb->entries_per_node = calc_entries_per_node(sb->blocksize);
 	/* Initialize base indexes for atable */
 	atable_init_base(sb);
@@ -108,10 +115,12 @@ static void __setup_sb(struct sb *sb, struct disksuper *super)
 	sb->atomgen = be32_to_cpu(super->atomgen);
 	sb->freeatom = be32_to_cpu(super->freeatom);
 	/* logchain and logcount are read from super directly */
-	trace("blocksize %u, blockbits %u, blockmask %08x",
-	      sb->blocksize, sb->blockbits, sb->blockmask);
-	trace("volblocks %Lu, freeblocks %Lu, freeinodes %Lu, nextblock %Lu",
-	      sb->volblocks, sb->freeblocks, sb->freeinodes, sb->nextblock);
+	trace("blocksize %u, blockbits %u, blockmask %08x, groupbits %u",
+	      sb->blocksize, sb->blockbits, sb->blockmask, sb->groupbits);
+	trace("volblocks %Lu, volmask %Lx",
+	      sb->volblocks, sb->volmask);
+	trace("freeblocks %Lu, freeinodes %Lu, nextblock %Lu",
+	      sb->freeblocks, sb->freeinodes, sb->nextblock);
 	trace("atom_dictsize %Lu, freeatom %u, atomgen %u",
 	      (s64)sb->atomdictsize, sb->freeatom, sb->atomgen);
 	trace("logchain %Lu, logcount %u",
diff --git a/fs/tux3/policy.c b/fs/tux3/policy.c
index 0e146c4c57b192..4e02b4730150e9 100644
--- a/fs/tux3/policy.c
+++ b/fs/tux3/policy.c
@@ -67,9 +67,62 @@ inum_t policy_inum(struct inode *dir, loff_t where, struct inode *inode)
 
 	return goal;
 }
+
+static block_t fold_goal(struct sb *sb, block_t goal)
+{
+	goal &= sb->volmask;
+	if (goal >= sb->volblocks)
+		goal -= (sb->volmask + 1) >> 1;
+	return goal;
+}
+
+void policy_inode_init(inum_t *previous)
+{
+	*previous = TUX_INVALID_INO - 1;
+}
+
+/*
+ * Policy to setup goal for inode for starting inode flush.
+ */
+void policy_inode(struct inode *inode, inum_t *previous)
+{
+	struct sb *sb = tux_sb(inode->i_sb);
+	struct tux3_inode *tuxnode = tux_inode(inode);
+	/*
+	 * Let inum determine block allocation goal, unless inodes are
+	 * written in inum order, then use linear allocation.
+	 */
+	if (tuxnode->inum != ++(*previous))
+		sb->nextblock = fold_goal(sb, *previous = tuxnode->inum);
+}
+
+/*
+ * Policy to choice physical address for creating extents.
+ */
+void policy_extents(struct bufvec *bufvec)
+{
+	struct inode *inode = bufvec_inode(bufvec);
+	struct sb *sb = tux_sb(inode->i_sb);
+	block_t base = tux_inode(inode)->inum;
+	unsigned blockbits = sb->blockbits, far = 1 << (21 - blockbits);
+	struct buffer_head *buf = bufvec_contig_buf(bufvec);
+	block_t offset = bufindex(buf);
+
+	if (abs64(base + offset - sb->nextblock) > far)
+		sb->nextblock = fold_goal(sb, base + offset);
+}
 #else /* POLICY_LINEAR */
 inum_t policy_inum(struct inode *dir, loff_t where, struct inode *inode)
 {
 	return tux_sb(dir->i_sb)->nextinum;
 }
+void policy_inode_init(inum_t *previous)
+{
+}
+void policy_inode(struct inode *inode, inum_t *previous)
+{
+}
+void policy_extents(struct bufvec *bufvec)
+{
+}
 #endif /* POLICY_LINEAR */
diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h
index 08f1021178d74e..065695b3509d38 100644
--- a/fs/tux3/tux3.h
+++ b/fs/tux3/tux3.h
@@ -286,7 +286,7 @@ struct sb {
 	unsigned blocksize, blockbits, blockmask, groupbits;
 	u64 freeinodes;		/* Number of free inode numbers. This is
 				 * including the deferred allocated inodes */
-	block_t volblocks, freeblocks, nextblock;
+	block_t volblocks, volmask, freeblocks, nextblock;
 	inum_t nextinum;	/* FIXME: temporary hack to avoid to find
 				 * same area in itree for free inum. */
 	unsigned entries_per_node; /* must be per-btree type, get rid of this */
@@ -930,6 +930,9 @@ struct replay *tux3_init_fs(struct sb *sbi);
 
 /* policy.c */
 inum_t policy_inum(struct inode *dir, loff_t where, struct inode *inode);
+void policy_inode_init(inum_t *previous);
+void policy_inode(struct inode *inode, inum_t *previous);
+void policy_extents(struct bufvec *bufvec);
 
 /* replay.c */
 struct replay *replay_stage1(struct sb *sb);
diff --git a/fs/tux3/writeback.c b/fs/tux3/writeback.c
index 5098aeebb29d16..4b6cb50f52b1d9 100644
--- a/fs/tux3/writeback.c
+++ b/fs/tux3/writeback.c
@@ -605,6 +605,7 @@ int tux3_flush_inodes(struct sb *sb, unsigned delta)
 	struct sb_delta_dirty *s_ddc = tux3_sb_ddc(sb, delta);
 	struct list_head *dirty_inodes = &s_ddc->dirty_inodes;
 	struct inode_delta_dirty *i_ddc, *safe;
+	inum_t private;
 	int err;
 
 	/* ->dirty_inodes owned by backend. No need to lock here */
@@ -612,10 +613,12 @@ int tux3_flush_inodes(struct sb *sb, unsigned delta)
 	/* Sort by tuxnode->inum. FIXME: do we want to sort? */
 	list_sort(&delta, dirty_inodes, inode_inum_cmp);
 
+	policy_inode_init(&private);
 	list_for_each_entry_safe(i_ddc, safe, dirty_inodes, dirty_list) {
 		struct tux3_inode *tuxnode = i_ddc_to_inode(i_ddc, delta);
 		struct inode *inode = &tuxnode->vfs_inode;
 
+		policy_inode(inode, &private);
 		assert(!tux3_is_inode_no_flush(inode));
 
 		err = tux3_flush_inode(inode, delta, 0);
author	Daniel Phillips <daniel@tux3.org>	2014-05-01 17:37:56 +0900
committer	Daniel Phillips <daniel@tux3.org>	2014-05-01 17:37:56 +0900
commit	6cc4b7554124f72798791d2e241dd35b53956def (patch)
tree	220af22fe20df9c694030d7e4d53b0e626f69e66
parent	90b49960c5736a5dd3d73a633925330a7a5f3f26 (diff)
download	linux-tux3-6cc4b7554124f72798791d2e241dd35b53956def.tar.gz