aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/tux3/Kconfig7
-rw-r--r--fs/tux3/Makefile2
-rw-r--r--fs/tux3/buffer.c21
-rw-r--r--fs/tux3/buffer_fork.c10
-rw-r--r--fs/tux3/commit.c2
-rw-r--r--fs/tux3/filemap.c6
-rw-r--r--fs/tux3/filemap_mmap.c247
-rw-r--r--fs/tux3/inode.c2
-rw-r--r--fs/tux3/mmap_builtin_hack.c95
-rw-r--r--fs/tux3/mmap_builtin_hack.h13
-rw-r--r--fs/tux3/tux3.h3
11 files changed, 385 insertions, 23 deletions
diff --git a/fs/tux3/Kconfig b/fs/tux3/Kconfig
index a952804d70eeb7..212e4e60ee4964 100644
--- a/fs/tux3/Kconfig
+++ b/fs/tux3/Kconfig
@@ -5,3 +5,10 @@ config TUX3
be called tux3.
If unsure, see you again in six months.
+
+config TUX3_MMAP
+ bool "Tux3 mmap support"
+ depends on TUX3
+ help
+ Adds EXPORT_SYMBOL_GPL(), etc. to provide functionality for mmap.
+ This needs build tux3 as module with support mmap.
diff --git a/fs/tux3/Makefile b/fs/tux3/Makefile
index 36233d8c309bf7..9623a540dcb2d4 100644
--- a/fs/tux3/Makefile
+++ b/fs/tux3/Makefile
@@ -14,4 +14,6 @@ EXTRA_CFLAGS += -Werror -std=gnu99 -Wno-declaration-after-statement
#EXTRA_CFLAGS += -DTUX3_FLUSHER=TUX3_FLUSHER_SYNC
#EXTRA_CFLAGS += -DTUX3_FLUSHER=TUX3_FLUSHER_ASYNC_OWN
EXTRA_CFLAGS += -DTUX3_FLUSHER=TUX3_FLUSHER_ASYNC_HACK
+
+obj-$(CONFIG_TUX3_MMAP) += mmap_builtin_hack.o
endif
diff --git a/fs/tux3/buffer.c b/fs/tux3/buffer.c
index fe7681466878b9..3d057c33417a9b 100644
--- a/fs/tux3/buffer.c
+++ b/fs/tux3/buffer.c
@@ -87,25 +87,6 @@ int buffer_can_modify(struct buffer_head *buffer, unsigned delta)
tux3_bufsta_get_delta(state) == tux3_delta(delta);
}
-/*
- * Copy of __set_page_dirty() without __mark_inode_dirty(). Caller
- * decides whether mark inode dirty or not.
- */
-static void __tux3_set_page_dirty(struct page *page,
- struct address_space *mapping, int warn)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&mapping->tree_lock, flags);
- if (page->mapping) { /* Race with truncate? */
- WARN_ON_ONCE(warn && !PageUptodate(page));
- account_page_dirtied(page, mapping);
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
- }
- spin_unlock_irqrestore(&mapping->tree_lock, flags);
-}
-
/* Set our delta dirty bits, then add to our dirty buffers list */
static inline void __tux3_set_buffer_dirty_list(struct address_space *mapping,
struct buffer_head *buffer, int delta,
@@ -147,7 +128,7 @@ int tux3_set_buffer_dirty_list(struct address_space *mapping,
if (!TestSetPageDirty(page)) {
struct address_space *mapping = page->mapping;
if (mapping)
- __tux3_set_page_dirty(page, mapping, 0);
+ __tux3_set_page_dirty_account(page, mapping, 0);
return 1;
}
}
diff --git a/fs/tux3/buffer_fork.c b/fs/tux3/buffer_fork.c
index 64d1d0f6dc92fb..fd3508e0f1b1d4 100644
--- a/fs/tux3/buffer_fork.c
+++ b/fs/tux3/buffer_fork.c
@@ -195,6 +195,8 @@ void free_forked_buffers(struct sb *sb, struct inode *inode, int force)
* Block fork core
*/
+#include "mmap_builtin_hack.h"
+
/*
* This replaces the oldpage on radix-tree with newpage atomically.
*
@@ -460,7 +462,8 @@ struct buffer_head *blockdirty(struct buffer_head *buffer, unsigned newdelta)
lock_page(oldpage);
/* This happens on partially dirty page. */
-// assert(PageUptodate(page));
+// assert(PageUptodate(oldpage));
+ assert(!page_mapped(oldpage));
switch ((ret_needfork = need_fork(oldpage, buffer, newdelta))) {
case RET_FORKED:
@@ -635,6 +638,10 @@ struct page *pagefork_for_blockdirty(struct page *oldpage, unsigned newdelta)
* newpage is available on radix-tree here.
*/
SetPageForked(oldpage);
+ /*
+ * Update PTEs for forked page.
+ */
+ page_cow_file(oldpage, newpage);
unlock_page(oldpage);
/* Register forked buffer to free forked page later */
@@ -660,6 +667,7 @@ int bufferfork_to_invalidate(struct address_space *mapping, struct page *page)
unsigned delta = tux3_inode_delta(mapping->host);
assert(PageLocked(page));
+ assert(!page_mapped(page));
switch (need_fork(page, NULL, delta)) {
case RET_NEED_FORK:
diff --git a/fs/tux3/commit.c b/fs/tux3/commit.c
index a01fd9ef0f51f7..fc7f85e6f912ce 100644
--- a/fs/tux3/commit.c
+++ b/fs/tux3/commit.c
@@ -695,7 +695,7 @@ void change_end_atomic(struct sb *sb)
* This is used for nested change_begin/end. We should not use this
* usually (nesting change_begin/end is wrong for normal operations).
*
- * For now, this is only used for ->evict_inode() debugging.
+ * For now, this is only used for ->evict_inode() debugging, and page fault.
*/
void change_begin_atomic_nested(struct sb *sb, void **ptr)
{
diff --git a/fs/tux3/filemap.c b/fs/tux3/filemap.c
index a53c71595106a8..612ac41e2490bc 100644
--- a/fs/tux3/filemap.c
+++ b/fs/tux3/filemap.c
@@ -960,6 +960,8 @@ static sector_t tux3_bmap(struct address_space *mapping, sector_t iblock)
return blocknr;
}
+#include "filemap_mmap.c"
+
const struct address_space_operations tux_file_aops = {
.readpage = tux3_readpage,
.readpages = tux3_readpages,
@@ -970,6 +972,7 @@ const struct address_space_operations tux_file_aops = {
.write_begin = tux3_file_write_begin,
.write_end = tux3_file_write_end,
.bmap = tux3_bmap,
+ .set_page_dirty = tux3_set_page_dirty_assert,
.invalidatepage = tux3_invalidatepage,
// .releasepage = ext4_releasepage,
#ifdef TUX3_DIRECT_IO
@@ -1000,6 +1003,7 @@ const struct address_space_operations tux_symlink_aops = {
.write_begin = tux3_symlink_write_begin,
.write_end = __tux3_file_write_end,
.bmap = tux3_bmap,
+ .set_page_dirty = tux3_set_page_dirty_bug,
.invalidatepage = tux3_invalidatepage,
// .releasepage = ext4_releasepage,
#ifdef TUX3_DIRECT_IO
@@ -1042,6 +1046,7 @@ const struct address_space_operations tux_blk_aops = {
.writepages = tux3_disable_writepages,
.write_begin = tux3_blk_write_begin,
.bmap = tux3_bmap,
+ .set_page_dirty = tux3_set_page_dirty_bug,
.invalidatepage = tux3_invalidatepage,
// .migratepage = buffer_migrate_page, /* FIXME */
// .is_partially_uptodate = block_is_partially_uptodate,
@@ -1087,6 +1092,7 @@ const struct address_space_operations tux_vol_aops = {
.writepage = tux3_disable_writepage,
.writepages = tux3_disable_writepages,
.write_begin = tux3_vol_write_begin,
+ .set_page_dirty = tux3_set_page_dirty_bug,
.invalidatepage = tux3_invalidatepage,
// .is_partially_uptodate = block_is_partially_uptodate,
// .is_dirty_writeback = buffer_check_dirty_writeback,
diff --git a/fs/tux3/filemap_mmap.c b/fs/tux3/filemap_mmap.c
new file mode 100644
index 00000000000000..93b9f9cdf40c72
--- /dev/null
+++ b/fs/tux3/filemap_mmap.c
@@ -0,0 +1,247 @@
+/*
+ * mmap(2) handlers to support page fork.
+ */
+
+/*
+ * Copy of __set_page_dirty() without __mark_inode_dirty(). Caller
+ * decides whether mark inode dirty or not.
+ */
+void __tux3_set_page_dirty_account(struct page *page,
+ struct address_space *mapping, int warn)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(warn && !PageUptodate(page));
+ account_page_dirtied(page, mapping);
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
+ }
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+}
+
+static void __tux3_set_page_dirty(struct page *page,
+ struct address_space *mapping, int warn)
+{
+ __tux3_set_page_dirty_account(page, mapping, warn);
+ __tux3_mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+}
+
+static int tux3_set_page_dirty_buffers(struct page *page)
+{
+#if 0
+ struct address_space *mapping = page->mapping;
+ int newly_dirty;
+
+ spin_lock(&mapping->private_lock);
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ do {
+ set_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+ newly_dirty = !TestSetPageDirty(page);
+ spin_unlock(&mapping->private_lock);
+
+ if (newly_dirty)
+ __set_page_dirty(page, mapping, 1);
+
+ return newly_dirty;
+#else
+ struct address_space *mapping = page->mapping;
+ unsigned delta = tux3_get_current_delta();
+ struct buffer_head *head, *buffer;
+ int newly_dirty;
+
+ /* This should be tux3 page and locked */
+ assert(mapping);
+ assert(PageLocked(page));
+ /* This page should have buffers (caller should allocate) */
+ assert(page_has_buffers(page));
+
+ /*
+ * FIXME: we dirty all buffers on this page, so we optimize this
+ * by avoiding to check page-dirty/inode-dirty multiple times.
+ */
+ newly_dirty = 0;
+ if (!TestSetPageDirty(page)) {
+ __tux3_set_page_dirty(page, mapping, 1);
+ newly_dirty = 1;
+ }
+ buffer = head = page_buffers(page);
+ do {
+ __tux3_mark_buffer_dirty(buffer, delta);
+ buffer = buffer->b_this_page;
+ } while (buffer != head);
+#endif
+ return newly_dirty;
+}
+
+/* Copy of set_page_dirty() */
+static int tux3_set_page_dirty(struct page *page)
+{
+ /*
+ * readahead/lru_deactivate_page could remain
+ * PG_readahead/PG_reclaim due to race with end_page_writeback
+ * About readahead, if the page is written, the flags would be
+ * reset. So no problem.
+ * About lru_deactivate_page, if the page is redirty, the flag
+ * will be reset. So no problem. but if the page is used by readahead
+ * it will confuse readahead and make it restart the size rampup
+ * process. But it's a trivial problem.
+ */
+ ClearPageReclaim(page);
+
+ return tux3_set_page_dirty_buffers(page);
+}
+
+static int tux3_set_page_dirty_assert(struct page *page)
+{
+ struct buffer_head *head, *buffer;
+
+ /* See comment of tux3_set_page_dirty() */
+ ClearPageReclaim(page);
+
+ /* Is there any cases to be called for old page of forked page? */
+ WARN_ON(PageForked(page));
+
+ /* This page should be dirty already, otherwise we will lost data. */
+ assert(PageDirty(page));
+ /* All buffers should be dirty already, otherwise we will lost data. */
+ assert(page_has_buffers(page));
+ head = buffer = page_buffers(page);
+ do {
+ assert(buffer_dirty(buffer));
+ buffer = buffer->b_this_page;
+ } while (buffer != head);
+
+ return 0;
+}
+
+static int tux3_set_page_dirty_bug(struct page *page)
+{
+ /* See comment of tux3_set_page_dirty() */
+ ClearPageReclaim(page);
+
+ assert(0);
+ /* This page should not be mmapped */
+ assert(!page_mapped(page));
+ /* This page should be dirty already, otherwise we will lost data. */
+ assert(PageDirty(page));
+ return 0;
+}
+
+static int tux3_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ struct sb *sb = tux_sb(inode->i_sb);
+ struct page *clone, *page = vmf->page;
+ void *ptr;
+ int ret;
+
+ sb_start_pagefault(inode->i_sb);
+
+retry:
+ lock_page(page);
+ if (page->mapping != mapping(inode)) {
+ unlock_page(page);
+ ret = VM_FAULT_NOPAGE;
+ goto out;
+ }
+
+ /*
+ * page fault can be happened while holding change_begin/end()
+ * (e.g. copy of user data between ->write_begin and
+ * ->write_end for write(2)).
+ *
+ * So, we use nested version here.
+ */
+ change_begin_atomic_nested(sb, &ptr);
+
+ /*
+ * FIXME: Caller releases vmf->page (old_page) unconditionally.
+ * So, this takes additional refcount to workaround it.
+ */
+ if (vmf->page == page)
+ page_cache_get(page);
+
+ clone = pagefork_for_blockdirty(page, tux3_get_current_delta());
+ if (IS_ERR(clone)) {
+ /* Someone did page fork */
+ pgoff_t index = page->index;
+
+ change_end_atomic_nested(sb, ptr);
+ unlock_page(page);
+ page_cache_release(page);
+
+ switch (PTR_ERR(clone)) {
+ case -EAGAIN:
+ page = find_get_page(inode->i_mapping, index);
+ assert(page);
+ goto retry;
+ case -ENOMEM:
+ ret = VM_FAULT_OOM;
+ break;
+ default:
+ ret = VM_FAULT_SIGBUS;
+ break;
+ }
+
+ goto out;
+ }
+
+ file_update_time(vma->vm_file);
+
+ /* Assign buffers to dirty */
+ if (!page_has_buffers(clone))
+ create_empty_buffers(clone, sb->blocksize, 0);
+
+ /*
+ * We mark the page dirty already here so that when freeze is in
+ * progress, we are guaranteed that writeback during freezing will
+ * see the dirty page and writeprotect it again.
+ */
+ tux3_set_page_dirty(clone);
+#if 1
+ /* FIXME: Caller doesn't see the changed vmf->page */
+ vmf->page = clone;
+
+ change_end_atomic_nested(sb, ptr);
+ /* FIXME: caller doesn't know about pagefork */
+ unlock_page(clone);
+ page_cache_release(clone);
+ ret = 0;
+// ret = VM_FAULT_LOCKED;
+#endif
+out:
+ sb_end_pagefault(inode->i_sb);
+
+ return ret;
+}
+
+static const struct vm_operations_struct tux3_file_vm_ops = {
+ .fault = filemap_fault,
+ .page_mkwrite = tux3_page_mkwrite,
+ .remap_pages = generic_file_remap_pages,
+};
+
+int tux3_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+#ifdef CONFIG_TUX3_MMAP
+ struct address_space *mapping = file->f_mapping;
+
+ if (!mapping->a_ops->readpage)
+ return -ENOEXEC;
+
+ file_accessed(file);
+ vma->vm_ops = &tux3_file_vm_ops;
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
diff --git a/fs/tux3/inode.c b/fs/tux3/inode.c
index 644deabc2cd22a..f759f8781d7a63 100644
--- a/fs/tux3/inode.c
+++ b/fs/tux3/inode.c
@@ -858,7 +858,7 @@ static const struct file_operations tux_file_fops = {
#ifdef CONFIG_COMPAT
// .compat_ioctl = fat_compat_dir_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = tux3_file_mmap,
.open = generic_file_open,
.fsync = tux3_sync_file,
.splice_read = generic_file_splice_read,
diff --git a/fs/tux3/mmap_builtin_hack.c b/fs/tux3/mmap_builtin_hack.c
new file mode 100644
index 00000000000000..dade54f8fd41dd
--- /dev/null
+++ b/fs/tux3/mmap_builtin_hack.c
@@ -0,0 +1,95 @@
+/*
+ * mmap support helpers. But core doesn't provide functionality that
+ * pagefork needs.
+ *
+ * So, this hack adds EXPORT_SYMBOL_GPL() and inline functions, and
+ * liked with kernel statically.
+ *
+ * FIXME: we should patch the kernel instead.
+ */
+
+#include "tux3.h"
+#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
+
+extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma);
+
+static int page_cow_one(struct page *oldpage, struct page *newpage,
+ struct vm_area_struct *vma, unsigned long address)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pte_t oldptval, ptval, *pte;
+ spinlock_t *ptl;
+ int ret = 0;
+
+ pte = page_check_address(oldpage, mm, address, &ptl, 1);
+ if (!pte)
+ goto out;
+
+ flush_cache_page(vma, address, pte_pfn(*pte));
+ oldptval = ptep_clear_flush(vma, address, pte);
+
+ /* Take refcount for PTE */
+ page_cache_get(newpage);
+
+ /*
+ * vm_page_prot doesn't have writable bit, so page fault will
+ * be occurred immediately after returned from this page fault
+ * again. And second time of page fault will be resolved with
+ * forked page was set here.
+ *
+ * FIXME: we should resolve page fault with one page
+ * fault. Maybe, we will have to modify callers of
+ * ->page_mkwrite().
+ */
+ ptval = mk_pte(newpage, vma->vm_page_prot);
+#if 0
+ if (pte_dirty(oldptval))
+ ptval = pte_mkdirty(ptval);
+ if (pte_young(oldptval))
+ ptval = pte_mkyoung(ptval);
+#endif
+ set_pte_at(mm, address, pte, ptval);
+
+ /* Update rmap accounting */
+ assert(!PageMlocked(oldpage)); /* Caller should migrate mlock flag */
+ page_remove_rmap(oldpage);
+ page_add_file_rmap(newpage);
+
+ /* no need to invalidate: a not-present page won't be cached */
+ update_mmu_cache(vma, address, pte);
+
+ pte_unmap_unlock(pte, ptl);
+
+ mmu_notifier_invalidate_page(mm, address);
+
+ /* Release refcount for PTE */
+ page_cache_release(oldpage);
+out:
+ return ret;
+}
+
+int page_cow_file(struct page *oldpage, struct page *newpage)
+{
+ struct address_space *mapping = page_mapping(oldpage);
+ pgoff_t pgoff = oldpage->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ struct vm_area_struct *vma;
+ int ret = 0;
+
+ BUG_ON(!PageLocked(oldpage));
+ BUG_ON(!PageLocked(newpage));
+ BUG_ON(PageAnon(oldpage));
+ BUG_ON(mapping == NULL);
+
+ mutex_lock(&mapping->i_mmap_mutex);
+ vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+ if (vma->vm_flags & VM_SHARED) {
+ unsigned long address = vma_address(oldpage, vma);
+ ret += page_cow_one(oldpage, newpage, vma, address);
+ }
+ }
+ mutex_unlock(&mapping->i_mmap_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(page_cow_file);
diff --git a/fs/tux3/mmap_builtin_hack.h b/fs/tux3/mmap_builtin_hack.h
new file mode 100644
index 00000000000000..b313506ae61b0a
--- /dev/null
+++ b/fs/tux3/mmap_builtin_hack.h
@@ -0,0 +1,13 @@
+#ifndef _MMAP_HACK_H
+#define _MMAP_HACK_H
+
+#ifdef CONFIG_TUX3_MMAP
+int page_cow_file(struct page *oldpage, struct page *newpage);
+#else
+static inline int page_cow_file(struct page *oldpage, struct page *newpage)
+{
+ return 0;
+}
+#endif
+
+#endif /* !_MMAP_HACK_H */
diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h
index eef92d4cac9f19..04fde94779ba08 100644
--- a/fs/tux3/tux3.h
+++ b/fs/tux3/tux3.h
@@ -674,6 +674,9 @@ int tux3_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
struct buffer_head *__get_buffer(struct page *page, int offset);
void tux3_try_cancel_dirty_page(struct page *page);
+void __tux3_set_page_dirty_account(struct page *page,
+ struct address_space *mapping, int warn);
+int tux3_file_mmap(struct file *file, struct vm_area_struct *vma);
extern const struct address_space_operations tux_file_aops;
extern const struct address_space_operations tux_symlink_aops;
extern const struct address_space_operations tux_blk_aops;