diff options
-rw-r--r-- | fs/tux3/Kconfig | 7 | ||||
-rw-r--r-- | fs/tux3/Makefile | 2 | ||||
-rw-r--r-- | fs/tux3/buffer.c | 21 | ||||
-rw-r--r-- | fs/tux3/buffer_fork.c | 10 | ||||
-rw-r--r-- | fs/tux3/commit.c | 2 | ||||
-rw-r--r-- | fs/tux3/filemap.c | 6 | ||||
-rw-r--r-- | fs/tux3/filemap_mmap.c | 247 | ||||
-rw-r--r-- | fs/tux3/inode.c | 2 | ||||
-rw-r--r-- | fs/tux3/mmap_builtin_hack.c | 95 | ||||
-rw-r--r-- | fs/tux3/mmap_builtin_hack.h | 13 | ||||
-rw-r--r-- | fs/tux3/tux3.h | 3 |
11 files changed, 385 insertions, 23 deletions
diff --git a/fs/tux3/Kconfig b/fs/tux3/Kconfig index a952804d70eeb7..212e4e60ee4964 100644 --- a/fs/tux3/Kconfig +++ b/fs/tux3/Kconfig @@ -5,3 +5,10 @@ config TUX3 be called tux3. If unsure, see you again in six months. + +config TUX3_MMAP + bool "Tux3 mmap support" + depends on TUX3 + help + Adds EXPORT_SYMBOL_GPL(), etc. to provide functionality for mmap. + This needs build tux3 as module with support mmap. diff --git a/fs/tux3/Makefile b/fs/tux3/Makefile index 36233d8c309bf7..9623a540dcb2d4 100644 --- a/fs/tux3/Makefile +++ b/fs/tux3/Makefile @@ -14,4 +14,6 @@ EXTRA_CFLAGS += -Werror -std=gnu99 -Wno-declaration-after-statement #EXTRA_CFLAGS += -DTUX3_FLUSHER=TUX3_FLUSHER_SYNC #EXTRA_CFLAGS += -DTUX3_FLUSHER=TUX3_FLUSHER_ASYNC_OWN EXTRA_CFLAGS += -DTUX3_FLUSHER=TUX3_FLUSHER_ASYNC_HACK + +obj-$(CONFIG_TUX3_MMAP) += mmap_builtin_hack.o endif diff --git a/fs/tux3/buffer.c b/fs/tux3/buffer.c index fe7681466878b9..3d057c33417a9b 100644 --- a/fs/tux3/buffer.c +++ b/fs/tux3/buffer.c @@ -87,25 +87,6 @@ int buffer_can_modify(struct buffer_head *buffer, unsigned delta) tux3_bufsta_get_delta(state) == tux3_delta(delta); } -/* - * Copy of __set_page_dirty() without __mark_inode_dirty(). Caller - * decides whether mark inode dirty or not. - */ -static void __tux3_set_page_dirty(struct page *page, - struct address_space *mapping, int warn) -{ - unsigned long flags; - - spin_lock_irqsave(&mapping->tree_lock, flags); - if (page->mapping) { /* Race with truncate? */ - WARN_ON_ONCE(warn && !PageUptodate(page)); - account_page_dirtied(page, mapping); - radix_tree_tag_set(&mapping->page_tree, - page_index(page), PAGECACHE_TAG_DIRTY); - } - spin_unlock_irqrestore(&mapping->tree_lock, flags); -} - /* Set our delta dirty bits, then add to our dirty buffers list */ static inline void __tux3_set_buffer_dirty_list(struct address_space *mapping, struct buffer_head *buffer, int delta, @@ -147,7 +128,7 @@ int tux3_set_buffer_dirty_list(struct address_space *mapping, if (!TestSetPageDirty(page)) { struct address_space *mapping = page->mapping; if (mapping) - __tux3_set_page_dirty(page, mapping, 0); + __tux3_set_page_dirty_account(page, mapping, 0); return 1; } } diff --git a/fs/tux3/buffer_fork.c b/fs/tux3/buffer_fork.c index 64d1d0f6dc92fb..fd3508e0f1b1d4 100644 --- a/fs/tux3/buffer_fork.c +++ b/fs/tux3/buffer_fork.c @@ -195,6 +195,8 @@ void free_forked_buffers(struct sb *sb, struct inode *inode, int force) * Block fork core */ +#include "mmap_builtin_hack.h" + /* * This replaces the oldpage on radix-tree with newpage atomically. * @@ -460,7 +462,8 @@ struct buffer_head *blockdirty(struct buffer_head *buffer, unsigned newdelta) lock_page(oldpage); /* This happens on partially dirty page. */ -// assert(PageUptodate(page)); +// assert(PageUptodate(oldpage)); + assert(!page_mapped(oldpage)); switch ((ret_needfork = need_fork(oldpage, buffer, newdelta))) { case RET_FORKED: @@ -635,6 +638,10 @@ struct page *pagefork_for_blockdirty(struct page *oldpage, unsigned newdelta) * newpage is available on radix-tree here. */ SetPageForked(oldpage); + /* + * Update PTEs for forked page. + */ + page_cow_file(oldpage, newpage); unlock_page(oldpage); /* Register forked buffer to free forked page later */ @@ -660,6 +667,7 @@ int bufferfork_to_invalidate(struct address_space *mapping, struct page *page) unsigned delta = tux3_inode_delta(mapping->host); assert(PageLocked(page)); + assert(!page_mapped(page)); switch (need_fork(page, NULL, delta)) { case RET_NEED_FORK: diff --git a/fs/tux3/commit.c b/fs/tux3/commit.c index a01fd9ef0f51f7..fc7f85e6f912ce 100644 --- a/fs/tux3/commit.c +++ b/fs/tux3/commit.c @@ -695,7 +695,7 @@ void change_end_atomic(struct sb *sb) * This is used for nested change_begin/end. We should not use this * usually (nesting change_begin/end is wrong for normal operations). * - * For now, this is only used for ->evict_inode() debugging. + * For now, this is only used for ->evict_inode() debugging, and page fault. */ void change_begin_atomic_nested(struct sb *sb, void **ptr) { diff --git a/fs/tux3/filemap.c b/fs/tux3/filemap.c index a53c71595106a8..612ac41e2490bc 100644 --- a/fs/tux3/filemap.c +++ b/fs/tux3/filemap.c @@ -960,6 +960,8 @@ static sector_t tux3_bmap(struct address_space *mapping, sector_t iblock) return blocknr; } +#include "filemap_mmap.c" + const struct address_space_operations tux_file_aops = { .readpage = tux3_readpage, .readpages = tux3_readpages, @@ -970,6 +972,7 @@ const struct address_space_operations tux_file_aops = { .write_begin = tux3_file_write_begin, .write_end = tux3_file_write_end, .bmap = tux3_bmap, + .set_page_dirty = tux3_set_page_dirty_assert, .invalidatepage = tux3_invalidatepage, // .releasepage = ext4_releasepage, #ifdef TUX3_DIRECT_IO @@ -1000,6 +1003,7 @@ const struct address_space_operations tux_symlink_aops = { .write_begin = tux3_symlink_write_begin, .write_end = __tux3_file_write_end, .bmap = tux3_bmap, + .set_page_dirty = tux3_set_page_dirty_bug, .invalidatepage = tux3_invalidatepage, // .releasepage = ext4_releasepage, #ifdef TUX3_DIRECT_IO @@ -1042,6 +1046,7 @@ const struct address_space_operations tux_blk_aops = { .writepages = tux3_disable_writepages, .write_begin = tux3_blk_write_begin, .bmap = tux3_bmap, + .set_page_dirty = tux3_set_page_dirty_bug, .invalidatepage = tux3_invalidatepage, // .migratepage = buffer_migrate_page, /* FIXME */ // .is_partially_uptodate = block_is_partially_uptodate, @@ -1087,6 +1092,7 @@ const struct address_space_operations tux_vol_aops = { .writepage = tux3_disable_writepage, .writepages = tux3_disable_writepages, .write_begin = tux3_vol_write_begin, + .set_page_dirty = tux3_set_page_dirty_bug, .invalidatepage = tux3_invalidatepage, // .is_partially_uptodate = block_is_partially_uptodate, // .is_dirty_writeback = buffer_check_dirty_writeback, diff --git a/fs/tux3/filemap_mmap.c b/fs/tux3/filemap_mmap.c new file mode 100644 index 00000000000000..93b9f9cdf40c72 --- /dev/null +++ b/fs/tux3/filemap_mmap.c @@ -0,0 +1,247 @@ +/* + * mmap(2) handlers to support page fork. + */ + +/* + * Copy of __set_page_dirty() without __mark_inode_dirty(). Caller + * decides whether mark inode dirty or not. + */ +void __tux3_set_page_dirty_account(struct page *page, + struct address_space *mapping, int warn) +{ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(warn && !PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); +} + +static void __tux3_set_page_dirty(struct page *page, + struct address_space *mapping, int warn) +{ + __tux3_set_page_dirty_account(page, mapping, warn); + __tux3_mark_inode_dirty(mapping->host, I_DIRTY_PAGES); +} + +static int tux3_set_page_dirty_buffers(struct page *page) +{ +#if 0 + struct address_space *mapping = page->mapping; + int newly_dirty; + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + set_buffer_dirty(bh); + bh = bh->b_this_page; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) + __set_page_dirty(page, mapping, 1); + + return newly_dirty; +#else + struct address_space *mapping = page->mapping; + unsigned delta = tux3_get_current_delta(); + struct buffer_head *head, *buffer; + int newly_dirty; + + /* This should be tux3 page and locked */ + assert(mapping); + assert(PageLocked(page)); + /* This page should have buffers (caller should allocate) */ + assert(page_has_buffers(page)); + + /* + * FIXME: we dirty all buffers on this page, so we optimize this + * by avoiding to check page-dirty/inode-dirty multiple times. + */ + newly_dirty = 0; + if (!TestSetPageDirty(page)) { + __tux3_set_page_dirty(page, mapping, 1); + newly_dirty = 1; + } + buffer = head = page_buffers(page); + do { + __tux3_mark_buffer_dirty(buffer, delta); + buffer = buffer->b_this_page; + } while (buffer != head); +#endif + return newly_dirty; +} + +/* Copy of set_page_dirty() */ +static int tux3_set_page_dirty(struct page *page) +{ + /* + * readahead/lru_deactivate_page could remain + * PG_readahead/PG_reclaim due to race with end_page_writeback + * About readahead, if the page is written, the flags would be + * reset. So no problem. + * About lru_deactivate_page, if the page is redirty, the flag + * will be reset. So no problem. but if the page is used by readahead + * it will confuse readahead and make it restart the size rampup + * process. But it's a trivial problem. + */ + ClearPageReclaim(page); + + return tux3_set_page_dirty_buffers(page); +} + +static int tux3_set_page_dirty_assert(struct page *page) +{ + struct buffer_head *head, *buffer; + + /* See comment of tux3_set_page_dirty() */ + ClearPageReclaim(page); + + /* Is there any cases to be called for old page of forked page? */ + WARN_ON(PageForked(page)); + + /* This page should be dirty already, otherwise we will lost data. */ + assert(PageDirty(page)); + /* All buffers should be dirty already, otherwise we will lost data. */ + assert(page_has_buffers(page)); + head = buffer = page_buffers(page); + do { + assert(buffer_dirty(buffer)); + buffer = buffer->b_this_page; + } while (buffer != head); + + return 0; +} + +static int tux3_set_page_dirty_bug(struct page *page) +{ + /* See comment of tux3_set_page_dirty() */ + ClearPageReclaim(page); + + assert(0); + /* This page should not be mmapped */ + assert(!page_mapped(page)); + /* This page should be dirty already, otherwise we will lost data. */ + assert(PageDirty(page)); + return 0; +} + +static int tux3_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + struct sb *sb = tux_sb(inode->i_sb); + struct page *clone, *page = vmf->page; + void *ptr; + int ret; + + sb_start_pagefault(inode->i_sb); + +retry: + lock_page(page); + if (page->mapping != mapping(inode)) { + unlock_page(page); + ret = VM_FAULT_NOPAGE; + goto out; + } + + /* + * page fault can be happened while holding change_begin/end() + * (e.g. copy of user data between ->write_begin and + * ->write_end for write(2)). + * + * So, we use nested version here. + */ + change_begin_atomic_nested(sb, &ptr); + + /* + * FIXME: Caller releases vmf->page (old_page) unconditionally. + * So, this takes additional refcount to workaround it. + */ + if (vmf->page == page) + page_cache_get(page); + + clone = pagefork_for_blockdirty(page, tux3_get_current_delta()); + if (IS_ERR(clone)) { + /* Someone did page fork */ + pgoff_t index = page->index; + + change_end_atomic_nested(sb, ptr); + unlock_page(page); + page_cache_release(page); + + switch (PTR_ERR(clone)) { + case -EAGAIN: + page = find_get_page(inode->i_mapping, index); + assert(page); + goto retry; + case -ENOMEM: + ret = VM_FAULT_OOM; + break; + default: + ret = VM_FAULT_SIGBUS; + break; + } + + goto out; + } + + file_update_time(vma->vm_file); + + /* Assign buffers to dirty */ + if (!page_has_buffers(clone)) + create_empty_buffers(clone, sb->blocksize, 0); + + /* + * We mark the page dirty already here so that when freeze is in + * progress, we are guaranteed that writeback during freezing will + * see the dirty page and writeprotect it again. + */ + tux3_set_page_dirty(clone); +#if 1 + /* FIXME: Caller doesn't see the changed vmf->page */ + vmf->page = clone; + + change_end_atomic_nested(sb, ptr); + /* FIXME: caller doesn't know about pagefork */ + unlock_page(clone); + page_cache_release(clone); + ret = 0; +// ret = VM_FAULT_LOCKED; +#endif +out: + sb_end_pagefault(inode->i_sb); + + return ret; +} + +static const struct vm_operations_struct tux3_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = tux3_page_mkwrite, + .remap_pages = generic_file_remap_pages, +}; + +int tux3_file_mmap(struct file *file, struct vm_area_struct *vma) +{ +#ifdef CONFIG_TUX3_MMAP + struct address_space *mapping = file->f_mapping; + + if (!mapping->a_ops->readpage) + return -ENOEXEC; + + file_accessed(file); + vma->vm_ops = &tux3_file_vm_ops; + + return 0; +#else + return -EOPNOTSUPP; +#endif +} diff --git a/fs/tux3/inode.c b/fs/tux3/inode.c index 644deabc2cd22a..f759f8781d7a63 100644 --- a/fs/tux3/inode.c +++ b/fs/tux3/inode.c @@ -858,7 +858,7 @@ static const struct file_operations tux_file_fops = { #ifdef CONFIG_COMPAT // .compat_ioctl = fat_compat_dir_ioctl, #endif - .mmap = generic_file_mmap, + .mmap = tux3_file_mmap, .open = generic_file_open, .fsync = tux3_sync_file, .splice_read = generic_file_splice_read, diff --git a/fs/tux3/mmap_builtin_hack.c b/fs/tux3/mmap_builtin_hack.c new file mode 100644 index 00000000000000..dade54f8fd41dd --- /dev/null +++ b/fs/tux3/mmap_builtin_hack.c @@ -0,0 +1,95 @@ +/* + * mmap support helpers. But core doesn't provide functionality that + * pagefork needs. + * + * So, this hack adds EXPORT_SYMBOL_GPL() and inline functions, and + * liked with kernel statically. + * + * FIXME: we should patch the kernel instead. + */ + +#include "tux3.h" +#include <linux/rmap.h> +#include <linux/mmu_notifier.h> + +extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma); + +static int page_cow_one(struct page *oldpage, struct page *newpage, + struct vm_area_struct *vma, unsigned long address) +{ + struct mm_struct *mm = vma->vm_mm; + pte_t oldptval, ptval, *pte; + spinlock_t *ptl; + int ret = 0; + + pte = page_check_address(oldpage, mm, address, &ptl, 1); + if (!pte) + goto out; + + flush_cache_page(vma, address, pte_pfn(*pte)); + oldptval = ptep_clear_flush(vma, address, pte); + + /* Take refcount for PTE */ + page_cache_get(newpage); + + /* + * vm_page_prot doesn't have writable bit, so page fault will + * be occurred immediately after returned from this page fault + * again. And second time of page fault will be resolved with + * forked page was set here. + * + * FIXME: we should resolve page fault with one page + * fault. Maybe, we will have to modify callers of + * ->page_mkwrite(). + */ + ptval = mk_pte(newpage, vma->vm_page_prot); +#if 0 + if (pte_dirty(oldptval)) + ptval = pte_mkdirty(ptval); + if (pte_young(oldptval)) + ptval = pte_mkyoung(ptval); +#endif + set_pte_at(mm, address, pte, ptval); + + /* Update rmap accounting */ + assert(!PageMlocked(oldpage)); /* Caller should migrate mlock flag */ + page_remove_rmap(oldpage); + page_add_file_rmap(newpage); + + /* no need to invalidate: a not-present page won't be cached */ + update_mmu_cache(vma, address, pte); + + pte_unmap_unlock(pte, ptl); + + mmu_notifier_invalidate_page(mm, address); + + /* Release refcount for PTE */ + page_cache_release(oldpage); +out: + return ret; +} + +int page_cow_file(struct page *oldpage, struct page *newpage) +{ + struct address_space *mapping = page_mapping(oldpage); + pgoff_t pgoff = oldpage->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + struct vm_area_struct *vma; + int ret = 0; + + BUG_ON(!PageLocked(oldpage)); + BUG_ON(!PageLocked(newpage)); + BUG_ON(PageAnon(oldpage)); + BUG_ON(mapping == NULL); + + mutex_lock(&mapping->i_mmap_mutex); + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { + if (vma->vm_flags & VM_SHARED) { + unsigned long address = vma_address(oldpage, vma); + ret += page_cow_one(oldpage, newpage, vma, address); + } + } + mutex_unlock(&mapping->i_mmap_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(page_cow_file); diff --git a/fs/tux3/mmap_builtin_hack.h b/fs/tux3/mmap_builtin_hack.h new file mode 100644 index 00000000000000..b313506ae61b0a --- /dev/null +++ b/fs/tux3/mmap_builtin_hack.h @@ -0,0 +1,13 @@ +#ifndef _MMAP_HACK_H +#define _MMAP_HACK_H + +#ifdef CONFIG_TUX3_MMAP +int page_cow_file(struct page *oldpage, struct page *newpage); +#else +static inline int page_cow_file(struct page *oldpage, struct page *newpage) +{ + return 0; +} +#endif + +#endif /* !_MMAP_HACK_H */ diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h index eef92d4cac9f19..04fde94779ba08 100644 --- a/fs/tux3/tux3.h +++ b/fs/tux3/tux3.h @@ -674,6 +674,9 @@ int tux3_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); struct buffer_head *__get_buffer(struct page *page, int offset); void tux3_try_cancel_dirty_page(struct page *page); +void __tux3_set_page_dirty_account(struct page *page, + struct address_space *mapping, int warn); +int tux3_file_mmap(struct file *file, struct vm_area_struct *vma); extern const struct address_space_operations tux_file_aops; extern const struct address_space_operations tux_symlink_aops; extern const struct address_space_operations tux_blk_aops; |