diff options
author | Jens Axboe <axboe@kernel.dk> | 2023-04-03 07:18:20 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2023-04-03 07:18:20 -0600 |
commit | bcd83e4071cffe907d294d61d0f0d50f6eee2da0 (patch) | |
tree | 8fc6673e363515d19b926c0787ccc76f30b5115e | |
parent | 22c29a462e14228799ade6b916a1ee3cda623aa8 (diff) | |
parent | d187b44bc9404581bad8d006d80937d1b3a2b0c0 (diff) | |
download | misc-bcd83e4071cffe907d294d61d0f0d50f6eee2da0.tar.gz |
Merge branch 'for-6.4/splice' into for-next
* for-6.4/splice:
block: convert bio_map_user_iov to use iov_iter_extract_pages
block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages
block: Add BIO_PAGE_PINNED and associated infrastructure
block: Replace BIO_NO_PAGE_REF with BIO_PAGE_REFFED with inverted logic
block: Fix bio_flagged() so that gcc can better optimise it
iomap: Don't get an reference on ZERO_PAGE for direct I/O block zeroing
iov_iter: Kill ITER_PIPE
cifs: Use generic_file_splice_read()
splice: Do splice read from a file without using ITER_PIPE
tty, proc, kernfs, random: Use direct_splice_read()
coda: Implement splice-read
overlayfs: Implement splice-read
shmem: Implement splice-read
splice: Make do_splice_to() generic and export it
splice: Clean up direct_splice_read() a bit
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/bio.c | 29 | ||||
-rw-r--r-- | block/blk-map.c | 22 | ||||
-rw-r--r-- | block/blk.h | 12 | ||||
-rw-r--r-- | drivers/char/random.c | 4 | ||||
-rw-r--r-- | drivers/tty/tty_io.c | 4 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 8 | ||||
-rw-r--r-- | fs/cifs/cifsfs.h | 3 | ||||
-rw-r--r-- | fs/cifs/file.c | 16 | ||||
-rw-r--r-- | fs/coda/file.c | 29 | ||||
-rw-r--r-- | fs/direct-io.c | 2 | ||||
-rw-r--r-- | fs/iomap/direct-io.c | 1 | ||||
-rw-r--r-- | fs/kernfs/file.c | 2 | ||||
-rw-r--r-- | fs/overlayfs/file.c | 23 | ||||
-rw-r--r-- | fs/proc/inode.c | 4 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 2 | ||||
-rw-r--r-- | fs/proc_namespace.c | 6 | ||||
-rw-r--r-- | fs/splice.c | 76 | ||||
-rw-r--r-- | include/linux/bio.h | 5 | ||||
-rw-r--r-- | include/linux/blk_types.h | 3 | ||||
-rw-r--r-- | include/linux/splice.h | 3 | ||||
-rw-r--r-- | include/linux/uio.h | 14 | ||||
-rw-r--r-- | lib/iov_iter.c | 429 | ||||
-rw-r--r-- | mm/filemap.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 134 |
24 files changed, 284 insertions, 551 deletions
diff --git a/block/bio.c b/block/bio.c index fd11614bba4dc5..fc98c1c723cac4 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1168,7 +1168,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) bio_for_each_segment_all(bvec, bio, iter_all) { if (mark_dirty && !PageCompound(bvec->bv_page)) set_page_dirty_lock(bvec->bv_page); - put_page(bvec->bv_page); + bio_release_page(bio, bvec->bv_page); } } EXPORT_SYMBOL_GPL(__bio_release_pages); @@ -1190,7 +1190,6 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) bio->bi_io_vec = (struct bio_vec *)iter->bvec; bio->bi_iter.bi_bvec_done = iter->iov_offset; bio->bi_iter.bi_size = size; - bio_set_flag(bio, BIO_NO_PAGE_REF); bio_set_flag(bio, BIO_CLONED); } @@ -1205,7 +1204,7 @@ static int bio_iov_add_page(struct bio *bio, struct page *page, } if (same_page) - put_page(page); + bio_release_page(bio, page); return 0; } @@ -1219,7 +1218,7 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, queue_max_zone_append_sectors(q), &same_page) != len) return -EINVAL; if (same_page) - put_page(page); + bio_release_page(bio, page); return 0; } @@ -1230,10 +1229,10 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, * @bio: bio to add pages to * @iter: iov iterator describing the region to be mapped * - * Pins pages from *iter and appends them to @bio's bvec array. The - * pages will have to be released using put_page() when done. - * For multi-segment *iter, this function only adds pages from the - * next non-empty segment of the iov iterator. + * Extracts pages from *iter and appends them to @bio's bvec array. The pages + * will have to be cleaned up in the way indicated by the BIO_PAGE_PINNED flag. + * For a multi-segment *iter, this function only adds pages from the next + * non-empty segment of the iov iterator. */ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { @@ -1265,9 +1264,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * result to ensure the bio's total size is correct. The remainder of * the iov data will be picked up in the next bio iteration. */ - size = iov_iter_get_pages(iter, pages, - UINT_MAX - bio->bi_iter.bi_size, - nr_pages, &offset, extraction_flags); + size = iov_iter_extract_pages(iter, &pages, + UINT_MAX - bio->bi_iter.bi_size, + nr_pages, extraction_flags, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; @@ -1300,7 +1299,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) iov_iter_revert(iter, left); out: while (i < nr_pages) - put_page(pages[i++]); + bio_release_page(bio, pages[i++]); return ret; } @@ -1335,6 +1334,8 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) return 0; } + if (iov_iter_extract_will_pin(iter)) + bio_set_flag(bio, BIO_PAGE_PINNED); do { ret = __bio_iov_iter_get_pages(bio, iter); } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); @@ -1488,8 +1489,8 @@ void bio_set_pages_dirty(struct bio *bio) * the BIO and re-dirty the pages in process context. * * It is expected that bio_check_pages_dirty() will wholly own the BIO from - * here on. It will run one put_page() against each page and will run one - * bio_put() against the BIO. + * here on. It will unpin each page and will run one bio_put() against the + * BIO. */ static void bio_dirty_fn(struct work_struct *work); diff --git a/block/blk-map.c b/block/blk-map.c index 04c55f1c492eb1..3551c3ff17cf63 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -281,21 +281,21 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, if (blk_queue_pci_p2pdma(rq->q)) extraction_flags |= ITER_ALLOW_P2PDMA; + if (iov_iter_extract_will_pin(iter)) + bio_set_flag(bio, BIO_PAGE_PINNED); while (iov_iter_count(iter)) { - struct page **pages, *stack_pages[UIO_FASTIOV]; + struct page *stack_pages[UIO_FASTIOV]; + struct page **pages = stack_pages; ssize_t bytes; size_t offs; int npages; - if (nr_vecs <= ARRAY_SIZE(stack_pages)) { - pages = stack_pages; - bytes = iov_iter_get_pages(iter, pages, LONG_MAX, - nr_vecs, &offs, extraction_flags); - } else { - bytes = iov_iter_get_pages_alloc(iter, &pages, - LONG_MAX, &offs, extraction_flags); - } + if (nr_vecs > ARRAY_SIZE(stack_pages)) + pages = NULL; + + bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX, + nr_vecs, extraction_flags, &offs); if (unlikely(bytes <= 0)) { ret = bytes ? bytes : -EFAULT; goto out_unmap; @@ -317,7 +317,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, if (!bio_add_hw_page(rq->q, bio, page, n, offs, max_sectors, &same_page)) { if (same_page) - put_page(page); + bio_release_page(bio, page); break; } @@ -329,7 +329,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, * release the pages we didn't map into the bio, if any */ while (j < npages) - put_page(pages[j++]); + bio_release_page(bio, pages[j++]); if (pages != stack_pages) kvfree(pages); /* couldn't stuff something into bio? */ diff --git a/block/blk.h b/block/blk.h index cc4e8873dfdea1..d65d96994a949f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -432,6 +432,18 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned int max_sectors, bool *same_page); +/* + * Clean up a page appropriately, where the page may be pinned, may have a + * ref taken on it or neither. + */ +static inline void bio_release_page(struct bio *bio, struct page *page) +{ + if (bio_flagged(bio, BIO_PAGE_PINNED)) + unpin_user_page(page); + else if (bio_flagged(bio, BIO_PAGE_REFFED)) + put_page(page); +} + struct request_queue *blk_alloc_queue(int node_id); int disk_scan_partitions(struct gendisk *disk, fmode_t mode); diff --git a/drivers/char/random.c b/drivers/char/random.c index 253f2ddb891308..2b47b500126cd3 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1546,7 +1546,7 @@ const struct file_operations random_fops = { .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .splice_write = iter_file_splice_write, }; @@ -1557,7 +1557,7 @@ const struct file_operations urandom_fops = { .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .splice_write = iter_file_splice_write, }; diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 36fb945fdad489..9d117e579dfbf9 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -466,7 +466,7 @@ static const struct file_operations tty_fops = { .llseek = no_llseek, .read_iter = tty_read, .write_iter = tty_write, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, @@ -481,7 +481,7 @@ static const struct file_operations console_fops = { .llseek = no_llseek, .read_iter = tty_read, .write_iter = redirected_tty_write, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ac9034fce409d2..5fd67c13d44b39 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1362,7 +1362,7 @@ const struct file_operations cifs_file_ops = { .fsync = cifs_fsync, .flush = cifs_flush, .mmap = cifs_file_mmap, - .splice_read = cifs_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, @@ -1382,7 +1382,7 @@ const struct file_operations cifs_file_strict_ops = { .fsync = cifs_strict_fsync, .flush = cifs_flush, .mmap = cifs_file_strict_mmap, - .splice_read = cifs_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, @@ -1420,7 +1420,7 @@ const struct file_operations cifs_file_nobrl_ops = { .fsync = cifs_fsync, .flush = cifs_flush, .mmap = cifs_file_mmap, - .splice_read = cifs_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, @@ -1438,7 +1438,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .fsync = cifs_strict_fsync, .flush = cifs_flush, .mmap = cifs_file_strict_mmap, - .splice_read = cifs_splice_read, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 415176b2cf321f..9321c161a27e21 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -100,9 +100,6 @@ extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from); extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); -extern ssize_t cifs_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags); extern int cifs_flock(struct file *pfile, int cmd, struct file_lock *plock); extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6831a9949c430a..040bb30fb21024 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -5066,19 +5066,3 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { .launder_folio = cifs_launder_folio, .migrate_folio = filemap_migrate_folio, }; - -/* - * Splice data from a file into a pipe. - */ -ssize_t cifs_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) -{ - if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes)) - return 0; - if (unlikely(!len)) - return 0; - if (in->f_flags & O_DIRECT) - return direct_splice_read(in, ppos, pipe, len, flags); - return filemap_splice_read(in, ppos, pipe, len, flags); -} diff --git a/fs/coda/file.c b/fs/coda/file.c index 3f3c81e6b1ab0c..12b26bd13564d9 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/uio.h> +#include <linux/splice.h> #include <linux/coda.h> #include "coda_psdev.h" @@ -94,6 +95,32 @@ finish_write: return ret; } +static ssize_t +coda_file_splice_read(struct file *coda_file, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + struct inode *coda_inode = file_inode(coda_file); + struct coda_file_info *cfi = coda_ftoc(coda_file); + struct file *in = cfi->cfi_container; + loff_t ki_pos = *ppos; + ssize_t ret; + + ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + len, ki_pos, CODA_ACCESS_TYPE_READ); + if (ret) + goto finish_read; + + ret = vfs_splice_read(in, ppos, pipe, len, flags); + +finish_read: + venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + len, ki_pos, CODA_ACCESS_TYPE_READ_FINISH); + return ret; +} + static void coda_vm_open(struct vm_area_struct *vma) { @@ -302,5 +329,5 @@ const struct file_operations coda_file_operations = { .open = coda_open, .release = coda_release, .fsync = coda_fsync, - .splice_read = generic_file_splice_read, + .splice_read = coda_file_splice_read, }; diff --git a/fs/direct-io.c b/fs/direct-io.c index ab0d7ea89813a6..47b90c68b36963 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -403,6 +403,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, bio->bi_end_io = dio_bio_end_aio; else bio->bi_end_io = dio_bio_end_io; + /* for now require references for all pages */ + bio_set_flag(bio, BIO_PAGE_REFFED); sdio->bio = bio; sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; } diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index f771001574d008..ceeb0a183cea5e 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -202,7 +202,6 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; - get_page(page); __bio_add_page(bio, page, len, 0); iomap_dio_submit_bio(iter, dio, bio, pos); } diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index e4a50e4ff0d234..9d23b8141db718 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -1011,7 +1011,7 @@ const struct file_operations kernfs_file_fops = { .release = kernfs_fop_release, .poll = kernfs_fop_poll, .fsync = noop_fsync, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .splice_write = iter_file_splice_write, }; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 7c04f033aadd75..86197882ff3512 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -419,6 +419,27 @@ out_unlock: return ret; } +static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + const struct cred *old_cred; + struct fd real; + ssize_t ret; + + ret = ovl_real_fdget(in, &real); + if (ret) + return ret; + + old_cred = ovl_override_creds(file_inode(in)->i_sb); + ret = vfs_splice_read(real.file, ppos, pipe, len, flags); + revert_creds(old_cred); + ovl_file_accessed(in); + + fdput(real); + return ret; +} + /* * Calling iter_file_splice_write() directly from overlay's f_op may deadlock * due to lock order inversion between pipe->mutex in iter_file_splice_write() @@ -695,7 +716,7 @@ const struct file_operations ovl_file_operations = { .fallocate = ovl_fallocate, .fadvise = ovl_fadvise, .flush = ovl_flush, - .splice_read = generic_file_splice_read, + .splice_read = ovl_splice_read, .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index f495fdb391517a..711f1270646998 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -591,7 +591,7 @@ static const struct file_operations proc_iter_file_ops = { .llseek = proc_reg_llseek, .read_iter = proc_reg_read_iter, .write = proc_reg_write, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, .mmap = proc_reg_mmap, @@ -617,7 +617,7 @@ static const struct file_operations proc_reg_file_ops_compat = { static const struct file_operations proc_iter_file_ops_compat = { .llseek = proc_reg_llseek, .read_iter = proc_reg_read_iter, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5851eb5bc7267e..e49f99657d1c8c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -869,7 +869,7 @@ static const struct file_operations proc_sys_file_operations = { .poll = proc_sys_poll, .read_iter = proc_sys_read, .write_iter = proc_sys_write, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .splice_write = iter_file_splice_write, .llseek = default_llseek, }; diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 846f9455ae226b..492abbbeff5e43 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -324,7 +324,7 @@ static int mountstats_open(struct inode *inode, struct file *file) const struct file_operations proc_mounts_operations = { .open = mounts_open, .read_iter = seq_read_iter, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -333,7 +333,7 @@ const struct file_operations proc_mounts_operations = { const struct file_operations proc_mountinfo_operations = { .open = mountinfo_open, .read_iter = seq_read_iter, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, @@ -342,7 +342,7 @@ const struct file_operations proc_mountinfo_operations = { const struct file_operations proc_mountstats_operations = { .open = mountstats_open, .read_iter = seq_read_iter, - .splice_read = generic_file_splice_read, + .splice_read = direct_splice_read, .llseek = seq_lseek, .release = mounts_release, }; diff --git a/fs/splice.c b/fs/splice.c index 2c3dec2b6dfaf1..b7fd9769451269 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -295,7 +295,7 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos, struct kiocb kiocb; struct page **pages; ssize_t ret; - size_t used, npages, chunk, remain, reclaim; + size_t used, npages, chunk, remain, keep = 0; int i; /* Work out how much data we can actually add into the pipe */ @@ -309,7 +309,7 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos, if (!bv) return -ENOMEM; - pages = (void *)(bv + npages); + pages = (struct page **)(bv + npages); npages = alloc_pages_bulk_array(GFP_USER, npages, pages); if (!npages) { kfree(bv); @@ -332,11 +332,8 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos, kiocb.ki_pos = *ppos; ret = call_read_iter(in, &kiocb, &to); - reclaim = npages * PAGE_SIZE; - remain = 0; if (ret > 0) { - reclaim -= ret; - remain = ret; + keep = DIV_ROUND_UP(ret, PAGE_SIZE); *ppos = kiocb.ki_pos; file_accessed(in); } else if (ret < 0) { @@ -349,14 +346,12 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos, } /* Free any pages that didn't get touched at all. */ - reclaim /= PAGE_SIZE; - if (reclaim) { - npages -= reclaim; - release_pages(pages + npages, reclaim); - } + if (keep < npages) + release_pages(pages + keep, npages - keep); /* Push the remaining pages into the pipe. */ - for (i = 0; i < npages; i++) { + remain = ret; + for (i = 0; i < keep; i++) { struct pipe_buffer *buf = pipe_head_buf(pipe); chunk = min_t(size_t, remain, PAGE_SIZE); @@ -392,29 +387,13 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - struct iov_iter to; - struct kiocb kiocb; - int ret; - - iov_iter_pipe(&to, ITER_DEST, pipe, len); - init_sync_kiocb(&kiocb, in); - kiocb.ki_pos = *ppos; - ret = call_read_iter(in, &kiocb, &to); - if (ret > 0) { - *ppos = kiocb.ki_pos; - file_accessed(in); - } else if (ret < 0) { - /* free what was emitted */ - pipe_discard_from(pipe, to.start_head); - /* - * callers of ->splice_read() expect -EAGAIN on - * "can't put anything in there", rather than -EFAULT. - */ - if (ret == -EFAULT) - ret = -EAGAIN; - } - - return ret; + if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes)) + return 0; + if (unlikely(!len)) + return 0; + if (in->f_flags & O_DIRECT) + return direct_splice_read(in, ppos, pipe, len, flags); + return filemap_splice_read(in, ppos, pipe, len, flags); } EXPORT_SYMBOL(generic_file_splice_read); @@ -856,12 +835,24 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, return out->f_op->splice_write(pipe, out, ppos, len, flags); } -/* - * Attempt to initiate a splice from a file to a pipe. +/** + * vfs_splice_read - Read data from a file and splice it into a pipe + * @in: File to splice from + * @ppos: Input file offset + * @pipe: Pipe to splice to + * @len: Number of bytes to splice + * @flags: Splice modifier flags (SPLICE_F_*) + * + * Splice the requested amount of data from the input file to the pipe. This + * is synchronous as the caller must hold the pipe lock across the entire + * operation. + * + * If successful, it returns the amount of data spliced, 0 if it hit the EOF or + * a hole and a negative error code otherwise. */ -static long do_splice_to(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +long vfs_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) { unsigned int p_space; int ret; @@ -884,6 +875,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, return warn_unsupported(in, "read"); return in->f_op->splice_read(in, ppos, pipe, len, flags); } +EXPORT_SYMBOL_GPL(vfs_splice_read); /** * splice_direct_to_actor - splices data directly between two non-pipes @@ -953,7 +945,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, size_t read_len; loff_t pos = sd->pos, prev_pos = pos; - ret = do_splice_to(in, &pos, pipe, len, flags); + ret = vfs_splice_read(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) goto out_release; @@ -1101,7 +1093,7 @@ long splice_file_to_pipe(struct file *in, pipe_lock(opipe); ret = wait_for_space(opipe, flags); if (!ret) - ret = do_splice_to(in, offset, opipe, len, flags); + ret = vfs_splice_read(in, offset, opipe, len, flags); pipe_unlock(opipe); if (ret > 0) wakeup_pipe_readers(opipe); diff --git a/include/linux/bio.h b/include/linux/bio.h index d766be7152e151..d8c30c791a9af8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -229,7 +229,7 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) static inline bool bio_flagged(struct bio *bio, unsigned int bit) { - return (bio->bi_flags & (1U << bit)) != 0; + return bio->bi_flags & (1U << bit); } static inline void bio_set_flag(struct bio *bio, unsigned int bit) @@ -488,7 +488,8 @@ void zero_fill_bio(struct bio *bio); static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + if (bio_flagged(bio, BIO_PAGE_REFFED) || + bio_flagged(bio, BIO_PAGE_PINNED)) __bio_release_pages(bio, mark_dirty); } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 99be590f952f6e..a0e339ff3d09a4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -318,7 +318,8 @@ struct bio { * bio flags */ enum { - BIO_NO_PAGE_REF, /* don't put release vec pages */ + BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */ + BIO_PAGE_REFFED, /* put pages in bio_release_pages() */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ BIO_QUIET, /* Make BIO Quiet */ diff --git a/include/linux/splice.h b/include/linux/splice.h index a55179fd60fc39..8f052c3dae953a 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -76,6 +76,9 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); extern ssize_t add_to_pipe(struct pipe_inode_info *, struct pipe_buffer *); +long vfs_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, splice_direct_actor *); extern long do_splice(struct file *in, loff_t *off_in, diff --git a/include/linux/uio.h b/include/linux/uio.h index ed35f4427a0ad4..ceab22e2c055db 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -11,7 +11,6 @@ #include <uapi/linux/uio.h> struct page; -struct pipe_inode_info; typedef unsigned int __bitwise iov_iter_extraction_t; @@ -25,7 +24,6 @@ enum iter_type { ITER_IOVEC, ITER_KVEC, ITER_BVEC, - ITER_PIPE, ITER_XARRAY, ITER_DISCARD, ITER_UBUF, @@ -73,7 +71,6 @@ struct iov_iter { const struct kvec *kvec; const struct bio_vec *bvec; struct xarray *xarray; - struct pipe_inode_info *pipe; void __user *ubuf; }; size_t count; @@ -81,10 +78,6 @@ struct iov_iter { }; union { unsigned long nr_segs; - struct { - unsigned int head; - unsigned int start_head; - }; loff_t xarray_start; }; }; @@ -132,11 +125,6 @@ static inline bool iov_iter_is_bvec(const struct iov_iter *i) return iov_iter_type(i) == ITER_BVEC; } -static inline bool iov_iter_is_pipe(const struct iov_iter *i) -{ - return iov_iter_type(i) == ITER_PIPE; -} - static inline bool iov_iter_is_discard(const struct iov_iter *i) { return iov_iter_type(i) == ITER_DISCARD; @@ -269,8 +257,6 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec unsigned long nr_segs, size_t count); void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); -void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe, - size_t count); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 3e6c9bcfa612c2..789b1c7d1254c1 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -14,8 +14,6 @@ #include <linux/scatterlist.h> #include <linux/instrumented.h> -#define PIPE_PARANOIA /* for now */ - /* covers ubuf and kbuf alike */ #define iterate_buf(i, n, base, len, off, __p, STEP) { \ size_t __maybe_unused off = 0; \ @@ -186,150 +184,6 @@ static int copyin(void *to, const void __user *from, size_t n) return res; } -#ifdef PIPE_PARANOIA -static bool sanity(const struct iov_iter *i) -{ - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_head = pipe->head; - unsigned int p_tail = pipe->tail; - unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); - unsigned int i_head = i->head; - unsigned int idx; - - if (i->last_offset) { - struct pipe_buffer *p; - if (unlikely(p_occupancy == 0)) - goto Bad; // pipe must be non-empty - if (unlikely(i_head != p_head - 1)) - goto Bad; // must be at the last buffer... - - p = pipe_buf(pipe, i_head); - if (unlikely(p->offset + p->len != abs(i->last_offset))) - goto Bad; // ... at the end of segment - } else { - if (i_head != p_head) - goto Bad; // must be right after the last buffer - } - return true; -Bad: - printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset); - printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", - p_head, p_tail, pipe->ring_size); - for (idx = 0; idx < pipe->ring_size; idx++) - printk(KERN_ERR "[%p %p %d %d]\n", - pipe->bufs[idx].ops, - pipe->bufs[idx].page, - pipe->bufs[idx].offset, - pipe->bufs[idx].len); - WARN_ON(1); - return false; -} -#else -#define sanity(i) true -#endif - -static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size) -{ - struct page *page = alloc_page(GFP_USER); - if (page) { - struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); - *buf = (struct pipe_buffer) { - .ops = &default_pipe_buf_ops, - .page = page, - .offset = 0, - .len = size - }; - } - return page; -} - -static void push_page(struct pipe_inode_info *pipe, struct page *page, - unsigned int offset, unsigned int size) -{ - struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); - *buf = (struct pipe_buffer) { - .ops = &page_cache_pipe_buf_ops, - .page = page, - .offset = offset, - .len = size - }; - get_page(page); -} - -static inline int last_offset(const struct pipe_buffer *buf) -{ - if (buf->ops == &default_pipe_buf_ops) - return buf->len; // buf->offset is 0 for those - else - return -(buf->offset + buf->len); -} - -static struct page *append_pipe(struct iov_iter *i, size_t size, - unsigned int *off) -{ - struct pipe_inode_info *pipe = i->pipe; - int offset = i->last_offset; - struct pipe_buffer *buf; - struct page *page; - - if (offset > 0 && offset < PAGE_SIZE) { - // some space in the last buffer; add to it - buf = pipe_buf(pipe, pipe->head - 1); - size = min_t(size_t, size, PAGE_SIZE - offset); - buf->len += size; - i->last_offset += size; - i->count -= size; - *off = offset; - return buf->page; - } - // OK, we need a new buffer - *off = 0; - size = min_t(size_t, size, PAGE_SIZE); - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) - return NULL; - page = push_anon(pipe, size); - if (!page) - return NULL; - i->head = pipe->head - 1; - i->last_offset = size; - i->count -= size; - return page; -} - -static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - struct pipe_inode_info *pipe = i->pipe; - unsigned int head = pipe->head; - - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - - if (!sanity(i)) - return 0; - - if (offset && i->last_offset == -offset) { // could we merge it? - struct pipe_buffer *buf = pipe_buf(pipe, head - 1); - if (buf->page == page) { - buf->len += bytes; - i->last_offset -= bytes; - i->count -= bytes; - return bytes; - } - } - if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) - return 0; - - push_page(pipe, page, offset, bytes); - i->last_offset = -(offset + bytes); - i->head = head; - i->count -= bytes; - return bytes; -} - /* * fault_in_iov_iter_readable - fault in iov iterator for reading * @i: iterator @@ -433,46 +287,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_init); -// returns the offset in partial buffer (if any) -static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages) -{ - struct pipe_inode_info *pipe = i->pipe; - int used = pipe->head - pipe->tail; - int off = i->last_offset; - - *npages = max((int)pipe->max_usage - used, 0); - - if (off > 0 && off < PAGE_SIZE) { // anon and not full - (*npages)++; - return off; - } - return 0; -} - -static size_t copy_pipe_to_iter(const void *addr, size_t bytes, - struct iov_iter *i) -{ - unsigned int off, chunk; - - if (unlikely(bytes > i->count)) - bytes = i->count; - if (unlikely(!bytes)) - return 0; - - if (!sanity(i)) - return 0; - - for (size_t n = bytes; n; n -= chunk) { - struct page *page = append_pipe(i, n, &off); - chunk = min_t(size_t, n, PAGE_SIZE - off); - if (!page) - return bytes - n; - memcpy_to_page(page, off, addr, chunk); - addr += chunk; - } - return bytes; -} - static __wsum csum_and_memcpy(void *to, const void *from, size_t len, __wsum sum, size_t off) { @@ -480,44 +294,10 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len, return csum_block_add(sum, next, off); } -static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, - struct iov_iter *i, __wsum *sump) -{ - __wsum sum = *sump; - size_t off = 0; - unsigned int chunk, r; - - if (unlikely(bytes > i->count)) - bytes = i->count; - if (unlikely(!bytes)) - return 0; - - if (!sanity(i)) - return 0; - - while (bytes) { - struct page *page = append_pipe(i, bytes, &r); - char *p; - - if (!page) - break; - chunk = min_t(size_t, bytes, PAGE_SIZE - r); - p = kmap_local_page(page); - sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off); - kunmap_local(p); - off += chunk; - bytes -= chunk; - } - *sump = sum; - return off; -} - size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(i->data_source)) return 0; - if (unlikely(iov_iter_is_pipe(i))) - return copy_pipe_to_iter(addr, bytes, i); if (user_backed_iter(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, @@ -539,42 +319,6 @@ static int copyout_mc(void __user *to, const void *from, size_t n) return n; } -static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, - struct iov_iter *i) -{ - size_t xfer = 0; - unsigned int off, chunk; - - if (unlikely(bytes > i->count)) - bytes = i->count; - if (unlikely(!bytes)) - return 0; - - if (!sanity(i)) - return 0; - - while (bytes) { - struct page *page = append_pipe(i, bytes, &off); - unsigned long rem; - char *p; - - if (!page) - break; - chunk = min_t(size_t, bytes, PAGE_SIZE - off); - p = kmap_local_page(page); - rem = copy_mc_to_kernel(p + off, addr + xfer, chunk); - chunk -= rem; - kunmap_local(p); - xfer += chunk; - bytes -= chunk; - if (rem) { - iov_iter_revert(i, rem); - break; - } - } - return xfer; -} - /** * _copy_mc_to_iter - copy to iter with source memory error exception handling * @addr: source kernel address @@ -594,9 +338,8 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, * alignment and poison alignment assumptions to avoid re-triggering * hardware exceptions. * - * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. - * Compare to copy_to_iter() where only ITER_IOVEC attempts might return - * a short copy. + * * ITER_KVEC and ITER_BVEC can return short copies. Compare to + * copy_to_iter() where only ITER_IOVEC attempts might return a short copy. * * Return: number of bytes copied (may be %0) */ @@ -604,8 +347,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (WARN_ON_ONCE(i->data_source)) return 0; - if (unlikely(iov_iter_is_pipe(i))) - return copy_mc_pipe_to_iter(addr, bytes, i); if (user_backed_iter(i)) might_fault(); __iterate_and_advance(i, bytes, base, len, off, @@ -711,8 +452,6 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, return 0; if (WARN_ON_ONCE(i->data_source)) return 0; - if (unlikely(iov_iter_is_pipe(i))) - return copy_page_to_iter_pipe(page, offset, bytes, i); page += offset / PAGE_SIZE; // first subpage offset %= PAGE_SIZE; while (1) { @@ -761,36 +500,8 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, } EXPORT_SYMBOL(copy_page_from_iter); -static size_t pipe_zero(size_t bytes, struct iov_iter *i) -{ - unsigned int chunk, off; - - if (unlikely(bytes > i->count)) - bytes = i->count; - if (unlikely(!bytes)) - return 0; - - if (!sanity(i)) - return 0; - - for (size_t n = bytes; n; n -= chunk) { - struct page *page = append_pipe(i, n, &off); - char *p; - - if (!page) - return bytes - n; - chunk = min_t(size_t, n, PAGE_SIZE - off); - p = kmap_local_page(page); - memset(p + off, 0, chunk); - kunmap_local(p); - } - return bytes; -} - size_t iov_iter_zero(size_t bytes, struct iov_iter *i) { - if (unlikely(iov_iter_is_pipe(i))) - return pipe_zero(bytes, i); iterate_and_advance(i, bytes, base, len, count, clear_user(base, len), memset(base, 0, len) @@ -821,32 +532,6 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt } EXPORT_SYMBOL(copy_page_from_iter_atomic); -static void pipe_advance(struct iov_iter *i, size_t size) -{ - struct pipe_inode_info *pipe = i->pipe; - int off = i->last_offset; - - if (!off && !size) { - pipe_discard_from(pipe, i->start_head); // discard everything - return; - } - i->count -= size; - while (1) { - struct pipe_buffer *buf = pipe_buf(pipe, i->head); - if (off) /* make it relative to the beginning of buffer */ - size += abs(off) - buf->offset; - if (size <= buf->len) { - buf->len = size; - i->last_offset = last_offset(buf); - break; - } - size -= buf->len; - i->head++; - off = 0; - } - pipe_discard_from(pipe, i->head + 1); // discard everything past this one -} - static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) { const struct bio_vec *bvec, *end; @@ -898,8 +583,6 @@ void iov_iter_advance(struct iov_iter *i, size_t size) iov_iter_iovec_advance(i, size); } else if (iov_iter_is_bvec(i)) { iov_iter_bvec_advance(i, size); - } else if (iov_iter_is_pipe(i)) { - pipe_advance(i, size); } else if (iov_iter_is_discard(i)) { i->count -= size; } @@ -913,26 +596,6 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) if (WARN_ON(unroll > MAX_RW_COUNT)) return; i->count += unroll; - if (unlikely(iov_iter_is_pipe(i))) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int head = pipe->head; - - while (head > i->start_head) { - struct pipe_buffer *b = pipe_buf(pipe, --head); - if (unroll < b->len) { - b->len -= unroll; - i->last_offset = last_offset(b); - i->head = head; - return; - } - unroll -= b->len; - pipe_buf_release(pipe, b); - pipe->head--; - } - i->last_offset = 0; - i->head = head; - return; - } if (unlikely(iov_iter_is_discard(i))) return; if (unroll <= i->iov_offset) { @@ -1020,24 +683,6 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_bvec); -void iov_iter_pipe(struct iov_iter *i, unsigned int direction, - struct pipe_inode_info *pipe, - size_t count) -{ - BUG_ON(direction != READ); - WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); - *i = (struct iov_iter){ - .iter_type = ITER_PIPE, - .data_source = false, - .pipe = pipe, - .head = pipe->head, - .start_head = pipe->head, - .last_offset = 0, - .count = count - }; -} -EXPORT_SYMBOL(iov_iter_pipe); - /** * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray * @i: The iterator to initialise. @@ -1163,19 +808,6 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, if (iov_iter_is_bvec(i)) return iov_iter_aligned_bvec(i, addr_mask, len_mask); - if (iov_iter_is_pipe(i)) { - size_t size = i->count; - - if (size & len_mask) - return false; - if (size && i->last_offset > 0) { - if (i->last_offset & addr_mask) - return false; - } - - return true; - } - if (iov_iter_is_xarray(i)) { if (i->count & len_mask) return false; @@ -1246,14 +878,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) if (iov_iter_is_bvec(i)) return iov_iter_alignment_bvec(i); - if (iov_iter_is_pipe(i)) { - size_t size = i->count; - - if (size && i->last_offset > 0) - return size | i->last_offset; - return size; - } - if (iov_iter_is_xarray(i)) return (i->xarray_start + i->iov_offset) | i->count; @@ -1306,36 +930,6 @@ static int want_pages_array(struct page ***res, size_t size, return count; } -static ssize_t pipe_get_pages(struct iov_iter *i, - struct page ***pages, size_t maxsize, unsigned maxpages, - size_t *start) -{ - unsigned int npages, count, off, chunk; - struct page **p; - size_t left; - - if (!sanity(i)) - return -EFAULT; - - *start = off = pipe_npages(i, &npages); - if (!npages) - return -EFAULT; - count = want_pages_array(pages, maxsize, off, min(npages, maxpages)); - if (!count) - return -ENOMEM; - p = *pages; - for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) { - struct page *page = append_pipe(i, left, &off); - if (!page) - break; - chunk = min_t(size_t, left, PAGE_SIZE - off); - get_page(*p++ = page); - } - if (!npages) - return -EFAULT; - return maxsize - left; -} - static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, pgoff_t index, unsigned int nr_pages) { @@ -1486,8 +1080,6 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, } return maxsize; } - if (iov_iter_is_pipe(i)) - return pipe_get_pages(i, pages, maxsize, maxpages, start); if (iov_iter_is_xarray(i)) return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); return -EFAULT; @@ -1577,9 +1169,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, } sum = csum_shift(csstate->csum, csstate->off); - if (unlikely(iov_iter_is_pipe(i))) - bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum); - else iterate_and_advance(i, bytes, base, len, off, ({ + iterate_and_advance(i, bytes, base, len, off, ({ next = csum_and_copy_to_user(addr + off, base, len); sum = csum_block_add(sum, next, off); next ? 0 : len; @@ -1664,15 +1254,6 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) return iov_npages(i, maxpages); if (iov_iter_is_bvec(i)) return bvec_npages(i, maxpages); - if (iov_iter_is_pipe(i)) { - int npages; - - if (!sanity(i)) - return 0; - - pipe_npages(i, &npages); - return min(npages, maxpages); - } if (iov_iter_is_xarray(i)) { unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE; int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); @@ -1685,10 +1266,6 @@ EXPORT_SYMBOL(iov_iter_npages); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) { *new = *old; - if (unlikely(iov_iter_is_pipe(new))) { - WARN_ON(1); - return NULL; - } if (iov_iter_is_bvec(new)) return new->bvec = kmemdup(new->bvec, new->nr_segs * sizeof(struct bio_vec), diff --git a/mm/filemap.c b/mm/filemap.c index 2723104cc06a12..470be06b609677 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2690,8 +2690,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(iocb->ki_pos >= i_size_read(inode))) break; - error = filemap_get_pages(iocb, iter->count, &fbatch, - iov_iter_is_pipe(iter)); + error = filemap_get_pages(iocb, iter->count, &fbatch, false); if (error < 0) break; @@ -2967,7 +2966,6 @@ out: return total_spliced ? total_spliced : error; } -EXPORT_SYMBOL(filemap_splice_read); static inline loff_t folio_seek_hole_data(struct xa_state *xas, struct address_space *mapping, struct folio *folio, diff --git a/mm/shmem.c b/mm/shmem.c index 448f393d8ab2b1..a0c268dcf7b855 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2719,6 +2719,138 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return retval ? retval : error; } +static bool zero_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return true; +} + +static void zero_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ +} + +static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return false; +} + +static const struct pipe_buf_operations zero_pipe_buf_ops = { + .release = zero_pipe_buf_release, + .try_steal = zero_pipe_buf_try_steal, + .get = zero_pipe_buf_get, +}; + +static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe, + loff_t fpos, size_t size) +{ + size_t offset = fpos & ~PAGE_MASK; + + size = min_t(size_t, size, PAGE_SIZE - offset); + + if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { + struct pipe_buffer *buf = pipe_head_buf(pipe); + + *buf = (struct pipe_buffer) { + .ops = &zero_pipe_buf_ops, + .page = ZERO_PAGE(0), + .offset = offset, + .len = size, + }; + pipe->head++; + } + + return size; +} + +static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + struct inode *inode = file_inode(in); + struct address_space *mapping = inode->i_mapping; + struct folio *folio = NULL; + size_t total_spliced = 0, used, npages, n, part; + loff_t isize; + int error = 0; + + /* Work out how much data we can actually add into the pipe */ + used = pipe_occupancy(pipe->head, pipe->tail); + npages = max_t(ssize_t, pipe->max_usage - used, 0); + len = min_t(size_t, len, npages * PAGE_SIZE); + + do { + if (*ppos >= i_size_read(inode)) + break; + + error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ); + if (error) { + if (error == -EINVAL) + error = 0; + break; + } + if (folio) { + folio_unlock(folio); + + if (folio_test_hwpoison(folio)) { + error = -EIO; + break; + } + } + + /* + * i_size must be checked after we know the pages are Uptodate. + * + * Checking i_size after the check allows us to calculate + * the correct value for "nr", which means the zero-filled + * part of the page is not copied back to userspace (unless + * another truncate extends the file - this is desired though). + */ + isize = i_size_read(inode); + if (unlikely(*ppos >= isize)) + break; + part = min_t(loff_t, isize - *ppos, len); + + if (folio) { + /* + * If users can be writing to this page using arbitrary + * virtual addresses, take care about potential aliasing + * before reading the page on the kernel side. + */ + if (mapping_writably_mapped(mapping)) + flush_dcache_folio(folio); + folio_mark_accessed(folio); + /* + * Ok, we have the page, and it's up-to-date, so we can + * now splice it into the pipe. + */ + n = splice_folio_into_pipe(pipe, folio, *ppos, part); + folio_put(folio); + folio = NULL; + } else { + n = splice_zeropage_into_pipe(pipe, *ppos, len); + } + + if (!n) + break; + len -= n; + total_spliced += n; + *ppos += n; + in->f_ra.prev_pos = *ppos; + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + break; + + cond_resched(); + } while (len); + + if (folio) + folio_put(folio); + + file_accessed(in); + return total_spliced ? total_spliced : error; +} + static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) { struct address_space *mapping = file->f_mapping; @@ -3938,7 +4070,7 @@ static const struct file_operations shmem_file_operations = { .read_iter = shmem_file_read_iter, .write_iter = generic_file_write_iter, .fsync = noop_fsync, - .splice_read = generic_file_splice_read, + .splice_read = shmem_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = shmem_fallocate, #endif |