aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2023-04-03 07:18:20 -0600
committerJens Axboe <axboe@kernel.dk>2023-04-03 07:18:20 -0600
commitbcd83e4071cffe907d294d61d0f0d50f6eee2da0 (patch)
tree8fc6673e363515d19b926c0787ccc76f30b5115e
parent22c29a462e14228799ade6b916a1ee3cda623aa8 (diff)
parentd187b44bc9404581bad8d006d80937d1b3a2b0c0 (diff)
downloadmisc-bcd83e4071cffe907d294d61d0f0d50f6eee2da0.tar.gz
Merge branch 'for-6.4/splice' into for-next
* for-6.4/splice: block: convert bio_map_user_iov to use iov_iter_extract_pages block: Convert bio_iov_iter_get_pages to use iov_iter_extract_pages block: Add BIO_PAGE_PINNED and associated infrastructure block: Replace BIO_NO_PAGE_REF with BIO_PAGE_REFFED with inverted logic block: Fix bio_flagged() so that gcc can better optimise it iomap: Don't get an reference on ZERO_PAGE for direct I/O block zeroing iov_iter: Kill ITER_PIPE cifs: Use generic_file_splice_read() splice: Do splice read from a file without using ITER_PIPE tty, proc, kernfs, random: Use direct_splice_read() coda: Implement splice-read overlayfs: Implement splice-read shmem: Implement splice-read splice: Make do_splice_to() generic and export it splice: Clean up direct_splice_read() a bit Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/bio.c29
-rw-r--r--block/blk-map.c22
-rw-r--r--block/blk.h12
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/tty/tty_io.c4
-rw-r--r--fs/cifs/cifsfs.c8
-rw-r--r--fs/cifs/cifsfs.h3
-rw-r--r--fs/cifs/file.c16
-rw-r--r--fs/coda/file.c29
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/iomap/direct-io.c1
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/overlayfs/file.c23
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc_namespace.c6
-rw-r--r--fs/splice.c76
-rw-r--r--include/linux/bio.h5
-rw-r--r--include/linux/blk_types.h3
-rw-r--r--include/linux/splice.h3
-rw-r--r--include/linux/uio.h14
-rw-r--r--lib/iov_iter.c429
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/shmem.c134
24 files changed, 284 insertions, 551 deletions
diff --git a/block/bio.c b/block/bio.c
index fd11614bba4dc5..fc98c1c723cac4 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1168,7 +1168,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
bio_for_each_segment_all(bvec, bio, iter_all) {
if (mark_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
- put_page(bvec->bv_page);
+ bio_release_page(bio, bvec->bv_page);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1190,7 +1190,6 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
bio->bi_io_vec = (struct bio_vec *)iter->bvec;
bio->bi_iter.bi_bvec_done = iter->iov_offset;
bio->bi_iter.bi_size = size;
- bio_set_flag(bio, BIO_NO_PAGE_REF);
bio_set_flag(bio, BIO_CLONED);
}
@@ -1205,7 +1204,7 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
}
if (same_page)
- put_page(page);
+ bio_release_page(bio, page);
return 0;
}
@@ -1219,7 +1218,7 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
queue_max_zone_append_sectors(q), &same_page) != len)
return -EINVAL;
if (same_page)
- put_page(page);
+ bio_release_page(bio, page);
return 0;
}
@@ -1230,10 +1229,10 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
* @bio: bio to add pages to
* @iter: iov iterator describing the region to be mapped
*
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
- * For multi-segment *iter, this function only adds pages from the
- * next non-empty segment of the iov iterator.
+ * Extracts pages from *iter and appends them to @bio's bvec array. The pages
+ * will have to be cleaned up in the way indicated by the BIO_PAGE_PINNED flag.
+ * For a multi-segment *iter, this function only adds pages from the next
+ * non-empty segment of the iov iterator.
*/
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
@@ -1265,9 +1264,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
* result to ensure the bio's total size is correct. The remainder of
* the iov data will be picked up in the next bio iteration.
*/
- size = iov_iter_get_pages(iter, pages,
- UINT_MAX - bio->bi_iter.bi_size,
- nr_pages, &offset, extraction_flags);
+ size = iov_iter_extract_pages(iter, &pages,
+ UINT_MAX - bio->bi_iter.bi_size,
+ nr_pages, extraction_flags, &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;
@@ -1300,7 +1299,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
iov_iter_revert(iter, left);
out:
while (i < nr_pages)
- put_page(pages[i++]);
+ bio_release_page(bio, pages[i++]);
return ret;
}
@@ -1335,6 +1334,8 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
return 0;
}
+ if (iov_iter_extract_will_pin(iter))
+ bio_set_flag(bio, BIO_PAGE_PINNED);
do {
ret = __bio_iov_iter_get_pages(bio, iter);
} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
@@ -1488,8 +1489,8 @@ void bio_set_pages_dirty(struct bio *bio)
* the BIO and re-dirty the pages in process context.
*
* It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on. It will run one put_page() against each page and will run one
- * bio_put() against the BIO.
+ * here on. It will unpin each page and will run one bio_put() against the
+ * BIO.
*/
static void bio_dirty_fn(struct work_struct *work);
diff --git a/block/blk-map.c b/block/blk-map.c
index 04c55f1c492eb1..3551c3ff17cf63 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -281,21 +281,21 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
if (blk_queue_pci_p2pdma(rq->q))
extraction_flags |= ITER_ALLOW_P2PDMA;
+ if (iov_iter_extract_will_pin(iter))
+ bio_set_flag(bio, BIO_PAGE_PINNED);
while (iov_iter_count(iter)) {
- struct page **pages, *stack_pages[UIO_FASTIOV];
+ struct page *stack_pages[UIO_FASTIOV];
+ struct page **pages = stack_pages;
ssize_t bytes;
size_t offs;
int npages;
- if (nr_vecs <= ARRAY_SIZE(stack_pages)) {
- pages = stack_pages;
- bytes = iov_iter_get_pages(iter, pages, LONG_MAX,
- nr_vecs, &offs, extraction_flags);
- } else {
- bytes = iov_iter_get_pages_alloc(iter, &pages,
- LONG_MAX, &offs, extraction_flags);
- }
+ if (nr_vecs > ARRAY_SIZE(stack_pages))
+ pages = NULL;
+
+ bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX,
+ nr_vecs, extraction_flags, &offs);
if (unlikely(bytes <= 0)) {
ret = bytes ? bytes : -EFAULT;
goto out_unmap;
@@ -317,7 +317,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
max_sectors, &same_page)) {
if (same_page)
- put_page(page);
+ bio_release_page(bio, page);
break;
}
@@ -329,7 +329,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
* release the pages we didn't map into the bio, if any
*/
while (j < npages)
- put_page(pages[j++]);
+ bio_release_page(bio, pages[j++]);
if (pages != stack_pages)
kvfree(pages);
/* couldn't stuff something into bio? */
diff --git a/block/blk.h b/block/blk.h
index cc4e8873dfdea1..d65d96994a949f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -432,6 +432,18 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page);
+/*
+ * Clean up a page appropriately, where the page may be pinned, may have a
+ * ref taken on it or neither.
+ */
+static inline void bio_release_page(struct bio *bio, struct page *page)
+{
+ if (bio_flagged(bio, BIO_PAGE_PINNED))
+ unpin_user_page(page);
+ else if (bio_flagged(bio, BIO_PAGE_REFFED))
+ put_page(page);
+}
+
struct request_queue *blk_alloc_queue(int node_id);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 253f2ddb891308..2b47b500126cd3 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1546,7 +1546,7 @@ const struct file_operations random_fops = {
.compat_ioctl = compat_ptr_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.splice_write = iter_file_splice_write,
};
@@ -1557,7 +1557,7 @@ const struct file_operations urandom_fops = {
.compat_ioctl = compat_ptr_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.splice_write = iter_file_splice_write,
};
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 36fb945fdad489..9d117e579dfbf9 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -466,7 +466,7 @@ static const struct file_operations tty_fops = {
.llseek = no_llseek,
.read_iter = tty_read,
.write_iter = tty_write,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.splice_write = iter_file_splice_write,
.poll = tty_poll,
.unlocked_ioctl = tty_ioctl,
@@ -481,7 +481,7 @@ static const struct file_operations console_fops = {
.llseek = no_llseek,
.read_iter = tty_read,
.write_iter = redirected_tty_write,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.splice_write = iter_file_splice_write,
.poll = tty_poll,
.unlocked_ioctl = tty_ioctl,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ac9034fce409d2..5fd67c13d44b39 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1362,7 +1362,7 @@ const struct file_operations cifs_file_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
- .splice_read = cifs_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
@@ -1382,7 +1382,7 @@ const struct file_operations cifs_file_strict_ops = {
.fsync = cifs_strict_fsync,
.flush = cifs_flush,
.mmap = cifs_file_strict_mmap,
- .splice_read = cifs_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
@@ -1420,7 +1420,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
- .splice_read = cifs_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
@@ -1438,7 +1438,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.fsync = cifs_strict_fsync,
.flush = cifs_flush,
.mmap = cifs_file_strict_mmap,
- .splice_read = cifs_splice_read,
+ .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 415176b2cf321f..9321c161a27e21 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,9 +100,6 @@ extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
-extern ssize_t cifs_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags);
extern int cifs_flock(struct file *pfile, int cmd, struct file_lock *plock);
extern int cifs_lock(struct file *, int, struct file_lock *);
extern int cifs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6831a9949c430a..040bb30fb21024 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -5066,19 +5066,3 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.launder_folio = cifs_launder_folio,
.migrate_folio = filemap_migrate_folio,
};
-
-/*
- * Splice data from a file into a pipe.
- */
-ssize_t cifs_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
-{
- if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
- return 0;
- if (unlikely(!len))
- return 0;
- if (in->f_flags & O_DIRECT)
- return direct_splice_read(in, ppos, pipe, len, flags);
- return filemap_splice_read(in, ppos, pipe, len, flags);
-}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 3f3c81e6b1ab0c..12b26bd13564d9 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
+#include <linux/splice.h>
#include <linux/coda.h>
#include "coda_psdev.h"
@@ -94,6 +95,32 @@ finish_write:
return ret;
}
+static ssize_t
+coda_file_splice_read(struct file *coda_file, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct inode *coda_inode = file_inode(coda_file);
+ struct coda_file_info *cfi = coda_ftoc(coda_file);
+ struct file *in = cfi->cfi_container;
+ loff_t ki_pos = *ppos;
+ ssize_t ret;
+
+ ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode),
+ &cfi->cfi_access_intent,
+ len, ki_pos, CODA_ACCESS_TYPE_READ);
+ if (ret)
+ goto finish_read;
+
+ ret = vfs_splice_read(in, ppos, pipe, len, flags);
+
+finish_read:
+ venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode),
+ &cfi->cfi_access_intent,
+ len, ki_pos, CODA_ACCESS_TYPE_READ_FINISH);
+ return ret;
+}
+
static void
coda_vm_open(struct vm_area_struct *vma)
{
@@ -302,5 +329,5 @@ const struct file_operations coda_file_operations = {
.open = coda_open,
.release = coda_release,
.fsync = coda_fsync,
- .splice_read = generic_file_splice_read,
+ .splice_read = coda_file_splice_read,
};
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ab0d7ea89813a6..47b90c68b36963 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -403,6 +403,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
bio->bi_end_io = dio_bio_end_aio;
else
bio->bi_end_io = dio_bio_end_io;
+ /* for now require references for all pages */
+ bio_set_flag(bio, BIO_PAGE_REFFED);
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index f771001574d008..ceeb0a183cea5e 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -202,7 +202,6 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
- get_page(page);
__bio_add_page(bio, page, len, 0);
iomap_dio_submit_bio(iter, dio, bio, pos);
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e4a50e4ff0d234..9d23b8141db718 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -1011,7 +1011,7 @@ const struct file_operations kernfs_file_fops = {
.release = kernfs_fop_release,
.poll = kernfs_fop_poll,
.fsync = noop_fsync,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.splice_write = iter_file_splice_write,
};
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 7c04f033aadd75..86197882ff3512 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -419,6 +419,27 @@ out_unlock:
return ret;
}
+static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ const struct cred *old_cred;
+ struct fd real;
+ ssize_t ret;
+
+ ret = ovl_real_fdget(in, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(in)->i_sb);
+ ret = vfs_splice_read(real.file, ppos, pipe, len, flags);
+ revert_creds(old_cred);
+ ovl_file_accessed(in);
+
+ fdput(real);
+ return ret;
+}
+
/*
* Calling iter_file_splice_write() directly from overlay's f_op may deadlock
* due to lock order inversion between pipe->mutex in iter_file_splice_write()
@@ -695,7 +716,7 @@ const struct file_operations ovl_file_operations = {
.fallocate = ovl_fallocate,
.fadvise = ovl_fadvise,
.flush = ovl_flush,
- .splice_read = generic_file_splice_read,
+ .splice_read = ovl_splice_read,
.splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index f495fdb391517a..711f1270646998 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -591,7 +591,7 @@ static const struct file_operations proc_iter_file_ops = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
@@ -617,7 +617,7 @@ static const struct file_operations proc_reg_file_ops_compat = {
static const struct file_operations proc_iter_file_ops_compat = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5851eb5bc7267e..e49f99657d1c8c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -869,7 +869,7 @@ static const struct file_operations proc_sys_file_operations = {
.poll = proc_sys_poll,
.read_iter = proc_sys_read,
.write_iter = proc_sys_write,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.splice_write = iter_file_splice_write,
.llseek = default_llseek,
};
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 846f9455ae226b..492abbbeff5e43 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -324,7 +324,7 @@ static int mountstats_open(struct inode *inode, struct file *file)
const struct file_operations proc_mounts_operations = {
.open = mounts_open,
.read_iter = seq_read_iter,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.llseek = seq_lseek,
.release = mounts_release,
.poll = mounts_poll,
@@ -333,7 +333,7 @@ const struct file_operations proc_mounts_operations = {
const struct file_operations proc_mountinfo_operations = {
.open = mountinfo_open,
.read_iter = seq_read_iter,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.llseek = seq_lseek,
.release = mounts_release,
.poll = mounts_poll,
@@ -342,7 +342,7 @@ const struct file_operations proc_mountinfo_operations = {
const struct file_operations proc_mountstats_operations = {
.open = mountstats_open,
.read_iter = seq_read_iter,
- .splice_read = generic_file_splice_read,
+ .splice_read = direct_splice_read,
.llseek = seq_lseek,
.release = mounts_release,
};
diff --git a/fs/splice.c b/fs/splice.c
index 2c3dec2b6dfaf1..b7fd9769451269 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -295,7 +295,7 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
struct kiocb kiocb;
struct page **pages;
ssize_t ret;
- size_t used, npages, chunk, remain, reclaim;
+ size_t used, npages, chunk, remain, keep = 0;
int i;
/* Work out how much data we can actually add into the pipe */
@@ -309,7 +309,7 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
if (!bv)
return -ENOMEM;
- pages = (void *)(bv + npages);
+ pages = (struct page **)(bv + npages);
npages = alloc_pages_bulk_array(GFP_USER, npages, pages);
if (!npages) {
kfree(bv);
@@ -332,11 +332,8 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
kiocb.ki_pos = *ppos;
ret = call_read_iter(in, &kiocb, &to);
- reclaim = npages * PAGE_SIZE;
- remain = 0;
if (ret > 0) {
- reclaim -= ret;
- remain = ret;
+ keep = DIV_ROUND_UP(ret, PAGE_SIZE);
*ppos = kiocb.ki_pos;
file_accessed(in);
} else if (ret < 0) {
@@ -349,14 +346,12 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
}
/* Free any pages that didn't get touched at all. */
- reclaim /= PAGE_SIZE;
- if (reclaim) {
- npages -= reclaim;
- release_pages(pages + npages, reclaim);
- }
+ if (keep < npages)
+ release_pages(pages + keep, npages - keep);
/* Push the remaining pages into the pipe. */
- for (i = 0; i < npages; i++) {
+ remain = ret;
+ for (i = 0; i < keep; i++) {
struct pipe_buffer *buf = pipe_head_buf(pipe);
chunk = min_t(size_t, remain, PAGE_SIZE);
@@ -392,29 +387,13 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
- struct iov_iter to;
- struct kiocb kiocb;
- int ret;
-
- iov_iter_pipe(&to, ITER_DEST, pipe, len);
- init_sync_kiocb(&kiocb, in);
- kiocb.ki_pos = *ppos;
- ret = call_read_iter(in, &kiocb, &to);
- if (ret > 0) {
- *ppos = kiocb.ki_pos;
- file_accessed(in);
- } else if (ret < 0) {
- /* free what was emitted */
- pipe_discard_from(pipe, to.start_head);
- /*
- * callers of ->splice_read() expect -EAGAIN on
- * "can't put anything in there", rather than -EFAULT.
- */
- if (ret == -EFAULT)
- ret = -EAGAIN;
- }
-
- return ret;
+ if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes))
+ return 0;
+ if (unlikely(!len))
+ return 0;
+ if (in->f_flags & O_DIRECT)
+ return direct_splice_read(in, ppos, pipe, len, flags);
+ return filemap_splice_read(in, ppos, pipe, len, flags);
}
EXPORT_SYMBOL(generic_file_splice_read);
@@ -856,12 +835,24 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
return out->f_op->splice_write(pipe, out, ppos, len, flags);
}
-/*
- * Attempt to initiate a splice from a file to a pipe.
+/**
+ * vfs_splice_read - Read data from a file and splice it into a pipe
+ * @in: File to splice from
+ * @ppos: Input file offset
+ * @pipe: Pipe to splice to
+ * @len: Number of bytes to splice
+ * @flags: Splice modifier flags (SPLICE_F_*)
+ *
+ * Splice the requested amount of data from the input file to the pipe. This
+ * is synchronous as the caller must hold the pipe lock across the entire
+ * operation.
+ *
+ * If successful, it returns the amount of data spliced, 0 if it hit the EOF or
+ * a hole and a negative error code otherwise.
*/
-static long do_splice_to(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
+long vfs_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
{
unsigned int p_space;
int ret;
@@ -884,6 +875,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
return warn_unsupported(in, "read");
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
+EXPORT_SYMBOL_GPL(vfs_splice_read);
/**
* splice_direct_to_actor - splices data directly between two non-pipes
@@ -953,7 +945,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
size_t read_len;
loff_t pos = sd->pos, prev_pos = pos;
- ret = do_splice_to(in, &pos, pipe, len, flags);
+ ret = vfs_splice_read(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0))
goto out_release;
@@ -1101,7 +1093,7 @@ long splice_file_to_pipe(struct file *in,
pipe_lock(opipe);
ret = wait_for_space(opipe, flags);
if (!ret)
- ret = do_splice_to(in, offset, opipe, len, flags);
+ ret = vfs_splice_read(in, offset, opipe, len, flags);
pipe_unlock(opipe);
if (ret > 0)
wakeup_pipe_readers(opipe);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d766be7152e151..d8c30c791a9af8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -229,7 +229,7 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count)
static inline bool bio_flagged(struct bio *bio, unsigned int bit)
{
- return (bio->bi_flags & (1U << bit)) != 0;
+ return bio->bi_flags & (1U << bit);
}
static inline void bio_set_flag(struct bio *bio, unsigned int bit)
@@ -488,7 +488,8 @@ void zero_fill_bio(struct bio *bio);
static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
{
- if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ if (bio_flagged(bio, BIO_PAGE_REFFED) ||
+ bio_flagged(bio, BIO_PAGE_PINNED))
__bio_release_pages(bio, mark_dirty);
}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 99be590f952f6e..a0e339ff3d09a4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -318,7 +318,8 @@ struct bio {
* bio flags
*/
enum {
- BIO_NO_PAGE_REF, /* don't put release vec pages */
+ BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */
+ BIO_PAGE_REFFED, /* put pages in bio_release_pages() */
BIO_CLONED, /* doesn't own data */
BIO_BOUNCED, /* bio is a bounce bio */
BIO_QUIET, /* Make BIO Quiet */
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc39..8f052c3dae953a 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -76,6 +76,9 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
struct splice_pipe_desc *);
extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *);
+long vfs_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
extern long do_splice(struct file *in, loff_t *off_in,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index ed35f4427a0ad4..ceab22e2c055db 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -11,7 +11,6 @@
#include <uapi/linux/uio.h>
struct page;
-struct pipe_inode_info;
typedef unsigned int __bitwise iov_iter_extraction_t;
@@ -25,7 +24,6 @@ enum iter_type {
ITER_IOVEC,
ITER_KVEC,
ITER_BVEC,
- ITER_PIPE,
ITER_XARRAY,
ITER_DISCARD,
ITER_UBUF,
@@ -73,7 +71,6 @@ struct iov_iter {
const struct kvec *kvec;
const struct bio_vec *bvec;
struct xarray *xarray;
- struct pipe_inode_info *pipe;
void __user *ubuf;
};
size_t count;
@@ -81,10 +78,6 @@ struct iov_iter {
};
union {
unsigned long nr_segs;
- struct {
- unsigned int head;
- unsigned int start_head;
- };
loff_t xarray_start;
};
};
@@ -132,11 +125,6 @@ static inline bool iov_iter_is_bvec(const struct iov_iter *i)
return iov_iter_type(i) == ITER_BVEC;
}
-static inline bool iov_iter_is_pipe(const struct iov_iter *i)
-{
- return iov_iter_type(i) == ITER_PIPE;
-}
-
static inline bool iov_iter_is_discard(const struct iov_iter *i)
{
return iov_iter_type(i) == ITER_DISCARD;
@@ -269,8 +257,6 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec
unsigned long nr_segs, size_t count);
void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
unsigned long nr_segs, size_t count);
-void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
- size_t count);
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
loff_t start, size_t count);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 3e6c9bcfa612c2..789b1c7d1254c1 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -14,8 +14,6 @@
#include <linux/scatterlist.h>
#include <linux/instrumented.h>
-#define PIPE_PARANOIA /* for now */
-
/* covers ubuf and kbuf alike */
#define iterate_buf(i, n, base, len, off, __p, STEP) { \
size_t __maybe_unused off = 0; \
@@ -186,150 +184,6 @@ static int copyin(void *to, const void __user *from, size_t n)
return res;
}
-#ifdef PIPE_PARANOIA
-static bool sanity(const struct iov_iter *i)
-{
- struct pipe_inode_info *pipe = i->pipe;
- unsigned int p_head = pipe->head;
- unsigned int p_tail = pipe->tail;
- unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
- unsigned int i_head = i->head;
- unsigned int idx;
-
- if (i->last_offset) {
- struct pipe_buffer *p;
- if (unlikely(p_occupancy == 0))
- goto Bad; // pipe must be non-empty
- if (unlikely(i_head != p_head - 1))
- goto Bad; // must be at the last buffer...
-
- p = pipe_buf(pipe, i_head);
- if (unlikely(p->offset + p->len != abs(i->last_offset)))
- goto Bad; // ... at the end of segment
- } else {
- if (i_head != p_head)
- goto Bad; // must be right after the last buffer
- }
- return true;
-Bad:
- printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset);
- printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
- p_head, p_tail, pipe->ring_size);
- for (idx = 0; idx < pipe->ring_size; idx++)
- printk(KERN_ERR "[%p %p %d %d]\n",
- pipe->bufs[idx].ops,
- pipe->bufs[idx].page,
- pipe->bufs[idx].offset,
- pipe->bufs[idx].len);
- WARN_ON(1);
- return false;
-}
-#else
-#define sanity(i) true
-#endif
-
-static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size)
-{
- struct page *page = alloc_page(GFP_USER);
- if (page) {
- struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++);
- *buf = (struct pipe_buffer) {
- .ops = &default_pipe_buf_ops,
- .page = page,
- .offset = 0,
- .len = size
- };
- }
- return page;
-}
-
-static void push_page(struct pipe_inode_info *pipe, struct page *page,
- unsigned int offset, unsigned int size)
-{
- struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++);
- *buf = (struct pipe_buffer) {
- .ops = &page_cache_pipe_buf_ops,
- .page = page,
- .offset = offset,
- .len = size
- };
- get_page(page);
-}
-
-static inline int last_offset(const struct pipe_buffer *buf)
-{
- if (buf->ops == &default_pipe_buf_ops)
- return buf->len; // buf->offset is 0 for those
- else
- return -(buf->offset + buf->len);
-}
-
-static struct page *append_pipe(struct iov_iter *i, size_t size,
- unsigned int *off)
-{
- struct pipe_inode_info *pipe = i->pipe;
- int offset = i->last_offset;
- struct pipe_buffer *buf;
- struct page *page;
-
- if (offset > 0 && offset < PAGE_SIZE) {
- // some space in the last buffer; add to it
- buf = pipe_buf(pipe, pipe->head - 1);
- size = min_t(size_t, size, PAGE_SIZE - offset);
- buf->len += size;
- i->last_offset += size;
- i->count -= size;
- *off = offset;
- return buf->page;
- }
- // OK, we need a new buffer
- *off = 0;
- size = min_t(size_t, size, PAGE_SIZE);
- if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
- return NULL;
- page = push_anon(pipe, size);
- if (!page)
- return NULL;
- i->head = pipe->head - 1;
- i->last_offset = size;
- i->count -= size;
- return page;
-}
-
-static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
- struct iov_iter *i)
-{
- struct pipe_inode_info *pipe = i->pipe;
- unsigned int head = pipe->head;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
- if (!sanity(i))
- return 0;
-
- if (offset && i->last_offset == -offset) { // could we merge it?
- struct pipe_buffer *buf = pipe_buf(pipe, head - 1);
- if (buf->page == page) {
- buf->len += bytes;
- i->last_offset -= bytes;
- i->count -= bytes;
- return bytes;
- }
- }
- if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
- return 0;
-
- push_page(pipe, page, offset, bytes);
- i->last_offset = -(offset + bytes);
- i->head = head;
- i->count -= bytes;
- return bytes;
-}
-
/*
* fault_in_iov_iter_readable - fault in iov iterator for reading
* @i: iterator
@@ -433,46 +287,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
}
EXPORT_SYMBOL(iov_iter_init);
-// returns the offset in partial buffer (if any)
-static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages)
-{
- struct pipe_inode_info *pipe = i->pipe;
- int used = pipe->head - pipe->tail;
- int off = i->last_offset;
-
- *npages = max((int)pipe->max_usage - used, 0);
-
- if (off > 0 && off < PAGE_SIZE) { // anon and not full
- (*npages)++;
- return off;
- }
- return 0;
-}
-
-static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
- struct iov_iter *i)
-{
- unsigned int off, chunk;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
- if (unlikely(!bytes))
- return 0;
-
- if (!sanity(i))
- return 0;
-
- for (size_t n = bytes; n; n -= chunk) {
- struct page *page = append_pipe(i, n, &off);
- chunk = min_t(size_t, n, PAGE_SIZE - off);
- if (!page)
- return bytes - n;
- memcpy_to_page(page, off, addr, chunk);
- addr += chunk;
- }
- return bytes;
-}
-
static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
__wsum sum, size_t off)
{
@@ -480,44 +294,10 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
return csum_block_add(sum, next, off);
}
-static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
- struct iov_iter *i, __wsum *sump)
-{
- __wsum sum = *sump;
- size_t off = 0;
- unsigned int chunk, r;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
- if (unlikely(!bytes))
- return 0;
-
- if (!sanity(i))
- return 0;
-
- while (bytes) {
- struct page *page = append_pipe(i, bytes, &r);
- char *p;
-
- if (!page)
- break;
- chunk = min_t(size_t, bytes, PAGE_SIZE - r);
- p = kmap_local_page(page);
- sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off);
- kunmap_local(p);
- off += chunk;
- bytes -= chunk;
- }
- *sump = sum;
- return off;
-}
-
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
if (WARN_ON_ONCE(i->data_source))
return 0;
- if (unlikely(iov_iter_is_pipe(i)))
- return copy_pipe_to_iter(addr, bytes, i);
if (user_backed_iter(i))
might_fault();
iterate_and_advance(i, bytes, base, len, off,
@@ -539,42 +319,6 @@ static int copyout_mc(void __user *to, const void *from, size_t n)
return n;
}
-static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
- struct iov_iter *i)
-{
- size_t xfer = 0;
- unsigned int off, chunk;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
- if (unlikely(!bytes))
- return 0;
-
- if (!sanity(i))
- return 0;
-
- while (bytes) {
- struct page *page = append_pipe(i, bytes, &off);
- unsigned long rem;
- char *p;
-
- if (!page)
- break;
- chunk = min_t(size_t, bytes, PAGE_SIZE - off);
- p = kmap_local_page(page);
- rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
- chunk -= rem;
- kunmap_local(p);
- xfer += chunk;
- bytes -= chunk;
- if (rem) {
- iov_iter_revert(i, rem);
- break;
- }
- }
- return xfer;
-}
-
/**
* _copy_mc_to_iter - copy to iter with source memory error exception handling
* @addr: source kernel address
@@ -594,9 +338,8 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
* alignment and poison alignment assumptions to avoid re-triggering
* hardware exceptions.
*
- * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
- * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
- * a short copy.
+ * * ITER_KVEC and ITER_BVEC can return short copies. Compare to
+ * copy_to_iter() where only ITER_IOVEC attempts might return a short copy.
*
* Return: number of bytes copied (may be %0)
*/
@@ -604,8 +347,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
if (WARN_ON_ONCE(i->data_source))
return 0;
- if (unlikely(iov_iter_is_pipe(i)))
- return copy_mc_pipe_to_iter(addr, bytes, i);
if (user_backed_iter(i))
might_fault();
__iterate_and_advance(i, bytes, base, len, off,
@@ -711,8 +452,6 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
return 0;
if (WARN_ON_ONCE(i->data_source))
return 0;
- if (unlikely(iov_iter_is_pipe(i)))
- return copy_page_to_iter_pipe(page, offset, bytes, i);
page += offset / PAGE_SIZE; // first subpage
offset %= PAGE_SIZE;
while (1) {
@@ -761,36 +500,8 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
}
EXPORT_SYMBOL(copy_page_from_iter);
-static size_t pipe_zero(size_t bytes, struct iov_iter *i)
-{
- unsigned int chunk, off;
-
- if (unlikely(bytes > i->count))
- bytes = i->count;
- if (unlikely(!bytes))
- return 0;
-
- if (!sanity(i))
- return 0;
-
- for (size_t n = bytes; n; n -= chunk) {
- struct page *page = append_pipe(i, n, &off);
- char *p;
-
- if (!page)
- return bytes - n;
- chunk = min_t(size_t, n, PAGE_SIZE - off);
- p = kmap_local_page(page);
- memset(p + off, 0, chunk);
- kunmap_local(p);
- }
- return bytes;
-}
-
size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
{
- if (unlikely(iov_iter_is_pipe(i)))
- return pipe_zero(bytes, i);
iterate_and_advance(i, bytes, base, len, count,
clear_user(base, len),
memset(base, 0, len)
@@ -821,32 +532,6 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
}
EXPORT_SYMBOL(copy_page_from_iter_atomic);
-static void pipe_advance(struct iov_iter *i, size_t size)
-{
- struct pipe_inode_info *pipe = i->pipe;
- int off = i->last_offset;
-
- if (!off && !size) {
- pipe_discard_from(pipe, i->start_head); // discard everything
- return;
- }
- i->count -= size;
- while (1) {
- struct pipe_buffer *buf = pipe_buf(pipe, i->head);
- if (off) /* make it relative to the beginning of buffer */
- size += abs(off) - buf->offset;
- if (size <= buf->len) {
- buf->len = size;
- i->last_offset = last_offset(buf);
- break;
- }
- size -= buf->len;
- i->head++;
- off = 0;
- }
- pipe_discard_from(pipe, i->head + 1); // discard everything past this one
-}
-
static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
{
const struct bio_vec *bvec, *end;
@@ -898,8 +583,6 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
iov_iter_iovec_advance(i, size);
} else if (iov_iter_is_bvec(i)) {
iov_iter_bvec_advance(i, size);
- } else if (iov_iter_is_pipe(i)) {
- pipe_advance(i, size);
} else if (iov_iter_is_discard(i)) {
i->count -= size;
}
@@ -913,26 +596,6 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
if (WARN_ON(unroll > MAX_RW_COUNT))
return;
i->count += unroll;
- if (unlikely(iov_iter_is_pipe(i))) {
- struct pipe_inode_info *pipe = i->pipe;
- unsigned int head = pipe->head;
-
- while (head > i->start_head) {
- struct pipe_buffer *b = pipe_buf(pipe, --head);
- if (unroll < b->len) {
- b->len -= unroll;
- i->last_offset = last_offset(b);
- i->head = head;
- return;
- }
- unroll -= b->len;
- pipe_buf_release(pipe, b);
- pipe->head--;
- }
- i->last_offset = 0;
- i->head = head;
- return;
- }
if (unlikely(iov_iter_is_discard(i)))
return;
if (unroll <= i->iov_offset) {
@@ -1020,24 +683,6 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
}
EXPORT_SYMBOL(iov_iter_bvec);
-void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
- struct pipe_inode_info *pipe,
- size_t count)
-{
- BUG_ON(direction != READ);
- WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
- *i = (struct iov_iter){
- .iter_type = ITER_PIPE,
- .data_source = false,
- .pipe = pipe,
- .head = pipe->head,
- .start_head = pipe->head,
- .last_offset = 0,
- .count = count
- };
-}
-EXPORT_SYMBOL(iov_iter_pipe);
-
/**
* iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
* @i: The iterator to initialise.
@@ -1163,19 +808,6 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
if (iov_iter_is_bvec(i))
return iov_iter_aligned_bvec(i, addr_mask, len_mask);
- if (iov_iter_is_pipe(i)) {
- size_t size = i->count;
-
- if (size & len_mask)
- return false;
- if (size && i->last_offset > 0) {
- if (i->last_offset & addr_mask)
- return false;
- }
-
- return true;
- }
-
if (iov_iter_is_xarray(i)) {
if (i->count & len_mask)
return false;
@@ -1246,14 +878,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
if (iov_iter_is_bvec(i))
return iov_iter_alignment_bvec(i);
- if (iov_iter_is_pipe(i)) {
- size_t size = i->count;
-
- if (size && i->last_offset > 0)
- return size | i->last_offset;
- return size;
- }
-
if (iov_iter_is_xarray(i))
return (i->xarray_start + i->iov_offset) | i->count;
@@ -1306,36 +930,6 @@ static int want_pages_array(struct page ***res, size_t size,
return count;
}
-static ssize_t pipe_get_pages(struct iov_iter *i,
- struct page ***pages, size_t maxsize, unsigned maxpages,
- size_t *start)
-{
- unsigned int npages, count, off, chunk;
- struct page **p;
- size_t left;
-
- if (!sanity(i))
- return -EFAULT;
-
- *start = off = pipe_npages(i, &npages);
- if (!npages)
- return -EFAULT;
- count = want_pages_array(pages, maxsize, off, min(npages, maxpages));
- if (!count)
- return -ENOMEM;
- p = *pages;
- for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) {
- struct page *page = append_pipe(i, left, &off);
- if (!page)
- break;
- chunk = min_t(size_t, left, PAGE_SIZE - off);
- get_page(*p++ = page);
- }
- if (!npages)
- return -EFAULT;
- return maxsize - left;
-}
-
static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
pgoff_t index, unsigned int nr_pages)
{
@@ -1486,8 +1080,6 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
}
return maxsize;
}
- if (iov_iter_is_pipe(i))
- return pipe_get_pages(i, pages, maxsize, maxpages, start);
if (iov_iter_is_xarray(i))
return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
return -EFAULT;
@@ -1577,9 +1169,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
}
sum = csum_shift(csstate->csum, csstate->off);
- if (unlikely(iov_iter_is_pipe(i)))
- bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum);
- else iterate_and_advance(i, bytes, base, len, off, ({
+ iterate_and_advance(i, bytes, base, len, off, ({
next = csum_and_copy_to_user(addr + off, base, len);
sum = csum_block_add(sum, next, off);
next ? 0 : len;
@@ -1664,15 +1254,6 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
return iov_npages(i, maxpages);
if (iov_iter_is_bvec(i))
return bvec_npages(i, maxpages);
- if (iov_iter_is_pipe(i)) {
- int npages;
-
- if (!sanity(i))
- return 0;
-
- pipe_npages(i, &npages);
- return min(npages, maxpages);
- }
if (iov_iter_is_xarray(i)) {
unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
@@ -1685,10 +1266,6 @@ EXPORT_SYMBOL(iov_iter_npages);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
{
*new = *old;
- if (unlikely(iov_iter_is_pipe(new))) {
- WARN_ON(1);
- return NULL;
- }
if (iov_iter_is_bvec(new))
return new->bvec = kmemdup(new->bvec,
new->nr_segs * sizeof(struct bio_vec),
diff --git a/mm/filemap.c b/mm/filemap.c
index 2723104cc06a12..470be06b609677 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2690,8 +2690,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(iocb->ki_pos >= i_size_read(inode)))
break;
- error = filemap_get_pages(iocb, iter->count, &fbatch,
- iov_iter_is_pipe(iter));
+ error = filemap_get_pages(iocb, iter->count, &fbatch, false);
if (error < 0)
break;
@@ -2967,7 +2966,6 @@ out:
return total_spliced ? total_spliced : error;
}
-EXPORT_SYMBOL(filemap_splice_read);
static inline loff_t folio_seek_hole_data(struct xa_state *xas,
struct address_space *mapping, struct folio *folio,
diff --git a/mm/shmem.c b/mm/shmem.c
index 448f393d8ab2b1..a0c268dcf7b855 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2719,6 +2719,138 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval ? retval : error;
}
+static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return true;
+}
+
+static void zero_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+}
+
+static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return false;
+}
+
+static const struct pipe_buf_operations zero_pipe_buf_ops = {
+ .release = zero_pipe_buf_release,
+ .try_steal = zero_pipe_buf_try_steal,
+ .get = zero_pipe_buf_get,
+};
+
+static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
+ loff_t fpos, size_t size)
+{
+ size_t offset = fpos & ~PAGE_MASK;
+
+ size = min_t(size_t, size, PAGE_SIZE - offset);
+
+ if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
+ struct pipe_buffer *buf = pipe_head_buf(pipe);
+
+ *buf = (struct pipe_buffer) {
+ .ops = &zero_pipe_buf_ops,
+ .page = ZERO_PAGE(0),
+ .offset = offset,
+ .len = size,
+ };
+ pipe->head++;
+ }
+
+ return size;
+}
+
+static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ struct inode *inode = file_inode(in);
+ struct address_space *mapping = inode->i_mapping;
+ struct folio *folio = NULL;
+ size_t total_spliced = 0, used, npages, n, part;
+ loff_t isize;
+ int error = 0;
+
+ /* Work out how much data we can actually add into the pipe */
+ used = pipe_occupancy(pipe->head, pipe->tail);
+ npages = max_t(ssize_t, pipe->max_usage - used, 0);
+ len = min_t(size_t, len, npages * PAGE_SIZE);
+
+ do {
+ if (*ppos >= i_size_read(inode))
+ break;
+
+ error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
+ if (error) {
+ if (error == -EINVAL)
+ error = 0;
+ break;
+ }
+ if (folio) {
+ folio_unlock(folio);
+
+ if (folio_test_hwpoison(folio)) {
+ error = -EIO;
+ break;
+ }
+ }
+
+ /*
+ * i_size must be checked after we know the pages are Uptodate.
+ *
+ * Checking i_size after the check allows us to calculate
+ * the correct value for "nr", which means the zero-filled
+ * part of the page is not copied back to userspace (unless
+ * another truncate extends the file - this is desired though).
+ */
+ isize = i_size_read(inode);
+ if (unlikely(*ppos >= isize))
+ break;
+ part = min_t(loff_t, isize - *ppos, len);
+
+ if (folio) {
+ /*
+ * If users can be writing to this page using arbitrary
+ * virtual addresses, take care about potential aliasing
+ * before reading the page on the kernel side.
+ */
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_folio(folio);
+ folio_mark_accessed(folio);
+ /*
+ * Ok, we have the page, and it's up-to-date, so we can
+ * now splice it into the pipe.
+ */
+ n = splice_folio_into_pipe(pipe, folio, *ppos, part);
+ folio_put(folio);
+ folio = NULL;
+ } else {
+ n = splice_zeropage_into_pipe(pipe, *ppos, len);
+ }
+
+ if (!n)
+ break;
+ len -= n;
+ total_spliced += n;
+ *ppos += n;
+ in->f_ra.prev_pos = *ppos;
+ if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ break;
+
+ cond_resched();
+ } while (len);
+
+ if (folio)
+ folio_put(folio);
+
+ file_accessed(in);
+ return total_spliced ? total_spliced : error;
+}
+
static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
{
struct address_space *mapping = file->f_mapping;
@@ -3938,7 +4070,7 @@ static const struct file_operations shmem_file_operations = {
.read_iter = shmem_file_read_iter,
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
- .splice_read = generic_file_splice_read,
+ .splice_read = shmem_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = shmem_fallocate,
#endif