diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2024-04-29 08:55:40 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2024-04-29 08:55:40 +1000 |
commit | 18a0d1e1c6f7bf9a862b50cb655867030853c30b (patch) | |
tree | 557b6f2f56ed50315da6ad2273baad75eaa37d29 | |
parent | 7426242e08afea9b7eec0d9565d2f002109ee1ae (diff) | |
parent | eea3260250ea3b874baba9cbe0344e678c9da563 (diff) | |
download | linux-next-history-18a0d1e1c6f7bf9a862b50cb655867030853c30b.tar.gz |
Merge branch 'vfs.all' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
Notice: this object is not reachable from any branch.
Notice: this object is not reachable from any branch.
76 files changed, 828 insertions, 705 deletions
diff --git a/block/bdev.c b/block/bdev.c index da2a167a4d08b6..25458ed47d7588 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -76,7 +76,7 @@ static void bdev_write_inode(struct block_device *bdev) /* Kill _all_ buffers and pagecache , dirty or not.. */ static void kill_bdev(struct block_device *bdev) { - struct address_space *mapping = bdev->bd_inode->i_mapping; + struct address_space *mapping = bdev->bd_mapping; if (mapping_empty(mapping)) return; @@ -88,7 +88,7 @@ static void kill_bdev(struct block_device *bdev) /* Invalidate clean unused buffers and pagecache. */ void invalidate_bdev(struct block_device *bdev) { - struct address_space *mapping = bdev->bd_inode->i_mapping; + struct address_space *mapping = bdev->bd_mapping; if (mapping->nrpages) { invalidate_bh_lrus(); @@ -116,7 +116,7 @@ int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode, goto invalidate; } - truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); + truncate_inode_pages_range(bdev->bd_mapping, lstart, lend); if (!(mode & BLK_OPEN_EXCL)) bd_abort_claiming(bdev, truncate_bdev_range); return 0; @@ -126,7 +126,7 @@ invalidate: * Someone else has handle exclusively open. Try invalidating instead. * The 'end' argument is inclusive so the rounding is safe. */ - return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping, + return invalidate_inode_pages2_range(bdev->bd_mapping, lstart >> PAGE_SHIFT, lend >> PAGE_SHIFT); } @@ -192,7 +192,7 @@ int sync_blockdev_nowait(struct block_device *bdev) { if (!bdev) return 0; - return filemap_flush(bdev->bd_inode->i_mapping); + return filemap_flush(bdev->bd_mapping); } EXPORT_SYMBOL_GPL(sync_blockdev_nowait); @@ -204,13 +204,13 @@ int sync_blockdev(struct block_device *bdev) { if (!bdev) return 0; - return filemap_write_and_wait(bdev->bd_inode->i_mapping); + return filemap_write_and_wait(bdev->bd_mapping); } EXPORT_SYMBOL(sync_blockdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend) { - return filemap_write_and_wait_range(bdev->bd_inode->i_mapping, + return filemap_write_and_wait_range(bdev->bd_mapping, lstart, lend); } EXPORT_SYMBOL(sync_blockdev_range); @@ -413,6 +413,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) mutex_init(&bdev->bd_holder_lock); bdev->bd_partno = partno; bdev->bd_inode = inode; + bdev->bd_mapping = &inode->i_data; bdev->bd_queue = disk->queue; if (partno) bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio; @@ -438,7 +439,7 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) void bdev_add(struct block_device *bdev, dev_t dev) { if (bdev_stable_writes(bdev)) - mapping_set_stable_writes(bdev->bd_inode->i_mapping); + mapping_set_stable_writes(bdev->bd_mapping); bdev->bd_dev = dev; bdev->bd_inode->i_rdev = dev; bdev->bd_inode->i_ino = dev; @@ -912,12 +913,12 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, disk_unblock_events(disk); bdev_file->f_flags |= O_LARGEFILE; - bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; + bdev_file->f_mode |= FMODE_CAN_ODIRECT; if (bdev_nowait(bdev)) bdev_file->f_mode |= FMODE_NOWAIT; if (mode & BLK_OPEN_RESTRICT_WRITES) bdev_file->f_mode |= FMODE_WRITE_RESTRICTED; - bdev_file->f_mapping = bdev->bd_inode->i_mapping; + bdev_file->f_mapping = bdev->bd_mapping; bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); bdev_file->private_data = holder; @@ -1260,6 +1261,18 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) blkdev_put_no_open(bdev); } +bool disk_live(struct gendisk *disk) +{ + return !inode_unhashed(disk->part0->bd_inode); +} +EXPORT_SYMBOL_GPL(disk_live); + +unsigned int block_size(struct block_device *bdev) +{ + return 1 << bdev->bd_inode->i_blkbits; +} +EXPORT_SYMBOL_GPL(block_size); + static int __init setup_bdev_allow_write_mounted(char *str) { if (kstrtobool(str, &bdev_allow_write_mounted)) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index da0f4b2a8fa093..b008bcd4889c45 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -398,7 +398,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, op = REQ_OP_ZONE_RESET; /* Invalidate the page cache, including dirty pages. */ - filemap_invalidate_lock(bdev->bd_inode->i_mapping); + filemap_invalidate_lock(bdev->bd_mapping); ret = blkdev_truncate_zone_range(bdev, mode, &zrange); if (ret) goto fail; @@ -420,7 +420,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, fail: if (cmd == BLKRESETZONE) - filemap_invalidate_unlock(bdev->bd_inode->i_mapping); + filemap_invalidate_unlock(bdev->bd_mapping); return ret; } diff --git a/block/fops.c b/block/fops.c index 679d9b752fe828..040743a3b43d27 100644 --- a/block/fops.c +++ b/block/fops.c @@ -668,8 +668,8 @@ static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from) static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct block_device *bdev = I_BDEV(file->f_mapping->host); - struct inode *bd_inode = bdev->bd_inode; + struct inode *bd_inode = bdev_file_inode(file); + struct block_device *bdev = I_BDEV(bd_inode); loff_t size = bdev_nr_bytes(bdev); size_t shorted = 0; ssize_t ret; @@ -863,6 +863,7 @@ const struct file_operations def_blk_fops = { .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, + .fop_flags = FOP_BUFFER_RASYNC, }; static __init int blkdev_init(void) diff --git a/block/genhd.c b/block/genhd.c index bb29a68e1d6766..b294d56961fba4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -745,7 +745,7 @@ void invalidate_disk(struct gendisk *disk) struct block_device *bdev = disk->part0; invalidate_bdev(bdev); - bdev->bd_inode->i_mapping->wb_err = 0; + bdev->bd_mapping->wb_err = 0; set_capacity(disk, 0); } EXPORT_SYMBOL(invalidate_disk); diff --git a/block/ioctl.c b/block/ioctl.c index f505f9c341eb08..3b31c09941dc2e 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -97,7 +97,6 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, { uint64_t range[2]; uint64_t start, len, end; - struct inode *inode = bdev->bd_inode; int err; if (!(mode & BLK_OPEN_WRITE)) @@ -121,13 +120,13 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, end > bdev_nr_bytes(bdev)) return -EINVAL; - filemap_invalidate_lock(inode->i_mapping); + filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) goto fail; err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); fail: - filemap_invalidate_unlock(inode->i_mapping); + filemap_invalidate_unlock(bdev->bd_mapping); return err; } @@ -152,12 +151,12 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, if (start + len > bdev_nr_bytes(bdev)) return -EINVAL; - filemap_invalidate_lock(bdev->bd_inode->i_mapping); + filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); - filemap_invalidate_unlock(bdev->bd_inode->i_mapping); + filemap_invalidate_unlock(bdev->bd_mapping); return err; } @@ -167,7 +166,6 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, { uint64_t range[2]; uint64_t start, end, len; - struct inode *inode = bdev->bd_inode; int err; if (!(mode & BLK_OPEN_WRITE)) @@ -190,7 +188,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, return -EINVAL; /* Invalidate the page cache, including dirty pages */ - filemap_invalidate_lock(inode->i_mapping); + filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end); if (err) goto fail; @@ -199,7 +197,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, BLKDEV_ZERO_NOUNMAP); fail: - filemap_invalidate_unlock(inode->i_mapping); + filemap_invalidate_unlock(bdev->bd_mapping); return err; } diff --git a/block/partitions/core.c b/block/partitions/core.c index b11e88c82c8cfa..899f2093835fe5 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -704,7 +704,7 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p) { - struct address_space *mapping = state->disk->part0->bd_inode->i_mapping; + struct address_space *mapping = state->disk->part0->bd_mapping; struct folio *folio; if (n >= get_capacity(state->disk)) { diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 47c126d37b59ab..eb61598247a94a 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -377,7 +377,7 @@ static const struct file_operations dax_fops = { .release = dax_release, .get_unmapped_area = dax_get_unmapped_area, .mmap = dax_mmap, - .mmap_supported_flags = MAP_SYNC, + .fop_flags = FOP_MMAP_SYNC, }; static void dev_dax_cdev_del(void *cdev) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 330bcd9ea4a9cc..707836a7d8b2af 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -171,7 +171,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, struct page *page; unsigned int i; - page = read_cache_page_gfp(bdev->bd_inode->i_mapping, + page = read_cache_page_gfp(bdev->bd_mapping, SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL); if (IS_ERR(page)) return "IO error"; diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index 5a4b0a927f560b..b423bec6458bbe 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -878,7 +878,7 @@ static int parse_device_config(int argc, char **argv, struct dm_target *ti, } if (config->version == 0) { - u64 device_size = i_size_read(config->owned_device->bdev->bd_inode); + u64 device_size = bdev_nr_bytes(config->owned_device->bdev); config->physical_blocks = device_size / VDO_BLOCK_SIZE; } @@ -1011,7 +1011,7 @@ static void vdo_status(struct dm_target *ti, status_type_t status_type, static block_count_t __must_check get_underlying_device_block_count(const struct vdo *vdo) { - return i_size_read(vdo_get_backing_device(vdo)->bd_inode) / VDO_BLOCK_SIZE; + return bdev_nr_bytes(vdo_get_backing_device(vdo)) / VDO_BLOCK_SIZE; } static int __must_check process_vdo_message_locked(struct vdo *vdo, unsigned int argc, diff --git a/drivers/md/dm-vdo/indexer/io-factory.c b/drivers/md/dm-vdo/indexer/io-factory.c index 515765d35794af..1bee9d63dc0a69 100644 --- a/drivers/md/dm-vdo/indexer/io-factory.c +++ b/drivers/md/dm-vdo/indexer/io-factory.c @@ -90,7 +90,7 @@ void uds_put_io_factory(struct io_factory *factory) size_t uds_get_writable_size(struct io_factory *factory) { - return i_size_read(factory->bdev->bd_inode); + return bdev_nr_bytes(factory->bdev); } /* Create a struct dm_bufio_client for an index region starting at offset. */ diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index caacdc0a381945..b06c8dd515624c 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -265,6 +265,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size, struct file *bdev_file; struct block_device *bdev; struct block2mtd_dev *dev; + loff_t size; char *name; if (!devname) @@ -291,7 +292,8 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size, goto err_free_block2mtd; } - if ((long)bdev->bd_inode->i_size % erase_size) { + size = bdev_nr_bytes(bdev); + if ((long)size % erase_size) { pr_err("erasesize must be a divisor of device size\n"); goto err_free_block2mtd; } @@ -309,7 +311,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size, dev->mtd.name = name; - dev->mtd.size = bdev->bd_inode->i_size & PAGE_MASK; + dev->mtd.size = size & PAGE_MASK; dev->mtd.erasesize = erase_size; dev->mtd.writesize = 1; dev->mtd.writebufsize = PAGE_SIZE; diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index e2c7d8ef205fcc..dd69342bbe7815 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -32,7 +32,7 @@ */ unsigned char *scsi_bios_ptable(struct block_device *dev) { - struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping; + struct address_space *mapping = bdev_whole(dev)->bd_mapping; unsigned char *res = NULL; struct folio *folio; diff --git a/fs/aio.c b/fs/aio.c index 0f4f531c97800c..6ed5507cd33099 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -122,7 +122,7 @@ struct kioctx { unsigned long mmap_base; unsigned long mmap_size; - struct page **ring_pages; + struct folio **ring_folios; long nr_pages; struct rcu_work free_rwork; /* see free_ioctx() */ @@ -160,7 +160,7 @@ struct kioctx { spinlock_t completion_lock; } ____cacheline_aligned_in_smp; - struct page *internal_pages[AIO_RING_PAGES]; + struct folio *internal_folios[AIO_RING_PAGES]; struct file *aio_ring_file; unsigned id; @@ -334,19 +334,20 @@ static void aio_free_ring(struct kioctx *ctx) put_aio_ring_file(ctx); for (i = 0; i < ctx->nr_pages; i++) { - struct page *page; - pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, - page_count(ctx->ring_pages[i])); - page = ctx->ring_pages[i]; - if (!page) + struct folio *folio = ctx->ring_folios[i]; + + if (!folio) continue; - ctx->ring_pages[i] = NULL; - put_page(page); + + pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i, + folio_ref_count(folio)); + ctx->ring_folios[i] = NULL; + folio_put(folio); } - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { - kfree(ctx->ring_pages); - ctx->ring_pages = NULL; + if (ctx->ring_folios && ctx->ring_folios != ctx->internal_folios) { + kfree(ctx->ring_folios); + ctx->ring_folios = NULL; } } @@ -441,7 +442,7 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, idx = src->index; if (idx < (pgoff_t)ctx->nr_pages) { /* Make sure the old folio hasn't already been changed */ - if (ctx->ring_pages[idx] != &src->page) + if (ctx->ring_folios[idx] != src) rc = -EAGAIN; } else rc = -EINVAL; @@ -465,8 +466,8 @@ static int aio_migrate_folio(struct address_space *mapping, struct folio *dst, */ spin_lock_irqsave(&ctx->completion_lock, flags); folio_migrate_copy(dst, src); - BUG_ON(ctx->ring_pages[idx] != &src->page); - ctx->ring_pages[idx] = &dst->page; + BUG_ON(ctx->ring_folios[idx] != src); + ctx->ring_folios[idx] = dst; spin_unlock_irqrestore(&ctx->completion_lock, flags); /* The old folio is no longer accessible. */ @@ -516,28 +517,30 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); - ctx->ring_pages = ctx->internal_pages; + ctx->ring_folios = ctx->internal_folios; if (nr_pages > AIO_RING_PAGES) { - ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), - GFP_KERNEL); - if (!ctx->ring_pages) { + ctx->ring_folios = kcalloc(nr_pages, sizeof(struct folio *), + GFP_KERNEL); + if (!ctx->ring_folios) { put_aio_ring_file(ctx); return -ENOMEM; } } for (i = 0; i < nr_pages; i++) { - struct page *page; - page = find_or_create_page(file->f_mapping, - i, GFP_USER | __GFP_ZERO); - if (!page) + struct folio *folio; + + folio = __filemap_get_folio(file->f_mapping, i, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + GFP_USER | __GFP_ZERO); + if (IS_ERR(folio)) break; - pr_debug("pid(%d) page[%d]->count=%d\n", - current->pid, i, page_count(page)); - SetPageUptodate(page); - unlock_page(page); - ctx->ring_pages[i] = page; + pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i, + folio_ref_count(folio)); + folio_end_read(folio, true); + + ctx->ring_folios[i] = folio; } ctx->nr_pages = i; @@ -570,7 +573,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ - ring = page_address(ctx->ring_pages[0]); + ring = folio_address(ctx->ring_folios[0]); ring->nr = nr_events; /* user copy */ ring->id = ~0U; ring->head = ring->tail = 0; @@ -578,7 +581,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) ring->compat_features = AIO_RING_COMPAT_FEATURES; ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->header_length = sizeof(struct aio_ring); - flush_dcache_page(ctx->ring_pages[0]); + flush_dcache_folio(ctx->ring_folios[0]); return 0; } @@ -689,9 +692,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) /* While kioctx setup is in progress, * we are protected from page migration - * changes ring_pages by ->ring_lock. + * changes ring_folios by ->ring_lock. */ - ring = page_address(ctx->ring_pages[0]); + ring = folio_address(ctx->ring_folios[0]); ring->id = ctx->id; return 0; } @@ -1033,7 +1036,7 @@ static void user_refill_reqs_available(struct kioctx *ctx) * against ctx->completed_events below will make sure we do the * safe/right thing. */ - ring = page_address(ctx->ring_pages[0]); + ring = folio_address(ctx->ring_folios[0]); head = ring->head; refill_reqs_available(ctx, head, ctx->tail); @@ -1145,12 +1148,12 @@ static void aio_complete(struct aio_kiocb *iocb) if (++tail >= ctx->nr_events) tail = 0; - ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); + ev_page = folio_address(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]); event = ev_page + pos % AIO_EVENTS_PER_PAGE; *event = iocb->ki_res; - flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); + flush_dcache_folio(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]); pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb, (void __user *)(unsigned long)iocb->ki_res.obj, @@ -1163,10 +1166,10 @@ static void aio_complete(struct aio_kiocb *iocb) ctx->tail = tail; - ring = page_address(ctx->ring_pages[0]); + ring = folio_address(ctx->ring_folios[0]); head = ring->head; ring->tail = tail; - flush_dcache_page(ctx->ring_pages[0]); + flush_dcache_folio(ctx->ring_folios[0]); ctx->completed_events++; if (ctx->completed_events > 1) @@ -1238,8 +1241,8 @@ static long aio_read_events_ring(struct kioctx *ctx, sched_annotate_sleep(); mutex_lock(&ctx->ring_lock); - /* Access to ->ring_pages here is protected by ctx->ring_lock. */ - ring = page_address(ctx->ring_pages[0]); + /* Access to ->ring_folios here is protected by ctx->ring_lock. */ + ring = folio_address(ctx->ring_folios[0]); head = ring->head; tail = ring->tail; @@ -1260,20 +1263,20 @@ static long aio_read_events_ring(struct kioctx *ctx, while (ret < nr) { long avail; struct io_event *ev; - struct page *page; + struct folio *folio; avail = (head <= tail ? tail : ctx->nr_events) - head; if (head == tail) break; pos = head + AIO_EVENTS_OFFSET; - page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; + folio = ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]; pos %= AIO_EVENTS_PER_PAGE; avail = min(avail, nr - ret); avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos); - ev = page_address(page); + ev = folio_address(folio); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); @@ -1287,9 +1290,9 @@ static long aio_read_events_ring(struct kioctx *ctx, head %= ctx->nr_events; } - ring = page_address(ctx->ring_pages[0]); + ring = folio_address(ctx->ring_folios[0]); ring->head = head; - flush_dcache_page(ctx->ring_pages[0]); + flush_dcache_folio(ctx->ring_folios[0]); pr_debug("%li h%u t%u\n", ret, head, tail); out: diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index afb57ea99431b2..062a98b86b58da 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -864,6 +864,9 @@ static int bch2_getattr(struct mnt_idmap *idmap, stat->blksize = block_bytes(c); stat->blocks = inode->v.i_blocks; + stat->subvol = inode->ei_subvol; + stat->result_mask |= STATX_SUBVOL; + if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 5cf885b09986ac..5d2c470a49ac97 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -445,11 +445,6 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) void bch2_bio_map(struct bio *bio, void *base, size_t); int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t); -static inline sector_t bdev_sectors(struct block_device *bdev) -{ - return bdev->bd_inode->i_size >> 9; -} - #define closure_bio_submit(bio, cl) \ do { \ closure_get(cl); \ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2d8733662dafa7..2cce4ddc4c539e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3657,7 +3657,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, struct btrfs_super_block *super; struct page *page; u64 bytenr, bytenr_orig; - struct address_space *mapping = bdev->bd_inode->i_mapping; + struct address_space *mapping = bdev->bd_mapping; int ret; bytenr_orig = btrfs_sb_offset(copy_num); @@ -3744,7 +3744,7 @@ static int write_dev_supers(struct btrfs_device *device, struct btrfs_super_block *sb, int max_mirrors) { struct btrfs_fs_info *fs_info = device->fs_info; - struct address_space *mapping = device->bdev->bd_inode->i_mapping; + struct address_space *mapping = device->bdev->bd_mapping; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); int i; int errors = 0; @@ -3861,7 +3861,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) device->commit_total_bytes) break; - page = find_get_page(device->bdev->bd_inode->i_mapping, + page = find_get_page(device->bdev->bd_mapping, bytenr >> PAGE_SHIFT); if (!page) { errors++; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0c7c1b42028e3d..e764ac3f22e236 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3896,8 +3896,7 @@ static int btrfs_file_open(struct inode *inode, struct file *filp) { int ret; - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | - FMODE_CAN_ODIRECT; + filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; ret = fsverity_file_open(inode, filp); if (ret) @@ -4027,6 +4026,7 @@ const struct file_operations btrfs_file_operations = { .compat_ioctl = btrfs_compat_ioctl, #endif .remap_file_range = btrfs_remap_file_range, + .fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC, }; int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 418c998012a1be..87ac1202ba2f78 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8591,6 +8591,9 @@ static int btrfs_getattr(struct mnt_idmap *idmap, generic_fillattr(idmap, request_mask, inode, stat); stat->dev = BTRFS_I(inode)->root->anon_dev; + stat->subvol = BTRFS_I(inode)->root->root_key.objectid; + stat->result_mask |= STATX_SUBVOL; + spin_lock(&BTRFS_I(inode)->lock); delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes; inode_bytes = inode_get_bytes(inode); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 48d13837840e02..4e92a01e40893e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1287,7 +1287,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev return ERR_PTR(-EINVAL); /* pull in the page with our super */ - page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL); + page = read_cache_page_gfp(bdev->bd_mapping, index, GFP_KERNEL); if (IS_ERR(page)) return ERR_CAST(page); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 4b52a8916dbb65..1d8e0f762918f9 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -118,7 +118,7 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, return -ENOENT; } else if (full[0] && full[1]) { /* Compare two super blocks */ - struct address_space *mapping = bdev->bd_inode->i_mapping; + struct address_space *mapping = bdev->bd_mapping; struct page *page[BTRFS_NR_SB_LOG_ZONES]; struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES]; int i; diff --git a/fs/buffer.c b/fs/buffer.c index ed698caa8834b2..21998e6a023741 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1041,12 +1041,12 @@ static sector_t folio_init_buffers(struct folio *folio, static bool grow_dev_folio(struct block_device *bdev, sector_t block, pgoff_t index, unsigned size, gfp_t gfp) { - struct inode *inode = bdev->bd_inode; + struct address_space *mapping = bdev->bd_mapping; struct folio *folio; struct buffer_head *bh; sector_t end_block = 0; - folio = __filemap_get_folio(inode->i_mapping, index, + folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (IS_ERR(folio)) return false; @@ -1080,10 +1080,10 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block, * lock to be atomic wrt __find_get_block(), which does not * run under the folio lock. */ - spin_lock(&inode->i_mapping->i_private_lock); + spin_lock(&mapping->i_private_lock); link_dev_buffers(folio, bh); end_block = folio_init_buffers(folio, bdev, size); - spin_unlock(&inode->i_mapping->i_private_lock); + spin_unlock(&mapping->i_private_lock); unlock: folio_unlock(folio); folio_put(folio); @@ -1486,7 +1486,7 @@ struct buffer_head *__bread_gfp(struct block_device *bdev, sector_t block, { struct buffer_head *bh; - gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= mapping_gfp_constraint(bdev->bd_mapping, ~__GFP_FS); /* * Prefer looping in the allocator rather than here, at least that diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 9901057a15ba79..460690ca017436 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -183,7 +183,7 @@ static int next_buffer; static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset, unsigned int len) { - struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + struct address_space *mapping = sb->s_bdev->bd_mapping; struct file_ra_state ra = {}; struct page *pages[BLKS_PER_BUF]; unsigned i, blocknr, buffer; diff --git a/fs/dcache.c b/fs/dcache.c index 71a8e943a0fa50..407095188f83a7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -355,7 +355,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) flags &= ~DCACHE_ENTRY_TYPE; WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; - if (dentry->d_flags & DCACHE_LRU_LIST) + if (flags & DCACHE_LRU_LIST) this_cpu_inc(nr_dentry_negative); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index a40da006543361..dc51df0b118d0a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -14,7 +14,8 @@ #include <linux/module.h> #include <linux/fs.h> -#include <linux/mount.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/kobject.h> @@ -23,7 +24,6 @@ #include <linux/fsnotify.h> #include <linux/string.h> #include <linux/seq_file.h> -#include <linux/parser.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/security.h> @@ -77,7 +77,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb) return inode; } -struct debugfs_mount_opts { +struct debugfs_fs_info { kuid_t uid; kgid_t gid; umode_t mode; @@ -89,68 +89,51 @@ enum { Opt_uid, Opt_gid, Opt_mode, - Opt_err }; -static const match_table_t tokens = { - {Opt_uid, "uid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_mode, "mode=%o"}, - {Opt_err, NULL} +static const struct fs_parameter_spec debugfs_param_specs[] = { + fsparam_u32 ("gid", Opt_gid), + fsparam_u32oct ("mode", Opt_mode), + fsparam_u32 ("uid", Opt_uid), + {} }; -struct debugfs_fs_info { - struct debugfs_mount_opts mount_opts; -}; - -static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts) +static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - substring_t args[MAX_OPT_ARGS]; - int option; - int token; + struct debugfs_fs_info *opts = fc->s_fs_info; + struct fs_parse_result result; kuid_t uid; kgid_t gid; - char *p; - - opts->opts = 0; - opts->mode = DEBUGFS_DEFAULT_MODE; - - while ((p = strsep(&data, ",")) != NULL) { - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_uid: - if (match_int(&args[0], &option)) - return -EINVAL; - uid = make_kuid(current_user_ns(), option); - if (!uid_valid(uid)) - return -EINVAL; - opts->uid = uid; - break; - case Opt_gid: - if (match_int(&args[0], &option)) - return -EINVAL; - gid = make_kgid(current_user_ns(), option); - if (!gid_valid(gid)) - return -EINVAL; - opts->gid = gid; - break; - case Opt_mode: - if (match_octal(&args[0], &option)) - return -EINVAL; - opts->mode = option & S_IALLUGO; - break; - /* - * We might like to report bad mount options here; - * but traditionally debugfs has ignored all mount options - */ - } - - opts->opts |= BIT(token); + int opt; + + opt = fs_parse(fc, debugfs_param_specs, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_uid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + return invalf(fc, "Unknown uid"); + opts->uid = uid; + break; + case Opt_gid: + gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(gid)) + return invalf(fc, "Unknown gid"); + opts->gid = gid; + break; + case Opt_mode: + opts->mode = result.uint_32 & S_IALLUGO; + break; + /* + * We might like to report bad mount options here; + * but traditionally debugfs has ignored all mount options + */ } + opts->opts |= BIT(opt); + return 0; } @@ -158,23 +141,22 @@ static void _debugfs_apply_options(struct super_block *sb, bool remount) { struct debugfs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); - struct debugfs_mount_opts *opts = &fsi->mount_opts; /* * On remount, only reset mode/uid/gid if they were provided as mount * options. */ - if (!remount || opts->opts & BIT(Opt_mode)) { + if (!remount || fsi->opts & BIT(Opt_mode)) { inode->i_mode &= ~S_IALLUGO; - inode->i_mode |= opts->mode; + inode->i_mode |= fsi->mode; } - if (!remount || opts->opts & BIT(Opt_uid)) - inode->i_uid = opts->uid; + if (!remount || fsi->opts & BIT(Opt_uid)) + inode->i_uid = fsi->uid; - if (!remount || opts->opts & BIT(Opt_gid)) - inode->i_gid = opts->gid; + if (!remount || fsi->opts & BIT(Opt_gid)) + inode->i_gid = fsi->gid; } static void debugfs_apply_options(struct super_block *sb) @@ -187,35 +169,33 @@ static void debugfs_apply_options_remount(struct super_block *sb) _debugfs_apply_options(sb, true); } -static int debugfs_remount(struct super_block *sb, int *flags, char *data) +static int debugfs_reconfigure(struct fs_context *fc) { - int err; - struct debugfs_fs_info *fsi = sb->s_fs_info; + struct super_block *sb = fc->root->d_sb; + struct debugfs_fs_info *sb_opts = sb->s_fs_info; + struct debugfs_fs_info *new_opts = fc->s_fs_info; sync_filesystem(sb); - err = debugfs_parse_options(data, &fsi->mount_opts); - if (err) - goto fail; + /* structure copy of new mount options to sb */ + *sb_opts = *new_opts; debugfs_apply_options_remount(sb); -fail: - return err; + return 0; } static int debugfs_show_options(struct seq_file *m, struct dentry *root) { struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; - struct debugfs_mount_opts *opts = &fsi->mount_opts; - if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) + if (!uid_eq(fsi->uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", - from_kuid_munged(&init_user_ns, opts->uid)); - if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) + from_kuid_munged(&init_user_ns, fsi->uid)); + if (!gid_eq(fsi->gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", - from_kgid_munged(&init_user_ns, opts->gid)); - if (opts->mode != DEBUGFS_DEFAULT_MODE) - seq_printf(m, ",mode=%o", opts->mode); + from_kgid_munged(&init_user_ns, fsi->gid)); + if (fsi->mode != DEBUGFS_DEFAULT_MODE) + seq_printf(m, ",mode=%o", fsi->mode); return 0; } @@ -229,7 +209,6 @@ static void debugfs_free_inode(struct inode *inode) static const struct super_operations debugfs_super_operations = { .statfs = simple_statfs, - .remount_fs = debugfs_remount, .show_options = debugfs_show_options, .free_inode = debugfs_free_inode, }; @@ -263,26 +242,14 @@ static const struct dentry_operations debugfs_dops = { .d_automount = debugfs_automount, }; -static int debug_fill_super(struct super_block *sb, void *data, int silent) +static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr debug_files[] = {{""}}; - struct debugfs_fs_info *fsi; int err; - fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL); - sb->s_fs_info = fsi; - if (!fsi) { - err = -ENOMEM; - goto fail; - } - - err = debugfs_parse_options(data, &fsi->mount_opts); + err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); if (err) - goto fail; - - err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); - if (err) - goto fail; + return err; sb->s_op = &debugfs_super_operations; sb->s_d_op = &debugfs_dops; @@ -290,27 +257,48 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent) debugfs_apply_options(sb); return 0; - -fail: - kfree(fsi); - sb->s_fs_info = NULL; - return err; } -static struct dentry *debug_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int debugfs_get_tree(struct fs_context *fc) { if (!(debugfs_allow & DEBUGFS_ALLOW_API)) - return ERR_PTR(-EPERM); + return -EPERM; + + return get_tree_single(fc, debugfs_fill_super); +} + +static void debugfs_free_fc(struct fs_context *fc) +{ + kfree(fc->s_fs_info); +} - return mount_single(fs_type, flags, data, debug_fill_super); +static const struct fs_context_operations debugfs_context_ops = { + .free = debugfs_free_fc, + .parse_param = debugfs_parse_param, + .get_tree = debugfs_get_tree, + .reconfigure = debugfs_reconfigure, +}; + +static int debugfs_init_fs_context(struct fs_context *fc) +{ + struct debugfs_fs_info *fsi; + + fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL); + if (!fsi) + return -ENOMEM; + + fsi->mode = DEBUGFS_DEFAULT_MODE; + + fc->s_fs_info = fsi; + fc->ops = &debugfs_context_ops; + return 0; } static struct file_system_type debug_fs_type = { .owner = THIS_MODULE, .name = "debugfs", - .mount = debug_mount, + .init_fs_context = debugfs_init_fs_context, + .parameters = debugfs_param_specs, .kill_sb = kill_litter_super, }; MODULE_ALIAS_FS("debugfs"); diff --git a/fs/direct-io.c b/fs/direct-io.c index 62c97ff9e852a1..b0aafe640fa428 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1217,7 +1217,6 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, */ inode_dio_begin(inode); - retval = 0; sdio.blkbits = blkbits; sdio.blkfactor = i_blkbits - blkbits; sdio.block_in_file = offset >> blkbits; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 3fe41964c0d8d9..7f9f68c00ef63c 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -300,9 +300,11 @@ write_tag_66_packet(char *signature, u8 cipher_code, * | Key Identifier Size | 1 or 2 bytes | * | Key Identifier | arbitrary | * | File Encryption Key Size | 1 or 2 bytes | + * | Cipher Code | 1 byte | * | File Encryption Key | arbitrary | + * | Checksum | 2 bytes | */ - data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); + data_len = (8 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); *packet = kmalloc(data_len, GFP_KERNEL); message = *packet; if (!message) { diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 3985f8c33f9553..ff4514e4626bdb 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -192,7 +192,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) (PAGE_SHIFT - inode->i_blkbits); if (!ra_has_index(&file->f_ra, index)) page_cache_sync_readahead( - sb->s_bdev->bd_inode->i_mapping, + sb->s_bdev->bd_mapping, &file->f_ra, file, index, 1); file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT; diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 5d8055161acdb5..da4a8245638364 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -206,7 +206,7 @@ static void ext4_journal_abort_handle(const char *caller, unsigned int line, static void ext4_check_bdev_write_error(struct super_block *sb) { - struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + struct address_space *mapping = sb->s_bdev->bd_mapping; struct ext4_sb_info *sbi = EXT4_SB(sb); int err; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 54d6ff22585cf1..28c51b0cc4db91 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -885,8 +885,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp) return ret; } - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | - FMODE_DIO_PARALLEL_WRITE; + filp->f_mode |= FMODE_NOWAIT; return dquot_file_open(inode, filp); } @@ -938,7 +937,6 @@ const struct file_operations ext4_file_operations = { .compat_ioctl = ext4_compat_ioctl, #endif .mmap = ext4_file_mmap, - .mmap_supported_flags = MAP_SYNC, .open = ext4_file_open, .release = ext4_release_file, .fsync = ext4_sync_file, @@ -946,6 +944,8 @@ const struct file_operations ext4_file_operations = { .splice_read = ext4_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, + .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | + FOP_DIO_PARALLEL_WRITE, }; const struct inode_operations ext4_file_inode_operations = { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 044135796f2b6e..6e2bd802b50c08 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -244,7 +244,7 @@ static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb, struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block, blk_opf_t op_flags) { - gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping, + gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping, ~__GFP_FS) | __GFP_MOVABLE; return __ext4_sb_bread_gfp(sb, block, op_flags, gfp); @@ -253,7 +253,7 @@ struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block, struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, sector_t block) { - gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping, + gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping, ~__GFP_FS); return __ext4_sb_bread_gfp(sb, block, 0, gfp); @@ -492,22 +492,6 @@ static void ext4_maybe_update_superblock(struct super_block *sb) schedule_work(&EXT4_SB(sb)->s_sb_upd_work); } -/* - * The del_gendisk() function uninitializes the disk-specific data - * structures, including the bdi structure, without telling anyone - * else. Once this happens, any attempt to call mark_buffer_dirty() - * (for example, by ext4_commit_super), will cause a kernel OOPS. - * This is a kludge to prevent these oops until we can put in a proper - * hook in del_gendisk() to inform the VFS and file system layers. - */ -static int block_device_ejected(struct super_block *sb) -{ - struct inode *bd_inode = sb->s_bdev->bd_inode; - struct backing_dev_info *bdi = inode_to_bdi(bd_inode); - - return bdi->dev == NULL; -} - static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) { struct super_block *sb = journal->j_private; @@ -1723,10 +1707,6 @@ static const struct constant_table ext4_param_dax[] = { {} }; -/* String parameter that allows empty argument */ -#define fsparam_string_empty(NAME, OPT) \ - __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) - /* * Mount option specification * We don't use fsparam_flag_no because of the way we set the @@ -5572,7 +5552,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * used to detect the metadata async write error. */ spin_lock_init(&sbi->s_bdev_wb_lock); - errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err, + errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err, &sbi->s_bdev_wb_err); EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); @@ -6172,8 +6152,6 @@ static int ext4_commit_super(struct super_block *sb) if (!sbh) return -EINVAL; - if (block_device_ejected(sb)) - return -ENODEV; ext4_update_super(sb); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d382f8bc2fbe6f..151039a2889b05 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -568,7 +568,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_CAN_ODIRECT; return dquot_file_open(inode, filp); @@ -5091,4 +5091,5 @@ const struct file_operations f2fs_file_operations = { .splice_read = f2fs_file_splice_read, .splice_write = iter_file_splice_write, .fadvise = f2fs_file_fadvise, + .fop_flags = FOP_BUFFER_RASYNC, }; diff --git a/fs/fhandle.c b/fs/fhandle.c index 57a12614addfd4..8a7f86c2139a7f 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -36,7 +36,7 @@ static long do_sys_name_to_handle(const struct path *path, if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; - handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) return -ENOMEM; @@ -71,7 +71,7 @@ static long do_sys_name_to_handle(const struct path *path, /* copy the mount id */ if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || copy_to_user(ufh, handle, - sizeof(struct file_handle) + handle_bytes)) + struct_size(handle, f_handle, handle_bytes))) retval = -EFAULT; kfree(handle); return retval; @@ -192,7 +192,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, retval = -EINVAL; goto out_err; } - handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) { retval = -ENOMEM; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 42e03b6b1cc7a8..fabe6077865806 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -17,7 +17,7 @@ #include <linux/slab.h> #include <linux/stat.h> #include <linux/vfs.h> -#include <linux/mount.h> +#include <linux/fs_context.h> #include "vxfs.h" #include "vxfs_extern.h" @@ -91,10 +91,10 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) return 0; } -static int vxfs_remount(struct super_block *sb, int *flags, char *data) +static int vxfs_reconfigure(struct fs_context *fc) { - sync_filesystem(sb); - *flags |= SB_RDONLY; + sync_filesystem(fc->root->d_sb); + fc->sb_flags |= SB_RDONLY; return 0; } @@ -120,24 +120,24 @@ static const struct super_operations vxfs_super_ops = { .evict_inode = vxfs_evict_inode, .put_super = vxfs_put_super, .statfs = vxfs_statfs, - .remount_fs = vxfs_remount, }; -static int vxfs_try_sb_magic(struct super_block *sbp, int silent, +static int vxfs_try_sb_magic(struct super_block *sbp, struct fs_context *fc, unsigned blk, __fs32 magic) { struct buffer_head *bp; struct vxfs_sb *rsbp; struct vxfs_sb_info *infp = VXFS_SBI(sbp); + int silent = fc->sb_flags & SB_SILENT; int rc = -ENOMEM; bp = sb_bread(sbp, blk); do { if (!bp || !buffer_mapped(bp)) { if (!silent) { - printk(KERN_WARNING - "vxfs: unable to read disk superblock at %u\n", - blk); + warnf(fc, + "vxfs: unable to read disk superblock at %u", + blk); } break; } @@ -146,9 +146,9 @@ static int vxfs_try_sb_magic(struct super_block *sbp, int silent, rsbp = (struct vxfs_sb *)bp->b_data; if (rsbp->vs_magic != magic) { if (!silent) - printk(KERN_NOTICE - "vxfs: WRONG superblock magic %08x at %u\n", - rsbp->vs_magic, blk); + infof(fc, + "vxfs: WRONG superblock magic %08x at %u", + rsbp->vs_magic, blk); break; } @@ -169,8 +169,7 @@ static int vxfs_try_sb_magic(struct super_block *sbp, int silent, /** * vxfs_fill_super - read superblock into memory and initialize filesystem * @sbp: VFS superblock (to fill) - * @dp: fs private mount data - * @silent: do not complain loudly when sth is wrong + * @fc: filesytem context * * Description: * We are called on the first mount of a filesystem to read the @@ -182,26 +181,27 @@ static int vxfs_try_sb_magic(struct super_block *sbp, int silent, * Locking: * We are under @sbp->s_lock. */ -static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) +static int vxfs_fill_super(struct super_block *sbp, struct fs_context *fc) { struct vxfs_sb_info *infp; struct vxfs_sb *rsbp; u_long bsize; struct inode *root; int ret = -EINVAL; + int silent = fc->sb_flags & SB_SILENT; u32 j; sbp->s_flags |= SB_RDONLY; infp = kzalloc(sizeof(*infp), GFP_KERNEL); if (!infp) { - printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); + warnf(fc, "vxfs: unable to allocate incore superblock"); return -ENOMEM; } bsize = sb_min_blocksize(sbp, BLOCK_SIZE); if (!bsize) { - printk(KERN_WARNING "vxfs: unable to set blocksize\n"); + warnf(fc, "vxfs: unable to set blocksize"); goto out; } @@ -210,24 +210,24 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) sbp->s_time_min = 0; sbp->s_time_max = U32_MAX; - if (!vxfs_try_sb_magic(sbp, silent, 1, + if (!vxfs_try_sb_magic(sbp, fc, 1, (__force __fs32)cpu_to_le32(VXFS_SUPER_MAGIC))) { /* Unixware, x86 */ infp->byte_order = VXFS_BO_LE; - } else if (!vxfs_try_sb_magic(sbp, silent, 8, + } else if (!vxfs_try_sb_magic(sbp, fc, 8, (__force __fs32)cpu_to_be32(VXFS_SUPER_MAGIC))) { /* HP-UX, parisc */ infp->byte_order = VXFS_BO_BE; } else { if (!silent) - printk(KERN_NOTICE "vxfs: can't find superblock.\n"); + infof(fc, "vxfs: can't find superblock."); goto out; } rsbp = infp->vsi_raw; j = fs32_to_cpu(infp, rsbp->vs_version); if ((j < 2 || j > 4) && !silent) { - printk(KERN_NOTICE "vxfs: unsupported VxFS version (%d)\n", j); + infof(fc, "vxfs: unsupported VxFS version (%d)", j); goto out; } @@ -244,17 +244,17 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) j = fs32_to_cpu(infp, rsbp->vs_bsize); if (!sb_set_blocksize(sbp, j)) { - printk(KERN_WARNING "vxfs: unable to set final block size\n"); + warnf(fc, "vxfs: unable to set final block size"); goto out; } if (vxfs_read_olt(sbp, bsize)) { - printk(KERN_WARNING "vxfs: unable to read olt\n"); + warnf(fc, "vxfs: unable to read olt"); goto out; } if (vxfs_read_fshead(sbp)) { - printk(KERN_WARNING "vxfs: unable to read fshead\n"); + warnf(fc, "vxfs: unable to read fshead"); goto out; } @@ -265,7 +265,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) } sbp->s_root = d_make_root(root); if (!sbp->s_root) { - printk(KERN_WARNING "vxfs: unable to get root dentry.\n"); + warnf(fc, "vxfs: unable to get root dentry."); goto out_free_ilist; } @@ -284,18 +284,29 @@ out: /* * The usual module blurb. */ -static struct dentry *vxfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int vxfs_get_tree(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super); + return get_tree_bdev(fc, vxfs_fill_super); +} + +static const struct fs_context_operations vxfs_context_ops = { + .get_tree = vxfs_get_tree, + .reconfigure = vxfs_reconfigure, +}; + +static int vxfs_init_fs_context(struct fs_context *fc) +{ + fc->ops = &vxfs_context_ops; + + return 0; } static struct file_system_type vxfs_fs_type = { .owner = THIS_MODULE, .name = "vxfs", - .mount = vxfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = vxfs_init_fs_context, }; MODULE_ALIAS_FS("vxfs"); /* makes mount -t vxfs autoload the module */ MODULE_ALIAS("vxfs"); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e4f17c53ddfcf3..92a5b8283528c0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -166,8 +166,7 @@ static void wb_wakeup_delayed(struct bdi_writeback *wb) spin_unlock_irq(&wb->work_lock); } -static void finish_writeback_work(struct bdi_writeback *wb, - struct wb_writeback_work *work) +static void finish_writeback_work(struct wb_writeback_work *work) { struct wb_completion *done = work->done; @@ -196,7 +195,7 @@ static void wb_queue_work(struct bdi_writeback *wb, list_add_tail(&work->list, &wb->work_list); mod_delayed_work(bdi_wq, &wb->dwork, 0); } else - finish_writeback_work(wb, work); + finish_writeback_work(work); spin_unlock_irq(&wb->work_lock); } @@ -1561,7 +1560,8 @@ static void inode_sleep_on_writeback(struct inode *inode) * thread's back can have unexpected consequences. */ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, - struct writeback_control *wbc) + struct writeback_control *wbc, + unsigned long dirtied_before) { if (inode->i_state & I_FREEING) return; @@ -1594,7 +1594,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * We didn't write back all the pages. nfs_writepages() * sometimes bales out without doing anything. */ - if (wbc->nr_to_write <= 0) { + if (wbc->nr_to_write <= 0 && + !inode_dirtied_after(inode, dirtied_before)) { /* Slice used up. Queue for next turn. */ requeue_io(inode, wb); } else { @@ -1862,6 +1863,11 @@ static long writeback_sb_inodes(struct super_block *sb, unsigned long start_time = jiffies; long write_chunk; long total_wrote = 0; /* count both pages and inodes */ + unsigned long dirtied_before = jiffies; + + if (work->for_kupdate) + dirtied_before = jiffies - + msecs_to_jiffies(dirty_expire_interval * 10); while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); @@ -1967,7 +1973,7 @@ static long writeback_sb_inodes(struct super_block *sb, spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) total_wrote++; - requeue_inode(inode, tmp_wb, &wbc); + requeue_inode(inode, tmp_wb, &wbc, dirtied_before); inode_sync_complete(inode); spin_unlock(&inode->i_lock); @@ -2069,6 +2075,7 @@ static long wb_writeback(struct bdi_writeback *wb, struct inode *inode; long progress; struct blk_plug plug; + bool queued = false; blk_start_plug(&plug); for (;;) { @@ -2098,21 +2105,24 @@ static long wb_writeback(struct bdi_writeback *wb, spin_lock(&wb->list_lock); - /* - * Kupdate and background works are special and we want to - * include all inodes that need writing. Livelock avoidance is - * handled by these works yielding to any other work so we are - * safe. - */ - if (work->for_kupdate) { - dirtied_before = jiffies - - msecs_to_jiffies(dirty_expire_interval * 10); - } else if (work->for_background) - dirtied_before = jiffies; - trace_writeback_start(wb, work); - if (list_empty(&wb->b_io)) + if (list_empty(&wb->b_io)) { + /* + * Kupdate and background works are special and we want + * to include all inodes that need writing. Livelock + * avoidance is handled by these works yielding to any + * other work so we are safe. + */ + if (work->for_kupdate) { + dirtied_before = jiffies - + msecs_to_jiffies(dirty_expire_interval * + 10); + } else if (work->for_background) + dirtied_before = jiffies; + queue_io(wb, work, dirtied_before); + queued = true; + } if (work->sb) progress = writeback_sb_inodes(work->sb, wb, work); else @@ -2127,7 +2137,7 @@ static long wb_writeback(struct bdi_writeback *wb, * mean the overall work is done. So we keep looping as long * as made some progress on cleaning pages or inodes. */ - if (progress) { + if (progress || !queued) { spin_unlock(&wb->list_lock); continue; } @@ -2262,7 +2272,7 @@ static long wb_do_writeback(struct bdi_writeback *wb) while ((work = get_next_work_item(wb)) != NULL) { trace_writeback_exec(wb, work); wrote += wb_writeback(wb, work); - finish_writeback_work(wb, work); + finish_writeback_work(work); } /* @@ -2322,8 +2332,7 @@ void wb_workfn(struct work_struct *work) } /* - * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero, - * write back the whole world. + * Start writeback of all dirty pages on this bdi. */ static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason) @@ -2726,7 +2735,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr); */ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { - return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); + writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); } EXPORT_SYMBOL(writeback_inodes_sb); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9f11fc1e79eb67..4ea6c8bfb4e6c5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1267,7 +1267,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping = gfs2_glock2aspace(gl); if (mapping) { mapping->a_ops = &gfs2_meta_aops; - mapping->host = s->s_bdev->bd_inode; + mapping->host = s->s_bdev->bd_mapping->host; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); mapping->i_private_data = NULL; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index cde7118599abbd..44cf99e44d4f40 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -114,7 +114,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) address_space_init_once(mapping); mapping->a_ops = &gfs2_rgrp_aops; - mapping->host = sb->s_bdev->bd_inode; + mapping->host = sb->s_bdev->bd_mapping->host; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); mapping->i_private_data = NULL; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2f4e88552d3f28..412f295acebe2e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -40,7 +40,7 @@ #include <linux/sched/mm.h> static const struct address_space_operations hugetlbfs_aops; -const struct file_operations hugetlbfs_file_operations; +static const struct file_operations hugetlbfs_file_operations; static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_inode_operations; @@ -1298,13 +1298,14 @@ static void init_once(void *foo) inode_init_once(&ei->vfs_inode); } -const struct file_operations hugetlbfs_file_operations = { +static const struct file_operations hugetlbfs_file_operations = { .read_iter = hugetlbfs_read_iter, .mmap = hugetlbfs_file_mmap, .fsync = noop_fsync, .get_unmapped_area = hugetlb_get_unmapped_area, .llseek = default_llseek, .fallocate = hugetlbfs_fallocate, + .fop_flags = FOP_HUGE_PAGES, }; static const struct inode_operations hugetlbfs_dir_inode_operations = { diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 4e8e41c8b3c0e4..41c8f0c68ef564 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -824,12 +824,11 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, out_unlock: __iomap_put_folio(iter, pos, 0, folio); - iomap_write_failed(iter->inode, pos, len); return status; } -static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, +static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, size_t copied, struct folio *folio) { flush_dcache_folio(folio); @@ -846,14 +845,14 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, * redo the whole thing. */ if (unlikely(copied < len && !folio_test_uptodate(folio))) - return 0; + return false; iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len); iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied); filemap_dirty_folio(inode->i_mapping, folio); - return copied; + return true; } -static size_t iomap_write_end_inline(const struct iomap_iter *iter, +static void iomap_write_end_inline(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t copied) { const struct iomap *iomap = &iter->iomap; @@ -868,42 +867,32 @@ static size_t iomap_write_end_inline(const struct iomap_iter *iter, kunmap_local(addr); mark_inode_dirty(iter->inode); - return copied; } -/* Returns the number of bytes copied. May be 0. Cannot be an errno. */ -static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, +/* + * Returns true if all copied bytes have been written to the pagecache, + * otherwise return false. + */ +static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - loff_t old_size = iter->inode->i_size; - size_t ret; if (srcmap->type == IOMAP_INLINE) { - ret = iomap_write_end_inline(iter, folio, pos, copied); - } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - ret = block_write_end(NULL, iter->inode->i_mapping, pos, len, - copied, &folio->page, NULL); - } else { - ret = __iomap_write_end(iter->inode, pos, len, copied, folio); + iomap_write_end_inline(iter, folio, pos, copied); + return true; } - /* - * Update the in-memory inode size after copying the data into the page - * cache. It's up to the file system to write the updated size to disk, - * preferably after I/O completion so that no stale data is exposed. - */ - if (pos + ret > old_size) { - i_size_write(iter->inode, pos + ret); - iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; + if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { + size_t bh_written; + + bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, + len, copied, &folio->page, NULL); + WARN_ON_ONCE(bh_written != copied && bh_written != 0); + return bh_written == copied; } - __iomap_put_folio(iter, pos, ret, folio); - if (old_size < pos) - pagecache_isize_extended(iter->inode, old_size, pos); - if (ret < len) - iomap_write_failed(iter->inode, pos + ret, len - ret); - return ret; + return __iomap_write_end(iter->inode, pos, len, copied, folio); } static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) @@ -911,16 +900,18 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) loff_t length = iomap_length(iter); size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; loff_t pos = iter->pos; - ssize_t written = 0; + ssize_t total_written = 0; long status = 0; struct address_space *mapping = iter->inode->i_mapping; unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; do { struct folio *folio; + loff_t old_size; size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ + size_t written; /* Bytes have been written */ bytes = iov_iter_count(i); retry: @@ -950,8 +941,10 @@ retry: } status = iomap_write_begin(iter, pos, bytes, &folio); - if (unlikely(status)) + if (unlikely(status)) { + iomap_write_failed(iter->inode, pos, bytes); break; + } if (iter->iomap.flags & IOMAP_F_STALE) break; @@ -963,19 +956,37 @@ retry: flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); - status = iomap_write_end(iter, pos, bytes, copied, folio); + written = iomap_write_end(iter, pos, bytes, copied, folio) ? + copied : 0; + + /* + * Update the in-memory inode size after copying the data into + * the page cache. It's up to the file system to write the + * updated size to disk, preferably after I/O completion so that + * no stale data is exposed. Only once that's done can we + * unlock and release the folio. + */ + old_size = iter->inode->i_size; + if (pos + written > old_size) { + i_size_write(iter->inode, pos + written); + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; + } + __iomap_put_folio(iter, pos, written, folio); - if (unlikely(copied != status)) - iov_iter_revert(i, copied - status); + if (old_size < pos) + pagecache_isize_extended(iter->inode, old_size, pos); cond_resched(); - if (unlikely(status == 0)) { + if (unlikely(written == 0)) { /* * A short copy made iomap_write_end() reject the * thing entirely. Might be memory poisoning * halfway through, might be a race with munmap, * might be severe memory pressure. */ + iomap_write_failed(iter->inode, pos, bytes); + iov_iter_revert(i, copied); + if (chunk > PAGE_SIZE) chunk /= 2; if (copied) { @@ -983,17 +994,17 @@ retry: goto retry; } } else { - pos += status; - written += status; - length -= status; + pos += written; + total_written += written; + length -= written; } } while (iov_iter_count(i) && length); if (status == -EAGAIN) { - iov_iter_revert(i, written); + iov_iter_revert(i, total_written); return -EAGAIN; } - return written ? written : status; + return total_written ? total_written : status; } ssize_t @@ -1322,6 +1333,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) int status; size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); + bool ret; status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) @@ -1333,8 +1345,9 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) if (bytes > folio_size(folio) - offset) bytes = folio_size(folio) - offset; - bytes = iomap_write_end(iter, pos, bytes, bytes, folio); - if (WARN_ON_ONCE(bytes == 0)) + ret = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); + if (WARN_ON_ONCE(!ret)) return -EIO; cond_resched(); @@ -1383,6 +1396,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) int status; size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); + bool ret; status = iomap_write_begin(iter, pos, bytes, &folio); if (status) @@ -1397,8 +1411,9 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); - bytes = iomap_write_end(iter, pos, bytes, bytes, folio); - if (WARN_ON_ONCE(bytes == 0)) + ret = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); + if (WARN_ON_ONCE(!ret)) return -EIO; pos += bytes; @@ -1958,18 +1973,13 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, return error; } -static int iomap_do_writepage(struct folio *folio, - struct writeback_control *wbc, void *data) -{ - return iomap_writepage_map(data, wbc, folio); -} - int iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, const struct iomap_writeback_ops *ops) { - int ret; + struct folio *folio = NULL; + int error; /* * Writeback from reclaim context should never happen except in the case @@ -1980,8 +1990,9 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, return -EIO; wpc->ops = ops; - ret = write_cache_pages(mapping, wbc, iomap_do_writepage, wpc); - return iomap_submit_ioend(wpc, ret); + while ((folio = writeback_iter(mapping, wbc, folio, &error))) + error = iomap_writepage_map(wpc, wbc, folio); + return iomap_submit_ioend(wpc, error); } EXPORT_SYMBOL_GPL(iomap_writepages); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b6c114c11b9787..03c4b9214f564a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2009,7 +2009,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) byte_count = (block_stop - block_start + 1) * journal->j_blocksize; - truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping, + truncate_inode_pages_range(journal->j_dev->bd_mapping, byte_start, byte_stop); if (flags & JBD2_JOURNAL_FLUSH_DISCARD) { diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 00224f3a8d6e71..defb4162c3d5b5 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -1110,6 +1110,9 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, return rc; request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size); + if (request > c->sector_size - c->cleanmarker_size) + return -ERANGE; + rc = jffs2_reserve_space(c, request, &length, ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE); if (rc) { diff --git a/fs/libfs.c b/fs/libfs.c index 3a6f2cb364f8cb..b635ee5adbcced 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -295,6 +295,18 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) return 0; } +static int simple_offset_replace(struct offset_ctx *octx, struct dentry *dentry, + long offset) +{ + int ret; + + ret = mtree_store(&octx->mt, offset, dentry, GFP_KERNEL); + if (ret) + return ret; + offset_set(dentry, offset); + return 0; +} + /** * simple_offset_remove - Remove an entry to a directory's offset map * @octx: directory offset ctx to be updated @@ -346,12 +358,45 @@ int simple_offset_empty(struct dentry *dentry) } /** + * simple_offset_rename - handle directory offsets for rename + * @old_dir: parent directory of source entry + * @old_dentry: dentry of source entry + * @new_dir: parent_directory of destination entry + * @new_dentry: dentry of destination + * + * Caller provides appropriate serialization. + * + * User space expects the directory offset value of the replaced + * (new) directory entry to be unchanged after a rename. + * + * Returns zero on success, a negative errno value on failure. + */ +int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); + struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); + long new_offset = dentry2offset(new_dentry); + + simple_offset_remove(old_ctx, old_dentry); + + if (new_offset) { + offset_set(new_dentry, 0); + return simple_offset_replace(new_ctx, old_dentry, new_offset); + } + return simple_offset_add(new_ctx, old_dentry); +} + +/** * simple_offset_rename_exchange - exchange rename with directory offsets * @old_dir: parent of dentry being moved * @old_dentry: dentry being moved * @new_dir: destination parent * @new_dentry: destination dentry * + * This API preserves the directory offset values. Caller provides + * appropriate serialization. + * * Returns zero on success. Otherwise a negative errno is returned and the * rename is rolled back. */ @@ -369,11 +414,11 @@ int simple_offset_rename_exchange(struct inode *old_dir, simple_offset_remove(old_ctx, old_dentry); simple_offset_remove(new_ctx, new_dentry); - ret = simple_offset_add(new_ctx, old_dentry); + ret = simple_offset_replace(new_ctx, old_dentry, new_index); if (ret) goto out_restore; - ret = simple_offset_add(old_ctx, new_dentry); + ret = simple_offset_replace(old_ctx, new_dentry, old_index); if (ret) { simple_offset_remove(new_ctx, old_dentry); goto out_restore; @@ -388,10 +433,8 @@ int simple_offset_rename_exchange(struct inode *old_dir, return 0; out_restore: - offset_set(old_dentry, old_index); - mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL); - offset_set(new_dentry, new_index); - mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL); + (void)simple_offset_replace(old_ctx, old_dentry, old_index); + (void)simple_offset_replace(new_ctx, new_dentry, new_index); return ret; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 7cbd2b9f4d115c..7f9a2d8aa420f1 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -20,11 +20,11 @@ #include <linux/mpage.h> #include <linux/vfs.h> #include <linux/writeback.h> +#include <linux/fs_context.h> static int minix_write_inode(struct inode *inode, struct writeback_control *wbc); static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); -static int minix_remount (struct super_block * sb, int * flags, char * data); static void minix_evict_inode(struct inode *inode) { @@ -111,19 +111,19 @@ static const struct super_operations minix_sops = { .evict_inode = minix_evict_inode, .put_super = minix_put_super, .statfs = minix_statfs, - .remount_fs = minix_remount, }; -static int minix_remount (struct super_block * sb, int * flags, char * data) +static int minix_reconfigure(struct fs_context *fc) { - struct minix_sb_info * sbi = minix_sb(sb); struct minix_super_block * ms; + struct super_block *sb = fc->root->d_sb; + struct minix_sb_info * sbi = sb->s_fs_info; sync_filesystem(sb); ms = sbi->s_ms; - if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) + if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) return 0; - if (*flags & SB_RDONLY) { + if (fc->sb_flags & SB_RDONLY) { if (ms->s_state & MINIX_VALID_FS || !(sbi->s_mount_state & MINIX_VALID_FS)) return 0; @@ -170,7 +170,7 @@ static bool minix_check_superblock(struct super_block *sb) return true; } -static int minix_fill_super(struct super_block *s, void *data, int silent) +static int minix_fill_super(struct super_block *s, struct fs_context *fc) { struct buffer_head *bh; struct buffer_head **map; @@ -180,6 +180,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) struct inode *root_inode; struct minix_sb_info *sbi; int ret = -EINVAL; + int silent = fc->sb_flags & SB_SILENT; sbi = kzalloc(sizeof(struct minix_sb_info), GFP_KERNEL); if (!sbi) @@ -371,6 +372,23 @@ out: return ret; } +static int minix_get_tree(struct fs_context *fc) +{ + return get_tree_bdev(fc, minix_fill_super); +} + +static const struct fs_context_operations minix_context_ops = { + .get_tree = minix_get_tree, + .reconfigure = minix_reconfigure, +}; + +static int minix_init_fs_context(struct fs_context *fc) +{ + fc->ops = &minix_context_ops; + + return 0; +} + static int minix_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; @@ -680,18 +698,12 @@ void minix_truncate(struct inode * inode) V2_minix_truncate(inode); } -static struct dentry *minix_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super); -} - static struct file_system_type minix_fs_type = { - .owner = THIS_MODULE, - .name = "minix", - .mount = minix_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "minix", + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = minix_init_fs_context, }; MODULE_ALIAS_FS("minix"); diff --git a/fs/namei.c b/fs/namei.c index c5b2a25be7d048..cb5dde0e309f7a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2422,6 +2422,14 @@ static const char *path_init(struct nameidata *nd, unsigned flags) if (!f.file) return ERR_PTR(-EBADF); + if (flags & LOOKUP_LINKAT_EMPTY) { + if (f.file->f_cred != current_cred() && + !ns_capable(f.file->f_cred->user_ns, CAP_DAC_READ_SEARCH)) { + fdput(f); + return ERR_PTR(-ENOENT); + } + } + dentry = f.file->f_path.dentry; if (*s && unlikely(!d_can_lookup(dentry))) { @@ -4641,14 +4649,13 @@ int do_linkat(int olddfd, struct filename *old, int newdfd, goto out_putnames; } /* - * To use null names we require CAP_DAC_READ_SEARCH + * To use null names we require CAP_DAC_READ_SEARCH or + * that the open-time creds of the dfd matches current. * This ensures that not everyone will be able to create - * handlink using the passed filedescriptor. + * a hardlink using the passed file descriptor. */ - if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) { - error = -ENOENT; - goto out_putnames; - } + if (flags & AT_EMPTY_PATH) + how |= LOOKUP_LINKAT_EMPTY; if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index f1a01c191cf53a..8be471ce4f1950 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -60,7 +60,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_nmembs == 0) return 0; - if (argv->v_size > PAGE_SIZE) + if ((size_t)argv->v_size > PAGE_SIZE) return -EINVAL; /* diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 4a0779e3ef7923..a7b527ea50d3ca 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -355,10 +355,10 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) return inode; } -static int openprom_remount(struct super_block *sb, int *flags, char *data) +static int openpromfs_reconfigure(struct fs_context *fc) { - sync_filesystem(sb); - *flags |= SB_NOATIME; + sync_filesystem(fc->root->d_sb); + fc->sb_flags |= SB_NOATIME; return 0; } @@ -366,7 +366,6 @@ static const struct super_operations openprom_sops = { .alloc_inode = openprom_alloc_inode, .free_inode = openprom_free_inode, .statfs = simple_statfs, - .remount_fs = openprom_remount, }; static int openprom_fill_super(struct super_block *s, struct fs_context *fc) @@ -415,6 +414,7 @@ static int openpromfs_get_tree(struct fs_context *fc) static const struct fs_context_operations openpromfs_context_ops = { .get_tree = openpromfs_get_tree, + .reconfigure = openpromfs_reconfigure, }; static int openpromfs_init_fs_context(struct fs_context *fc) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 8bbe9486e3a622..395a00ed8ac75f 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -33,9 +33,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.parent_refn = parent->refn; - strncpy(new_op->upcall.req.lookup.d_name, - dentry->d_name.name, - ORANGEFS_NAME_MAX - 1); + strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name); gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d interrupt flag [%d]\n", diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index c9dfd5c6a0970c..200558ec72f086 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -41,8 +41,7 @@ static int orangefs_create(struct mnt_idmap *idmap, fill_default_sys_attrs(new_op->upcall.req.create.attributes, ORANGEFS_TYPE_METAFILE, mode); - strncpy(new_op->upcall.req.create.d_name, - dentry->d_name.name, ORANGEFS_NAME_MAX - 1); + strscpy(new_op->upcall.req.create.d_name, dentry->d_name.name); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -137,8 +136,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, &parent->refn.khandle); new_op->upcall.req.lookup.parent_refn = parent->refn; - strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, - ORANGEFS_NAME_MAX - 1); + strscpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name); gossip_debug(GOSSIP_NAME_DEBUG, "%s: doing lookup on %s under %pU,%d\n", @@ -192,8 +190,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) return -ENOMEM; new_op->upcall.req.remove.parent_refn = parent->refn; - strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, - ORANGEFS_NAME_MAX - 1); + strscpy(new_op->upcall.req.remove.d_name, dentry->d_name.name); ret = service_operation(new_op, "orangefs_unlink", get_interruptible_flag(inode)); @@ -247,10 +244,8 @@ static int orangefs_symlink(struct mnt_idmap *idmap, ORANGEFS_TYPE_SYMLINK, mode); - strncpy(new_op->upcall.req.sym.entry_name, - dentry->d_name.name, - ORANGEFS_NAME_MAX - 1); - strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX - 1); + strscpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name); + strscpy(new_op->upcall.req.sym.target, symname); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -324,8 +319,7 @@ static int orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir, fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes, ORANGEFS_TYPE_DIRECTORY, mode); - strncpy(new_op->upcall.req.mkdir.d_name, - dentry->d_name.name, ORANGEFS_NAME_MAX - 1); + strscpy(new_op->upcall.req.mkdir.d_name, dentry->d_name.name); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); @@ -405,12 +399,8 @@ static int orangefs_rename(struct mnt_idmap *idmap, new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn; new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn; - strncpy(new_op->upcall.req.rename.d_old_name, - old_dentry->d_name.name, - ORANGEFS_NAME_MAX - 1); - strncpy(new_op->upcall.req.rename.d_new_name, - new_dentry->d_name.name, - ORANGEFS_NAME_MAX - 1); + strscpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name); + strscpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name); ret = service_operation(new_op, "orangefs_rename", diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 34849b4a3243ca..fb4d09c2f531f0 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -253,9 +253,8 @@ int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb) new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT); if (!new_op) return -ENOMEM; - strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, - orangefs_sb->devname, - ORANGEFS_MAX_SERVER_ADDR_LEN); + strscpy(new_op->upcall.req.fs_mount.orangefs_config_server, + orangefs_sb->devname); gossip_debug(GOSSIP_SUPER_DEBUG, "Attempting ORANGEFS Remount via host %s\n", @@ -400,8 +399,7 @@ static int orangefs_unmount(int id, __s32 fs_id, const char *devname) return -ENOMEM; op->upcall.req.fs_umount.id = id; op->upcall.req.fs_umount.fs_id = fs_id; - strncpy(op->upcall.req.fs_umount.orangefs_config_server, - devname, ORANGEFS_MAX_SERVER_ADDR_LEN - 1); + strscpy(op->upcall.req.fs_umount.orangefs_config_server, devname); r = service_operation(op, "orangefs_fs_umount", 0); /* Not much to do about an error here. */ if (r) @@ -494,9 +492,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, if (!new_op) return ERR_PTR(-ENOMEM); - strncpy(new_op->upcall.req.fs_mount.orangefs_config_server, - devname, - ORANGEFS_MAX_SERVER_ADDR_LEN - 1); + strscpy(new_op->upcall.req.fs_mount.orangefs_config_server, devname); gossip_debug(GOSSIP_SUPER_DEBUG, "Attempting ORANGEFS Mount via host %s\n", @@ -543,9 +539,8 @@ struct dentry *orangefs_mount(struct file_system_type *fst, * on successful mount, store the devname and data * used */ - strncpy(ORANGEFS_SB(sb)->devname, - devname, - ORANGEFS_MAX_SERVER_ADDR_LEN - 1); + strscpy(ORANGEFS_SB(sb)->devname, devname); + /* mount_pending must be cleared */ ORANGEFS_SB(sb)->mount_pending = 0; diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c index 36dcc530ac286b..4860fcc4611bb7 100644 --- a/fs/overlayfs/params.c +++ b/fs/overlayfs/params.c @@ -139,10 +139,6 @@ static int ovl_verity_mode_def(void) return OVL_VERITY_OFF; } -#define fsparam_string_empty(NAME, OPT) \ - __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) - - const struct fs_parameter_spec ovl_parameter_spec[] = { fsparam_string_empty("lowerdir", Opt_lowerdir), fsparam_string("lowerdir+", Opt_lowerdir_add), diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 2ba31b6d68c077..52f0b75cbce2ca 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -135,6 +135,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data); * @parent: The parent directory in which to create. * @ops: The seq_file ops with which to read the file. * @write: The write method with which to 'modify' the file. + * @state_size: The size of the per-file private state to allocate. * @data: Data for retrieval by pde_data(). * * Create a network namespaced proc file in the @parent directory with the diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 405913f4faff99..d62fbef838b681 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -19,11 +19,11 @@ #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/statfs.h> -#include <linux/parser.h> #include <linux/seq_file.h> -#include <linux/mount.h> #include <linux/crc32.h> #include <linux/mpage.h> +#include <linux/fs_parser.h> +#include <linux/fs_context.h> #include "qnx6.h" static const struct super_operations qnx6_sops; @@ -31,7 +31,7 @@ static const struct super_operations qnx6_sops; static void qnx6_put_super(struct super_block *sb); static struct inode *qnx6_alloc_inode(struct super_block *sb); static void qnx6_free_inode(struct inode *inode); -static int qnx6_remount(struct super_block *sb, int *flags, char *data); +static int qnx6_reconfigure(struct fs_context *fc); static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf); static int qnx6_show_options(struct seq_file *seq, struct dentry *root); @@ -40,7 +40,6 @@ static const struct super_operations qnx6_sops = { .free_inode = qnx6_free_inode, .put_super = qnx6_put_super, .statfs = qnx6_statfs, - .remount_fs = qnx6_remount, .show_options = qnx6_show_options, }; @@ -54,10 +53,12 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static int qnx6_remount(struct super_block *sb, int *flags, char *data) +static int qnx6_reconfigure(struct fs_context *fc) { + struct super_block *sb = fc->root->d_sb; + sync_filesystem(sb); - *flags |= SB_RDONLY; + fc->sb_flags |= SB_RDONLY; return 0; } @@ -218,39 +219,36 @@ void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s) #endif enum { - Opt_mmifs, - Opt_err + Opt_mmifs +}; + +struct qnx6_context { + unsigned long s_mount_opts; }; -static const match_table_t tokens = { - {Opt_mmifs, "mmi_fs"}, - {Opt_err, NULL} +static const struct fs_parameter_spec qnx6_param_spec[] = { + fsparam_flag ("mmi_fs", Opt_mmifs), + {} }; -static int qnx6_parse_options(char *options, struct super_block *sb) +static int qnx6_parse_param(struct fs_context *fc, struct fs_parameter *param) { - char *p; - struct qnx6_sb_info *sbi = QNX6_SB(sb); - substring_t args[MAX_OPT_ARGS]; - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_mmifs: - set_opt(sbi->s_mount_opt, MMI_FS); - break; - default: - return 0; - } + struct qnx6_context *ctx = fc->fs_private; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, qnx6_param_spec, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_mmifs: + ctx->s_mount_opts |= QNX6_MOUNT_MMI_FS; + break; + default: + return -EINVAL; } - return 1; + return 0; } static struct buffer_head *qnx6_check_first_superblock(struct super_block *s, @@ -293,22 +291,25 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s, static struct inode *qnx6_private_inode(struct super_block *s, struct qnx6_root_node *p); -static int qnx6_fill_super(struct super_block *s, void *data, int silent) +static int qnx6_fill_super(struct super_block *s, struct fs_context *fc) { struct buffer_head *bh1 = NULL, *bh2 = NULL; struct qnx6_super_block *sb1 = NULL, *sb2 = NULL; struct qnx6_sb_info *sbi; + struct qnx6_context *ctx = fc->fs_private; struct inode *root; const char *errmsg; struct qnx6_sb_info *qs; int ret = -EINVAL; u64 offset; int bootblock_offset = QNX6_BOOTBLOCK_SIZE; + int silent = fc->sb_flags & SB_SILENT; qs = kzalloc(sizeof(struct qnx6_sb_info), GFP_KERNEL); if (!qs) return -ENOMEM; s->s_fs_info = qs; + qs->s_mount_opt = ctx->s_mount_opts; /* Superblock always is 512 Byte long */ if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) { @@ -316,12 +317,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent) goto outnobh; } - /* parse the mount-options */ - if (!qnx6_parse_options((char *) data, s)) { - pr_err("invalid mount options.\n"); - goto outnobh; - } - if (test_opt(s, MMI_FS)) { + if (qs->s_mount_opt == QNX6_MOUNT_MMI_FS) { sb1 = qnx6_mmi_fill_super(s, silent); if (sb1) goto mmi_success; @@ -632,18 +628,43 @@ static void destroy_inodecache(void) kmem_cache_destroy(qnx6_inode_cachep); } -static struct dentry *qnx6_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int qnx6_get_tree(struct fs_context *fc) +{ + return get_tree_bdev(fc, qnx6_fill_super); +} + +static void qnx6_free_fc(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, qnx6_fill_super); + kfree(fc->fs_private); +} + +static const struct fs_context_operations qnx6_context_ops = { + .parse_param = qnx6_parse_param, + .get_tree = qnx6_get_tree, + .reconfigure = qnx6_reconfigure, + .free = qnx6_free_fc, +}; + +static int qnx6_init_fs_context(struct fs_context *fc) +{ + struct qnx6_context *ctx; + + ctx = kzalloc(sizeof(struct qnx6_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + fc->ops = &qnx6_context_ops; + fc->fs_private = ctx; + + return 0; } static struct file_system_type qnx6_fs_type = { - .owner = THIS_MODULE, - .name = "qnx6", - .mount = qnx6_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "qnx6", + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = qnx6_init_fs_context, + .parameters = qnx6_param_spec, }; MODULE_ALIAS_FS("qnx6"); diff --git a/fs/read_write.c b/fs/read_write.c index 85c096f2c0d06e..0f862c0ee1d4a3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1686,7 +1686,7 @@ int generic_write_checks_count(struct kiocb *iocb, loff_t *count) if ((iocb->ki_flags & IOCB_NOWAIT) && !((iocb->ki_flags & IOCB_DIRECT) || - (file->f_mode & FMODE_BUF_WASYNC))) + (file->f_op->fop_flags & FOP_BUFFER_WASYNC))) return -EINVAL; return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count); diff --git a/fs/signalfd.c b/fs/signalfd.c index e20d1484c66333..4a5614442dbfa7 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -68,8 +68,7 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait) /* * Copied from copy_siginfo_to_user() in kernel/signal.c */ -static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, - kernel_siginfo_t const *kinfo) +static int signalfd_copyinfo(struct iov_iter *to, kernel_siginfo_t const *kinfo) { struct signalfd_siginfo new; @@ -146,10 +145,10 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, break; } - if (copy_to_user(uinfo, &new, sizeof(struct signalfd_siginfo))) + if (!copy_to_iter_full(&new, sizeof(struct signalfd_siginfo), to)) return -EFAULT; - return sizeof(*uinfo); + return sizeof(struct signalfd_siginfo); } static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info, @@ -199,28 +198,27 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info * error code. The "count" parameter must be at least the size of a * "struct signalfd_siginfo". */ -static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t signalfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct signalfd_ctx *ctx = file->private_data; - struct signalfd_siginfo __user *siginfo; - int nonblock = file->f_flags & O_NONBLOCK; + size_t count = iov_iter_count(to); ssize_t ret, total = 0; kernel_siginfo_t info; + bool nonblock; count /= sizeof(struct signalfd_siginfo); if (!count) return -EINVAL; - siginfo = (struct signalfd_siginfo __user *) buf; + nonblock = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; do { ret = signalfd_dequeue(ctx, &info, nonblock); if (unlikely(ret <= 0)) break; - ret = signalfd_copyinfo(siginfo, &info); + ret = signalfd_copyinfo(to, &info); if (ret < 0) break; - siginfo++; total += ret; nonblock = 1; } while (--count); @@ -246,7 +244,7 @@ static const struct file_operations signalfd_fops = { #endif .release = signalfd_release, .poll = signalfd_poll, - .read = signalfd_read, + .read_iter = signalfd_read_iter, .llseek = noop_llseek, }; @@ -265,20 +263,34 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) signotset(mask); if (ufd == -1) { + struct file *file; + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->sigmask = *mask; + ufd = get_unused_fd_flags(flags & O_CLOEXEC); + if (ufd < 0) { + kfree(ctx); + return ufd; + } + + file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx, + O_RDWR | (flags & O_NONBLOCK)); + if (IS_ERR(file)) { + put_unused_fd(ufd); + kfree(ctx); + return ufd; + } + file->f_mode |= FMODE_NOWAIT; + /* * When we call this, the initialization must be complete, since * anon_inode_getfd() will install the fd. */ - ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx, - O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK))); - if (ufd < 0) - kfree(ctx); + fd_install(ufd, file); } else { struct fd f = fdget(ufd); if (!f.file) diff --git a/fs/stat.c b/fs/stat.c index 77cdc69eb4220b..70bd3e888cfa30 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -658,6 +658,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_mnt_id = stat->mnt_id; tmp.stx_dio_mem_align = stat->dio_mem_align; tmp.stx_dio_offset_align = stat->dio_offset_align; + tmp.stx_subvol = stat->subvol; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } diff --git a/fs/timerfd.c b/fs/timerfd.c index e9c96a0c79f113..4bf2f8bfec112e 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -262,17 +262,18 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait) return events; } -static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct timerfd_ctx *ctx = file->private_data; ssize_t res; u64 ticks = 0; - if (count < sizeof(ticks)) + if (iov_iter_count(to) < sizeof(ticks)) return -EINVAL; + spin_lock_irq(&ctx->wqh.lock); - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT) res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); @@ -312,8 +313,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, ctx->ticks = 0; } spin_unlock_irq(&ctx->wqh.lock); - if (ticks) - res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); + if (ticks) { + res = copy_to_iter(&ticks, sizeof(ticks), to); + if (!res) + res = -EFAULT; + } return res; } @@ -384,7 +388,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg static const struct file_operations timerfd_fops = { .release = timerfd_release, .poll = timerfd_poll, - .read = timerfd_read, + .read_iter = timerfd_read_iter, .llseek = noop_llseek, .show_fdinfo = timerfd_show, .unlocked_ioctl = timerfd_ioctl, @@ -407,6 +411,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; + struct file *file; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); @@ -443,11 +448,22 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) ctx->moffs = ktime_mono_to_real(0); - ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, - O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); - if (ufd < 0) + ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS); + if (ufd < 0) { + kfree(ctx); + return ufd; + } + + file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx, + O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); + if (IS_ERR(file)) { + put_unused_fd(ufd); kfree(ctx); + return PTR_ERR(file); + } + file->f_mode |= FMODE_NOWAIT; + fd_install(ufd, file); return ufd; } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 5545e6bf7d26c4..abd0b477e28759 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -11,14 +11,14 @@ #include <linux/module.h> #include <linux/fs.h> -#include <linux/mount.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/kobject.h> #include <linux/namei.h> #include <linux/tracefs.h> #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/seq_file.h> -#include <linux/parser.h> #include <linux/magic.h> #include <linux/slab.h> #include "internal.h" @@ -231,7 +231,7 @@ struct inode *tracefs_get_inode(struct super_block *sb) return inode; } -struct tracefs_mount_opts { +struct tracefs_fs_info { kuid_t uid; kgid_t gid; umode_t mode; @@ -243,68 +243,51 @@ enum { Opt_uid, Opt_gid, Opt_mode, - Opt_err }; -static const match_table_t tokens = { - {Opt_uid, "uid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_mode, "mode=%o"}, - {Opt_err, NULL} +static const struct fs_parameter_spec tracefs_param_specs[] = { + fsparam_u32 ("gid", Opt_gid), + fsparam_u32oct ("mode", Opt_mode), + fsparam_u32 ("uid", Opt_uid), + {} }; -struct tracefs_fs_info { - struct tracefs_mount_opts mount_opts; -}; - -static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) +static int tracefs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - substring_t args[MAX_OPT_ARGS]; - int option; - int token; + struct tracefs_fs_info *opts = fc->s_fs_info; + struct fs_parse_result result; kuid_t uid; kgid_t gid; - char *p; - - opts->opts = 0; - opts->mode = TRACEFS_DEFAULT_MODE; - - while ((p = strsep(&data, ",")) != NULL) { - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_uid: - if (match_int(&args[0], &option)) - return -EINVAL; - uid = make_kuid(current_user_ns(), option); - if (!uid_valid(uid)) - return -EINVAL; - opts->uid = uid; - break; - case Opt_gid: - if (match_int(&args[0], &option)) - return -EINVAL; - gid = make_kgid(current_user_ns(), option); - if (!gid_valid(gid)) - return -EINVAL; - opts->gid = gid; - break; - case Opt_mode: - if (match_octal(&args[0], &option)) - return -EINVAL; - opts->mode = option & S_IALLUGO; - break; - /* - * We might like to report bad mount options here; - * but traditionally tracefs has ignored all mount options - */ - } - - opts->opts |= BIT(token); + int opt; + + opt = fs_parse(fc, tracefs_param_specs, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_uid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + return invalf(fc, "Unknown uid"); + opts->uid = uid; + break; + case Opt_gid: + gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(gid)) + return invalf(fc, "Unknown gid"); + opts->gid = gid; + break; + case Opt_mode: + opts->mode = result.uint_32 & S_IALLUGO; + break; + /* + * We might like to report bad mount options here; + * but traditionally tracefs has ignored all mount options + */ } + opts->opts |= BIT(opt); + return 0; } @@ -312,7 +295,6 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) { struct tracefs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); - struct tracefs_mount_opts *opts = &fsi->mount_opts; umode_t tmp_mode; /* @@ -320,50 +302,46 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) * options. */ - if (!remount || opts->opts & BIT(Opt_mode)) { + if (!remount || fsi->opts & BIT(Opt_mode)) { tmp_mode = READ_ONCE(inode->i_mode) & ~S_IALLUGO; - tmp_mode |= opts->mode; + tmp_mode |= fsi->mode; WRITE_ONCE(inode->i_mode, tmp_mode); } - if (!remount || opts->opts & BIT(Opt_uid)) - inode->i_uid = opts->uid; + if (!remount || fsi->opts & BIT(Opt_uid)) + inode->i_uid = fsi->uid; - if (!remount || opts->opts & BIT(Opt_gid)) - inode->i_gid = opts->gid; + if (!remount || fsi->opts & BIT(Opt_gid)) + inode->i_gid = fsi->gid; return 0; } -static int tracefs_remount(struct super_block *sb, int *flags, char *data) +static int tracefs_reconfigure(struct fs_context *fc) { - int err; - struct tracefs_fs_info *fsi = sb->s_fs_info; + struct super_block *sb = fc->root->d_sb; + struct tracefs_fs_info *sb_opts = sb->s_fs_info; + struct tracefs_fs_info *new_opts = fc->s_fs_info; sync_filesystem(sb); - err = tracefs_parse_options(data, &fsi->mount_opts); - if (err) - goto fail; + /* structure copy of new mount options to sb */ + *sb_opts = *new_opts; - tracefs_apply_options(sb, true); - -fail: - return err; + return tracefs_apply_options(sb, true); } static int tracefs_show_options(struct seq_file *m, struct dentry *root) { struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; - struct tracefs_mount_opts *opts = &fsi->mount_opts; - if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) + if (!uid_eq(fsi->uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", - from_kuid_munged(&init_user_ns, opts->uid)); - if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) + from_kuid_munged(&init_user_ns, fsi->uid)); + if (!gid_eq(fsi->gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", - from_kgid_munged(&init_user_ns, opts->gid)); - if (opts->mode != TRACEFS_DEFAULT_MODE) - seq_printf(m, ",mode=%o", opts->mode); + from_kgid_munged(&init_user_ns, fsi->gid)); + if (fsi->mode != TRACEFS_DEFAULT_MODE) + seq_printf(m, ",mode=%o", fsi->mode); return 0; } @@ -373,7 +351,6 @@ static const struct super_operations tracefs_super_operations = { .free_inode = tracefs_free_inode, .drop_inode = generic_delete_inode, .statfs = simple_statfs, - .remount_fs = tracefs_remount, .show_options = tracefs_show_options, }; @@ -403,26 +380,14 @@ static const struct dentry_operations tracefs_dentry_operations = { .d_release = tracefs_d_release, }; -static int trace_fill_super(struct super_block *sb, void *data, int silent) +static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr trace_files[] = {{""}}; - struct tracefs_fs_info *fsi; int err; - fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); - sb->s_fs_info = fsi; - if (!fsi) { - err = -ENOMEM; - goto fail; - } - - err = tracefs_parse_options(data, &fsi->mount_opts); - if (err) - goto fail; - - err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); + err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); if (err) - goto fail; + return err; sb->s_op = &tracefs_super_operations; sb->s_d_op = &tracefs_dentry_operations; @@ -430,24 +395,45 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent) tracefs_apply_options(sb, false); return 0; +} -fail: - kfree(fsi); - sb->s_fs_info = NULL; - return err; +static int tracefs_get_tree(struct fs_context *fc) +{ + return get_tree_single(fc, tracefs_fill_super); } -static struct dentry *trace_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static void tracefs_free_fc(struct fs_context *fc) { - return mount_single(fs_type, flags, data, trace_fill_super); + kfree(fc->s_fs_info); +} + +static const struct fs_context_operations tracefs_context_ops = { + .free = tracefs_free_fc, + .parse_param = tracefs_parse_param, + .get_tree = tracefs_get_tree, + .reconfigure = tracefs_reconfigure, +}; + +static int tracefs_init_fs_context(struct fs_context *fc) +{ + struct tracefs_fs_info *fsi; + + fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); + if (!fsi) + return -ENOMEM; + + fsi->mode = TRACEFS_DEFAULT_MODE; + + fc->s_fs_info = fsi; + fc->ops = &tracefs_context_ops; + return 0; } static struct file_system_type trace_fs_type = { .owner = THIS_MODULE, .name = "tracefs", - .mount = trace_mount, + .init_fs_context = tracefs_init_fs_context, + .parameters = tracefs_param_specs, .kill_sb = kill_litter_super, }; MODULE_ALIAS_FS("tracefs"); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d2c3879745e535..eee7320ab0b028 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -31,6 +31,7 @@ #include <linux/hugetlb.h> #include <linux/swapops.h> #include <linux/miscdevice.h> +#include <linux/uio.h> static int sysctl_unprivileged_userfaultfd __read_mostly; @@ -282,7 +283,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, /* * Verify the pagetables are still not ok after having reigstered into * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any - * userfault that has already been resolved, if userfaultfd_read and + * userfault that has already been resolved, if userfaultfd_read_iter and * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different * threads. */ @@ -1184,34 +1185,34 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, return ret; } -static ssize_t userfaultfd_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct userfaultfd_ctx *ctx = file->private_data; ssize_t _ret, ret = 0; struct uffd_msg msg; - int no_wait = file->f_flags & O_NONBLOCK; struct inode *inode = file_inode(file); + bool no_wait; if (!userfaultfd_is_initialized(ctx)) return -EINVAL; + no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; for (;;) { - if (count < sizeof(msg)) + if (iov_iter_count(to) < sizeof(msg)) return ret ? ret : -EINVAL; _ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode); if (_ret < 0) return ret ? ret : _ret; - if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg))) + _ret = !copy_to_iter_full(&msg, sizeof(msg), to); + if (_ret) return ret ? ret : -EFAULT; ret += sizeof(msg); - buf += sizeof(msg); - count -= sizeof(msg); /* * Allow to read more than one fault at time but only * block if waiting for the very first one. */ - no_wait = O_NONBLOCK; + no_wait = true; } } @@ -2179,7 +2180,7 @@ static const struct file_operations userfaultfd_fops = { #endif .release = userfaultfd_release, .poll = userfaultfd_poll, - .read = userfaultfd_read, + .read_iter = userfaultfd_read_iter, .unlocked_ioctl = userfaultfd_ioctl, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, @@ -2199,6 +2200,7 @@ static void init_once_userfaultfd_ctx(void *mem) static int new_userfaultfd(int flags) { struct userfaultfd_ctx *ctx; + struct file *file; int fd; BUG_ON(!current->mm); @@ -2222,16 +2224,26 @@ static int new_userfaultfd(int flags) init_rwsem(&ctx->map_changing_lock); atomic_set(&ctx->mmap_changing, 0); ctx->mm = current->mm; - /* prevent the mm struct to be freed */ - mmgrab(ctx->mm); + + fd = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS); + if (fd < 0) + goto err_out; /* Create a new inode so that the LSM can block the creation. */ - fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx, + file = anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); - if (fd < 0) { - mmdrop(ctx->mm); - kmem_cache_free(userfaultfd_ctx_cachep, ctx); + if (IS_ERR(file)) { + put_unused_fd(fd); + fd = PTR_ERR(file); + goto err_out; } + /* prevent the mm struct to be freed */ + mmgrab(ctx->mm); + file->f_mode |= FMODE_NOWAIT; + fd_install(fd, file); + return fd; +err_out: + kmem_cache_free(userfaultfd_ctx_cachep, ctx); return fd; } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d1d4158441bd96..b240ea5241dc9d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1156,8 +1156,7 @@ xfs_file_open( { if (xfs_is_shutdown(XFS_M(inode->i_sb))) return -EIO; - file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC | - FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT; + file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; return generic_file_open(inode, file); } @@ -1170,7 +1169,9 @@ xfs_dir_open( unsigned int mode; int error; - error = xfs_file_open(inode, file); + if (xfs_is_shutdown(ip->i_mount)) + return -EIO; + error = generic_file_open(inode, file); if (error) return error; @@ -1416,7 +1417,6 @@ const struct file_operations xfs_file_operations = { .compat_ioctl = xfs_file_compat_ioctl, #endif .mmap = xfs_file_mmap, - .mmap_supported_flags = MAP_SYNC, .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, @@ -1424,6 +1424,8 @@ const struct file_operations xfs_file_operations = { .fallocate = xfs_file_fallocate, .fadvise = xfs_file_fadvise, .remap_file_range = xfs_file_remap_range, + .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | + FOP_BUFFER_WASYNC | FOP_DIO_PARALLEL_WRITE, }; const struct file_operations xfs_dir_file_operations = { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cb1526ec44b5f6..6438c75cbb358f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -51,6 +51,7 @@ struct block_device { bool bd_has_submit_bio; dev_t bd_dev; struct inode *bd_inode; /* will die */ + struct address_space *bd_mapping; /* page cache */ atomic_t bd_openers; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 69e7da33ca49a6..d020541cd33e1c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -213,11 +213,6 @@ struct gendisk { struct blk_independent_access_ranges *ia_ranges; }; -static inline bool disk_live(struct gendisk *disk) -{ - return !inode_unhashed(disk->part0->bd_inode); -} - /** * disk_openers - returns how many openers are there for a disk * @disk: disk to check @@ -1366,11 +1361,6 @@ static inline unsigned int blksize_bits(unsigned int size) return order_base_2(size >> SECTOR_SHIFT) + SECTOR_SHIFT; } -static inline unsigned int block_size(struct block_device *bdev) -{ - return 1 << bdev->bd_inode->i_blkbits; -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); @@ -1538,6 +1528,8 @@ void blkdev_put_no_open(struct block_device *bdev); struct block_device *I_BDEV(struct inode *inode); struct block_device *file_bdev(struct file *bdev_file); +bool disk_live(struct gendisk *disk); +unsigned int block_size(struct block_device *bdev); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index a1c0bdd0cca66b..e022e40b099e33 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -364,7 +364,7 @@ static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, { gfp_t gfp; - gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp = mapping_gfp_constraint(bdev->bd_mapping, ~__GFP_FS); gfp |= __GFP_NOFAIL; return bdev_getblk(bdev, block, size, gfp); @@ -375,7 +375,7 @@ static inline struct buffer_head *__getblk(struct block_device *bdev, { gfp_t gfp; - gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp = mapping_gfp_constraint(bdev->bd_mapping, ~__GFP_FS); gfp |= __GFP_MOVABLE | __GFP_NOFAIL; return bdev_getblk(bdev, block, size, gfp); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0839b4f067164b..ff5fcd98c9e4d8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -112,23 +112,26 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, */ /* file is open for reading */ -#define FMODE_READ ((__force fmode_t)0x1) +#define FMODE_READ ((__force fmode_t)(1 << 0)) /* file is open for writing */ -#define FMODE_WRITE ((__force fmode_t)0x2) +#define FMODE_WRITE ((__force fmode_t)(1 << 1)) /* file is seekable */ -#define FMODE_LSEEK ((__force fmode_t)0x4) +#define FMODE_LSEEK ((__force fmode_t)(1 << 2)) /* file can be accessed using pread */ -#define FMODE_PREAD ((__force fmode_t)0x8) +#define FMODE_PREAD ((__force fmode_t)(1 << 3)) /* file can be accessed using pwrite */ -#define FMODE_PWRITE ((__force fmode_t)0x10) +#define FMODE_PWRITE ((__force fmode_t)(1 << 4)) /* File is opened for execution with sys_execve / sys_uselib */ -#define FMODE_EXEC ((__force fmode_t)0x20) +#define FMODE_EXEC ((__force fmode_t)(1 << 5)) /* File writes are restricted (block device specific) */ -#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40) +#define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6)) + +/* FMODE_* bits 7 to 8 */ + /* 32bit hashes as llseek() offset (for directories) */ -#define FMODE_32BITHASH ((__force fmode_t)0x200) +#define FMODE_32BITHASH ((__force fmode_t)(1 << 9)) /* 64bit hashes as llseek() offset (for directories) */ -#define FMODE_64BITHASH ((__force fmode_t)0x400) +#define FMODE_64BITHASH ((__force fmode_t)(1 << 10)) /* * Don't update ctime and mtime. @@ -136,60 +139,53 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, * Currently a special hack for the XFS open_by_handle ioctl, but we'll * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. */ -#define FMODE_NOCMTIME ((__force fmode_t)0x800) +#define FMODE_NOCMTIME ((__force fmode_t)(1 << 11)) /* Expect random access pattern */ -#define FMODE_RANDOM ((__force fmode_t)0x1000) +#define FMODE_RANDOM ((__force fmode_t)(1 << 12)) /* File is huge (eg. /dev/mem): treat loff_t as unsigned */ -#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) +#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13)) /* File is opened with O_PATH; almost nothing can be done with it */ -#define FMODE_PATH ((__force fmode_t)0x4000) +#define FMODE_PATH ((__force fmode_t)(1 << 14)) /* File needs atomic accesses to f_pos */ -#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) +#define FMODE_ATOMIC_POS ((__force fmode_t)(1 << 15)) /* Write access to underlying fs */ -#define FMODE_WRITER ((__force fmode_t)0x10000) +#define FMODE_WRITER ((__force fmode_t)(1 << 16)) /* Has read method(s) */ -#define FMODE_CAN_READ ((__force fmode_t)0x20000) +#define FMODE_CAN_READ ((__force fmode_t)(1 << 17)) /* Has write method(s) */ -#define FMODE_CAN_WRITE ((__force fmode_t)0x40000) +#define FMODE_CAN_WRITE ((__force fmode_t)(1 << 18)) -#define FMODE_OPENED ((__force fmode_t)0x80000) -#define FMODE_CREATED ((__force fmode_t)0x100000) +#define FMODE_OPENED ((__force fmode_t)(1 << 19)) +#define FMODE_CREATED ((__force fmode_t)(1 << 20)) /* File is stream-like */ -#define FMODE_STREAM ((__force fmode_t)0x200000) +#define FMODE_STREAM ((__force fmode_t)(1 << 21)) /* File supports DIRECT IO */ -#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) +#define FMODE_CAN_ODIRECT ((__force fmode_t)(1 << 22)) -#define FMODE_NOREUSE ((__force fmode_t)0x800000) +#define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) -/* File supports non-exclusive O_DIRECT writes from multiple threads */ -#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000) +/* FMODE_* bit 24 */ /* File is embedded in backing_file object */ -#define FMODE_BACKING ((__force fmode_t)0x2000000) +#define FMODE_BACKING ((__force fmode_t)(1 << 25)) /* File was opened by fanotify and shouldn't generate fanotify events */ -#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) +#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26)) /* File is capable of returning -EAGAIN if I/O will block */ -#define FMODE_NOWAIT ((__force fmode_t)0x8000000) +#define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) /* File represents mount that needs unmounting */ -#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) +#define FMODE_NEED_UNMOUNT ((__force fmode_t)(1 << 28)) /* File does not contribute to nr_files count */ -#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) - -/* File supports async buffered reads */ -#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) - -/* File supports async nowait buffered writes */ -#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000) +#define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) /* * Attribute flags. These should be or-ed together to figure out what @@ -1035,7 +1031,7 @@ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ - unsigned char f_handle[]; + unsigned char f_handle[] __counted_by(handle_bytes); }; static inline struct file *get_file(struct file *f) @@ -1997,8 +1993,11 @@ struct iov_iter; struct io_uring_cmd; struct offset_ctx; +typedef unsigned int __bitwise fop_flags_t; + struct file_operations { struct module *owner; + fop_flags_t fop_flags; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); @@ -2011,7 +2010,6 @@ struct file_operations { long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); - unsigned long mmap_supported_flags; int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); @@ -2042,6 +2040,17 @@ struct file_operations { unsigned int poll_flags); } __randomize_layout; +/* Supports async buffered reads */ +#define FOP_BUFFER_RASYNC ((__force fop_flags_t)(1 << 0)) +/* Supports async buffered writes */ +#define FOP_BUFFER_WASYNC ((__force fop_flags_t)(1 << 1)) +/* Supports synchronous page faults for mappings */ +#define FOP_MMAP_SYNC ((__force fop_flags_t)(1 << 2)) +/* Supports non-exclusive O_DIRECT writes from multiple threads */ +#define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) +/* Contains huge pages */ +#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) + /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, int (*) (struct file *, struct dir_context *)); @@ -2248,7 +2257,13 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) + +#ifdef CONFIG_SWAP #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) +#else +#define IS_SWAPFILE(inode) ((void)(inode), 0U) +#endif + #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) @@ -3331,6 +3346,8 @@ void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); int simple_offset_empty(struct dentry *dentry); +int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 01542c4b87a2be..d3350979115f0a 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -132,4 +132,8 @@ static inline bool fs_validate_description(const char *name, #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) +/* String parameter that allows empty argument */ +#define fsparam_string_empty(NAME, OPT) \ + __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) + #endif /* _LINUX_FS_PARSER_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 68244bb3637a87..2b3c3a40476917 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -526,17 +526,13 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); } -extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, int creat_flags, int page_size_log); -static inline bool is_file_hugepages(struct file *file) +static inline bool is_file_hugepages(const struct file *file) { - if (file->f_op == &hugetlbfs_file_operations) - return true; - - return is_file_shm_hugepages(file); + return file->f_op->fop_flags & FOP_HUGE_PAGES; } static inline struct hstate *hstate_inode(struct inode *i) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 9512fe33266841..b20c79891966ec 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1692,7 +1692,7 @@ static inline void jbd2_journal_abort_handle(handle_t *handle) static inline void jbd2_init_fs_dev_write_error(journal_t *journal) { - struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; + struct address_space *mapping = journal->j_fs_dev->bd_mapping; /* * Save the original wb_err value of client fs's bdev mapping which @@ -1703,7 +1703,7 @@ static inline void jbd2_init_fs_dev_write_error(journal_t *journal) static inline int jbd2_check_fs_dev_write_error(journal_t *journal) { - struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; + struct address_space *mapping = journal->j_fs_dev->bd_mapping; return errseq_check(&mapping->wb_err, READ_ONCE(journal->j_fs_dev_wb_err)); diff --git a/include/linux/namei.h b/include/linux/namei.h index 74e0cc14ebf86b..967aa9ea9f960d 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -44,6 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ #define LOOKUP_CACHED 0x200000 /* Only do cached lookup */ +#define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) diff --git a/include/linux/shm.h b/include/linux/shm.h index c55bef0538e584..1d3d3ae958fbd3 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -16,7 +16,6 @@ struct sysv_shm { long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); -bool is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) #else @@ -30,10 +29,6 @@ static inline long do_shmat(int shmid, char __user *shmaddr, { return -ENOSYS; } -static inline bool is_file_shm_hugepages(struct file *file) -{ - return false; -} static inline void exit_shm(struct task_struct *task) { } diff --git a/include/linux/stat.h b/include/linux/stat.h index 52150570d37a53..bf92441dbad284 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -53,6 +53,7 @@ struct kstat { u32 dio_mem_align; u32 dio_offset_align; u64 change_cookie; + u64 subvol; }; /* These definitions are internal to the kernel for now. Mainly used by nfsd. */ diff --git a/include/linux/uio.h b/include/linux/uio.h index 00cebe2b70de7e..7020adedfa08cf 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -206,6 +206,16 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) } static __always_inline __must_check +bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i) +{ + size_t copied = copy_to_iter(addr, bytes, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; +} + +static __always_inline __must_check bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { size_t copied = copy_from_iter(addr, bytes, i); diff --git a/include/net/udp.h b/include/net/udp.h index 488a6d2babccf2..c4e05b14b648a4 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -379,14 +379,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb) static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { - int n; - - n = copy_to_iter(skb->data + off, len, to); - if (n == len) - return 0; - - iov_iter_revert(to, n); - return -EFAULT; + return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT; } /* diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 2f2ee82d55175d..67626d53531664 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -126,8 +126,9 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[12]; /* Spare space for future expansion */ + __u64 __spare3[11]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -155,6 +156,7 @@ struct statx { #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ +#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d5edfb8444d78f..eef8a4f90d989d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -471,7 +471,7 @@ static void io_prep_async_work(struct io_kiocb *req) /* don't serialize this request if the fs doesn't need it */ if (should_hash && (req->file->f_flags & O_DIRECT) && - (req->file->f_mode & FMODE_DIO_PARALLEL_WRITE)) + (req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE)) should_hash = false; if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL)) io_wq_hash_work(&req->work, file_inode(req->file)); diff --git a/io_uring/rw.c b/io_uring/rw.c index c8d48287439e5a..2382116aa4b20e 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -683,7 +683,8 @@ static bool io_rw_should_retry(struct io_kiocb *req) * just use poll if we can, and don't attempt if the fs doesn't * support callback based unlocks */ - if (io_file_can_poll(req) || !(req->file->f_mode & FMODE_BUF_RASYNC)) + if (io_file_can_poll(req) || + !(req->file->f_op->fop_flags & FOP_BUFFER_RASYNC)) return false; wait->wait.func = io_async_buf_func; @@ -1029,10 +1030,10 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(!io_file_supports_nowait(req))) goto copy_iov; - /* File path supports NOWAIT for non-direct_IO only for block devices. */ + /* Check if we can support NOWAIT. */ if (!(kiocb->ki_flags & IOCB_DIRECT) && - !(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) && - (req->flags & REQ_F_ISREG)) + !(req->file->f_op->fop_flags & FOP_BUFFER_WASYNC) && + (req->flags & REQ_F_ISREG)) goto copy_iov; kiocb->ki_flags |= IOCB_NOWAIT; diff --git a/ipc/shm.c b/ipc/shm.c index a89f001a8bf072..3e3071252dac65 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -662,8 +662,8 @@ static const struct file_operations shm_file_operations = { }; /* - * shm_file_operations_huge is now identical to shm_file_operations, - * but we keep it distinct for the sake of is_file_shm_hugepages(). + * shm_file_operations_huge is now identical to shm_file_operations + * except for fop_flags */ static const struct file_operations shm_file_operations_huge = { .mmap = shm_mmap, @@ -672,13 +672,9 @@ static const struct file_operations shm_file_operations_huge = { .get_unmapped_area = shm_get_unmapped_area, .llseek = noop_llseek, .fallocate = shm_fallocate, + .fop_flags = FOP_HUGE_PAGES, }; -bool is_file_shm_hugepages(struct file *file) -{ - return file->f_op == &shm_file_operations_huge; -} - static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ diff --git a/mm/mmap.c b/mm/mmap.c index 057270dbe3aa12..83b4682ec85cfa 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1304,7 +1304,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (!file_mmap_ok(file, inode, pgoff, len)) return -EOVERFLOW; - flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; + flags_mask = LEGACY_MAP_MASK; + if (file->f_op->fop_flags & FOP_MMAP_SYNC) + flags_mask |= MAP_SYNC; switch (flags & MAP_TYPE) { case MAP_SHARED: diff --git a/mm/shmem.c b/mm/shmem.c index fa2a0ed97507d1..f5d60436b604af 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3466,8 +3466,7 @@ static int shmem_rename2(struct mnt_idmap *idmap, return error; } - simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry); - error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry); + error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); if (error) return error; |