summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Hutchings <ben@decadent.org.uk>2020-04-24 16:47:04 +0100
committerBen Hutchings <ben@decadent.org.uk>2020-04-24 16:47:04 +0100
commitccc533e80bec43289c924129b12d979cdc18d8a1 (patch)
tree63511ceb4b1748ba82abcf8a05a8b2373ca604bd
parentd58cdcc6e1b8a6cdbf39f5274d1c336052d4e823 (diff)
downloadlinux-stable-queue-ccc533e80bec43289c924129b12d979cdc18d8a1.tar.gz
Add futex fixes requested by Jann Horn
-rw-r--r--queue-3.16/futex-fix-inode-life-time-issue.patch221
-rw-r--r--queue-3.16/futex-unbreak-futex-hashing.patch42
-rw-r--r--queue-3.16/series2
3 files changed, 265 insertions, 0 deletions
diff --git a/queue-3.16/futex-fix-inode-life-time-issue.patch b/queue-3.16/futex-fix-inode-life-time-issue.patch
new file mode 100644
index 00000000..0950b6ea
--- /dev/null
+++ b/queue-3.16/futex-fix-inode-life-time-issue.patch
@@ -0,0 +1,221 @@
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 4 Mar 2020 11:28:31 +0100
+Subject: futex: Fix inode life-time issue
+
+commit 8019ad13ef7f64be44d4f892af9c840179009254 upstream.
+
+As reported by Jann, ihold() does not in fact guarantee inode
+persistence. And instead of making it so, replace the usage of inode
+pointers with a per boot, machine wide, unique inode identifier.
+
+This sequence number is global, but shared (file backed) futexes are
+rare enough that this should not become a performance issue.
+
+Reported-by: Jann Horn <jannh@google.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[bwh: Backported to 3.16: Use atomic64_cmpxchg() instead of the
+ _relaxed() variant which we don't have]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ fs/inode.c | 1 +
+ include/linux/fs.h | 1 +
+ include/linux/futex.h | 17 +++++----
+ kernel/futex.c | 89 ++++++++++++++++++++++++++-----------------
+ 4 files changed, 65 insertions(+), 43 deletions(-)
+
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -131,6 +131,7 @@ int inode_init_always(struct super_block
+ inode->i_sb = sb;
+ inode->i_blkbits = sb->s_blocksize_bits;
+ inode->i_flags = 0;
++ atomic64_set(&inode->i_sequence, 0);
+ atomic_set(&inode->i_count, 1);
+ inode->i_op = &empty_iops;
+ inode->i_fop = &empty_fops;
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -570,6 +570,7 @@ struct inode {
+ struct rcu_head i_rcu;
+ };
+ u64 i_version;
++ atomic64_t i_sequence; /* see futex */
+ atomic_t i_count;
+ atomic_t i_dio_count;
+ atomic_t i_writecount;
+--- a/include/linux/futex.h
++++ b/include/linux/futex.h
+@@ -39,23 +39,26 @@ handle_futex_death(u32 __user *uaddr, st
+
+ union futex_key {
+ struct {
++ u64 i_seq;
+ unsigned long pgoff;
+- struct inode *inode;
+- int offset;
++ unsigned int offset;
+ } shared;
+ struct {
++ union {
++ struct mm_struct *mm;
++ u64 __tmp;
++ };
+ unsigned long address;
+- struct mm_struct *mm;
+- int offset;
++ unsigned int offset;
+ } private;
+ struct {
++ u64 ptr;
+ unsigned long word;
+- void *ptr;
+- int offset;
++ unsigned int offset;
+ } both;
+ };
+
+-#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
++#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
+
+ #ifdef CONFIG_FUTEX
+ extern void exit_robust_list(struct task_struct *curr);
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -338,7 +338,7 @@ static void get_futex_key_refs(union fut
+
+ switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+ case FUT_OFF_INODE:
+- ihold(key->shared.inode); /* implies MB (B) */
++ smp_mb(); /* explicit smp_mb(); (B) */
+ break;
+ case FUT_OFF_MMSHARED:
+ futex_get_mm(key); /* implies MB (B) */
+@@ -362,7 +362,6 @@ static void drop_futex_key_refs(union fu
+
+ switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+ case FUT_OFF_INODE:
+- iput(key->shared.inode);
+ break;
+ case FUT_OFF_MMSHARED:
+ mmdrop(key->private.mm);
+@@ -370,6 +369,46 @@ static void drop_futex_key_refs(union fu
+ }
+ }
+
++/*
++ * Generate a machine wide unique identifier for this inode.
++ *
++ * This relies on u64 not wrapping in the life-time of the machine; which with
++ * 1ns resolution means almost 585 years.
++ *
++ * This further relies on the fact that a well formed program will not unmap
++ * the file while it has a (shared) futex waiting on it. This mapping will have
++ * a file reference which pins the mount and inode.
++ *
++ * If for some reason an inode gets evicted and read back in again, it will get
++ * a new sequence number and will _NOT_ match, even though it is the exact same
++ * file.
++ *
++ * It is important that match_futex() will never have a false-positive, esp.
++ * for PI futexes that can mess up the state. The above argues that false-negatives
++ * are only possible for malformed programs.
++ */
++static u64 get_inode_sequence_number(struct inode *inode)
++{
++ static atomic64_t i_seq;
++ u64 old;
++
++ /* Does the inode already have a sequence number? */
++ old = atomic64_read(&inode->i_sequence);
++ if (likely(old))
++ return old;
++
++ for (;;) {
++ u64 new = atomic64_add_return(1, &i_seq);
++ if (WARN_ON_ONCE(!new))
++ continue;
++
++ old = atomic64_cmpxchg(&inode->i_sequence, 0, new);
++ if (old)
++ return old;
++ return new;
++ }
++}
++
+ /**
+ * get_futex_key() - Get parameters which are the keys for a futex
+ * @uaddr: virtual address of the futex
+@@ -382,9 +421,15 @@ static void drop_futex_key_refs(union fu
+ *
+ * The key words are stored in *key on success.
+ *
+- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
+- * offset_within_page). For private mappings, it's (uaddr, current->mm).
+- * We can usually work out the index without swapping in the page.
++ * For shared mappings (when @fshared), the key is:
++ * ( inode->i_sequence, page->index, offset_within_page )
++ * [ also see get_inode_sequence_number() ]
++ *
++ * For private mappings (or when !@fshared), the key is:
++ * ( current->mm, address, 0 )
++ *
++ * This allows (cross process, where applicable) identification of the futex
++ * without keeping the page pinned for the duration of the FUTEX_WAIT.
+ *
+ * lock_page() might sleep, the caller should not hold a spinlock.
+ */
+@@ -545,8 +590,6 @@ again:
+ key->private.mm = mm;
+ key->private.address = address;
+
+- get_futex_key_refs(key); /* implies smp_mb(); (B) */
+-
+ } else {
+ struct inode *inode;
+
+@@ -578,40 +621,14 @@ again:
+ goto again;
+ }
+
+- /*
+- * Take a reference unless it is about to be freed. Previously
+- * this reference was taken by ihold under the page lock
+- * pinning the inode in place so i_lock was unnecessary. The
+- * only way for this check to fail is if the inode was
+- * truncated in parallel which is almost certainly an
+- * application bug. In such a case, just retry.
+- *
+- * We are not calling into get_futex_key_refs() in file-backed
+- * cases, therefore a successful atomic_inc return below will
+- * guarantee that get_futex_key() will still imply smp_mb(); (B).
+- */
+- if (!atomic_inc_not_zero(&inode->i_count)) {
+- rcu_read_unlock();
+- put_page(page_head);
+-
+- goto again;
+- }
+-
+- /* Should be impossible but lets be paranoid for now */
+- if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+- err = -EFAULT;
+- rcu_read_unlock();
+- iput(inode);
+-
+- goto out;
+- }
+-
+ key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+- key->shared.inode = inode;
++ key->shared.i_seq = get_inode_sequence_number(inode);
+ key->shared.pgoff = basepage_index(page);
+ rcu_read_unlock();
+ }
+
++ get_futex_key_refs(key); /* implies smp_mb(); (B) */
++
+ out:
+ put_page(page_head);
+ return err;
diff --git a/queue-3.16/futex-unbreak-futex-hashing.patch b/queue-3.16/futex-unbreak-futex-hashing.patch
new file mode 100644
index 00000000..0d1017f0
--- /dev/null
+++ b/queue-3.16/futex-unbreak-futex-hashing.patch
@@ -0,0 +1,42 @@
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 8 Mar 2020 19:07:17 +0100
+Subject: futex: Unbreak futex hashing
+
+commit 8d67743653dce5a0e7aa500fcccb237cde7ad88e upstream.
+
+The recent futex inode life time fix changed the ordering of the futex key
+union struct members, but forgot to adjust the hash function accordingly,
+
+As a result the hashing omits the leading 64bit and even hashes beyond the
+futex key causing a bad hash distribution which led to a ~100% performance
+regression.
+
+Hand in the futex key pointer instead of a random struct member and make
+the size calculation based of the struct offset.
+
+Fixes: 8019ad13ef7f ("futex: Fix inode life-time issue")
+Reported-by: Rong Chen <rong.a.chen@intel.com>
+Decoded-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Rong Chen <rong.a.chen@intel.com>
+Link: https://lkml.kernel.org/r/87h7yy90ve.fsf@nanos.tec.linutronix.de
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Cc: Jann Horn <jannh@google.com>
+---
+ kernel/futex.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -309,9 +309,9 @@ static inline int hb_waiters_pending(str
+ */
+ static struct futex_hash_bucket *hash_futex(union futex_key *key)
+ {
+- u32 hash = jhash2((u32*)&key->both.word,
+- (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
++ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
+ key->both.offset);
++
+ return &futex_queues[hash & (futex_hashsize - 1)];
+ }
+
diff --git a/queue-3.16/series b/queue-3.16/series
index a676e4a8..3104b993 100644
--- a/queue-3.16/series
+++ b/queue-3.16/series
@@ -243,3 +243,5 @@ media-device-dynamically-allocate-struct-media_devnode.patch
media-fix-use-after-free-in-cdev_put-when-app-exits-after.patch
media-fix-media-devnode-ioctl-syscall-and-unregister-race.patch
slcan-don-t-transmit-uninitialized-stack-data-in-padding.patch
+futex-fix-inode-life-time-issue.patch
+futex-unbreak-futex-hashing.patch