1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
From 36ae378ece30da0dad87615f45f30e3062e0c3e9 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Mon, 26 Feb 2024 02:24:52 +0000
Subject: net: make SK_MEMORY_PCPU_RESERV tunable
From: Adam Li <adamli@os.amperecomputing.com>
[ Upstream commit 12a686c2e761f1f1f6e6e2117a9ab9c6de2ac8a7 ]
This patch adds /proc/sys/net/core/mem_pcpu_rsv sysctl file,
to make SK_MEMORY_PCPU_RESERV tunable.
Commit 3cd3399dd7a8 ("net: implement per-cpu reserves for
memory_allocated") introduced per-cpu forward alloc cache:
"Implement a per-cpu cache of +1/-1 MB, to reduce number
of changes to sk->sk_prot->memory_allocated, which
would otherwise be cause of false sharing."
sk_prot->memory_allocated points to global atomic variable:
atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp;
If increasing the per-cpu cache size from 1MB to e.g. 16MB,
changes to sk->sk_prot->memory_allocated can be further reduced.
Performance may be improved on system with many cores.
Signed-off-by: Adam Li <adamli@os.amperecomputing.com>
Reviewed-by: Christoph Lameter (Ampere) <cl@linux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Stable-dep-of: 3584718cf2ec ("net: fix sk_memory_allocated_{add|sub} vs softirqs")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
Documentation/admin-guide/sysctl/net.rst | 5 +++++
include/net/sock.h | 5 +++--
net/core/sock.c | 1 +
net/core/sysctl_net_core.c | 9 +++++++++
4 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 6394f5dc2303d..e3894c928118c 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -205,6 +205,11 @@ Will increase power usage.
Default: 0 (off)
+mem_pcpu_rsv
+------------
+
+Per-cpu reserved forward alloc cache size in page units. Default 1MB per CPU.
+
rmem_default
------------
diff --git a/include/net/sock.h b/include/net/sock.h
index 60577751ea9e8..6ef6ce43a2edc 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1483,6 +1483,7 @@ sk_memory_allocated(const struct sock *sk)
/* 1 MB per cpu, in page units */
#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+extern int sysctl_mem_pcpu_rsv;
static inline void
sk_memory_allocated_add(struct sock *sk, int amt)
@@ -1491,7 +1492,7 @@ sk_memory_allocated_add(struct sock *sk, int amt)
preempt_disable();
local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+ if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) {
__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
@@ -1505,7 +1506,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt)
preempt_disable();
local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+ if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) {
__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index c8803b95ea0da..550af616f5359 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -279,6 +279,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
+int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
/* Maximal space eaten by iovec or ancillary data plus some space */
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5b1ce656baa1d..d281d5343ff4a 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -29,6 +29,7 @@ static int int_3600 = 3600;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
+static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -348,6 +349,14 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
},
+ {
+ .procname = "mem_pcpu_rsv",
+ .data = &sysctl_mem_pcpu_rsv,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &min_mem_pcpu_rsv,
+ },
{
.procname = "dev_weight",
.data = &weight_p,
--
2.43.0
|