From efd402537673f9951992aea4ef0f5ff51d858f4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:55 +0000 Subject: sock_diag: annotate data-races around sock_diag_handlers[family] __sock_diag_cmd() and sock_diag_bind() read sock_diag_handlers[family] without a lock held. Use READ_ONCE()/WRITE_ONCE() annotations to avoid potential issues. Fixes: 8ef874bfc729 ("sock_diag: Move the sock_ code to net/core/") Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/core/sock_diag.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index b1e29e18d1d60..c53b731f2d672 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -193,7 +193,7 @@ int sock_diag_register(const struct sock_diag_handler *hndl) if (sock_diag_handlers[hndl->family]) err = -EBUSY; else - sock_diag_handlers[hndl->family] = hndl; + WRITE_ONCE(sock_diag_handlers[hndl->family], hndl); mutex_unlock(&sock_diag_table_mutex); return err; @@ -209,7 +209,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) mutex_lock(&sock_diag_table_mutex); BUG_ON(sock_diag_handlers[family] != hnld); - sock_diag_handlers[family] = NULL; + WRITE_ONCE(sock_diag_handlers[family], NULL); mutex_unlock(&sock_diag_table_mutex); } EXPORT_SYMBOL_GPL(sock_diag_unregister); @@ -227,7 +227,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX); - if (sock_diag_handlers[req->sdiag_family] == NULL) + if (READ_ONCE(sock_diag_handlers[req->sdiag_family]) == NULL) sock_load_diag_module(req->sdiag_family, 0); mutex_lock(&sock_diag_table_mutex); @@ -286,12 +286,12 @@ static int sock_diag_bind(struct net *net, int group) switch (group) { case SKNLGRP_INET_TCP_DESTROY: case SKNLGRP_INET_UDP_DESTROY: - if (!sock_diag_handlers[AF_INET]) + if (!READ_ONCE(sock_diag_handlers[AF_INET])) sock_load_diag_module(AF_INET, 0); break; case SKNLGRP_INET6_TCP_DESTROY: case SKNLGRP_INET6_UDP_DESTROY: - if (!sock_diag_handlers[AF_INET6]) + if (!READ_ONCE(sock_diag_handlers[AF_INET6])) sock_load_diag_module(AF_INET6, 0); break; } -- cgit 1.2.3-korg From e50e10ae5d81ddb41547114bfdc5edc04422f082 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:56 +0000 Subject: inet_diag: annotate data-races around inet_diag_table[] inet_diag_lock_handler() reads inet_diag_table[proto] locklessly. Use READ_ONCE()/WRITE_ONCE() annotations to avoid potential issues. Fixes: d523a328fb02 ("[INET]: Fix inet_diag dead-lock regression") Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/ipv4/inet_diag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8e6b6aa0579e1..9804e9608a5a0 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -57,7 +57,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto) return ERR_PTR(-ENOENT); } - if (!inet_diag_table[proto]) + if (!READ_ONCE(inet_diag_table[proto])) sock_load_diag_module(AF_INET, proto); mutex_lock(&inet_diag_table_mutex); @@ -1503,7 +1503,7 @@ int inet_diag_register(const struct inet_diag_handler *h) mutex_lock(&inet_diag_table_mutex); err = -EEXIST; if (!inet_diag_table[type]) { - inet_diag_table[type] = h; + WRITE_ONCE(inet_diag_table[type], h); err = 0; } mutex_unlock(&inet_diag_table_mutex); @@ -1520,7 +1520,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) return; mutex_lock(&inet_diag_table_mutex); - inet_diag_table[type] = NULL; + WRITE_ONCE(inet_diag_table[type], NULL); mutex_unlock(&inet_diag_table_mutex); } EXPORT_SYMBOL_GPL(inet_diag_unregister); -- cgit 1.2.3-korg From db5914695a84a7b128ec2e4e9272e6e8091753e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:57 +0000 Subject: inet_diag: add module pointer to "struct inet_diag_handler" Following patch is going to use RCU instead of inet_diag_table_mutex acquisition. This patch is a preparation, no change of behavior yet. Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/inet_diag.h | 1 + net/dccp/diag.c | 1 + net/ipv4/raw_diag.c | 1 + net/ipv4/tcp_diag.c | 1 + net/ipv4/udp_diag.c | 2 ++ net/mptcp/mptcp_diag.c | 1 + net/sctp/diag.c | 1 + 7 files changed, 8 insertions(+) (limited to 'net') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 84abb30a3fbb1..a9033696b0aad 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -8,6 +8,7 @@ struct inet_hashinfo; struct inet_diag_handler { + struct module *owner; void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 8a82c5a2c5a8c..f5019d95c3ae5 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -58,6 +58,7 @@ static int dccp_diag_dump_one(struct netlink_callback *cb, } static const struct inet_diag_handler dccp_diag_handler = { + .owner = THIS_MODULE, .dump = dccp_diag_dump, .dump_one = dccp_diag_dump_one, .idiag_get_info = dccp_diag_get_info, diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index fe2140c8375c8..cc793bd8de258 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -213,6 +213,7 @@ static int raw_diag_destroy(struct sk_buff *in_skb, #endif static const struct inet_diag_handler raw_diag_handler = { + .owner = THIS_MODULE, .dump = raw_diag_dump, .dump_one = raw_diag_dump_one, .idiag_get_info = raw_diag_get_info, diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 4cbe4b44425a6..f428ecf9120f2 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -222,6 +222,7 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, #endif static const struct inet_diag_handler tcp_diag_handler = { + .owner = THIS_MODULE, .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index dc41a22ee80e8..38cb3a28e4ed6 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -237,6 +237,7 @@ static int udplite_diag_destroy(struct sk_buff *in_skb, #endif static const struct inet_diag_handler udp_diag_handler = { + .owner = THIS_MODULE, .dump = udp_diag_dump, .dump_one = udp_diag_dump_one, .idiag_get_info = udp_diag_get_info, @@ -260,6 +261,7 @@ static int udplite_diag_dump_one(struct netlink_callback *cb, } static const struct inet_diag_handler udplite_diag_handler = { + .owner = THIS_MODULE, .dump = udplite_diag_dump, .dump_one = udplite_diag_dump_one, .idiag_get_info = udp_diag_get_info, diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 5409c2ea3f572..bd8ff5950c8d3 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -225,6 +225,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static const struct inet_diag_handler mptcp_diag_handler = { + .owner = THIS_MODULE, .dump = mptcp_diag_dump, .dump_one = mptcp_diag_dump_one, .idiag_get_info = mptcp_diag_get_info, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index eb05131ff1dd6..23359e522273f 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -507,6 +507,7 @@ done: } static const struct inet_diag_handler sctp_diag_handler = { + .owner = THIS_MODULE, .dump = sctp_diag_dump, .dump_one = sctp_diag_dump_one, .idiag_get_info = sctp_diag_get_info, -- cgit 1.2.3-korg From 223f55196bbdb182a9b8de6108a0834b5e5e832e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:58 +0000 Subject: inet_diag: allow concurrent operations inet_diag_lock_handler() current implementation uses a mutex to protect inet_diag_table[] array against concurrent changes. This makes inet_diag dump serialized, thus less scalable than legacy /proc files. It is time to switch to full RCU protection. As a bonus, if a target is statically linked instead of being modular, inet_diag_lock_handler() & inet_diag_unlock_handler() reduce to reads only. Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/ipv4/inet_diag.c | 80 ++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9804e9608a5a0..abf7dc9827969 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -32,7 +32,7 @@ #include #include -static const struct inet_diag_handler **inet_diag_table; +static const struct inet_diag_handler __rcu **inet_diag_table; struct inet_diag_entry { const __be32 *saddr; @@ -48,28 +48,28 @@ struct inet_diag_entry { #endif }; -static DEFINE_MUTEX(inet_diag_table_mutex); - static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { - if (proto < 0 || proto >= IPPROTO_MAX) { - mutex_lock(&inet_diag_table_mutex); - return ERR_PTR(-ENOENT); - } + const struct inet_diag_handler *handler; + + if (proto < 0 || proto >= IPPROTO_MAX) + return NULL; if (!READ_ONCE(inet_diag_table[proto])) sock_load_diag_module(AF_INET, proto); - mutex_lock(&inet_diag_table_mutex); - if (!inet_diag_table[proto]) - return ERR_PTR(-ENOENT); + rcu_read_lock(); + handler = rcu_dereference(inet_diag_table[proto]); + if (handler && !try_module_get(handler->owner)) + handler = NULL; + rcu_read_unlock(); - return inet_diag_table[proto]; + return handler; } static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) { - mutex_unlock(&inet_diag_table_mutex); + module_put(handler->owner); } void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) @@ -104,9 +104,12 @@ static size_t inet_sk_attr_size(struct sock *sk, const struct inet_diag_handler *handler; size_t aux = 0; - handler = inet_diag_table[req->sdiag_protocol]; + rcu_read_lock(); + handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]); + DEBUG_NET_WARN_ON_ONCE(!handler); if (handler && handler->idiag_get_aux_size) aux = handler->idiag_get_aux_size(sk, net_admin); + rcu_read_unlock(); return nla_total_size(sizeof(struct tcp_info)) + nla_total_size(sizeof(struct inet_diag_msg)) @@ -244,10 +247,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; + int protocol; cb_data = cb->data; - handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)]; - BUG_ON(!handler); + protocol = inet_diag_get_protocol(req, cb_data); + + /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */ + handler = rcu_dereference_protected(inet_diag_table[protocol], 1); + DEBUG_NET_WARN_ON_ONCE(!handler); + if (!handler) + return -ENXIO; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); @@ -605,9 +614,10 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, protocol = inet_diag_get_protocol(req, &dump_data); handler = inet_diag_lock_handler(protocol); - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - } else if (cmd == SOCK_DIAG_BY_FAMILY) { + if (!handler) + return -ENOENT; + + if (cmd == SOCK_DIAG_BY_FAMILY) { struct netlink_callback cb = { .nlh = nlh, .skb = in_skb, @@ -1259,12 +1269,12 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, again: prev_min_dump_alloc = cb->min_dump_alloc; handler = inet_diag_lock_handler(protocol); - if (!IS_ERR(handler)) + if (handler) { handler->dump(skb, cb, r); - else - err = PTR_ERR(handler); - inet_diag_unlock_handler(handler); - + inet_diag_unlock_handler(handler); + } else { + err = -ENOENT; + } /* The skb is not large enough to fit one sk info and * inet_sk_diag_fill() has requested for a larger skb. */ @@ -1457,10 +1467,9 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) } handler = inet_diag_lock_handler(sk->sk_protocol); - if (IS_ERR(handler)) { - inet_diag_unlock_handler(handler); + if (!handler) { nlmsg_cancel(skb, nlh); - return PTR_ERR(handler); + return -ENOENT; } attr = handler->idiag_info_size @@ -1495,20 +1504,12 @@ static const struct sock_diag_handler inet6_diag_handler = { int inet_diag_register(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; - int err = -EINVAL; if (type >= IPPROTO_MAX) - goto out; + return -EINVAL; - mutex_lock(&inet_diag_table_mutex); - err = -EEXIST; - if (!inet_diag_table[type]) { - WRITE_ONCE(inet_diag_table[type], h); - err = 0; - } - mutex_unlock(&inet_diag_table_mutex); -out: - return err; + return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type], + NULL, h) ? 0 : -EEXIST; } EXPORT_SYMBOL_GPL(inet_diag_register); @@ -1519,9 +1520,8 @@ void inet_diag_unregister(const struct inet_diag_handler *h) if (type >= IPPROTO_MAX) return; - mutex_lock(&inet_diag_table_mutex); - WRITE_ONCE(inet_diag_table[type], NULL); - mutex_unlock(&inet_diag_table_mutex); + xchg((const struct inet_diag_handler **)&inet_diag_table[type], + NULL); } EXPORT_SYMBOL_GPL(inet_diag_unregister); -- cgit 1.2.3-korg From 114b4bb1cc19239b272d52ebbe156053483fe2f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:25:59 +0000 Subject: sock_diag: add module pointer to "struct sock_diag_handler" Following patch is going to use RCU instead of sock_diag_table_mutex acquisition. This patch is a preparation, no change of behavior yet. Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/sock_diag.h | 1 + net/ipv4/inet_diag.c | 2 ++ net/netlink/diag.c | 1 + net/packet/diag.c | 1 + net/smc/smc_diag.c | 1 + net/tipc/diag.c | 1 + net/unix/diag.c | 1 + net/vmw_vsock/diag.c | 1 + net/xdp/xsk_diag.c | 1 + 9 files changed, 10 insertions(+) (limited to 'net') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 0b9ecd8cf9793..7c07754d711b9 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -13,6 +13,7 @@ struct nlmsghdr; struct sock; struct sock_diag_handler { + struct module *owner; __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index abf7dc9827969..52ce20691e4ef 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1488,6 +1488,7 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) } static const struct sock_diag_handler inet_diag_handler = { + .owner = THIS_MODULE, .family = AF_INET, .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, @@ -1495,6 +1496,7 @@ static const struct sock_diag_handler inet_diag_handler = { }; static const struct sock_diag_handler inet6_diag_handler = { + .owner = THIS_MODULE, .family = AF_INET6, .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 1eeff9422856e..e12c90d5f6ad2 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -241,6 +241,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } static const struct sock_diag_handler netlink_diag_handler = { + .owner = THIS_MODULE, .family = AF_NETLINK, .dump = netlink_diag_handler_dump, }; diff --git a/net/packet/diag.c b/net/packet/diag.c index 9a7980e3309d6..b3bd2f6c2bf7b 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -245,6 +245,7 @@ static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } static const struct sock_diag_handler packet_diag_handler = { + .owner = THIS_MODULE, .family = AF_PACKET, .dump = packet_diag_handler_dump, }; diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 52f7c4f1e7670..32bad267fa3e2 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -255,6 +255,7 @@ static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } static const struct sock_diag_handler smc_diag_handler = { + .owner = THIS_MODULE, .family = AF_SMC, .dump = smc_diag_handler_dump, }; diff --git a/net/tipc/diag.c b/net/tipc/diag.c index 18733451c9e0c..54dde8c4e4d46 100644 --- a/net/tipc/diag.c +++ b/net/tipc/diag.c @@ -95,6 +95,7 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb, } static const struct sock_diag_handler tipc_sock_diag_handler = { + .owner = THIS_MODULE, .family = AF_TIPC, .dump = tipc_sock_diag_handler_dump, }; diff --git a/net/unix/diag.c b/net/unix/diag.c index bec09a3a1d44c..c3648b7065096 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -322,6 +322,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } static const struct sock_diag_handler unix_diag_handler = { + .owner = THIS_MODULE, .family = AF_UNIX, .dump = unix_diag_handler_dump, }; diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c index 2e29994f92ffa..ab87ef66c1e88 100644 --- a/net/vmw_vsock/diag.c +++ b/net/vmw_vsock/diag.c @@ -157,6 +157,7 @@ static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } static const struct sock_diag_handler vsock_diag_handler = { + .owner = THIS_MODULE, .family = AF_VSOCK, .dump = vsock_diag_handler_dump, }; diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 9f8955367275e..09dcea0cbbed9 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -194,6 +194,7 @@ static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr) } static const struct sock_diag_handler xsk_diag_handler = { + .owner = THIS_MODULE, .family = AF_XDP, .dump = xsk_diag_handler_dump, }; -- cgit 1.2.3-korg From 1d55a6974756cf3979efd2cc68bcece611a44053 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:26:00 +0000 Subject: sock_diag: allow concurrent operations sock_diag_broadcast_destroy_work() and __sock_diag_cmd() are currently using sock_diag_table_mutex to protect against concurrent sock_diag_handlers[] changes. This makes inet_diag dump serialized, thus less scalable than legacy /proc files. It is time to switch to full RCU protection. Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/core/sock_diag.c | 73 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index c53b731f2d672..72009e1f4380d 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -16,7 +16,7 @@ #include #include -static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; +static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; @@ -122,6 +122,24 @@ static size_t sock_diag_nlmsg_size(void) + nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */ } +static const struct sock_diag_handler *sock_diag_lock_handler(int family) +{ + const struct sock_diag_handler *handler; + + rcu_read_lock(); + handler = rcu_dereference(sock_diag_handlers[family]); + if (handler && !try_module_get(handler->owner)) + handler = NULL; + rcu_read_unlock(); + + return handler; +} + +static void sock_diag_unlock_handler(const struct sock_diag_handler *handler) +{ + module_put(handler->owner); +} + static void sock_diag_broadcast_destroy_work(struct work_struct *work) { struct broadcast_sk *bsk = @@ -138,12 +156,12 @@ static void sock_diag_broadcast_destroy_work(struct work_struct *work) if (!skb) goto out; - mutex_lock(&sock_diag_table_mutex); - hndl = sock_diag_handlers[sk->sk_family]; - if (hndl && hndl->get_info) - err = hndl->get_info(skb, sk); - mutex_unlock(&sock_diag_table_mutex); - + hndl = sock_diag_lock_handler(sk->sk_family); + if (hndl) { + if (hndl->get_info) + err = hndl->get_info(skb, sk); + sock_diag_unlock_handler(hndl); + } if (!err) nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group, GFP_KERNEL); @@ -184,33 +202,26 @@ EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); int sock_diag_register(const struct sock_diag_handler *hndl) { - int err = 0; + int family = hndl->family; - if (hndl->family >= AF_MAX) + if (family >= AF_MAX) return -EINVAL; - mutex_lock(&sock_diag_table_mutex); - if (sock_diag_handlers[hndl->family]) - err = -EBUSY; - else - WRITE_ONCE(sock_diag_handlers[hndl->family], hndl); - mutex_unlock(&sock_diag_table_mutex); - - return err; + return !cmpxchg((const struct sock_diag_handler **) + &sock_diag_handlers[family], + NULL, hndl) ? 0 : -EBUSY; } EXPORT_SYMBOL_GPL(sock_diag_register); -void sock_diag_unregister(const struct sock_diag_handler *hnld) +void sock_diag_unregister(const struct sock_diag_handler *hndl) { - int family = hnld->family; + int family = hndl->family; if (family >= AF_MAX) return; - mutex_lock(&sock_diag_table_mutex); - BUG_ON(sock_diag_handlers[family] != hnld); - WRITE_ONCE(sock_diag_handlers[family], NULL); - mutex_unlock(&sock_diag_table_mutex); + xchg((const struct sock_diag_handler **)&sock_diag_handlers[family], + NULL); } EXPORT_SYMBOL_GPL(sock_diag_unregister); @@ -227,20 +238,20 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX); - if (READ_ONCE(sock_diag_handlers[req->sdiag_family]) == NULL) + if (!rcu_access_pointer(sock_diag_handlers[req->sdiag_family])) sock_load_diag_module(req->sdiag_family, 0); - mutex_lock(&sock_diag_table_mutex); - hndl = sock_diag_handlers[req->sdiag_family]; + hndl = sock_diag_lock_handler(req->sdiag_family); if (hndl == NULL) - err = -ENOENT; - else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) + return -ENOENT; + + if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy) err = hndl->destroy(skb, nlh); else err = -EOPNOTSUPP; - mutex_unlock(&sock_diag_table_mutex); + sock_diag_unlock_handler(hndl); return err; } @@ -286,12 +297,12 @@ static int sock_diag_bind(struct net *net, int group) switch (group) { case SKNLGRP_INET_TCP_DESTROY: case SKNLGRP_INET_UDP_DESTROY: - if (!READ_ONCE(sock_diag_handlers[AF_INET])) + if (!rcu_access_pointer(sock_diag_handlers[AF_INET])) sock_load_diag_module(AF_INET, 0); break; case SKNLGRP_INET6_TCP_DESTROY: case SKNLGRP_INET6_UDP_DESTROY: - if (!READ_ONCE(sock_diag_handlers[AF_INET6])) + if (!rcu_access_pointer(sock_diag_handlers[AF_INET6])) sock_load_diag_module(AF_INET6, 0); break; } -- cgit 1.2.3-korg From 86e8921df05c6e9423ab74ab8d41022775d8b83a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:26:01 +0000 Subject: sock_diag: allow concurrent operation in sock_diag_rcv_msg() TCPDIAG_GETSOCK and DCCPDIAG_GETSOCK diag are serialized on sock_diag_table_mutex. This is to make sure inet_diag module is not unloaded while diag was ongoing. It is time to get rid of this mutex and use RCU protection, allowing full parallelism. Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/linux/sock_diag.h | 9 +++++++-- net/core/sock_diag.c | 43 ++++++++++++++++++++++++++----------------- net/ipv4/inet_diag.c | 9 +++++++-- 3 files changed, 40 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 7c07754d711b9..110978dc9af1b 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -23,8 +23,13 @@ struct sock_diag_handler { int sock_diag_register(const struct sock_diag_handler *h); void sock_diag_unregister(const struct sock_diag_handler *h); -void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +struct sock_diag_inet_compat { + struct module *owner; + int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh); +}; + +void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr); +void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr); u64 __sock_gen_cookie(struct sock *sk); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 72009e1f4380d..5c3666431df49 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -17,8 +17,9 @@ #include static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX]; -static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); -static DEFINE_MUTEX(sock_diag_table_mutex); + +static struct sock_diag_inet_compat __rcu *inet_rcv_compat; + static struct workqueue_struct *broadcast_wq; DEFINE_COOKIE(sock_cookie); @@ -184,19 +185,20 @@ void sock_diag_broadcast_destroy(struct sock *sk) queue_work(broadcast_wq, &bsk->work); } -void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) +void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr) { - mutex_lock(&sock_diag_table_mutex); - inet_rcv_compat = fn; - mutex_unlock(&sock_diag_table_mutex); + xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat, + ptr); } EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat); -void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) +void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr) { - mutex_lock(&sock_diag_table_mutex); - inet_rcv_compat = NULL; - mutex_unlock(&sock_diag_table_mutex); + const struct sock_diag_inet_compat *old; + + old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat, + NULL); + WARN_ON_ONCE(old != ptr); } EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); @@ -259,20 +261,27 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + const struct sock_diag_inet_compat *ptr; int ret; switch (nlh->nlmsg_type) { case TCPDIAG_GETSOCK: case DCCPDIAG_GETSOCK: - if (inet_rcv_compat == NULL) + + if (!rcu_access_pointer(inet_rcv_compat)) sock_load_diag_module(AF_INET, 0); - mutex_lock(&sock_diag_table_mutex); - if (inet_rcv_compat != NULL) - ret = inet_rcv_compat(skb, nlh); - else - ret = -EOPNOTSUPP; - mutex_unlock(&sock_diag_table_mutex); + rcu_read_lock(); + ptr = rcu_dereference(inet_rcv_compat); + if (ptr && !try_module_get(ptr->owner)) + ptr = NULL; + rcu_read_unlock(); + + ret = -EOPNOTSUPP; + if (ptr) { + ret = ptr->fn(skb, nlh); + module_put(ptr->owner); + } return ret; case SOCK_DIAG_BY_FAMILY: diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 52ce20691e4ef..2c2d8b9dd8e9b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1527,6 +1527,11 @@ void inet_diag_unregister(const struct inet_diag_handler *h) } EXPORT_SYMBOL_GPL(inet_diag_unregister); +static const struct sock_diag_inet_compat inet_diag_compat = { + .owner = THIS_MODULE, + .fn = inet_diag_rcv_msg_compat, +}; + static int __init inet_diag_init(void) { const int inet_diag_table_size = (IPPROTO_MAX * @@ -1545,7 +1550,7 @@ static int __init inet_diag_init(void) if (err) goto out_free_inet; - sock_diag_register_inet_compat(inet_diag_rcv_msg_compat); + sock_diag_register_inet_compat(&inet_diag_compat); out: return err; @@ -1560,7 +1565,7 @@ static void __exit inet_diag_exit(void) { sock_diag_unregister(&inet6_diag_handler); sock_diag_unregister(&inet_diag_handler); - sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat); + sock_diag_unregister_inet_compat(&inet_diag_compat); kfree(inet_diag_table); } -- cgit 1.2.3-korg From f44e64990beb41167bd7c313d90bcf7e290c3582 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:26:02 +0000 Subject: sock_diag: remove sock_diag_mutex sock_diag_rcv() is still serializing its operations using a mutex, for no good reason. This came with commit 0a9c73014415 ("[INET_DIAG]: Fix oops in netlink_rcv_skb"), but the root cause has been fixed with commit cd40b7d3983c ("[NET]: make netlink user -> kernel interface synchronious") Remove this mutex to let multiple threads run concurrently. Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/core/sock_diag.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 5c3666431df49..6541228380252 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -292,13 +292,9 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } } -static DEFINE_MUTEX(sock_diag_mutex); - static void sock_diag_rcv(struct sk_buff *skb) { - mutex_lock(&sock_diag_mutex); netlink_rcv_skb(skb, &sock_diag_rcv_msg); - mutex_unlock(&sock_diag_mutex); } static int sock_diag_bind(struct net *net, int group) -- cgit 1.2.3-korg From 622a08e8de9f9ad576fd56e3a2e97a84370a8100 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jan 2024 11:26:03 +0000 Subject: inet_diag: skip over empty buckets After the removal of inet_diag_table_mutex, sock_diag_table_mutex and sock_diag_mutex, I was able so see spinlock contention from inet_diag_dump_icsk() when running 100 parallel invocations. It is time to skip over empty buckets. Signed-off-by: Eric Dumazet Reviewed-by: Guillaume Nault Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/ipv4/inet_diag.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2c2d8b9dd8e9b..7adace541fe29 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1045,6 +1045,10 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, num = 0; ilb = &hashinfo->lhash2[i]; + if (hlist_nulls_empty(&ilb->nulls_head)) { + s_num = 0; + continue; + } spin_lock(&ilb->lock); sk_nulls_for_each(sk, node, &ilb->nulls_head) { struct inet_sock *inet = inet_sk(sk); @@ -1109,6 +1113,10 @@ resume_bind_walk: accum = 0; ibb = &hashinfo->bhash2[i]; + if (hlist_empty(&ibb->chain)) { + s_num = 0; + continue; + } spin_lock_bh(&ibb->lock); inet_bind_bucket_for_each(tb2, &ibb->chain) { if (!net_eq(ib2_net(tb2), net)) -- cgit 1.2.3-korg From 15b8b0be985592fd19ee4e661d13d291877b09c7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 5 Jan 2024 22:55:45 -0800 Subject: net: filter: fix spelling mistakes Fix spelling errors as reported by codespell. Signed-off-by: Randy Dunlap Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240106065545.16855-1-rdunlap@infradead.org Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 24061f29c9dd2..8c9f67c81e22d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -777,7 +777,7 @@ jmp_rest: BPF_EMIT_JMP; break; - /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ + /* ldxb 4 * ([14] & 0xf) is remapped into 6 insns. */ case BPF_LDX | BPF_MSH | BPF_B: { struct sock_filter tmp = { .code = BPF_LD | BPF_ABS | BPF_B, @@ -803,7 +803,7 @@ jmp_rest: *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); break; } - /* RET_K is remaped into 2 insns. RET_A case doesn't need an + /* RET_K is remapped into 2 insns. RET_A case doesn't need an * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. */ case BPF_RET | BPF_A: @@ -2967,7 +2967,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, * * Then if B is non-zero AND there is no space allocate space and * compact A, B regions into page. If there is space shift ring to - * the rigth free'ing the next element in ring to place B, leaving + * the right free'ing the next element in ring to place B, leaving * A untouched except to reduce length. */ if (start != offset) { @@ -8226,7 +8226,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) /* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The * kfuncs are defined in two different modules, and we want to be able - * to use them interchangably with the same BTF type ID. Because modules + * to use them interchangeably with the same BTF type ID. Because modules * can't de-duplicate BTF IDs between each other, we need the type to be * referenced in the vmlinux BTF or the verifier will get confused about * the different types. So we add this dummy type reference which will -- cgit 1.2.3-korg From b18afb6f42292a9154102fed7265ae747bbfbc57 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:09 -0800 Subject: tcp: Move tcp_ns_to_ts() to tcp.h We will support arbitrary SYN Cookie with BPF. When BPF prog validates ACK and kfunc allocates a reqsk, we need to call tcp_ns_to_ts() to calculate an offset of TSval for later use: time t0 : Send SYN+ACK -> tsval = Initial TSval (Random Number) t1 : Recv ACK of 3WHS -> tsoff = TSecr - tcp_ns_to_ts(usec_ts_ok, tcp_clock_ns()) = Initial TSval - t1 t2 : Send ACK -> tsval = t2 + tsoff = Initial TSval + (t2 - t1) = Initial TSval + Time Delta (x) (x) Note that the time delta does not include the initial RTT from t0 to t1. Let's move tcp_ns_to_ts() to tcp.h. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240115205514.68364-2-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 9 +++++++++ net/ipv4/syncookies.c | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index dd78a11810310..114000e71a468 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -577,6 +577,15 @@ static inline u32 tcp_cookie_time(void) return val; } +/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */ +static inline u64 tcp_ns_to_ts(bool usec_ts, u64 val) +{ + if (usec_ts) + return div_u64(val, NSEC_PER_USEC); + + return div_u64(val, NSEC_PER_MSEC); +} + u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 61f1c96cfe63e..981944c228200 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -51,15 +51,6 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, count, &syncookie_secret[c]); } -/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */ -static u64 tcp_ns_to_ts(bool usec_ts, u64 val) -{ - if (usec_ts) - return div_u64(val, NSEC_PER_USEC); - - return div_u64(val, NSEC_PER_MSEC); -} - /* * when syncookies are in effect and tcp timestamps are enabled we encode * tcp options in the lower bits of the timestamp value that will be -- cgit 1.2.3-korg From 695751e31a63efd2bbe6779873adf1e4deb00cd5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:12 -0800 Subject: bpf: tcp: Handle BPF SYN Cookie in cookie_v[46]_check(). We will support arbitrary SYN Cookie with BPF in the following patch. If BPF prog validates ACK and kfunc allocates a reqsk, it will be carried to cookie_[46]_check() as skb->sk. If skb->sk is not NULL, we call cookie_bpf_check(). Then, we clear skb->sk and skb->destructor, which are needed not to hold refcnt for reqsk and the listener. See the following patch for details. After that, we finish initialisation for the remaining fields with cookie_tcp_reqsk_init(). Note that the server side WScale is set only for non-BPF SYN Cookie. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-5-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 20 ++++++++++++++++++++ net/ipv4/syncookies.c | 31 +++++++++++++++++++++++++++---- net/ipv6/syncookies.c | 13 +++++++++---- 3 files changed, 56 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 114000e71a468..dfe99a084a71f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -599,6 +599,26 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * dst_feature(dst, RTAX_FEATURE_ECN); } +#if IS_ENABLED(CONFIG_BPF) +static inline bool cookie_bpf_ok(struct sk_buff *skb) +{ + return skb->sk; +} + +struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb); +#else +static inline bool cookie_bpf_ok(struct sk_buff *skb) +{ + return false; +} + +static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return NULL; +} +#endif + /* From net/ipv6/syncookies.c */ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 981944c228200..be88bf586ff9f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -295,6 +295,24 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, return 0; } +#if IS_ENABLED(CONFIG_BPF) +struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb) +{ + struct request_sock *req = inet_reqsk(skb->sk); + + skb->sk = NULL; + skb->destructor = NULL; + + if (cookie_tcp_reqsk_init(sk, skb, req)) { + reqsk_free(req); + req = NULL; + } + + return req; +} +EXPORT_SYMBOL_GPL(cookie_bpf_check); +#endif + struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk, struct sk_buff *skb, struct tcp_options_received *tcp_opt, @@ -395,9 +413,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) !th->ack || th->rst) goto out; - req = cookie_tcp_check(net, sk, skb); - if (IS_ERR(req)) - goto out; + if (cookie_bpf_ok(skb)) { + req = cookie_bpf_check(sk, skb); + } else { + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) + goto out; + } if (!req) goto out_drop; @@ -445,7 +467,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); - ireq->rcv_wscale = rcv_wscale; + if (!req->syncookie) + ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst); ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index c8d2ca27220cc..6b9c692788196 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -182,9 +182,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) !th->ack || th->rst) goto out; - req = cookie_tcp_check(net, sk, skb); - if (IS_ERR(req)) - goto out; + if (cookie_bpf_ok(skb)) { + req = cookie_bpf_check(sk, skb); + } else { + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) + goto out; + } if (!req) goto out_drop; @@ -247,7 +251,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); - ireq->rcv_wscale = rcv_wscale; + if (!req->syncookie) + ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok &= cookie_ecn_ok(net, dst); ret = tcp_get_cookie_sock(sk, skb, req, dst); -- cgit 1.2.3-korg From e472f88891abbc535a5e16a68a104073985f6061 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:13 -0800 Subject: bpf: tcp: Support arbitrary SYN Cookie. This patch adds a new kfunc available at TC hook to support arbitrary SYN Cookie. The basic usage is as follows: struct bpf_tcp_req_attrs attrs = { .mss = mss, .wscale_ok = wscale_ok, .rcv_wscale = rcv_wscale, /* Server's WScale < 15 */ .snd_wscale = snd_wscale, /* Client's WScale < 15 */ .tstamp_ok = tstamp_ok, .rcv_tsval = tsval, .rcv_tsecr = tsecr, /* Server's Initial TSval */ .usec_ts_ok = usec_ts_ok, .sack_ok = sack_ok, .ecn_ok = ecn_ok, } skc = bpf_skc_lookup_tcp(...); sk = (struct sock *)bpf_skc_to_tcp_sock(skc); bpf_sk_assign_tcp_reqsk(skb, sk, attrs, sizeof(attrs)); bpf_sk_release(skc); bpf_sk_assign_tcp_reqsk() takes skb, a listener sk, and struct bpf_tcp_req_attrs and allocates reqsk and configures it. Then, bpf_sk_assign_tcp_reqsk() links reqsk with skb and the listener. The notable thing here is that we do not hold refcnt for both reqsk and listener. To differentiate that, we mark reqsk->syncookie, which is only used in TX for now. So, if reqsk->syncookie is 1 in RX, it means that the reqsk is allocated by kfunc. When skb is freed, sock_pfree() checks if reqsk->syncookie is 1, and in that case, we set NULL to reqsk->rsk_listener before calling reqsk_free() as reqsk does not hold a refcnt of the listener. When the TCP stack looks up a socket from the skb, we steal the listener from the reqsk in skb_steal_sock() and create a full sk in cookie_v[46]_check(). The refcnt of reqsk will finally be set to 1 in tcp_get_cookie_sock() after creating a full sk. Note that we can extend struct bpf_tcp_req_attrs in the future when we add a new attribute that is determined in 3WHS. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-6-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 14 +++++++ net/core/filter.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++- net/core/sock.c | 14 ++++++- 3 files changed, 135 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index dfe99a084a71f..451dc13739705 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -600,6 +600,20 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * } #if IS_ENABLED(CONFIG_BPF) +struct bpf_tcp_req_attrs { + u32 rcv_tsval; + u32 rcv_tsecr; + u16 mss; + u8 rcv_wscale; + u8 snd_wscale; + u8 ecn_ok; + u8 wscale_ok; + u8 sack_ok; + u8 tstamp_ok; + u8 usec_ts_ok; + u8 reserved[3]; +}; + static inline bool cookie_bpf_ok(struct sk_buff *skb) { return skb->sk; diff --git a/net/core/filter.c b/net/core/filter.c index 8c9f67c81e22d..6a7abbaa50b88 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11837,6 +11837,103 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } + +__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk, + struct bpf_tcp_req_attrs *attrs, int attrs__sz) +{ +#if IS_ENABLED(CONFIG_SYN_COOKIES) + const struct request_sock_ops *ops; + struct inet_request_sock *ireq; + struct tcp_request_sock *treq; + struct request_sock *req; + struct net *net; + __u16 min_mss; + u32 tsoff = 0; + + if (attrs__sz != sizeof(*attrs) || + attrs->reserved[0] || attrs->reserved[1] || attrs->reserved[2]) + return -EINVAL; + + if (!skb_at_tc_ingress(skb)) + return -EINVAL; + + net = dev_net(skb->dev); + if (net != sock_net(sk)) + return -ENETUNREACH; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ops = &tcp_request_sock_ops; + min_mss = 536; + break; +#if IS_BUILTIN(CONFIG_IPV6) + case htons(ETH_P_IPV6): + ops = &tcp6_request_sock_ops; + min_mss = IPV6_MIN_MTU - 60; + break; +#endif + default: + return -EINVAL; + } + + if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_LISTEN || + sk_is_mptcp(sk)) + return -EINVAL; + + if (attrs->mss < min_mss) + return -EINVAL; + + if (attrs->wscale_ok) { + if (!READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) + return -EINVAL; + + if (attrs->snd_wscale > TCP_MAX_WSCALE || + attrs->rcv_wscale > TCP_MAX_WSCALE) + return -EINVAL; + } + + if (attrs->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) + return -EINVAL; + + if (attrs->tstamp_ok) { + if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) + return -EINVAL; + + tsoff = attrs->rcv_tsecr - tcp_ns_to_ts(attrs->usec_ts_ok, tcp_clock_ns()); + } + + req = inet_reqsk_alloc(ops, sk, false); + if (!req) + return -ENOMEM; + + ireq = inet_rsk(req); + treq = tcp_rsk(req); + + req->rsk_listener = sk; + req->syncookie = 1; + req->mss = attrs->mss; + req->ts_recent = attrs->rcv_tsval; + + ireq->snd_wscale = attrs->snd_wscale; + ireq->rcv_wscale = attrs->rcv_wscale; + ireq->tstamp_ok = !!attrs->tstamp_ok; + ireq->sack_ok = !!attrs->sack_ok; + ireq->wscale_ok = !!attrs->wscale_ok; + ireq->ecn_ok = !!attrs->ecn_ok; + + treq->req_usec_ts = !!attrs->usec_ts_ok; + treq->ts_off = tsoff; + + skb_orphan(skb); + skb->sk = req_to_sk(req); + skb->destructor = sock_pfree; + + return 0; +#else + return -EOPNOTSUPP; +#endif +} + __bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -11865,6 +11962,10 @@ BTF_SET8_START(bpf_kfunc_check_set_sock_addr) BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) BTF_SET8_END(bpf_kfunc_check_set_sock_addr) +BTF_SET8_START(bpf_kfunc_check_set_tcp_reqsk) +BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS) +BTF_SET8_END(bpf_kfunc_check_set_tcp_reqsk) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -11880,6 +11981,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { .set = &bpf_kfunc_check_set_sock_addr, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_tcp_reqsk, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -11895,8 +12001,9 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - &bpf_kfunc_set_sock_addr); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + &bpf_kfunc_set_sock_addr); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk); } late_initcall(bpf_kfunc_init); diff --git a/net/core/sock.c b/net/core/sock.c index 158dbdebce6a3..147fb2656e6bb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2582,8 +2582,18 @@ EXPORT_SYMBOL(sock_efree); #ifdef CONFIG_INET void sock_pfree(struct sk_buff *skb) { - if (sk_is_refcounted(skb->sk)) - sock_gen_put(skb->sk); + struct sock *sk = skb->sk; + + if (!sk_is_refcounted(sk)) + return; + + if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) { + inet_reqsk(sk)->rsk_listener = NULL; + reqsk_free(inet_reqsk(sk)); + return; + } + + sock_gen_put(sk); } EXPORT_SYMBOL(sock_pfree); #endif /* CONFIG_INET */ -- cgit 1.2.3-korg From 4c5763ed996a61b51d721d0968d0df957826ea49 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:49:54 -0800 Subject: bpf, net: introduce bpf_struct_ops_desc. Move some of members of bpf_struct_ops to bpf_struct_ops_desc. type_id is unavailabe in bpf_struct_ops anymore. Modules should get it from the btf received by kmod's init function. Cc: netdev@vger.kernel.org Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240119225005.668602-4-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 15 +++++--- kernel/bpf/bpf_struct_ops.c | 80 ++++++++++++++++++++++-------------------- kernel/bpf/verifier.c | 8 +++-- net/bpf/bpf_dummy_struct_ops.c | 11 +++++- net/ipv4/bpf_tcp_ca.c | 8 ++++- 5 files changed, 73 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 377857b232c68..7fc95e7babab7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1673,18 +1673,23 @@ struct bpf_struct_ops { void (*unreg)(void *kdata); int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); - const struct btf_type *type; - const struct btf_type *value_type; + void *cfi_stubs; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; +}; + +struct bpf_struct_ops_desc { + struct bpf_struct_ops *st_ops; + + const struct btf_type *type; + const struct btf_type *value_type; u32 type_id; u32 value_id; - void *cfi_stubs; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); +const struct bpf_struct_ops_desc *bpf_struct_ops_find(u32 type_id); void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); @@ -1728,7 +1733,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif #else -static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) +static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(u32 type_id) { return NULL; } diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 5b3ebcb435d04..9774f7824e8be 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -32,7 +32,7 @@ struct bpf_struct_ops_value { struct bpf_struct_ops_map { struct bpf_map map; struct rcu_head rcu; - const struct bpf_struct_ops *st_ops; + const struct bpf_struct_ops_desc *st_ops_desc; /* protect map_update */ struct mutex lock; /* link has all the bpf_links that is populated @@ -92,9 +92,9 @@ enum { __NR_BPF_STRUCT_OPS_TYPE, }; -static struct bpf_struct_ops * const bpf_struct_ops[] = { +static struct bpf_struct_ops_desc bpf_struct_ops[] = { #define BPF_STRUCT_OPS_TYPE(_name) \ - [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name, + [BPF_STRUCT_OPS_TYPE_##_name] = { .st_ops = &bpf_##_name }, #include "bpf_struct_ops_types.h" #undef BPF_STRUCT_OPS_TYPE }; @@ -115,10 +115,11 @@ enum { IDX_MODULE_ID, }; -static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, - struct btf *btf, - struct bpf_verifier_log *log) +static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log) { + struct bpf_struct_ops *st_ops = st_ops_desc->st_ops; const struct btf_member *member; const struct btf_type *t; s32 type_id, value_id; @@ -190,18 +191,18 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, pr_warn("Error in init bpf_struct_ops %s\n", st_ops->name); } else { - st_ops->type_id = type_id; - st_ops->type = t; - st_ops->value_id = value_id; - st_ops->value_type = btf_type_by_id(btf, - value_id); + st_ops_desc->type_id = type_id; + st_ops_desc->type = t; + st_ops_desc->value_id = value_id; + st_ops_desc->value_type = btf_type_by_id(btf, + value_id); } } } void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) { - struct bpf_struct_ops *st_ops; + struct bpf_struct_ops_desc *st_ops_desc; u32 i; /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */ @@ -210,14 +211,14 @@ void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) #undef BPF_STRUCT_OPS_TYPE for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - st_ops = bpf_struct_ops[i]; - bpf_struct_ops_init_one(st_ops, btf, log); + st_ops_desc = &bpf_struct_ops[i]; + bpf_struct_ops_desc_init(st_ops_desc, btf, log); } } extern struct btf *btf_vmlinux; -static const struct bpf_struct_ops * +static const struct bpf_struct_ops_desc * bpf_struct_ops_find_value(u32 value_id) { unsigned int i; @@ -226,14 +227,14 @@ bpf_struct_ops_find_value(u32 value_id) return NULL; for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - if (bpf_struct_ops[i]->value_id == value_id) - return bpf_struct_ops[i]; + if (bpf_struct_ops[i].value_id == value_id) + return &bpf_struct_ops[i]; } return NULL; } -const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) +const struct bpf_struct_ops_desc *bpf_struct_ops_find(u32 type_id) { unsigned int i; @@ -241,8 +242,8 @@ const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) return NULL; for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - if (bpf_struct_ops[i]->type_id == type_id) - return bpf_struct_ops[i]; + if (bpf_struct_ops[i].type_id == type_id) + return &bpf_struct_ops[i]; } return NULL; @@ -302,7 +303,7 @@ static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key) static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) { - const struct btf_type *t = st_map->st_ops->type; + const struct btf_type *t = st_map->st_ops_desc->type; u32 i; for (i = 0; i < btf_type_vlen(t); i++) { @@ -382,11 +383,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; - const struct bpf_struct_ops *st_ops = st_map->st_ops; + const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc; + const struct bpf_struct_ops *st_ops = st_ops_desc->st_ops; struct bpf_struct_ops_value *uvalue, *kvalue; const struct btf_type *module_type; const struct btf_member *member; - const struct btf_type *t = st_ops->type; + const struct btf_type *t = st_ops_desc->type; struct bpf_tramp_links *tlinks; void *udata, *kdata; int prog_fd, err; @@ -399,7 +401,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (*(u32 *)key != 0) return -E2BIG; - err = check_zero_holes(st_ops->value_type, value); + err = check_zero_holes(st_ops_desc->value_type, value); if (err) return err; @@ -492,7 +494,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, } if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || - prog->aux->attach_btf_id != st_ops->type_id || + prog->aux->attach_btf_id != st_ops_desc->type_id || prog->expected_attach_type != i) { bpf_prog_put(prog); err = -EINVAL; @@ -588,7 +590,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) BPF_STRUCT_OPS_STATE_TOBEFREE); switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: - st_map->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: @@ -669,22 +671,22 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) { - const struct bpf_struct_ops *st_ops; + const struct bpf_struct_ops_desc *st_ops_desc; size_t st_map_size; struct bpf_struct_ops_map *st_map; const struct btf_type *t, *vt; struct bpf_map *map; int ret; - st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); - if (!st_ops) + st_ops_desc = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); + if (!st_ops_desc) return ERR_PTR(-ENOTSUPP); - vt = st_ops->value_type; + vt = st_ops_desc->value_type; if (attr->value_size != vt->size) return ERR_PTR(-EINVAL); - t = st_ops->type; + t = st_ops_desc->type; st_map_size = sizeof(*st_map) + /* kvalue stores the @@ -696,7 +698,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (!st_map) return ERR_PTR(-ENOMEM); - st_map->st_ops = st_ops; + st_map->st_ops_desc = st_ops_desc; map = &st_map->map; ret = bpf_jit_charge_modmem(PAGE_SIZE); @@ -733,8 +735,8 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; - const struct bpf_struct_ops *st_ops = st_map->st_ops; - const struct btf_type *vt = st_ops->value_type; + const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc; + const struct btf_type *vt = st_ops_desc->value_type; u64 usage; usage = sizeof(*st_map) + @@ -808,7 +810,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) /* st_link->map can be NULL if * bpf_struct_ops_link_create() fails to register. */ - st_map->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); bpf_map_put(&st_map->map); } kfree(st_link); @@ -855,7 +857,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map if (!bpf_struct_ops_valid_to_reg(new_map)) return -EINVAL; - if (!st_map->st_ops->update) + if (!st_map->st_ops_desc->st_ops->update) return -EOPNOTSUPP; mutex_lock(&update_mutex); @@ -868,12 +870,12 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); /* The new and old struct_ops must be the same type. */ - if (st_map->st_ops != old_st_map->st_ops) { + if (st_map->st_ops_desc != old_st_map->st_ops_desc) { err = -EINVAL; goto err_out; } - err = st_map->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); if (err) goto err_out; @@ -924,7 +926,7 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; - err = st_map->st_ops->reg(st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data); if (err) { bpf_link_cleanup(&link_primer); link = NULL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9e9cc132dd024..e279491118b7e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20285,6 +20285,7 @@ static void print_verification_stats(struct bpf_verifier_env *env) static int check_struct_ops_btf_id(struct bpf_verifier_env *env) { const struct btf_type *t, *func_proto; + const struct bpf_struct_ops_desc *st_ops_desc; const struct bpf_struct_ops *st_ops; const struct btf_member *member; struct bpf_prog *prog = env->prog; @@ -20297,14 +20298,15 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } btf_id = prog->aux->attach_btf_id; - st_ops = bpf_struct_ops_find(btf_id); - if (!st_ops) { + st_ops_desc = bpf_struct_ops_find(btf_id); + if (!st_ops_desc) { verbose(env, "attach_btf_id %u is not a supported struct\n", btf_id); return -ENOTSUPP; } + st_ops = st_ops_desc->st_ops; - t = st_ops->type; + t = st_ops_desc->type; member_idx = prog->expected_attach_type; if (member_idx >= btf_type_vlen(t)) { verbose(env, "attach to invalid member idx %u of struct %s\n", diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 8906f7bdf4a92..ba2c58dba2da5 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -22,6 +22,8 @@ struct bpf_dummy_ops_test_args { struct bpf_dummy_ops_state state; }; +static struct btf *bpf_dummy_ops_btf; + static struct bpf_dummy_ops_test_args * dummy_ops_init_args(const union bpf_attr *kattr, unsigned int nr) { @@ -90,9 +92,15 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, void *image = NULL; unsigned int op_idx; int prog_ret; + s32 type_id; int err; - if (prog->aux->attach_btf_id != st_ops->type_id) + type_id = btf_find_by_name_kind(bpf_dummy_ops_btf, + bpf_bpf_dummy_ops.name, + BTF_KIND_STRUCT); + if (type_id < 0) + return -EINVAL; + if (prog->aux->attach_btf_id != type_id) return -EOPNOTSUPP; func_proto = prog->aux->attach_func_proto; @@ -148,6 +156,7 @@ out: static int bpf_dummy_init(struct btf *btf) { + bpf_dummy_ops_btf = btf; return 0; } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index ae8b15e6896fd..dffd8828079b6 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -20,6 +20,7 @@ static u32 unsupported_ops[] = { static const struct btf_type *tcp_sock_type; static u32 tcp_sock_id, sock_id; +static const struct btf_type *tcp_congestion_ops_type; static int bpf_tcp_ca_init(struct btf *btf) { @@ -36,6 +37,11 @@ static int bpf_tcp_ca_init(struct btf *btf) tcp_sock_id = type_id; tcp_sock_type = btf_type_by_id(btf, tcp_sock_id); + type_id = btf_find_by_name_kind(btf, "tcp_congestion_ops", BTF_KIND_STRUCT); + if (type_id < 0) + return -EINVAL; + tcp_congestion_ops_type = btf_type_by_id(btf, type_id); + return 0; } @@ -149,7 +155,7 @@ static u32 prog_ops_moff(const struct bpf_prog *prog) u32 midx; midx = prog->expected_attach_type; - t = bpf_tcp_congestion_ops.type; + t = tcp_congestion_ops_type; m = &btf_type_member(t)[midx]; return __btf_member_bit_offset(t, m) / 8; -- cgit 1.2.3-korg From f6be98d19985411ca1f3d53413d94d5b7f41c200 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:50:02 -0800 Subject: bpf, net: switch to dynamic registration Replace the static list of struct_ops types with per-btf struct_ops_tab to enable dynamic registration. Both bpf_dummy_ops and bpf_tcp_ca now utilize the registration function instead of being listed in bpf_struct_ops_types.h. Cc: netdev@vger.kernel.org Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240119225005.668602-12-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 27 ++++++---- include/linux/btf.h | 12 +++++ kernel/bpf/bpf_struct_ops.c | 100 +++++--------------------------------- kernel/bpf/bpf_struct_ops_types.h | 12 ----- kernel/bpf/btf.c | 86 ++++++++++++++++++++++++++++++-- net/bpf/bpf_dummy_struct_ops.c | 11 ++++- net/ipv4/bpf_tcp_ca.c | 12 +++-- 7 files changed, 142 insertions(+), 118 deletions(-) delete mode 100644 kernel/bpf/bpf_struct_ops_types.h (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7c178170f93fe..75b7f9b19c6ac 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1701,9 +1701,20 @@ struct bpf_struct_ops_common_value { }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +/* This macro helps developer to register a struct_ops type and generate + * type information correctly. Developers should use this macro to register + * a struct_ops type instead of calling __register_bpf_struct_ops() directly. + */ +#define register_bpf_struct_ops(st_ops, type) \ + ({ \ + struct bpf_struct_ops_##type { \ + struct bpf_struct_ops_common_value common; \ + struct type data ____cacheline_aligned_in_smp; \ + }; \ + BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \ + __register_bpf_struct_ops(st_ops); \ + }) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id); -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, @@ -1745,16 +1756,12 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log); void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); #else -static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id) -{ - return NULL; -} -static inline void bpf_struct_ops_init(struct btf *btf, - struct bpf_verifier_log *log) -{ -} +#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); diff --git a/include/linux/btf.h b/include/linux/btf.h index 932af1680bb50..1ee8977b8c954 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -497,6 +497,18 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) struct bpf_verifier_log; +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +struct bpf_struct_ops; +int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops); +const struct bpf_struct_ops_desc *bpf_struct_ops_find_value(struct btf *btf, u32 value_id); +const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id); +#else +static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id) +{ + return NULL; +} +#endif + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 30ab34fab0f8d..defc052e46226 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -62,35 +62,6 @@ static DEFINE_MUTEX(update_mutex); #define VALUE_PREFIX "bpf_struct_ops_" #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) -/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is - * the map's value exposed to the userspace and its btf-type-id is - * stored at the map->btf_vmlinux_value_type_id. - * - */ -#define BPF_STRUCT_OPS_TYPE(_name) \ -extern struct bpf_struct_ops bpf_##_name; \ - \ -struct bpf_struct_ops_##_name { \ - struct bpf_struct_ops_common_value common; \ - struct _name data ____cacheline_aligned_in_smp; \ -}; -#include "bpf_struct_ops_types.h" -#undef BPF_STRUCT_OPS_TYPE - -enum { -#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name, -#include "bpf_struct_ops_types.h" -#undef BPF_STRUCT_OPS_TYPE - __NR_BPF_STRUCT_OPS_TYPE, -}; - -static struct bpf_struct_ops_desc bpf_struct_ops[] = { -#define BPF_STRUCT_OPS_TYPE(_name) \ - [BPF_STRUCT_OPS_TYPE_##_name] = { .st_ops = &bpf_##_name }, -#include "bpf_struct_ops_types.h" -#undef BPF_STRUCT_OPS_TYPE -}; - const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { }; @@ -145,9 +116,9 @@ static bool is_valid_value_type(struct btf *btf, s32 value_id, return true; } -static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, - struct btf *btf, - struct bpf_verifier_log *log) +int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log) { struct bpf_struct_ops *st_ops = st_ops_desc->st_ops; const struct btf_member *member; @@ -161,7 +132,7 @@ static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, sizeof(value_name)) { pr_warn("struct_ops name %s is too long\n", st_ops->name); - return; + return -EINVAL; } sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name); @@ -170,13 +141,13 @@ static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, if (type_id < 0) { pr_warn("Cannot find struct %s in %s\n", st_ops->name, btf_get_name(btf)); - return; + return -EINVAL; } t = btf_type_by_id(btf, type_id); if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) { pr_warn("Cannot support #%u members in struct %s\n", btf_type_vlen(t), st_ops->name); - return; + return -EINVAL; } value_id = btf_find_by_name_kind(btf, value_name, @@ -184,10 +155,10 @@ static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, if (value_id < 0) { pr_warn("Cannot find struct %s in %s\n", value_name, btf_get_name(btf)); - return; + return -EINVAL; } if (!is_valid_value_type(btf, value_id, t, value_name)) - return; + return -EINVAL; for_each_member(i, t, member) { const struct btf_type *func_proto; @@ -196,13 +167,13 @@ static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, if (!*mname) { pr_warn("anon member in struct %s is not supported\n", st_ops->name); - break; + return -EOPNOTSUPP; } if (__btf_member_bitfield_size(t, member)) { pr_warn("bit field member %s in struct %s is not supported\n", mname, st_ops->name); - break; + return -EOPNOTSUPP; } func_proto = btf_type_resolve_func_ptr(btf, @@ -214,7 +185,7 @@ static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, &st_ops->func_models[i])) { pr_warn("Error in parsing func ptr %s in struct %s\n", mname, st_ops->name); - break; + return -EINVAL; } } @@ -222,6 +193,7 @@ static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, if (st_ops->init(btf)) { pr_warn("Error in init bpf_struct_ops %s\n", st_ops->name); + return -EINVAL; } else { st_ops_desc->type_id = type_id; st_ops_desc->type = t; @@ -230,54 +202,8 @@ static void bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, value_id); } } -} -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) -{ - struct bpf_struct_ops_desc *st_ops_desc; - u32 i; - - /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */ -#define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name); -#include "bpf_struct_ops_types.h" -#undef BPF_STRUCT_OPS_TYPE - - for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - st_ops_desc = &bpf_struct_ops[i]; - bpf_struct_ops_desc_init(st_ops_desc, btf, log); - } -} - -static const struct bpf_struct_ops_desc * -bpf_struct_ops_find_value(struct btf *btf, u32 value_id) -{ - unsigned int i; - - if (!value_id || !btf) - return NULL; - - for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - if (bpf_struct_ops[i].value_id == value_id) - return &bpf_struct_ops[i]; - } - - return NULL; -} - -const struct bpf_struct_ops_desc * -bpf_struct_ops_find(struct btf *btf, u32 type_id) -{ - unsigned int i; - - if (!type_id || !btf) - return NULL; - - for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { - if (bpf_struct_ops[i].type_id == type_id) - return &bpf_struct_ops[i]; - } - - return NULL; + return 0; } static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key, diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h deleted file mode 100644 index 5678a9ddf8178..0000000000000 --- a/kernel/bpf/bpf_struct_ops_types.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* internal file - do not include directly */ - -#ifdef CONFIG_BPF_JIT -#ifdef CONFIG_NET -BPF_STRUCT_OPS_TYPE(bpf_dummy_ops) -#endif -#ifdef CONFIG_INET -#include -BPF_STRUCT_OPS_TYPE(tcp_congestion_ops) -#endif -#endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2e5e6ec5d965c..5c3e526a2dece 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -5972,8 +5973,6 @@ struct btf *btf_parse_vmlinux(void) /* btf_parse_vmlinux() runs under bpf_verifier_lock */ bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); - bpf_struct_ops_init(btf, log); - refcount_set(&btf->refcnt, 1); err = btf_alloc_id(btf); @@ -8706,11 +8705,13 @@ bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, return !strncmp(reg_name, arg_name, cmp_len); } +#ifdef CONFIG_BPF_JIT static int -btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops) +btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops, + struct bpf_verifier_log *log) { struct btf_struct_ops_tab *tab, *new_tab; - int i; + int i, err; tab = btf->struct_ops_tab; if (!tab) { @@ -8740,7 +8741,84 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops) tab->ops[btf->struct_ops_tab->cnt].st_ops = st_ops; + err = bpf_struct_ops_desc_init(&tab->ops[btf->struct_ops_tab->cnt], btf, log); + if (err) + return err; + btf->struct_ops_tab->cnt++; return 0; } + +const struct bpf_struct_ops_desc * +bpf_struct_ops_find_value(struct btf *btf, u32 value_id) +{ + const struct bpf_struct_ops_desc *st_ops_list; + unsigned int i; + u32 cnt; + + if (!value_id) + return NULL; + if (!btf->struct_ops_tab) + return NULL; + + cnt = btf->struct_ops_tab->cnt; + st_ops_list = btf->struct_ops_tab->ops; + for (i = 0; i < cnt; i++) { + if (st_ops_list[i].value_id == value_id) + return &st_ops_list[i]; + } + + return NULL; +} + +const struct bpf_struct_ops_desc * +bpf_struct_ops_find(struct btf *btf, u32 type_id) +{ + const struct bpf_struct_ops_desc *st_ops_list; + unsigned int i; + u32 cnt; + + if (!type_id) + return NULL; + if (!btf->struct_ops_tab) + return NULL; + + cnt = btf->struct_ops_tab->cnt; + st_ops_list = btf->struct_ops_tab->ops; + for (i = 0; i < cnt; i++) { + if (st_ops_list[i].type_id == type_id) + return &st_ops_list[i]; + } + + return NULL; +} + +int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops) +{ + struct bpf_verifier_log *log; + struct btf *btf; + int err = 0; + + btf = btf_get_module_btf(st_ops->owner); + if (!btf) + return -EINVAL; + + log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN); + if (!log) { + err = -ENOMEM; + goto errout; + } + + log->level = BPF_LOG_KERNEL; + + err = btf_add_struct_ops(btf, st_ops, log); + +errout: + kfree(log); + btf_put(btf); + + return err; +} +EXPORT_SYMBOL_GPL(__register_bpf_struct_ops); +#endif diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index ba2c58dba2da5..02de71719aeda 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -7,7 +7,7 @@ #include #include -extern struct bpf_struct_ops bpf_bpf_dummy_ops; +static struct bpf_struct_ops bpf_bpf_dummy_ops; /* A common type for test_N with return value in bpf_dummy_ops */ typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...); @@ -256,7 +256,7 @@ static struct bpf_dummy_ops __bpf_bpf_dummy_ops = { .test_sleepable = bpf_dummy_test_sleepable, }; -struct bpf_struct_ops bpf_bpf_dummy_ops = { +static struct bpf_struct_ops bpf_bpf_dummy_ops = { .verifier_ops = &bpf_dummy_verifier_ops, .init = bpf_dummy_init, .check_member = bpf_dummy_ops_check_member, @@ -265,4 +265,11 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = { .unreg = bpf_dummy_unreg, .name = "bpf_dummy_ops", .cfi_stubs = &__bpf_bpf_dummy_ops, + .owner = THIS_MODULE, }; + +static int __init bpf_dummy_struct_ops_init(void) +{ + return register_bpf_struct_ops(&bpf_bpf_dummy_ops, bpf_dummy_ops); +} +late_initcall(bpf_dummy_struct_ops_init); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index dffd8828079b6..8e7716256d3c1 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -12,7 +12,7 @@ #include /* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */ -extern struct bpf_struct_ops bpf_tcp_congestion_ops; +static struct bpf_struct_ops bpf_tcp_congestion_ops; static u32 unsupported_ops[] = { offsetof(struct tcp_congestion_ops, get_info), @@ -345,7 +345,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = { .release = __bpf_tcp_ca_release, }; -struct bpf_struct_ops bpf_tcp_congestion_ops = { +static struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, .unreg = bpf_tcp_ca_unreg, @@ -356,10 +356,16 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .validate = bpf_tcp_ca_validate, .name = "tcp_congestion_ops", .cfi_stubs = &__bpf_ops_tcp_congestion_ops, + .owner = THIS_MODULE, }; static int __init bpf_tcp_ca_kfunc_init(void) { - return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); + int ret; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); + ret = ret ?: register_bpf_struct_ops(&bpf_tcp_congestion_ops, tcp_congestion_ops); + + return ret; } late_initcall(bpf_tcp_ca_kfunc_init); -- cgit 1.2.3-korg From a6348a7104e0dac7b9b4b7042c3c8c36b81d71e7 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 22 Jan 2024 10:19:54 -0800 Subject: net/ipv6: Remove unnecessary pr_debug() logs In the ipv6 system, we have some logs basically dumping the name of the function that is being called. This is not ideal, since ftrace give us "for free". Moreover, checkpatch is not happy when touching that code: WARNING: Unnecessary ftrace-like logging - prefer using ftrace Remove debug functions that only print the current function name. Signed-off-by: Breno Leitao Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240122181955.2391676-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4fc2cae0d116c..fb41bec6b4b5d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -751,8 +751,6 @@ static struct fib6_node *fib6_add_1(struct net *net, int bit; __be32 dir = 0; - RT6_TRACE("fib6_add_1\n"); - /* insert node in tree */ fn = root; @@ -1905,8 +1903,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, struct net *net = info->nl_net; bool notify_del = false; - RT6_TRACE("fib6_del_route\n"); - /* If the deleted route is the first in the node and it is not part of * a multipath route, then we need to replace it with the next route * in the node, if exists. -- cgit 1.2.3-korg From 20df28fb5bd8081a05ec34542bd45e4f3feeced5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 22 Jan 2024 10:19:55 -0800 Subject: net/ipv6: resolve warning in ip6_fib.c In some configurations, the 'iter' variable in function fib6_repair_tree() is unused, resulting the following warning when compiled with W=1. net/ipv6/ip6_fib.c:1781:6: warning: variable 'iter' set but not used [-Wunused-but-set-variable] 1781 | int iter = 0; | ^ It is unclear what is the advantage of this RT6_TRACE() macro[1], since users can control pr_debug() in runtime, which is better than at compilation time. pr_debug() has no overhead when disabled. Remove the RT6_TRACE() in favor of simple pr_debug() helpers. [1] Link: https://lore.kernel.org/all/ZZwSEJv2HgI0cD4J@gmail.com/ Signed-off-by: Breno Leitao Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240122181955.2391676-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/net/ip6_fib.h | 6 ------ net/ipv6/ip6_fib.c | 15 +++++++++------ net/ipv6/route.c | 8 ++++---- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 9ba6413fd2e3e..360b12e618507 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -30,12 +30,6 @@ #define RT6_DEBUG 2 -#if RT6_DEBUG >= 3 -#define RT6_TRACE(x...) pr_debug(x) -#else -#define RT6_TRACE(x...) do { ; } while (0) -#endif - struct rt6_info; struct fib6_info; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fb41bec6b4b5d..38a0348b1d178 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1801,7 +1801,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, lockdep_is_held(&table->tb6_lock)); struct fib6_info *new_fn_leaf; - RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); + pr_debug("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; WARN_ON(fn->fn_flags & RTN_RTINFO); @@ -1864,7 +1864,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net, FOR_WALKERS(net, w) { if (!child) { if (w->node == fn) { - RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); + pr_debug("W %p adjusted by delnode 1, s=%d/%d\n", + w, w->state, nstate); w->node = pn; w->state = nstate; } @@ -1872,10 +1873,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net, if (w->node == fn) { w->node = child; if (children&2) { - RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); + pr_debug("W %p adjusted by delnode 2, s=%d\n", + w, w->state); w->state = w->state >= FWS_R ? FWS_U : FWS_INIT; } else { - RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); + pr_debug("W %p adjusted by delnode 2, s=%d\n", + w, w->state); w->state = w->state >= FWS_C ? FWS_U : FWS_INIT; } } @@ -1951,7 +1954,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, read_lock(&net->ipv6.fib6_walker_lock); FOR_WALKERS(net, w) { if (w->state == FWS_C && w->leaf == rt) { - RT6_TRACE("walker %p adjusted by delroute\n", w); + pr_debug("walker %p adjusted by delroute\n", w); w->leaf = rcu_dereference_protected(rt->fib6_next, lockdep_is_held(&table->tb6_lock)); if (!w->leaf) @@ -2289,7 +2292,7 @@ static int fib6_age(struct fib6_info *rt, void *arg) if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { if (time_after(now, rt->expires)) { - RT6_TRACE("expiring %p\n", rt); + pr_debug("expiring %p\n", rt); return -1; } gc_args->more++; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ea1dec8448fce..63b4c60565820 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2085,12 +2085,12 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, */ if (!(rt->rt6i_flags & RTF_EXPIRES)) { if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { - RT6_TRACE("aging clone %p\n", rt); + pr_debug("aging clone %p\n", rt); rt6_remove_exception(bucket, rt6_ex); return; } } else if (time_after(jiffies, rt->dst.expires)) { - RT6_TRACE("purging expired route %p\n", rt); + pr_debug("purging expired route %p\n", rt); rt6_remove_exception(bucket, rt6_ex); return; } @@ -2101,8 +2101,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); if (!(neigh && (neigh->flags & NTF_ROUTER))) { - RT6_TRACE("purging route %p via non-router but gateway\n", - rt); + pr_debug("purging route %p via non-router but gateway\n", + rt); rt6_remove_exception(bucket, rt6_ex); return; } -- cgit 1.2.3-korg From bbc1d24724e110b86a1a7c3c1724ce0d62cc1e2e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:04 -0800 Subject: bpf: Take into account BPF token when fetching helper protos Instead of performing unconditional system-wide bpf_capable() and perfmon_capable() calls inside bpf_base_func_proto() function (and other similar ones) to determine eligibility of a given BPF helper for a given program, use previously recorded BPF token during BPF_PROG_LOAD command handling to inform the decision. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-8-andrii@kernel.org --- drivers/media/rc/bpf-lirc.c | 2 +- include/linux/bpf.h | 5 +++-- kernel/bpf/cgroup.c | 6 +++--- kernel/bpf/helpers.c | 6 +++--- kernel/bpf/syscall.c | 5 +++-- kernel/trace/bpf_trace.c | 2 +- net/core/filter.c | 32 ++++++++++++++++---------------- net/ipv4/bpf_tcp_ca.c | 2 +- net/netfilter/nf_bpf_link.c | 2 +- 9 files changed, 32 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index fe17c7f98e810..6d07693c6b9f5 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_trace_printk: - if (perfmon_capable()) + if (bpf_token_capable(prog->aux->token, CAP_PERFMON)) return bpf_get_trace_printk_proto(); fallthrough; default: diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d0bf37e3f166a..1325225bf6022 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2550,7 +2550,8 @@ int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2810,7 +2811,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 491d20038cbe0..98e0e3835b28b 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1630,7 +1630,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } } @@ -2191,7 +2191,7 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } } @@ -2348,7 +2348,7 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e04ca1af89272..bcb951a2ecf4b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1680,7 +1680,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; const struct bpf_func_proto bpf_task_pt_regs_proto __weak; const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -1731,7 +1731,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) break; } - if (!bpf_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_BPF)) return NULL; switch (func_id) { @@ -1789,7 +1789,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) break; } - if (!perfmon_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) return NULL; switch (func_id) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 61b4bf4cc2870..f76408c957ced 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5772,7 +5772,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = { const struct bpf_func_proto * __weak tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } BPF_CALL_1(bpf_sys_close, u32, fd) @@ -5822,7 +5822,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_sys_bpf: - return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; + return !bpf_token_capable(prog->aux->token, CAP_PERFMON) + ? NULL : &bpf_sys_bpf_proto; case BPF_FUNC_btf_find_by_name_kind: return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index c98c20abaf99d..64fdaf79d1136 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1629,7 +1629,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } } diff --git a/net/core/filter.c b/net/core/filter.c index 6a7abbaa50b88..521bcd0f5e4d4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -87,7 +87,7 @@ #include "dev.h" static const struct bpf_func_proto * -bpf_sk_base_func_proto(enum bpf_func_id func_id); +bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len) { @@ -7862,7 +7862,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } } @@ -7955,7 +7955,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -7974,7 +7974,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -8161,7 +8161,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #endif default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -8220,7 +8220,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #endif default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } #if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) @@ -8281,7 +8281,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -8323,7 +8323,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_cgroup_classid_curr_proto; #endif default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -8367,7 +8367,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_lookup_tcp_proto; #endif default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -8378,7 +8378,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_load_bytes: return &bpf_flow_dissector_load_bytes_proto; default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -8405,7 +8405,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -11236,7 +11236,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id, case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } } @@ -11418,7 +11418,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_sk_release: return &bpf_sk_release_proto; default: - return bpf_sk_base_func_proto(func_id); + return bpf_sk_base_func_proto(func_id, prog); } } @@ -11752,7 +11752,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = { }; static const struct bpf_func_proto * -bpf_sk_base_func_proto(enum bpf_func_id func_id) +bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { const struct bpf_func_proto *func; @@ -11781,10 +11781,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } - if (!perfmon_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) return NULL; return func; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 8e7716256d3c1..834edc18463ac 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -197,7 +197,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } } diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 0e4beae421f83..5257d5e7eb09d 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, static const struct bpf_func_proto * bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id); + return bpf_base_func_proto(func_id, prog); } const struct bpf_verifier_ops netfilter_verifier_ops = { -- cgit 1.2.3-korg From d79a3549754725bb90e58104417449edddf3da3d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:05 -0800 Subject: bpf: Consistently use BPF token throughout BPF verifier logic Remove remaining direct queries to perfmon_capable() and bpf_capable() in BPF verifier logic and instead use BPF token (if available) to make decisions about privileges. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-9-andrii@kernel.org --- include/linux/bpf.h | 16 ++++++++-------- include/linux/filter.h | 2 +- kernel/bpf/arraymap.c | 2 +- kernel/bpf/core.c | 2 +- kernel/bpf/verifier.c | 13 ++++++------- net/core/filter.c | 4 ++-- 6 files changed, 19 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1325225bf6022..4e146e9708be3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2261,24 +2261,24 @@ extern int sysctl_unprivileged_bpf_disabled; bool bpf_token_capable(const struct bpf_token *token, int cap); -static inline bool bpf_allow_ptr_leaks(void) +static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_allow_uninit_stack(void) +static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v1(void) +static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v4(void) +static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); diff --git a/include/linux/filter.h b/include/linux/filter.h index 35f067fd3840a..fee070b9826ee 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1140,7 +1140,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && bpf_capable()) + if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF)) return false; return true; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 0bdbbbeab1550..13358675ff2ed 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -82,7 +82,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; - bool bypass_spec_v1 = bpf_bypass_spec_v1(); + bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL); u64 array_size, mask64; struct bpf_array *array; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 00dccba297698..71c459a51d9e1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -682,7 +682,7 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || - !bpf_capable()) + !bpf_token_capable(fp->aux->token, CAP_BPF)) return; bpf_prog_ksym_set_addr(fp); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f31868ba0c2d8..fe833e831cb64 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20830,7 +20830,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); - is_priv = bpf_capable(); + + env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); + env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); + env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token); + env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token); + env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF); bpf_get_btf_vmlinux(); @@ -20862,12 +20867,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false; - env->allow_ptr_leaks = bpf_allow_ptr_leaks(); - env->allow_uninit_stack = bpf_allow_uninit_stack(); - env->bypass_spec_v1 = bpf_bypass_spec_v1(); - env->bypass_spec_v4 = bpf_bypass_spec_v4(); - env->bpf_capable = bpf_capable(); - if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; diff --git a/net/core/filter.c b/net/core/filter.c index 521bcd0f5e4d4..40121475e8d15 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8580,7 +8580,7 @@ static bool cg_skb_is_valid_access(int off, int size, return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): - if (!bpf_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_BPF)) return false; break; } @@ -8592,7 +8592,7 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; case bpf_ctx_range(struct __sk_buff, tstamp): - if (!bpf_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_BPF)) return false; break; default: -- cgit 1.2.3-korg From 88bf1b8f3c3196c175c9291605ec741756979862 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 22 Jan 2024 21:12:01 -0800 Subject: tipc: socket: remove Excess struct member kernel-doc warning Remove a kernel-doc description to squelch a warning: socket.c:143: warning: Excess struct member 'blocking_link' description in 'tipc_sock' Signed-off-by: Randy Dunlap Cc: Jon Maloy Cc: Ying Xue Cc: Jonathan Corbet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123051201.24701-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- net/tipc/socket.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index bb1118d02f953..7e4135db58163 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -80,7 +80,6 @@ struct sockaddr_pair { * @phdr: preformatted message header used when sending messages * @cong_links: list of congested links * @publications: list of publications for port - * @blocking_link: address of the congested link we are currently sleeping on * @pub_count: total # of publications port has made during its lifetime * @conn_timeout: the time we can wait for an unresponded setup request * @probe_unacked: probe has not received ack yet -- cgit 1.2.3-korg From 5ca1a5153a28dc8bcfeeafa983915b76af457929 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 22 Jan 2024 21:11:52 -0800 Subject: tipc: node: remove Excess struct member kernel-doc warnings Remove 2 kernel-doc descriptions to squelch warnings: node.c:150: warning: Excess struct member 'inputq' description in 'tipc_node' node.c:150: warning: Excess struct member 'namedq' description in 'tipc_node' Signed-off-by: Randy Dunlap Cc: Jon Maloy Cc: Ying Xue Cc: Jonathan Corbet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123051152.23684-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- net/tipc/node.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index 3105abe97bb9c..c1e890a824347 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -86,8 +86,6 @@ struct tipc_bclink_entry { * @lock: rwlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain - * @inputq: pointer to input queue containing messages for msg event - * @namedq: pointer to name table input queue with name table messages * @active_links: bearer ids of active links, used as index into links[] array * @links: array containing references to all links to node * @bc_entry: broadcast link entry -- cgit 1.2.3-korg From 63b21caba17ef2df5982c7b75eb989100212b27b Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Fri, 19 Jan 2024 12:27:42 +0100 Subject: xfrm: introduce forwarding of ICMP Error messages This commit aligns with RFC 4301, Section 6, and addresses the requirement to forward unauthenticated ICMP error messages that do not match any xfrm policies. It utilizes the ICMP payload as an skb and performs a reverse lookup. If a policy match is found, forward the packet. The ICMP payload typically contains a partial IP packet that is likely responsible for the error message. The following error types will be forwarded: - IPv4 ICMP error types: ICMP_DEST_UNREACH & ICMP_TIME_EXCEEDED - IPv6 ICMPv6 error types: ICMPV6_DEST_UNREACH, ICMPV6_PKT_TOOBIG, ICMPV6_TIME_EXCEED To implement this feature, a reverse lookup has been added to the xfrm forward path, making use of the ICMP payload as the skb. To enable this functionality from user space, the XFRM_POLICY_ICMP flag should be added to the outgoing and forward policies, and the XFRM_STATE_ICMP flag should be set on incoming states. e.g. ip xfrm policy add flag icmp tmpl ip xfrm policy src 192.0.2.0/24 dst 192.0.1.0/25 dir out priority 2084302 ptype main flag icmp ip xfrm state add ...flag icmp ip xfrm state root@west:~#ip x s src 192.1.2.23 dst 192.1.2.45 proto esp spi 0xa7b76872 reqid 16389 mode tunnel replay-window 32 flag icmp af-unspec Changes since v5: - fix return values bool->int, feedback from Steffen Changes since v4: - split the series to only ICMP erorr forwarding Changes since v3: no code chage - add missing white spaces detected by checkpatch.pl Changes since v2: reviewed by Steffen Klassert - user consume_skb instead of kfree_skb for the inner skb - fixed newskb leaks in error paths - free the newskb once inner flow is decoded with change due to commit 7a0207094f1b ("xfrm: policy: replace session decode with flow dissector") - if xfrm_decode_session_reverse() on inner payload fails ignore. do not increment error counter Changes since v1: - Move IPv6 variable declaration inside IS_ENABLED(CONFIG_IPV6) Changes since RFC: - Fix calculation of ICMPv6 header length Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1b7e751597277..b4850a8f14adf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -3503,6 +3504,128 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int return 0; } +static bool icmp_err_packet(const struct flowi *fl, unsigned short family) +{ + const struct flowi4 *fl4 = &fl->u.ip4; + + if (family == AF_INET && + fl4->flowi4_proto == IPPROTO_ICMP && + (fl4->fl4_icmp_type == ICMP_DEST_UNREACH || + fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) { + const struct flowi6 *fl6 = &fl->u.ip6; + + if (fl6->flowi6_proto == IPPROTO_ICMPV6 && + (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH || + fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG || + fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED)) + return true; + } +#endif + return false; +} + +static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family, + const struct flowi *fl, struct flowi *fl1) +{ + bool ret = true; + struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); + int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) : + (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr)); + + if (!newskb) + return true; + + if (!pskb_pull(newskb, hl)) + goto out; + + skb_reset_network_header(newskb); + + if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0) + goto out; + + fl1->flowi_oif = fl->flowi_oif; + fl1->flowi_mark = fl->flowi_mark; + fl1->flowi_tos = fl->flowi_tos; + nf_nat_decode_session(newskb, fl1, family); + ret = false; + +out: + consume_skb(newskb); + return ret; +} + +static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family, + const struct xfrm_selector *sel, + const struct flowi *fl) +{ + bool ret = false; + + if (icmp_err_packet(fl, family)) { + struct flowi fl1; + + if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) + return ret; + + ret = xfrm_selector_match(sel, &fl1, family); + } + + return ret; +} + +static inline struct +xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb, + const struct flowi *fl, unsigned short family, + u32 if_id) +{ + struct xfrm_policy *pol = NULL; + + if (icmp_err_packet(fl, family)) { + struct flowi fl1; + struct net *net = dev_net(skb->dev); + + if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) + return pol; + + pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id); + } + + return pol; +} + +static inline struct +dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl, + unsigned short family, struct dst_entry *dst) +{ + if (icmp_err_packet(fl, family)) { + struct net *net = dev_net(skb->dev); + struct dst_entry *dst2; + struct flowi fl1; + + if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) + return dst; + + dst_hold(dst); + + dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP)); + + if (IS_ERR(dst2)) + return dst; + + if (dst2->xfrm) { + dst_release(dst); + dst = dst2; + } else { + dst_release(dst2); + } + } + + return dst; +} + int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { @@ -3549,9 +3672,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (i = sp->len - 1; i >= 0; i--) { struct xfrm_state *x = sp->xvec[i]; + int ret = 0; + if (!xfrm_selector_match(&x->sel, &fl, family)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); - return 0; + ret = 1; + if (x->props.flags & XFRM_STATE_ICMP && + xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl)) + ret = 0; + if (ret) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); + return 0; + } } } } @@ -3574,6 +3705,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, return 0; } + if (!pol && dir == XFRM_POLICY_FWD) + pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id); + if (!pol) { if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); @@ -3707,6 +3841,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) res = 0; dst = NULL; } + + if (dst && !dst->xfrm) + dst = xfrm_out_fwd_icmp(skb, &fl, family, dst); + skb_dst_set(skb, dst); return res; } -- cgit 1.2.3-korg From ab1e1a38de240057ed108075abd1309c02e2a2a4 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 24 Jan 2024 14:31:50 +0800 Subject: xfrm6_tunnel: Use KMEM_CACHE instead of kmem_cache_create Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_tunnel.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 1323f2f6928e2..0f3df26878a36 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -355,10 +355,7 @@ static int __init xfrm6_tunnel_init(void) { int rv; - xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi", - sizeof(struct xfrm6_tunnel_spi), - 0, SLAB_HWCACHE_ALIGN, - NULL); + xfrm6_tunnel_spi_kmem = KMEM_CACHE(xfrm6_tunnel_spi, SLAB_HWCACHE_ALIGN); if (!xfrm6_tunnel_spi_kmem) return -ENOMEM; rv = register_pernet_subsys(&xfrm6_tunnel_net_ops); -- cgit 1.2.3-korg From 7f78840cf4d4ac9ab36ddf574a025a45835375d0 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 10 Jan 2024 08:42:06 +0300 Subject: wifi: wireless: avoid strlen() in cfg80211_michael_mic_failure() In 'cfg80211_michael_mic_failure()', avoid extra call to 'strlen()' by using the value returned by 'sprintf()'. Compile tested only. Signed-off-by: Dmitry Antipov Link: https://msgid.link/20240110054246.371651-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index f635a8b6ca2ec..43ba7891e2a3e 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -241,12 +241,12 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, char *buf = kmalloc(128, gfp); if (buf) { - sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" - "keyid=%d %scast addr=%pM)", key_id, - key_type == NL80211_KEYTYPE_GROUP ? "broad" : "uni", - addr); memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = strlen(buf); + wrqu.data.length = + sprintf(buf, "MLME-MICHAELMICFAILURE." + "indication(keyid=%d %scast addr=%pM)", + key_id, key_type == NL80211_KEYTYPE_GROUP + ? "broad" : "uni", addr); wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); kfree(buf); } -- cgit 1.2.3-korg From 4d1d6b3f45999b1ddde53831d639a67e2655285f Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 2 Jan 2024 21:35:32 +0200 Subject: wifi: cfg80211: add RNR with reporting AP information If the reporting AP is part of the same MLD, then an entry in the RNR is required in order to discover it again from the BSS generated from the per-STA profile in the Multi-Link Probe Response. We need this because we do not have a direct concept of an MLD AP and just do the lookup from one to the other on the fly if needed. As such, we need to ensure that this lookup will work both ways. Fixes: 2481b5da9c6b ("wifi: cfg80211: handle BSS data contained in ML probe responses") Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.4cb3dbb1d84f.I7c74edec83c5d7598cdd578929fd0876d67aef7f@changeid [roll in off-by-one fix and test updates from Benjamin] Signed-off-by: Johannes Berg --- net/wireless/scan.c | 135 ++++++++++++++++++++++++++++++++++++++++++++-- net/wireless/tests/scan.c | 36 +++++++++++-- 2 files changed, 163 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2249b1a89d1c4..01618c4059f48 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2617,6 +2617,103 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, return 0; } +static struct element * +cfg80211_gen_reporter_rnr(struct cfg80211_bss *source_bss, bool is_mbssid, + bool same_mld, u8 link_id, u8 bss_change_count, + gfp_t gfp) +{ + const struct cfg80211_bss_ies *ies; + struct ieee80211_neighbor_ap_info ap_info; + struct ieee80211_tbtt_info_ge_11 tbtt_info; + u32 short_ssid; + const struct element *elem; + struct element *res; + + /* + * We only generate the RNR to permit ML lookups. For that we do not + * need an entry for the corresponding transmitting BSS, lets just skip + * it even though it would be easy to add. + */ + if (!same_mld) + return NULL; + + /* We could use tx_data->ies if we change cfg80211_calc_short_ssid */ + rcu_read_lock(); + ies = rcu_dereference(source_bss->ies); + + ap_info.tbtt_info_len = offsetofend(typeof(tbtt_info), mld_params); + ap_info.tbtt_info_hdr = + u8_encode_bits(IEEE80211_TBTT_INFO_TYPE_TBTT, + IEEE80211_AP_INFO_TBTT_HDR_TYPE) | + u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT); + + ap_info.channel = ieee80211_frequency_to_channel(source_bss->channel->center_freq); + + /* operating class */ + elem = cfg80211_find_elem(WLAN_EID_SUPPORTED_REGULATORY_CLASSES, + ies->data, ies->len); + if (elem && elem->datalen >= 1) { + ap_info.op_class = elem->data[0]; + } else { + struct cfg80211_chan_def chandef; + + /* The AP is not providing us with anything to work with. So + * make up a somewhat reasonable operating class, but don't + * bother with it too much as no one will ever use the + * information. + */ + cfg80211_chandef_create(&chandef, source_bss->channel, + NL80211_CHAN_NO_HT); + + if (!ieee80211_chandef_to_operating_class(&chandef, + &ap_info.op_class)) + goto out_unlock; + } + + /* Just set TBTT offset and PSD 20 to invalid/unknown */ + tbtt_info.tbtt_offset = 255; + tbtt_info.psd_20 = IEEE80211_RNR_TBTT_PARAMS_PSD_RESERVED; + + memcpy(tbtt_info.bssid, source_bss->bssid, ETH_ALEN); + if (cfg80211_calc_short_ssid(ies, &elem, &short_ssid)) + goto out_unlock; + + rcu_read_unlock(); + + tbtt_info.short_ssid = cpu_to_le32(short_ssid); + + tbtt_info.bss_params = IEEE80211_RNR_TBTT_PARAMS_SAME_SSID; + + if (is_mbssid) { + tbtt_info.bss_params |= IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID; + tbtt_info.bss_params |= IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID; + } + + tbtt_info.mld_params.mld_id = 0; + tbtt_info.mld_params.params = + le16_encode_bits(link_id, IEEE80211_RNR_MLD_PARAMS_LINK_ID) | + le16_encode_bits(bss_change_count, + IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT); + + res = kzalloc(struct_size(res, data, + sizeof(ap_info) + ap_info.tbtt_info_len), + gfp); + if (!res) + return NULL; + + /* Copy the data */ + res->id = WLAN_EID_REDUCED_NEIGHBOR_REPORT; + res->datalen = sizeof(ap_info) + ap_info.tbtt_info_len; + memcpy(res->data, &ap_info, sizeof(ap_info)); + memcpy(res->data + sizeof(ap_info), &tbtt_info, ap_info.tbtt_info_len); + + return res; + +out_unlock: + rcu_read_unlock(); + return NULL; +} + static void cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, struct cfg80211_inform_single_bss_data *tx_data, @@ -2630,13 +2727,14 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, .source_bss = source_bss, .bss_source = BSS_SOURCE_STA_PROFILE, }; + struct element *reporter_rnr = NULL; struct ieee80211_multi_link_elem *ml_elem; struct cfg80211_mle *mle; u16 control; u8 ml_common_len; - u8 *new_ie; + u8 *new_ie = NULL; struct cfg80211_bss *bss; - int mld_id; + u8 mld_id, reporter_link_id, bss_change_count; u16 seen_links = 0; const u8 *pos; u8 i; @@ -2658,8 +2756,14 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, ml_common_len = ml_elem->variable[0]; - /* length + MLD MAC address + link ID info + BSS Params Change Count */ - pos = ml_elem->variable + 1 + 6 + 1 + 1; + /* length + MLD MAC address */ + pos = ml_elem->variable + 1 + 6; + + reporter_link_id = pos[0]; + pos += 1; + + bss_change_count = pos[0]; + pos += 1; if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)) pos += 2; @@ -2690,10 +2794,21 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, if (!mle) return; + /* No point in doing anything if there is no per-STA profile */ + if (!mle->sta_prof[0]) + goto out; + new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp); if (!new_ie) goto out; + reporter_rnr = cfg80211_gen_reporter_rnr(source_bss, + u16_get_bits(control, + IEEE80211_MLC_BASIC_PRES_MLD_ID), + mld_id == 0, reporter_link_id, + bss_change_count, + gfp); + for (i = 0; i < ARRAY_SIZE(mle->sta_prof) && mle->sta_prof[i]; i++) { const struct ieee80211_neighbor_ap_info *ap_info; enum nl80211_band band; @@ -2803,7 +2918,16 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, data.ielen += sizeof(*ml_elem) + ml_common_len; - /* TODO: Add an RNR containing only the reporting AP */ + if (reporter_rnr && (use_for & NL80211_BSS_USE_FOR_NORMAL)) { + if (data.ielen + sizeof(struct element) + + reporter_rnr->datalen > IEEE80211_MAX_DATA_LEN) + continue; + + memcpy(new_ie + data.ielen, reporter_rnr, + sizeof(struct element) + reporter_rnr->datalen); + data.ielen += sizeof(struct element) + + reporter_rnr->datalen; + } bss = cfg80211_inform_single_bss_data(wiphy, &data, gfp); if (!bss) @@ -2812,6 +2936,7 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, } out: + kfree(reporter_rnr); kfree(new_ie); kfree(mle); } diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c index 77854161cd22b..f9ea44aee9952 100644 --- a/net/wireless/tests/scan.c +++ b/net/wireless/tests/scan.c @@ -2,7 +2,7 @@ /* * KUnit tests for inform_bss functions * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation */ #include #include @@ -406,9 +406,27 @@ static struct inform_bss_ml_sta_case { const char *desc; int mld_id; bool sta_prof_vendor_elems; + bool include_oper_class; } inform_bss_ml_sta_cases[] = { - { .desc = "no_mld_id", .mld_id = 0, .sta_prof_vendor_elems = false }, - { .desc = "mld_id_eq_1", .mld_id = 1, .sta_prof_vendor_elems = true }, + { + .desc = "zero_mld_id", + .mld_id = 0, + .sta_prof_vendor_elems = false, + }, { + .desc = "zero_mld_id_with_oper_class", + .mld_id = 0, + .sta_prof_vendor_elems = false, + .include_oper_class = true, + }, { + .desc = "mld_id_eq_1", + .mld_id = 1, + .sta_prof_vendor_elems = true, + }, { + .desc = "mld_id_eq_1_with_oper_class", + .mld_id = 1, + .sta_prof_vendor_elems = true, + .include_oper_class = true, + }, }; KUNIT_ARRAY_PARAM_DESC(inform_bss_ml_sta, inform_bss_ml_sta_cases, desc) @@ -515,6 +533,12 @@ static void test_inform_bss_ml_sta(struct kunit *test) skb_put_u8(input, 4); skb_put_data(input, "TEST", 4); + if (params->include_oper_class) { + skb_put_u8(input, WLAN_EID_SUPPORTED_REGULATORY_CLASSES); + skb_put_u8(input, 1); + skb_put_u8(input, 81); + } + skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT); skb_put_u8(input, sizeof(rnr)); skb_put_data(input, &rnr, sizeof(rnr)); @@ -582,15 +606,21 @@ static void test_inform_bss_ml_sta(struct kunit *test) KUNIT_EXPECT_EQ(test, ies->tsf, tsf + le64_to_cpu(sta_prof.tsf_offset)); /* Resulting length should be: * SSID (inherited) + RNR (inherited) + vendor element(s) + + * operating class (if requested) + + * generated RNR (if MLD ID == 0) + * MLE common info + MLE header and control */ if (params->sta_prof_vendor_elems) KUNIT_EXPECT_EQ(test, ies->len, 6 + 2 + sizeof(rnr) + 2 + 160 + 2 + 165 + + (params->include_oper_class ? 3 : 0) + + (!params->mld_id ? 22 : 0) + mle_basic_common_info.var_len + 5); else KUNIT_EXPECT_EQ(test, ies->len, 6 + 2 + sizeof(rnr) + 2 + 155 + + (params->include_oper_class ? 3 : 0) + + (!params->mld_id ? 22 : 0) + mle_basic_common_info.var_len + 5); rcu_read_unlock(); -- cgit 1.2.3-korg From 8f500fbc6c655976c8062b1f1e55bd0b3095d6c2 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 2 Jan 2024 21:35:34 +0200 Subject: wifi: mac80211: process and save negotiated TID to Link mapping request An MLD may send TID-to-Link mapping request frame to negotiate TID to link mapping with a peer MLD. Support handling negotiated TID-to-Link mapping request frame by parsing the frame, asking the driver whether it supports the received mapping or not, and sending a TID-to-Link mapping response to the AP MLD. Theoretically, links that became inactive due to the received TID-to-Link mapping request, can be selected to be activated but this would require tearing down the negotiated TID-to-Link mapping, which is still not supported. Signed-off-by: Ayala Beker Reviewed-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.0bc1a24fcc9d.Ie72e47dc6f8c77d4a2f0947b775ef6367fe0edac@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 46 ++++++- net/mac80211/driver-ops.h | 19 +++ net/mac80211/ieee80211_i.h | 2 + net/mac80211/iface.c | 12 ++ net/mac80211/main.c | 3 +- net/mac80211/mlme.c | 335 ++++++++++++++++++++++++++++++++++++++++++--- net/mac80211/rx.c | 14 ++ net/mac80211/trace.h | 52 +++++++ 8 files changed, 465 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d400fe2e8668d..6490b92d5cc1b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -342,6 +342,7 @@ struct ieee80211_vif_chanctx_switch { * status changed. * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. + * @BSS_CHANGED_MLD_TTLM: TID to link mapping was changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -378,6 +379,7 @@ enum ieee80211_bss_change { BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32), BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), + BSS_CHANGED_MLD_TTLM = BIT_ULL(34), /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -1845,6 +1847,35 @@ struct ieee80211_vif_cfg { u8 ap_addr[ETH_ALEN] __aligned(2); }; +#define IEEE80211_TTLM_NUM_TIDS 8 + +/** + * struct ieee80211_neg_ttlm - negotiated TID to link map info + * + * @downlink: bitmap of active links per TID for downlink, or 0 if mapping for + * this TID is not included. + * @uplink: bitmap of active links per TID for uplink, or 0 if mapping for this + * TID is not included. + * @valid: info is valid or not. + */ +struct ieee80211_neg_ttlm { + u16 downlink[IEEE80211_TTLM_NUM_TIDS]; + u16 uplink[IEEE80211_TTLM_NUM_TIDS]; + bool valid; +}; + +/** + * enum ieee80211_neg_ttlm_res - return value for negotiated TTLM handling + * @NEG_TTLM_RES_ACCEPT: accept the request + * @NEG_TTLM_RES_REJECT: reject the request + * @NEG_TTLM_RES_SUGGEST_PREFERRED: reject and suggest a new mapping + */ +enum ieee80211_neg_ttlm_res { + NEG_TTLM_RES_ACCEPT, + NEG_TTLM_RES_REJECT, + NEG_TTLM_RES_SUGGEST_PREFERRED +}; + /** * struct ieee80211_vif - per-interface data * @@ -1863,6 +1894,11 @@ struct ieee80211_vif_cfg { * API calls meant for that purpose. * @dormant_links: bitmap of valid but disabled links, or 0 for non-MLO. * Must be a subset of valid_links. + * @suspended_links: subset of dormant_links representing links that are + * suspended. + * 0 for non-MLO. + * @neg_ttlm: negotiated TID to link mapping info. + * see &struct ieee80211_neg_ttlm. * @addr: address of this interface * @p2p: indicates whether this AP or STA interface is a p2p * interface, i.e. a GO or p2p-sta respectively @@ -1900,7 +1936,8 @@ struct ieee80211_vif { struct ieee80211_vif_cfg cfg; struct ieee80211_bss_conf bss_conf; struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS]; - u16 valid_links, active_links, dormant_links; + u16 valid_links, active_links, dormant_links, suspended_links; + struct ieee80211_neg_ttlm neg_ttlm; u8 addr[ETH_ALEN] __aligned(2); bool p2p; @@ -4293,6 +4330,10 @@ struct ieee80211_prep_tx_info { * flow offloading for flows originating from the vif. * Note that the driver must not assume that the vif driver_data is valid * at this point, since the callback can be called during netdev teardown. + * @can_neg_ttlm: for managed interface, requests the driver to determine + * if the requested TID-To-Link mapping can be accepted or not. + * If it's not accepted the driver may suggest a preferred mapping and + * modify @ttlm parameter with the suggested TID-to-Link mapping. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4673,6 +4714,9 @@ struct ieee80211_ops { struct net_device *dev, enum tc_setup_type type, void *type_data); + enum ieee80211_neg_ttlm_res + (*can_neg_ttlm)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_neg_ttlm *ttlm); }; /** diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index eb482fb8c3afe..e20c64edb880c 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1695,4 +1695,23 @@ int drv_change_sta_links(struct ieee80211_local *local, struct ieee80211_sta *sta, u16 old_links, u16 new_links); +static inline enum ieee80211_neg_ttlm_res +drv_can_neg_ttlm(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_neg_ttlm *neg_ttlm) +{ + enum ieee80211_neg_ttlm_res res = NEG_TTLM_RES_REJECT; + + might_sleep(); + if (!check_sdata_in_driver(sdata)) + return -EIO; + + trace_drv_can_neg_ttlm(local, sdata, neg_ttlm); + if (local->ops->can_neg_ttlm) + res = local->ops->can_neg_ttlm(&local->hw, &sdata->vif, + neg_ttlm); + trace_drv_neg_ttlm_res(local, sdata, res, neg_ttlm); + + return res; +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0b2b53550bd99..38755f6f6fa0b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2603,6 +2603,8 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_eht_cap_elem *eht_cap_ie_elem, u8 eht_cap_len, struct link_sta_info *link_sta); +void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len); void ieee80211_check_wbrf_support(struct ieee80211_local *local); void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e4e7c0b38cb6e..4a87d2d336ae2 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1546,6 +1546,18 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, default: break; } + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_EHT) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + switch (mgmt->u.action.u.ttlm_req.action_code) { + case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ: + ieee80211_process_neg_ttlm_req(sdata, mgmt, + skb->len); + break; + default: + break; + } + } } else if (ieee80211_is_ext(mgmt->frame_control)) { if (sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_sta_rx_queued_ext(sdata, skb); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index f2ece77935739..13c417eda2810 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -208,7 +208,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) BSS_CHANGED_IBSS |\ BSS_CHANGED_ARP_FILTER |\ BSS_CHANGED_SSID |\ - BSS_CHANGED_MLD_VALID_LINKS) + BSS_CHANGED_MLD_VALID_LINKS |\ + BSS_CHANGED_MLD_TTLM) void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u64 changed) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 073105deb4248..b56f7de3b53fa 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1318,8 +1318,6 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata, cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EML_CAPA); skb_put_data(skb, &eml_capa, sizeof(eml_capa)); } - /* need indication from userspace to support this */ - mld_capa_ops &= ~cpu_to_le16(IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP); skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops)); for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { @@ -5890,6 +5888,56 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, TU_TO_JIFFIES(delay)); } +static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, + u16 active_links, u16 dormant_links, + u16 suspended_links) +{ + u64 changed = 0; + int ret; + + if (!active_links) { + ret = -EINVAL; + goto out; + } + + /* If there is an active negotiated TTLM, it should be discarded by + * the new negotiated/advertised TTLM. + */ + if (sdata->vif.neg_ttlm.valid) { + memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm)); + sdata->vif.suspended_links = 0; + changed = BSS_CHANGED_MLD_TTLM; + } + + if (sdata->vif.active_links != active_links) { + ret = ieee80211_set_active_links(&sdata->vif, active_links); + if (ret) { + sdata_info(sdata, "Failed to set TTLM active links\n"); + goto out; + } + } + + ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links, + dormant_links); + if (ret) { + sdata_info(sdata, "Failed to set TTLM dormant links\n"); + goto out; + } + + changed |= BSS_CHANGED_MLD_VALID_LINKS; + sdata->vif.suspended_links = suspended_links; + if (sdata->vif.suspended_links) + changed |= BSS_CHANGED_MLD_TTLM; + + ieee80211_vif_cfg_change_notify(sdata, changed); + +out: + if (ret) + ieee80211_disconnect(&sdata->vif, false); + + return ret; +} + static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy, struct wiphy_work *work) { @@ -5897,30 +5945,19 @@ static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, u.mgd.ttlm_work.work); - int ret; new_active_links = sdata->u.mgd.ttlm_info.map & sdata->vif.valid_links; new_dormant_links = ~sdata->u.mgd.ttlm_info.map & sdata->vif.valid_links; - if (!new_active_links) { - ieee80211_disconnect(&sdata->vif, false); - return; - } ieee80211_vif_set_links(sdata, sdata->vif.valid_links, 0); - new_active_links = BIT(ffs(new_active_links) - 1); - ieee80211_set_active_links(&sdata->vif, new_active_links); - - ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links, - new_dormant_links); + if (ieee80211_ttlm_set_links(sdata, new_active_links, new_dormant_links, + 0)) + return; sdata->u.mgd.ttlm_info.active = true; sdata->u.mgd.ttlm_info.switch_time = 0; - - if (!ret) - ieee80211_vif_cfg_change_notify(sdata, - BSS_CHANGED_MLD_VALID_LINKS); } static u16 ieee80211_get_ttlm(u8 bm_size, u8 *data) @@ -6437,6 +6474,272 @@ free: kfree(elems); } +static void ieee80211_apply_neg_ttlm(struct ieee80211_sub_if_data *sdata, + struct ieee80211_neg_ttlm neg_ttlm) +{ + u16 new_active_links, new_dormant_links, new_suspended_links, map = 0; + u8 i; + + for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) + map |= neg_ttlm.downlink[i] | neg_ttlm.uplink[i]; + + /* If there is an active TTLM, unset previously suspended links */ + if (sdata->vif.neg_ttlm.valid) + sdata->vif.dormant_links &= ~sdata->vif.suspended_links; + + /* exclude links that are already disabled by advertised TTLM */ + new_active_links = + map & sdata->vif.valid_links & ~sdata->vif.dormant_links; + new_suspended_links = + (~map & sdata->vif.valid_links) & ~sdata->vif.dormant_links; + new_dormant_links = sdata->vif.dormant_links | new_suspended_links; + if (ieee80211_ttlm_set_links(sdata, new_active_links, + new_dormant_links, new_suspended_links)) + return; + + sdata->vif.neg_ttlm = neg_ttlm; + sdata->vif.neg_ttlm.valid = true; +} + +static void +ieee80211_neg_ttlm_add_suggested_map(struct sk_buff *skb, + struct ieee80211_neg_ttlm *neg_ttlm) +{ + u8 i, direction[IEEE80211_TTLM_MAX_CNT]; + + if (memcmp(neg_ttlm->downlink, neg_ttlm->uplink, + sizeof(neg_ttlm->downlink))) { + direction[0] = IEEE80211_TTLM_DIRECTION_DOWN; + direction[1] = IEEE80211_TTLM_DIRECTION_UP; + } else { + direction[0] = IEEE80211_TTLM_DIRECTION_BOTH; + } + + for (i = 0; i < ARRAY_SIZE(direction); i++) { + u8 tid, len, map_ind = 0, *len_pos, *map_ind_pos, *pos; + __le16 map; + + len = sizeof(struct ieee80211_ttlm_elem) + 1 + 1; + + pos = skb_put(skb, len + 2); + *pos++ = WLAN_EID_EXTENSION; + len_pos = pos++; + *pos++ = WLAN_EID_EXT_TID_TO_LINK_MAPPING; + *pos++ = direction[i]; + map_ind_pos = pos++; + for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) { + map = direction[i] == IEEE80211_TTLM_DIRECTION_UP ? + cpu_to_le16(neg_ttlm->uplink[tid]) : + cpu_to_le16(neg_ttlm->downlink[tid]); + if (!map) + continue; + + len += 2; + map_ind |= BIT(tid); + skb_put_data(skb, &map, sizeof(map)); + } + + *map_ind_pos = map_ind; + *len_pos = len; + + if (direction[i] == IEEE80211_TTLM_DIRECTION_BOTH) + break; + } +} + +static void +ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, + enum ieee80211_neg_ttlm_res ttlm_res, + u8 dialog_token, + struct ieee80211_neg_ttlm *neg_ttlm) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_mgmt *mgmt; + struct sk_buff *skb; + int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); + int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; + + skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len); + if (!skb) + return; + + skb_reserve(skb, local->tx_headroom); + mgmt = skb_put_zero(skb, hdr_len); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); + + mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; + mgmt->u.action.u.ttlm_res.action_code = + WLAN_PROTECTED_EHT_ACTION_TTLM_RES; + mgmt->u.action.u.ttlm_res.dialog_token = dialog_token; + switch (ttlm_res) { + default: + WARN_ON(1); + fallthrough; + case NEG_TTLM_RES_REJECT: + mgmt->u.action.u.ttlm_res.status_code = + WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; + break; + case NEG_TTLM_RES_ACCEPT: + mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS; + break; + case NEG_TTLM_RES_SUGGEST_PREFERRED: + mgmt->u.action.u.ttlm_res.status_code = + WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; + ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); + break; + } + + ieee80211_tx_skb(sdata, skb); +} + +static int +ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_ttlm_elem *ttlm, + struct ieee80211_neg_ttlm *neg_ttlm, + u8 *direction) +{ + u8 control, link_map_presence, map_size, tid; + u8 *pos; + + /* The element size was already validated in + * ieee80211_tid_to_link_map_size_ok() + */ + pos = (void *)ttlm->optional; + + control = ttlm->control; + + /* mapping switch time and expected duration fields are not expected + * in case of negotiated TTLM + */ + if (control & (IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT | + IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT)) { + mlme_dbg(sdata, + "Invalid TTLM element in negotiated TTLM request\n"); + return -EINVAL; + } + + if (control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) { + for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) { + neg_ttlm->downlink[tid] = sdata->vif.valid_links; + neg_ttlm->uplink[tid] = sdata->vif.valid_links; + } + *direction = IEEE80211_TTLM_DIRECTION_BOTH; + return 0; + } + + *direction = u8_get_bits(control, IEEE80211_TTLM_CONTROL_DIRECTION); + if (*direction != IEEE80211_TTLM_DIRECTION_DOWN && + *direction != IEEE80211_TTLM_DIRECTION_UP && + *direction != IEEE80211_TTLM_DIRECTION_BOTH) + return -EINVAL; + + link_map_presence = *pos; + pos++; + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + map_size = 1; + else + map_size = 2; + + for (tid = 0; tid < IEEE80211_TTLM_NUM_TIDS; tid++) { + u16 map; + + if (link_map_presence & BIT(tid)) { + map = ieee80211_get_ttlm(map_size, pos); + if (!map) { + mlme_dbg(sdata, + "No active links for TID %d", tid); + return -EINVAL; + } + } else { + map = 0; + } + + switch (*direction) { + case IEEE80211_TTLM_DIRECTION_BOTH: + neg_ttlm->downlink[tid] = map; + neg_ttlm->uplink[tid] = map; + break; + case IEEE80211_TTLM_DIRECTION_DOWN: + neg_ttlm->downlink[tid] = map; + break; + case IEEE80211_TTLM_DIRECTION_UP: + neg_ttlm->uplink[tid] = map; + break; + default: + return -EINVAL; + } + pos += map_size; + } + return 0; +} + +void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) +{ + u8 dialog_token, direction[IEEE80211_TTLM_MAX_CNT] = {}, i; + size_t ies_len; + enum ieee80211_neg_ttlm_res ttlm_res = NEG_TTLM_RES_ACCEPT; + struct ieee802_11_elems *elems = NULL; + struct ieee80211_neg_ttlm neg_ttlm = {}; + + BUILD_BUG_ON(ARRAY_SIZE(direction) != ARRAY_SIZE(elems->ttlm)); + + if (!ieee80211_vif_is_mld(&sdata->vif)) + return; + + dialog_token = mgmt->u.action.u.ttlm_req.dialog_token; + ies_len = len - offsetof(struct ieee80211_mgmt, + u.action.u.ttlm_req.variable); + elems = ieee802_11_parse_elems(mgmt->u.action.u.ttlm_req.variable, + ies_len, true, NULL); + if (!elems) { + ttlm_res = NEG_TTLM_RES_REJECT; + goto out; + } + + for (i = 0; i < elems->ttlm_num; i++) { + if (ieee80211_parse_neg_ttlm(sdata, elems->ttlm[i], + &neg_ttlm, &direction[i]) || + (direction[i] == IEEE80211_TTLM_DIRECTION_BOTH && + elems->ttlm_num != 1)) { + ttlm_res = NEG_TTLM_RES_REJECT; + goto out; + } + } + + if (!elems->ttlm_num || + (elems->ttlm_num == 2 && direction[0] == direction[1])) { + ttlm_res = NEG_TTLM_RES_REJECT; + goto out; + } + + for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) { + if ((neg_ttlm.downlink[i] && + (neg_ttlm.downlink[i] & ~sdata->vif.valid_links)) || + (neg_ttlm.uplink[i] && + (neg_ttlm.uplink[i] & ~sdata->vif.valid_links))) { + ttlm_res = NEG_TTLM_RES_REJECT; + goto out; + } + } + + ttlm_res = drv_can_neg_ttlm(sdata->local, sdata, &neg_ttlm); + + if (ttlm_res != NEG_TTLM_RES_ACCEPT) + goto out; + + ieee80211_apply_neg_ttlm(sdata, neg_ttlm); +out: + kfree(elems); + ieee80211_send_neg_ttlm_res(sdata, ttlm_res, dialog_token, &neg_ttlm); +} + void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0bf72928ccfca..75eb3e55eaecf 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3763,6 +3763,20 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; } break; + case WLAN_CATEGORY_PROTECTED_EHT: + switch (mgmt->u.action.u.ttlm_req.action_code) { + case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ: + if (sdata->vif.type != NL80211_IFTYPE_STATION) + break; + + if (len < offsetofend(typeof(*mgmt), + u.action.u.ttlm_req)) + goto invalid; + goto queue; + default: + break; + } + break; } return RX_CONTINUE; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 06835ed4c44f1..1c0c46b11c6d7 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -3088,6 +3088,58 @@ TRACE_EVENT(stop_queue, ) ); +TRACE_EVENT(drv_can_neg_ttlm, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_neg_ttlm *neg_ttlm), + + TP_ARGS(local, sdata, neg_ttlm), + + TP_STRUCT__entry(LOCAL_ENTRY + VIF_ENTRY + __array(u16, downlink, sizeof(u16) * 8) + __array(u16, uplink, sizeof(u16) * 8) + ), + + TP_fast_assign(LOCAL_ASSIGN; + VIF_ASSIGN; + memcpy(__entry->downlink, neg_ttlm->downlink, + sizeof(neg_ttlm->downlink)); + memcpy(__entry->uplink, neg_ttlm->uplink, + sizeof(neg_ttlm->uplink)); + ), + + TP_printk(LOCAL_PR_FMT ", " VIF_PR_FMT, LOCAL_PR_ARG, VIF_PR_ARG) +); + +TRACE_EVENT(drv_neg_ttlm_res, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum ieee80211_neg_ttlm_res res, + struct ieee80211_neg_ttlm *neg_ttlm), + + TP_ARGS(local, sdata, res, neg_ttlm), + + TP_STRUCT__entry(LOCAL_ENTRY + VIF_ENTRY + __field(u32, res) + __array(u16, downlink, sizeof(u16) * 8) + __array(u16, uplink, sizeof(u16) * 8) + ), + + TP_fast_assign(LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->res = res; + memcpy(__entry->downlink, neg_ttlm->downlink, + sizeof(neg_ttlm->downlink)); + memcpy(__entry->uplink, neg_ttlm->uplink, + sizeof(neg_ttlm->uplink)); + ), + + TP_printk(LOCAL_PR_FMT VIF_PR_FMT " response: %d\n ", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->res + ) +); #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit 1.2.3-korg From f7660b3f584aadd25dde18aa1902488577a15863 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 2 Jan 2024 21:35:37 +0200 Subject: wifi: mac80211: add support for negotiated TTLM request Update neg_ttlm and active_links according to the new mapping, and send a negotiated TID-to-link map request with the new mapping. Signed-off-by: Ayala Beker Reviewed-by: Gregory Greenman Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.eeb385d771df.I2a5441c14421de884dbd93d1624ce7bb2c944833@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 37 +++++++++++++++ include/net/mac80211.h | 7 ++- net/mac80211/cfg.c | 12 +++++ net/mac80211/ieee80211_i.h | 8 ++++ net/mac80211/iface.c | 4 ++ net/mac80211/mlme.c | 114 +++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/rx.c | 8 ++++ 7 files changed, 188 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index eafa70e5ba948..f0c068446c792 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -5024,6 +5024,43 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) return get_unaligned_le16(common); } +/** + * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. + * @data: pointer to the multi link EHT IE + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD capabilities and operations field is not present, 0 will be + * returned. + */ +static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + + return get_unaligned_le16(common); +} + /** * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6490b92d5cc1b..84cc66dd93c14 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1810,9 +1810,11 @@ enum ieee80211_offload_flags { * @ps: power-save mode (STA only). This flag is NOT affected by * offchannel/dynamic_ps operations. * @aid: association ID number, valid only when @assoc is true - * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k. + * @eml_cap: EML capabilities as described in P802.11be_D4.1 Figure 9-1001j. * @eml_med_sync_delay: Medium Synchronization delay as described in - * P802.11be_D2.2 Figure 9-1002j. + * P802.11be_D4.1 Figure 9-1001i. + * @mld_capa_op: MLD Capabilities and Operations per P802.11be_D4.1 + * Figure 9-1001k * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The * may filter ARP queries targeted for other addresses than listed here. * The driver must allow ARP queries targeted for all address listed here @@ -1837,6 +1839,7 @@ struct ieee80211_vif_cfg { u16 aid; u16 eml_cap; u16 eml_med_sync_delay; + u16 mld_capa_op; __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; int arp_addr_cnt; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 489dd97f51724..1e8da6372da29 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4966,6 +4966,17 @@ static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts); } +static int +ieee80211_set_ttlm(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_ttlm_params *params) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + return ieee80211_req_neg_ttlm(sdata, params); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -5078,4 +5089,5 @@ const struct cfg80211_ops mac80211_config_ops = { .mod_link_station = ieee80211_mod_link_station, .del_link_station = ieee80211_del_link_station, .set_hw_timestamp = ieee80211_set_hw_timestamp, + .set_ttlm = ieee80211_set_ttlm, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 38755f6f6fa0b..79a5067ce82be 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -579,6 +579,10 @@ struct ieee80211_if_managed { /* TID-to-link mapping support */ struct wiphy_delayed_work ttlm_work; struct ieee80211_adv_ttlm_info ttlm_info; + + /* dialog token enumerator for neg TTLM request */ + u8 dialog_token_alloc; + struct wiphy_delayed_work neg_ttlm_timeout_work; }; struct ieee80211_if_ibss { @@ -2605,6 +2609,10 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, struct link_sta_info *link_sta); void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); +void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len); +int ieee80211_req_neg_ttlm(struct ieee80211_sub_if_data *sdata, + struct cfg80211_ttlm_params *params); void ieee80211_check_wbrf_support(struct ieee80211_local *local); void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 4a87d2d336ae2..df314222c2c90 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1554,6 +1554,10 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, ieee80211_process_neg_ttlm_req(sdata, mgmt, skb->len); break; + case WLAN_PROTECTED_EHT_ACTION_TTLM_RES: + ieee80211_process_neg_ttlm_res(sdata, mgmt, + skb->len); + break; default: break; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b56f7de3b53fa..5279af09fd530 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -46,6 +46,8 @@ #define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100) #define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00 +#define IEEE80211_NEG_TTLM_REQ_TIMEOUT (HZ / 5) + static int max_nullfunc_tries = 2; module_param(max_nullfunc_tries, int, 0644); MODULE_PARM_DESC(max_nullfunc_tries, @@ -3087,6 +3089,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(&sdata->u.mgd.ttlm_info, 0, sizeof(sdata->u.mgd.ttlm_info)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &ifmgd->neg_ttlm_timeout_work); ieee80211_vif_set_links(sdata, 0, 0); } @@ -4984,6 +4988,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1); sdata->vif.cfg.eml_med_sync_delay = ieee80211_mle_get_eml_med_sync_delay(eht_ml_elem->data + 1); + sdata->vif.cfg.mld_capa_op = + ieee80211_mle_get_mld_capa_op(eht_ml_elem->data + 1); } } @@ -6501,6 +6507,19 @@ static void ieee80211_apply_neg_ttlm(struct ieee80211_sub_if_data *sdata, sdata->vif.neg_ttlm.valid = true; } +static void ieee80211_neg_ttlm_timeout_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.neg_ttlm_timeout_work.work); + + sdata_info(sdata, + "No negotiated TTLM response from AP, disconnecting.\n"); + + __ieee80211_disconnect(sdata); +} + static void ieee80211_neg_ttlm_add_suggested_map(struct sk_buff *skb, struct ieee80211_neg_ttlm *neg_ttlm) @@ -6547,6 +6566,74 @@ ieee80211_neg_ttlm_add_suggested_map(struct sk_buff *skb, } } +static void +ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_neg_ttlm *neg_ttlm, + u8 dialog_token) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_mgmt *mgmt; + struct sk_buff *skb; + int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_req); + int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; + + skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len); + if (!skb) + return; + + skb_reserve(skb, local->tx_headroom); + mgmt = skb_put_zero(skb, hdr_len); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + memcpy(mgmt->da, sdata->vif.cfg.ap_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); + + mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; + mgmt->u.action.u.ttlm_req.action_code = + WLAN_PROTECTED_EHT_ACTION_TTLM_REQ; + mgmt->u.action.u.ttlm_req.dialog_token = dialog_token; + ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); + ieee80211_tx_skb(sdata, skb); +} + +int ieee80211_req_neg_ttlm(struct ieee80211_sub_if_data *sdata, + struct cfg80211_ttlm_params *params) +{ + struct ieee80211_neg_ttlm neg_ttlm = {}; + u8 i; + + if (!ieee80211_vif_is_mld(&sdata->vif) || + !(sdata->vif.cfg.mld_capa_op & + IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP)) + return -EINVAL; + + for (i = 0; i < IEEE80211_TTLM_NUM_TIDS; i++) { + if ((params->dlink[i] & ~sdata->vif.valid_links) || + (params->ulink[i] & ~sdata->vif.valid_links)) + return -EINVAL; + + neg_ttlm.downlink[i] = params->dlink[i]; + neg_ttlm.uplink[i] = params->ulink[i]; + } + + if (drv_can_neg_ttlm(sdata->local, sdata, &neg_ttlm) != + NEG_TTLM_RES_ACCEPT) + return -EINVAL; + + ieee80211_apply_neg_ttlm(sdata, neg_ttlm); + sdata->u.mgd.dialog_token_alloc++; + ieee80211_send_neg_ttlm_req(sdata, &sdata->vif.neg_ttlm, + sdata->u.mgd.dialog_token_alloc); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.neg_ttlm_timeout_work); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.neg_ttlm_timeout_work, + IEEE80211_NEG_TTLM_REQ_TIMEOUT); + return 0; +} + static void ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, enum ieee80211_neg_ttlm_res ttlm_res, @@ -6740,6 +6827,29 @@ out: ieee80211_send_neg_ttlm_res(sdata, ttlm_res, dialog_token, &neg_ttlm); } +void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) +{ + if (!ieee80211_vif_is_mld(&sdata->vif) || + mgmt->u.action.u.ttlm_req.dialog_token != + sdata->u.mgd.dialog_token_alloc) + return; + + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.neg_ttlm_timeout_work); + + /* MLD station sends a TID to link mapping request, mainly to handle + * BTM (BSS transition management) request, in which case it needs to + * restrict the active links set. + * In this case it's not expected that the MLD AP will reject the + * negotiated TTLM request. + * This can be better implemented in the future, to handle request + * rejections. + */ + if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS) + __ieee80211_disconnect(sdata); +} + void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { @@ -7369,6 +7479,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_sta_handle_tspec_ac_params_wk); wiphy_delayed_work_init(&ifmgd->ttlm_work, ieee80211_tid_to_link_map_work); + wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work, + ieee80211_neg_ttlm_timeout_work); ifmgd->flags = 0; ifmgd->powersave = sdata->wdev.ps; @@ -8459,6 +8571,8 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ml_reconf_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &ifmgd->neg_ttlm_timeout_work); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 75eb3e55eaecf..615795c4b0526 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3773,6 +3773,14 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) u.action.u.ttlm_req)) goto invalid; goto queue; + case WLAN_PROTECTED_EHT_ACTION_TTLM_RES: + if (sdata->vif.type != NL80211_IFTYPE_STATION) + break; + + if (len < offsetofend(typeof(*mgmt), + u.action.u.ttlm_res)) + goto invalid; + goto queue; default: break; } -- cgit 1.2.3-korg From 2518e89d5b1913c360f8e4cd9fc6eda6146b8800 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jan 2024 21:35:39 +0200 Subject: wifi: cfg80211: add support for SPP A-MSDUs Add SPP (signaling and payload protected) AMSDU support. Since userspace has to build the RSNX element, add an extended feature flag to indicate that this is supported. In order to avoid downgrade/mismatch attacks, add a flag to the assoc command on the station side, so that we can be sure that the value of the flag comes from the same RSNX element that will be validated by the supplicant against the 4-way-handshake. If we just pulled the data out of a beacon/probe response, we could theoretically look an RSNX element from a different frame, with a different value for this flag, than the supplicant is using to validate in the 4-way-handshake. Note that this patch is only geared towards software crypto implementations or hardware ones that can perfectly implement SPP A-MSDUs, i.e. are able to switch the AAD construction on the fly for each TX/RX frame. For more limited hardware implementations, more capability advertisement would be required, e.g. if the hardware has no way to switch this on the fly but has only a global configuration that must apply to all stations. The driver could of course *reject* mismatches, but the supplicant must know so it can do things like not negotiating SPP A-MSDUs on a T-DLS link when connected to an AP that doesn't support it, or similar. Signed-off-by: Johannes Berg Signed-off-by: Daniel Gabay Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.fadac8df7030.I9240aebcba1be49636a73c647ed0af862713fc6f@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 14 ++++++++++++++ net/wireless/nl80211.c | 24 +++++++++++++++++++++--- 3 files changed, 37 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cf79656ce09ca..56bce924bec6d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3059,6 +3059,7 @@ struct cfg80211_assoc_link { * @CONNECT_REQ_MLO_SUPPORT: Userspace indicates support for handling MLD links. * Drivers shall disable MLO features for the current association if this * flag is not set. + * @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any) */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), @@ -3068,6 +3069,7 @@ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HE = BIT(4), ASSOC_REQ_DISABLE_EHT = BIT(5), CONNECT_REQ_MLO_SUPPORT = BIT(6), + ASSOC_REQ_SPP_AMSDU = BIT(7), }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1ccdcae243722..3e239df3528f3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2851,6 +2851,10 @@ enum nl80211_commands { * mapping is as defined in section 9.4.2.314 (TID-To-Link Mapping element) * in Draft P802.11be_D4.0. * + * @NL80211_ATTR_ASSOC_SPP_AMSDU: flag attribute used with + * %NL80211_CMD_ASSOCIATE indicating the SPP A-MSDUs + * are used on this connection + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3394,6 +3398,8 @@ enum nl80211_attrs { NL80211_ATTR_MLO_TTLM_DLINK, NL80211_ATTR_MLO_TTLM_ULINK, + NL80211_ATTR_ASSOC_SPP_AMSDU, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3534,6 +3540,7 @@ enum nl80211_iftype { * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a * previously added station into associated state + * @NL80211_STA_FLAG_SPP_AMSDU: station supports SPP A-MSDUs * @NL80211_STA_FLAG_MAX: highest station flag number currently defined * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ @@ -3546,6 +3553,7 @@ enum nl80211_sta_flags { NL80211_STA_FLAG_AUTHENTICATED, NL80211_STA_FLAG_TDLS_PEER, NL80211_STA_FLAG_ASSOCIATED, + NL80211_STA_FLAG_SPP_AMSDU, /* keep last */ __NL80211_STA_FLAG_AFTER_LAST, @@ -6520,6 +6528,11 @@ enum nl80211_feature_flags { * DFS master on the same channel as described in FCC-594280 D01 * (Section B.3). This, for example, allows P2P GO and P2P clients to * operate on DFS channels as long as there's a concurrent BSS connection. + * + * @NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT: The driver has support for SPP + * (signaling and payload protected) A-MSDUs and this shall be advertised + * in the RSNXE. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6594,6 +6607,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_OWE_OFFLOAD, NL80211_EXT_FEATURE_OWE_OFFLOAD_AP, NL80211_EXT_FEATURE_DFS_CONCURRENT, + NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b09700400d097..b267317aa33c6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -821,6 +821,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA] = { .type = NLA_FLAG }, [NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), [NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), + [NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -6874,7 +6875,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, return -EINVAL; /* When you run into this, adjust the code below for the new flag */ - BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8); switch (statype) { case CFG80211_STA_MESH_PEER_KERNEL: @@ -6934,6 +6935,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy, params->link_sta_params.he_capa || params->link_sta_params.eht_capa) return -EINVAL; + if (params->sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) + return -EINVAL; } if (statype != CFG80211_STA_AP_CLIENT && @@ -6957,7 +6960,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy, BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | - BIT(NL80211_STA_FLAG_MFP))) + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_SPP_AMSDU))) return -EINVAL; /* but authenticated/associated only if driver handles it */ @@ -7516,7 +7520,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; /* When you run into this, adjust the code below for the new flag */ - BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8); switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: @@ -7540,6 +7544,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.sta_flags_mask & auth_assoc) return -EINVAL; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT) && + params.sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) + return -EINVAL; + /* Older userspace, or userspace wanting to be compatible with * !NL80211_FEATURE_FULL_AP_CLIENT_STATE, will not set the auth * and assoc flags in the mask, but assumes the station will be @@ -11102,6 +11111,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) sizeof(req.s1g_capa)); } + if (nla_get_flag(info->attrs[NL80211_ATTR_ASSOC_SPP_AMSDU])) { + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT)) { + GENL_SET_ERR_MSG(info, "SPP A-MSDUs not supported"); + return -EINVAL; + } + req.flags |= ASSOC_REQ_SPP_AMSDU; + } + req.link_id = nl80211_link_id_or_invalid(info->attrs); if (info->attrs[NL80211_ATTR_MLO_LINKS]) { -- cgit 1.2.3-korg From 3b220ed8b2172fd8edb22a62a5c3a9a9c9f2b6bd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jan 2024 21:35:40 +0200 Subject: wifi: mac80211: add support for SPP A-MSDUs If software crypto is used, simply add support for SPP A-MSDUs (and use it whenever enabled as required by the cfg80211 API). If hardware crypto is used, leave it up to the driver to set the NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT flag and then check sta->spp_amsdu or the IEEE80211_KEY_FLAG_SPP_AMSDU key flag. Signed-off-by: Johannes Berg Signed-off-by: Daniel Gabay Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.b8ada4514e2b.I1ac25d5f158165b5a88062a5a5e4c4fbeecf9a5d@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ net/mac80211/cfg.c | 3 +++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/key.c | 4 ++++ net/mac80211/main.c | 5 ++++- net/mac80211/mlme.c | 4 ++++ net/mac80211/wpa.c | 33 +++++++++++++++++++++++---------- 7 files changed, 44 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 84cc66dd93c14..8d6ae22c09bfb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2081,6 +2081,8 @@ static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif) * @IEEE80211_KEY_FLAG_GENERATE_MMIE: This flag should be set by the driver * for a AES_CMAC key to indicate that it requires sequence number * generation only + * @IEEE80211_KEY_FLAG_SPP_AMSDU: SPP A-MSDUs can be used with this key + * (set by mac80211 from the sta->spp_amsdu flag) */ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0), @@ -2094,6 +2096,7 @@ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8), IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9), IEEE80211_KEY_FLAG_GENERATE_MMIE = BIT(10), + IEEE80211_KEY_FLAG_SPP_AMSDU = BIT(11), }; /** @@ -2392,6 +2395,7 @@ struct ieee80211_link_sta { * would be assigned to link[link_id] where link_id is the id assigned * by the AP. * @valid_links: bitmap of valid links, or 0 for non-MLO + * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not. */ struct ieee80211_sta { u8 addr[ETH_ALEN]; @@ -2405,6 +2409,7 @@ struct ieee80211_sta { bool tdls_initiator; bool mfp; bool mlo; + bool spp_amsdu; u8 max_amsdu_subframes; struct ieee80211_sta_aggregates *cur; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1e8da6372da29..dd237081dbe33 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1942,6 +1942,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, clear_sta_flag(sta, WLAN_STA_TDLS_PEER); } + if (mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) + sta->sta.spp_amsdu = set & BIT(NL80211_STA_FLAG_SPP_AMSDU); + /* mark TDLS channel switch support, if the AP allows it */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !sdata->deflink.u.mgd.tdls_chan_switch_prohibited && diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 79a5067ce82be..eb32174984c3e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -441,6 +441,7 @@ struct ieee80211_mgd_assoc_data { bool timeout_started; bool comeback; /* whether the AP has requested association comeback */ bool s1g; + bool spp_amsdu; unsigned int assoc_link_id; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index af74d7f9d94d0..a2cce62c97b70 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -925,6 +925,10 @@ int ieee80211_key_link(struct ieee80211_key *key, */ key->color = atomic_inc_return(&key_color); + /* keep this flag for easier access later */ + if (sta && sta->sta.spp_amsdu) + key->conf.flags |= IEEE80211_KEY_FLAG_SPP_AMSDU; + increment_tailroom_need_count(sdata); ret = ieee80211_key_replace(sdata, link, sta, pairwise, old_key, key); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 13c417eda2810..d48fa1147c14c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -735,8 +735,11 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT); } - if (!ops->set_key) + if (!ops->set_key) { wiphy->flags |= WIPHY_FLAG_IBSS_RSN; + wiphy_ext_feature_set(wiphy, + NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT); + } wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_TXQS); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5279af09fd530..fba596d812806 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5132,6 +5132,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!sta)) goto out_err; + sta->sta.spp_amsdu = assoc_data->spp_amsdu; + if (ieee80211_vif_is_mld(&sdata->vif)) { for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { if (!assoc_data->link[link_id].bss) @@ -8216,6 +8218,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->wmm = bss->wmm_used && (local->hw.queues >= IEEE80211_NUM_ACS); + assoc_data->spp_amsdu = req->flags & ASSOC_REQ_SPP_AMSDU; + /* * IEEE802.11n does not allow TKIP/WEP as pairwise ciphers in HT mode. * We still associate in non-HT mode (11a/b/g) if any one of these diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 94dae7cb6dbd3..e40529b8c5c9e 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -315,7 +315,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) * Calculate AAD for CCMP/GCMP, returning qos_tid since we * need that in CCMP also for b_0. */ -static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad) +static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu) { struct ieee80211_hdr *hdr = (void *)skb->data; __le16 mask_fc; @@ -340,7 +340,14 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad) len_a += 6; if (ieee80211_is_data_qos(hdr->frame_control)) { - qos_tid = ieee80211_get_tid(hdr); + qos_tid = *ieee80211_get_qos_ctl(hdr); + + if (spp_amsdu) + qos_tid &= IEEE80211_QOS_CTL_TID_MASK | + IEEE80211_QOS_CTL_A_MSDU_PRESENT; + else + qos_tid &= IEEE80211_QOS_CTL_TID_MASK; + mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_ORDER); len_a += 2; } else { @@ -369,10 +376,11 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad) return qos_tid; } -static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad) +static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad, + bool spp_amsdu) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - u8 qos_tid = ccmp_gcmp_aad(skb, aad); + u8 qos_tid = ccmp_gcmp_aad(skb, aad, spp_amsdu); /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC * mode authentication are not allowed to collide, yet both are derived @@ -479,7 +487,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, return 0; pos += IEEE80211_CCMP_HDR_LEN; - ccmp_special_blocks(skb, pn, b_0, aad); + ccmp_special_blocks(skb, pn, b_0, aad, + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, skb_put(skb, mic_len)); } @@ -557,7 +566,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, u8 aad[2 * AES_BLOCK_SIZE]; u8 b_0[AES_BLOCK_SIZE]; /* hardware didn't decrypt/verify MIC */ - ccmp_special_blocks(skb, pn, b_0, aad); + ccmp_special_blocks(skb, pn, b_0, aad, + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); if (ieee80211_aes_ccm_decrypt( key->u.ccmp.tfm, b_0, aad, @@ -581,7 +591,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, return RX_CONTINUE; } -static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad) +static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad, + bool spp_amsdu) { struct ieee80211_hdr *hdr = (void *)skb->data; @@ -591,7 +602,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad) j_0[14] = 0; j_0[AES_BLOCK_SIZE - 1] = 0x01; - ccmp_gcmp_aad(skb, aad); + ccmp_gcmp_aad(skb, aad, spp_amsdu); } static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id) @@ -680,7 +691,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) return 0; pos += IEEE80211_GCMP_HDR_LEN; - gcmp_special_blocks(skb, pn, j_0, aad); + gcmp_special_blocks(skb, pn, j_0, aad, + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, skb_put(skb, IEEE80211_GCMP_MIC_LEN)); } @@ -753,7 +765,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) u8 aad[2 * AES_BLOCK_SIZE]; u8 j_0[AES_BLOCK_SIZE]; /* hardware didn't decrypt/verify MIC */ - gcmp_special_blocks(skb, pn, j_0, aad); + gcmp_special_blocks(skb, pn, j_0, aad, + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); if (ieee80211_aes_gcm_decrypt( key->u.gcmp.tfm, j_0, aad, -- cgit 1.2.3-korg From a8b652604e39ff73b8b7f91e7c29a18904eb8a7c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jan 2024 21:35:43 +0200 Subject: wifi: mac80211: take EML/MLD capa from assoc response The association response is more likely to be correct than a random scan result, which really also should be correct, but we generally prefer to take data from the association response, so do that here as well. Also reset the data so it doesn't hang around from an old connection to a non-MLO connection, drivers would hopefully not look at it, but less surprise this way. Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.1d10f1d1dbab.I545e955675e2269a52496a22ae7822d95b40235e@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index fba596d812806..576ba6b25db9f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3086,9 +3086,14 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(sdata->vif.bss_conf.tx_pwr_env, 0, sizeof(sdata->vif.bss_conf.tx_pwr_env)); + sdata->vif.cfg.eml_cap = 0; + sdata->vif.cfg.eml_med_sync_delay = 0; + sdata->vif.cfg.mld_capa_op = 0; + memset(&sdata->u.mgd.ttlm_info, 0, sizeof(sdata->u.mgd.ttlm_info)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->neg_ttlm_timeout_work); ieee80211_vif_set_links(sdata, 0, 0); @@ -4981,16 +4986,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, eht_ml_elem && ieee80211_mle_type_ok(eht_ml_elem->data + 1, IEEE80211_ML_CONTROL_TYPE_BASIC, - eht_ml_elem->datalen - 1)) { + eht_ml_elem->datalen - 1)) supports_mlo = true; - - sdata->vif.cfg.eml_cap = - ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1); - sdata->vif.cfg.eml_med_sync_delay = - ieee80211_mle_get_eml_med_sync_delay(eht_ml_elem->data + 1); - sdata->vif.cfg.mld_capa_op = - ieee80211_mle_get_mld_capa_op(eht_ml_elem->data + 1); - } } /* Allow VHT if at least one channel on the sband supports 80 MHz */ @@ -5432,6 +5429,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, assoc_data->ap_addr); goto abandon_assoc; } + + sdata->vif.cfg.eml_cap = + ieee80211_mle_get_eml_cap((const void *)elems->ml_basic); + sdata->vif.cfg.eml_med_sync_delay = + ieee80211_mle_get_eml_med_sync_delay((const void *)elems->ml_basic); + sdata->vif.cfg.mld_capa_op = + ieee80211_mle_get_mld_capa_op((const void *)elems->ml_basic); } sdata->vif.cfg.aid = aid; -- cgit 1.2.3-korg From ccb964b4ab1663ce92f389b72c052fc47a0ffdb9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jan 2024 21:35:44 +0200 Subject: wifi: cfg80211: validate MLO connections better When going into an MLO connection, validate that the link IDs match what userspace indicated, and that the AP MLD addresses and capabilities are all matching between the links. Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.ff83c034cb9a.I9962db0bfa8c73b37b8d5b59a3fad7f02f2129ae@changeid [roll in extra fix from Miri to actually check the return value] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 24 ++++++++ net/wireless/core.h | 3 +- net/wireless/mlme.c | 136 +++++++++++++++++++++++++++++++++++++++++----- net/wireless/nl80211.c | 3 +- net/wireless/sme.c | 3 +- 5 files changed, 152 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f0c068446c792..a70388ae3a7b0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4935,6 +4935,30 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) return sizeof(*mle) + common + mle->variable[0]; } +/** + * ieee80211_mle_get_link_id - returns the link ID + * @data: the basic multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the BSS link ID can't be found, -1 will be returned + */ +static inline int ieee80211_mle_get_link_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* common points now at the beginning of ieee80211_mle_basic_common_info */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID)) + return -1; + + return *common; +} + /** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count * @mle: the basic multi link element diff --git a/net/wireless/core.h b/net/wireless/core.h index 13657a85cf61a..30434551b3774 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -362,7 +362,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, struct cfg80211_auth_request *req); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_assoc_request *req); + struct cfg80211_assoc_request *req, + struct netlink_ext_ack *extack); int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 43ba7891e2a3e..4052041a19ead 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -4,7 +4,7 @@ * * Copyright (c) 2009, Jouni Malinen * Copyright (c) 2015 Intel Deutschland GmbH - * Copyright (C) 2019-2020, 2022-2023 Intel Corporation + * Copyright (C) 2019-2020, 2022-2024 Intel Corporation */ #include @@ -325,28 +325,136 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, p1[i] &= p2[i]; } -/* Note: caller must cfg80211_put_bss() regardless of result */ -int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_assoc_request *req) +static int +cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a, + const struct ieee80211_multi_link_elem *mle_b, + struct netlink_ext_ack *extack) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err, i, j; + const struct ieee80211_mle_basic_common_info *common_a, *common_b; - lockdep_assert_wiphy(wdev->wiphy); + common_a = (const void *)mle_a->variable; + common_b = (const void *)mle_b->variable; + + if (memcmp(common_a->mld_mac_addr, common_b->mld_mac_addr, ETH_ALEN)) { + NL_SET_ERR_MSG(extack, "AP MLD address mismatch"); + return -EINVAL; + } + + if (ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_a) != + ieee80211_mle_get_eml_med_sync_delay((const u8 *)mle_b)) { + NL_SET_ERR_MSG(extack, "link EML medium sync delay mismatch"); + return -EINVAL; + } + + if (ieee80211_mle_get_eml_cap((const u8 *)mle_a) != + ieee80211_mle_get_eml_cap((const u8 *)mle_b)) { + NL_SET_ERR_MSG(extack, "link EML capabilities mismatch"); + return -EINVAL; + } + + if (ieee80211_mle_get_mld_capa_op((const u8 *)mle_a) != + ieee80211_mle_get_mld_capa_op((const u8 *)mle_b)) { + NL_SET_ERR_MSG(extack, "link MLD capabilities/ops mismatch"); + return -EINVAL; + } + + return 0; +} + +static int cfg80211_mlme_check_mlo(struct net_device *dev, + struct cfg80211_assoc_request *req, + struct netlink_ext_ack *extack) +{ + const struct ieee80211_multi_link_elem *mles[ARRAY_SIZE(req->links)] = {}; + int i; + + if (req->link_id < 0) + return 0; + + if (!req->links[req->link_id].bss) { + NL_SET_ERR_MSG(extack, "no BSS for assoc link"); + return -EINVAL; + } + + rcu_read_lock(); + for (i = 0; i < ARRAY_SIZE(req->links); i++) { + const struct cfg80211_bss_ies *ies; + const struct element *ml; - for (i = 1; i < ARRAY_SIZE(req->links); i++) { if (!req->links[i].bss) continue; - for (j = 0; j < i; j++) { - if (req->links[i].bss == req->links[j].bss) - return -EINVAL; + + if (ether_addr_equal(req->links[i].bss->bssid, dev->dev_addr)) { + NL_SET_ERR_MSG(extack, "BSSID must not be our address"); + req->links[i].error = -EINVAL; + goto error; } - if (ether_addr_equal(req->links[i].bss->bssid, dev->dev_addr)) - return -EINVAL; + ies = rcu_dereference(req->links[i].bss->ies); + ml = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK, + ies->data, ies->len); + if (!ml) { + NL_SET_ERR_MSG(extack, "MLO BSS w/o ML element"); + req->links[i].error = -EINVAL; + goto error; + } + + if (!ieee80211_mle_type_ok(ml->data + 1, + IEEE80211_ML_CONTROL_TYPE_BASIC, + ml->datalen - 1)) { + NL_SET_ERR_MSG(extack, "BSS with invalid ML element"); + req->links[i].error = -EINVAL; + goto error; + } + + mles[i] = (const void *)(ml->data + 1); + + if (ieee80211_mle_get_link_id((const u8 *)mles[i]) != i) { + NL_SET_ERR_MSG(extack, "link ID mismatch"); + req->links[i].error = -EINVAL; + goto error; + } + } + + if (WARN_ON(!mles[req->link_id])) + goto error; + + for (i = 0; i < ARRAY_SIZE(req->links); i++) { + if (i == req->link_id || !req->links[i].bss) + continue; + + if (WARN_ON(!mles[i])) + goto error; + + if (cfg80211_mlme_check_mlo_compat(mles[req->link_id], mles[i], + extack)) { + req->links[i].error = -EINVAL; + goto error; + } } + rcu_read_unlock(); + return 0; +error: + rcu_read_unlock(); + return -EINVAL; +} + +/* Note: caller must cfg80211_put_bss() regardless of result */ +int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_assoc_request *req, + struct netlink_ext_ack *extack) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + lockdep_assert_wiphy(wdev->wiphy); + + err = cfg80211_mlme_check_mlo(dev, req, extack); + if (err) + return err; + if (wdev->connected && (!req->prev_bssid || !ether_addr_equal(wdev->u.client.connected_addr, req->prev_bssid))) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b267317aa33c6..0809f721f0459 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11245,7 +11245,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) struct nlattr *link; int rem = 0; - err = cfg80211_mlme_assoc(rdev, dev, &req); + err = cfg80211_mlme_assoc(rdev, dev, &req, + info->extack); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { dev->ieee80211_ptr->conn_owner_nlportid = diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 195c8532734bd..82e3ce42206cc 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -209,7 +209,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev, if (!req.bss) { err = -ENOENT; } else { - err = cfg80211_mlme_assoc(rdev, wdev->netdev, &req); + err = cfg80211_mlme_assoc(rdev, wdev->netdev, + &req, NULL); cfg80211_put_bss(&rdev->wiphy, req.bss); } -- cgit 1.2.3-korg From d1155f2873cfd387ffb7f5423edbfb2bb22eaf32 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jan 2024 18:17:40 +0200 Subject: wifi: mac80211: simplify ieee80211_config_bw() prototype The only user of this function passes a lot of pointers directly from the parsed elements, so it's simpler to just pass the entire elements parsing struct. This also shows that the ht_cap is actually unused. Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111181514.f0653cd5e7dd.I8bd5ee848074029a9f0495c95e4339546ad8fe15@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 576ba6b25db9f..6fa69ad3ad4f8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -489,15 +489,15 @@ out: } static int ieee80211_config_bw(struct ieee80211_link_data *link, - const struct ieee80211_ht_cap *ht_cap, - const struct ieee80211_vht_cap *vht_cap, - const struct ieee80211_ht_operation *ht_oper, - const struct ieee80211_vht_operation *vht_oper, - const struct ieee80211_he_operation *he_oper, - const struct ieee80211_eht_operation *eht_oper, - const struct ieee80211_s1g_oper_ie *s1g_oper, + struct ieee802_11_elems *elems, const u8 *bssid, u64 *changed) { + const struct ieee80211_vht_cap *vht_cap = elems->vht_cap_elem; + const struct ieee80211_ht_operation *ht_oper = elems->ht_operation; + const struct ieee80211_vht_operation *vht_oper = elems->vht_operation; + const struct ieee80211_he_operation *he_oper = elems->he_operation; + const struct ieee80211_eht_operation *eht_oper = elems->eht_operation; + const struct ieee80211_s1g_oper_ie *s1g_oper = elems->s1g_oper; struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -6433,11 +6433,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems); - if (ieee80211_config_bw(link, elems->ht_cap_elem, - elems->vht_cap_elem, elems->ht_operation, - elems->vht_operation, elems->he_operation, - elems->eht_operation, - elems->s1g_oper, bssid, &changed)) { + if (ieee80211_config_bw(link, elems, bssid, &changed)) { sdata_info(sdata, "failed to follow AP %pM bandwidth change, disconnect\n", bssid); -- cgit 1.2.3-korg From f73ef56c941248ae8b1e19862e11a7bb517b9af1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jan 2024 18:17:41 +0200 Subject: wifi: mac80211: remove extra element parsing We already parse all the BSS elements into elems, there's really no need to separately find EHT/ML again. Remove the extra code. Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111181514.c4a55da9f778.I112b1ef00904c4183ac7644800f8daa8a4449875@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6fa69ad3ad4f8..45be270eaab7a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4962,32 +4962,12 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, (IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT)) && he_oper) { - const struct cfg80211_bss_ies *cbss_ies; - const struct element *eht_ml_elem; - const u8 *eht_oper_ie; - - cbss_ies = rcu_dereference(cbss->ies); - eht_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_EHT_OPERATION, - cbss_ies->data, cbss_ies->len); - if (eht_oper_ie && eht_oper_ie[1] >= - 1 + sizeof(struct ieee80211_eht_operation)) - eht_oper = (void *)(eht_oper_ie + 3); - else - eht_oper = NULL; + eht_oper = elems->eht_operation; if (!ieee80211_verify_sta_eht_mcs_support(sdata, sband, eht_oper)) *conn_flags |= IEEE80211_CONN_DISABLE_EHT; - eht_ml_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK, - cbss_ies->data, cbss_ies->len); - - /* data + 1 / datalen - 1 since it's an extended element */ - if (!(*conn_flags & IEEE80211_CONN_DISABLE_EHT) && - eht_ml_elem && - ieee80211_mle_type_ok(eht_ml_elem->data + 1, - IEEE80211_ML_CONTROL_TYPE_BASIC, - eht_ml_elem->datalen - 1)) - supports_mlo = true; + supports_mlo = elems->ml_basic; } /* Allow VHT if at least one channel on the sband supports 80 MHz */ -- cgit 1.2.3-korg From 6593c7aec7fa15d59af4fbefdbb3d46d27d6e3ed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jan 2024 18:17:42 +0200 Subject: wifi: mac80211: simplify HE capability access For verifying the required HE capabilities are supported locally, we access the HE capability element of the AP. Simplify that access, we've already parsed and validated it when parsing elements. Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111181514.2ef62b43caeb.I8baa604dd3f3399e08b86c99395a2c6a1185d35d@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 45be270eaab7a..5b1bc84760d5e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4547,41 +4547,17 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link, static bool ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata, - const struct cfg80211_bss_ies *ies, + const struct ieee80211_he_cap_elem *he_cap, const struct ieee80211_he_operation *he_op) { - const struct element *he_cap_elem; - const struct ieee80211_he_cap_elem *he_cap; struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp; u16 mcs_80_map_tx, mcs_80_map_rx; u16 ap_min_req_set; - int mcs_nss_size; int nss; - he_cap_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, - ies->data, ies->len); - - if (!he_cap_elem) + if (!he_cap) return false; - /* invalid HE IE */ - if (he_cap_elem->datalen < 1 + sizeof(*he_cap)) { - sdata_info(sdata, - "Invalid HE elem, Disable HE\n"); - return false; - } - - /* skip one byte ext_tag_id */ - he_cap = (void *)(he_cap_elem->data + 1); - mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap); - - /* invalid HE IE */ - if (he_cap_elem->datalen < 1 + sizeof(*he_cap) + mcs_nss_size) { - sdata_info(sdata, - "Invalid HE elem with nss size, Disable HE\n"); - return false; - } - /* mcs_nss is right after he_cap info */ he_mcs_nss_supp = (void *)(he_cap + 1); @@ -4946,7 +4922,9 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } } - if (!ieee80211_verify_peer_he_mcs_support(sdata, ies, he_oper) || + if (!ieee80211_verify_peer_he_mcs_support(sdata, + (void *)elems->he_cap, + he_oper) || !ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper)) *conn_flags |= IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT; -- cgit 1.2.3-korg From f04d2c247e0407ff8219395bef11ddc0837b7397 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jan 2024 18:17:43 +0200 Subject: wifi: mac80211: disallow drivers with HT wider than HE To simplify the code in the next patch, disallow drivers supporting 40 MHz in HT but not HE, since we'd otherwise have to track local maximum bandwidth per mode there. Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111181514.da15fe3214d2.I4df51ad2f4c844615c168bf9bdb498925b3c77d4@changeid Signed-off-by: Johannes Berg --- net/mac80211/main.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d48fa1147c14c..e05bcc35bc1e0 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1119,8 +1119,26 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_vht = supp_vht || sband->vht_cap.vht_supported; for_each_sband_iftype_data(sband, i, iftd) { + u8 he_40_mhz_cap; + supp_he = supp_he || iftd->he_cap.has_he; supp_eht = supp_eht || iftd->eht_cap.has_eht; + + if (sband->band == NL80211_BAND_2GHZ) + he_40_mhz_cap = + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G; + else + he_40_mhz_cap = + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G; + + /* currently no support for HE client where HT has 40 MHz but not HT */ + if (iftd->he_cap.has_he && + iftd->types_mask & (BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_P2P_CLIENT)) && + sband->ht_cap.ht_supported && + sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 && + !(iftd->he_cap.he_cap_elem.phy_cap_info[0] & he_40_mhz_cap)) + return -EINVAL; } /* HT, VHT, HE require QoS, thus >= 4 queues */ -- cgit 1.2.3-korg From bc8a0fac8677f729ab17176023be1a2653e8b9c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jan 2024 18:17:45 +0200 Subject: wifi: mac80211: don't set bss_conf in parsing When parsing 6 GHz operation, don't set the bss_conf values. We only commit to that later in association, so move the code there. Also clear it later. While at it, handle IEEE80211_6GHZ_CTRL_REG_VLP_AP. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111181514.c2da4bc515e8.I219ca40e15c0fbaff0e7c3e83ca4b92ecbc1f8ae@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 27 +++++++++++++++++++++++++++ net/mac80211/util.c | 21 +-------------------- 2 files changed, 28 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5b1bc84760d5e..99188bd847997 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3081,6 +3081,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec)); wiphy_delayed_work_cancel(local->hw.wiphy, &ifmgd->tx_tspec_wk); + sdata->vif.bss_conf.power_type = IEEE80211_REG_UNSET_AP; sdata->vif.bss_conf.pwr_reduction = 0; sdata->vif.bss_conf.tx_pwr_env_num = 0; memset(sdata->vif.bss_conf.tx_pwr_env, 0, @@ -4236,12 +4237,38 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && elems->he_cap) { + const struct ieee80211_he_6ghz_oper *he_6ghz_oper; + ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap, elems->he_cap_len, elems->he_6ghz_capa, link_sta); + he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation); + + if (is_6ghz && he_6ghz_oper) { + switch (u8_get_bits(he_6ghz_oper->control, + IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { + case IEEE80211_6GHZ_CTRL_REG_LPI_AP: + bss_conf->power_type = IEEE80211_REG_LPI_AP; + break; + case IEEE80211_6GHZ_CTRL_REG_SP_AP: + bss_conf->power_type = IEEE80211_REG_SP_AP; + break; + case IEEE80211_6GHZ_CTRL_REG_VLP_AP: + bss_conf->power_type = IEEE80211_REG_VLP_AP; + break; + default: + bss_conf->power_type = IEEE80211_REG_UNSET_AP; + break; + } + } else if (is_6ghz) { + link_info(link, + "HE 6 GHz operation missing (on %d MHz), expect issues\n", + bss_conf->chandef.chan->center_freq); + } + bss_conf->he_support = link_sta->pub->he_cap.has_he; if (elems->rsnx && elems->rsnx_len && (elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) && diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 643c54855be6c..685b55a053f3f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3845,7 +3845,6 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, const struct ieee80211_sta_eht_cap *eht_cap; struct cfg80211_chan_def he_chandef = *chandef; const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; bool support_80_80, support_160, support_320; u8 he_phy_cap, eht_phy_cap; u32 freq; @@ -3881,13 +3880,8 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, eht_oper = NULL; he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); - - if (!he_6ghz_oper) { - sdata_info(sdata, - "HE 6GHz operation missing (on %d MHz), expect issues\n", - chandef->chan->center_freq); + if (!he_6ghz_oper) return false; - } /* * The EHT operation IE does not contain the primary channel so the @@ -3898,19 +3892,6 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, NL80211_BAND_6GHZ); he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq); - switch (u8_get_bits(he_6ghz_oper->control, - IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { - case IEEE80211_6GHZ_CTRL_REG_LPI_AP: - bss_conf->power_type = IEEE80211_REG_LPI_AP; - break; - case IEEE80211_6GHZ_CTRL_REG_SP_AP: - bss_conf->power_type = IEEE80211_REG_SP_AP; - break; - default: - bss_conf->power_type = IEEE80211_REG_UNSET_AP; - break; - } - if (!eht_oper || !(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) { switch (u8_get_bits(he_6ghz_oper->control, -- cgit 1.2.3-korg From e10322810ce0d0d4a5a319458c4e1e052c6fe9be Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 11 Jan 2024 18:17:46 +0200 Subject: wifi: mac80211: use deflink and fix typo in link ID check This does not change anything effectively, but it is closer to what the code is trying to achieve here. i.e. select the link data if it is an MLD and fall back to using the deflink otherwise. Fixes: 0f99f0878350 ("wifi: mac80211: Print local link address during authentication") Signed-off-by: Benjamin Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://msgid.link/20240111181514.4c4b1c40eb3c.I2771621dee328c618536596b7e56232df42a79c8@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 99188bd847997..cc9a8eaffa6b5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7869,10 +7869,10 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (err) goto err_clear; - if (req->link_id > 0) + if (req->link_id >= 0) link = sdata_dereference(sdata->link[req->link_id], sdata); else - link = sdata_dereference(sdata->link[0], sdata); + link = &sdata->deflink; if (WARN_ON(!link)) { err = -ENOLINK; -- cgit 1.2.3-korg From d60277ac3fc9c9f7887ff813c98d0a71ac2abde2 Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Tue, 23 Jan 2024 13:47:52 +0800 Subject: wifi: mac80211: apply duration for SW scan This patch makes duration in scan request be applicable when using SW scan, but only accepts durations greater than the default value for the following reasons: 1. Most APs have a beacoon interval of 100ms. 2. Sending and receiving probe require some delay. 3. Setting channel to HW also requires some delays Signed-off-by: Michael-CY Lee Link: https://msgid.link/20240123054752.22833-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 645355e5f1bc7..8adcb23262f55 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -671,7 +671,10 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, * After sending probe requests, wait for probe responses * on the channel. */ - *next_delay = IEEE80211_CHANNEL_TIME; + *next_delay = msecs_to_jiffies(scan_req->duration) > + IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME ? + msecs_to_jiffies(scan_req->duration) - IEEE80211_PROBE_DELAY : + IEEE80211_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; } @@ -994,7 +997,10 @@ set_channel: */ if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) || !scan_req->n_ssids) { - *next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; + *next_delay = msecs_to_jiffies(scan_req->duration) > + IEEE80211_PASSIVE_CHANNEL_TIME ? + msecs_to_jiffies(scan_req->duration) : + IEEE80211_PASSIVE_CHANNEL_TIME; local->next_scan_state = SCAN_DECISION; if (scan_req->n_ssids) set_bit(SCAN_BEACON_WAIT, &local->scanning); -- cgit 1.2.3-korg From a923ff876f4b6133a093482a6d465cde3bc2e65c Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 25 Jan 2024 14:55:47 -0800 Subject: Revert "nl80211/cfg80211: Specify band specific min RSSI thresholds with sched scan" This *mostly* reverts commit 1e1b11b6a111 ("nl80211/cfg80211: Specify band specific min RSSI thresholds with sched scan"). During the review of a new patch [1] it was observed that the functionality being modified was not actually being used by any in-tree driver. Further research determined that the functionality was originally introduced to support a new Android interface, but that interface was subsequently abandoned. Since the functionality has apparently never been used, remove it. However, to mantain the sanctity of the UABI, keep the nl80211.h assignments, but clearly mark them as obsolete. Cc: Lin Ma Cc: Vamsi Krishna Link: https://lore.kernel.org/linux-wireless/20240119151201.8670-1-linma@zju.edu.cn/ [1] Signed-off-by: Jeff Johnson Link: https://msgid.link/20240125-for-next-v1-1-fd79e01c6c09@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ------- include/uapi/linux/nl80211.h | 16 ++++--------- net/wireless/nl80211.c | 55 -------------------------------------------- 3 files changed, 4 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 56bce924bec6d..51b9e6fa12f89 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2695,19 +2695,11 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) * @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) - * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied - * for filtering out scan results received. Drivers advertise this support - * of band specific rssi based filtering through the feature capability - * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band - * specific rssi thresholds take precedence over rssi_thold, if specified. - * If not specified for any band, it will be assigned with rssi_thold of - * corresponding matchset. */ struct cfg80211_match_set { struct cfg80211_ssid ssid; u8 bssid[ETH_ALEN]; s32 rssi_thold; - s32 per_band_rssi_thold[NUM_NL80211_BANDS]; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3e239df3528f3..853ac538a686f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4463,14 +4463,7 @@ enum nl80211_reg_rule_attr { * value as specified by &struct nl80211_bss_select_rssi_adjust. * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching * (this cannot be used together with SSID). - * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the - * band specific minimum rssi thresholds for the bands defined in - * enum nl80211_band. The minimum rssi threshold value(s32) specific to a - * band shall be encapsulated in attribute with type value equals to one - * of the NL80211_BAND_* defined in enum nl80211_band. For example, the - * minimum rssi threshold value for 2.4GHZ band shall be encapsulated - * within an attribute of type NL80211_BAND_2GHZ. And one or more of such - * attributes will be nested within this attribute. + * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Obsolete * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter * attribute number currently defined * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use @@ -4483,7 +4476,7 @@ enum nl80211_sched_scan_match_attr { NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI, NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST, NL80211_SCHED_SCAN_MATCH_ATTR_BSSID, - NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI, + NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI, /* obsolete */ /* keep last */ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, @@ -6418,8 +6411,7 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching * (set/del PMKSA operations) in AP mode. * - * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports - * filtering of sched scan results using band specific RSSI thresholds. + * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Obsolete * * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power * to a station. @@ -6574,7 +6566,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS, NL80211_EXT_FEATURE_AP_PMKSA_CACHING, - NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, + NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, /* obsolete */ NL80211_EXT_FEATURE_EXT_KEY_ID, NL80211_EXT_FEATURE_STA_TX_PWR, NL80211_EXT_FEATURE_SAE_OFFLOAD, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0809f721f0459..e4f41f86e295d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -906,23 +906,12 @@ nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = { [NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 }, }; -static const struct nla_policy -nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = { - [NL80211_BAND_2GHZ] = { .type = NLA_S32 }, - [NL80211_BAND_5GHZ] = { .type = NLA_S32 }, - [NL80211_BAND_6GHZ] = { .type = NLA_S32 }, - [NL80211_BAND_60GHZ] = { .type = NLA_S32 }, - [NL80211_BAND_LC] = { .type = NLA_S32 }, -}; - static const struct nla_policy nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, - [NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI] = - NLA_POLICY_NESTED(nl80211_match_band_rssi_policy), }; static const struct nla_policy @@ -9490,41 +9479,6 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, return 0; } -static int -nl80211_parse_sched_scan_per_band_rssi(struct wiphy *wiphy, - struct cfg80211_match_set *match_sets, - struct nlattr *tb_band_rssi, - s32 rssi_thold) -{ - struct nlattr *attr; - int i, tmp, ret = 0; - - if (!wiphy_ext_feature_isset(wiphy, - NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD)) { - if (tb_band_rssi) - ret = -EOPNOTSUPP; - else - for (i = 0; i < NUM_NL80211_BANDS; i++) - match_sets->per_band_rssi_thold[i] = - NL80211_SCAN_RSSI_THOLD_OFF; - return ret; - } - - for (i = 0; i < NUM_NL80211_BANDS; i++) - match_sets->per_band_rssi_thold[i] = rssi_thold; - - nla_for_each_nested(attr, tb_band_rssi, tmp) { - enum nl80211_band band = nla_type(attr); - - if (band < 0 || band >= NUM_NL80211_BANDS) - return -EINVAL; - - match_sets->per_band_rssi_thold[band] = nla_get_s32(attr); - } - - return 0; -} - static struct cfg80211_sched_scan_request * nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr **attrs, int max_match_sets) @@ -9799,15 +9753,6 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, if (rssi) request->match_sets[i].rssi_thold = nla_get_s32(rssi); - - /* Parse per band RSSI attribute */ - err = nl80211_parse_sched_scan_per_band_rssi(wiphy, - &request->match_sets[i], - tb[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI], - request->match_sets[i].rssi_thold); - if (err) - goto out_free; - i++; } -- cgit 1.2.3-korg From 31e03207119a535d0b0e3b3a7f91983aeb2cb14d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:52 -0800 Subject: af_unix: Annotate data-race of gc_in_progress in wait_for_unix_gc(). gc_in_progress is changed under spin_lock(&unix_gc_lock), but wait_for_unix_gc() reads it locklessly. Let's use READ_ONCE(). Fixes: 5f23b734963e ("net: Fix soft lockups/OOM issues w/ unix garbage collector") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240123170856.41348-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/garbage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2405f0f9af31c..af3d2221cf6ad 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -198,7 +198,7 @@ void wait_for_unix_gc(void) if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && !READ_ONCE(gc_in_progress)) unix_gc(); - wait_event(unix_gc_wait, gc_in_progress == false); + wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress)); } /* The external entry point: unix_gc() */ -- cgit 1.2.3-korg From 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:53 -0800 Subject: af_unix: Do not use atomic ops for unix_sk(sk)->inflight. When touching unix_sk(sk)->inflight, we are always under spin_lock(&unix_gc_lock). Let's convert unix_sk(sk)->inflight to the normal unsigned long. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 2 +- net/unix/af_unix.c | 4 ++-- net/unix/garbage.c | 17 ++++++++--------- net/unix/scm.c | 8 +++++--- 4 files changed, 16 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 49c4640027d8a..ac38b63db554f 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -61,7 +61,7 @@ struct unix_sock { struct mutex iolock, bindlock; struct sock *peer; struct list_head link; - atomic_long_t inflight; + unsigned long inflight; spinlock_t lock; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ac1f2bc18fc96..1e9378036dcc3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -993,11 +993,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; - u = unix_sk(sk); + u = unix_sk(sk); + u->inflight = 0; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - atomic_long_set(&u->inflight, 0); INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index af3d2221cf6ad..051f96a3ab029 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), static void dec_inflight(struct unix_sock *usk) { - atomic_long_dec(&usk->inflight); + usk->inflight--; } static void inc_inflight(struct unix_sock *usk) { - atomic_long_inc(&usk->inflight); + usk->inflight++; } static void inc_inflight_move_tail(struct unix_sock *u) { - atomic_long_inc(&u->inflight); + u->inflight++; + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over @@ -237,14 +238,12 @@ void unix_gc(void) */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; - long inflight_refs; total_refs = file_count(u->sk.sk_socket->file); - inflight_refs = atomic_long_read(&u->inflight); - BUG_ON(inflight_refs < 1); - BUG_ON(total_refs < inflight_refs); - if (total_refs == inflight_refs) { + BUG_ON(!u->inflight); + BUG_ON(total_refs < u->inflight); + if (total_refs == u->inflight) { list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); @@ -271,7 +270,7 @@ void unix_gc(void) /* Move cursor to after the current position. */ list_move(&cursor, &u->link); - if (atomic_long_read(&u->inflight) > 0) { + if (u->inflight) { list_move_tail(&u->link, ¬_cycle_list); __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); scan_children(&u->sk, inc_inflight_move_tail, NULL); diff --git a/net/unix/scm.c b/net/unix/scm.c index 822ce0d0d7915..e92f2fad64105 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -53,12 +53,13 @@ void unix_inflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - if (atomic_long_inc_return(&u->inflight) == 1) { + if (!u->inflight) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { BUG_ON(list_empty(&u->link)); } + u->inflight++; /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } @@ -75,10 +76,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); - BUG_ON(!atomic_long_read(&u->inflight)); + BUG_ON(!u->inflight); BUG_ON(list_empty(&u->link)); - if (atomic_long_dec_and_test(&u->inflight)) + u->inflight--; + if (!u->inflight) list_del_init(&u->link); /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); -- cgit 1.2.3-korg From 5b17307bd0789edea0675d524a2b277b93bbde62 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:54 -0800 Subject: af_unix: Return struct unix_sock from unix_get_socket(). Currently, unix_get_socket() returns struct sock, but after calling it, we always cast it to unix_sk(). Let's return struct unix_sock from unix_get_socket(). Signed-off-by: Kuniyuki Iwashima Acked-by: Pavel Begunkov Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123170856.41348-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 2 +- net/unix/garbage.c | 19 +++++++------------ net/unix/scm.c | 19 +++++++------------ 3 files changed, 15 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index ac38b63db554f..2c98ef95017b2 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -14,7 +14,7 @@ void unix_destruct_scm(struct sk_buff *skb); void io_uring_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(void); -struct sock *unix_get_socket(struct file *filp); +struct unix_sock *unix_get_socket(struct file *filp); struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 051f96a3ab029..d5ef46a75f1f4 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -105,20 +105,15 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), while (nfd--) { /* Get the socket the fd matches if it indeed does so */ - struct sock *sk = unix_get_socket(*fp++); + struct unix_sock *u = unix_get_socket(*fp++); - if (sk) { - struct unix_sock *u = unix_sk(sk); + /* Ignore non-candidates, they could have been added + * to the queues after starting the garbage collection + */ + if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { + hit = true; - /* Ignore non-candidates, they could - * have been added to the queues after - * starting the garbage collection - */ - if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { - hit = true; - - func(u); - } + func(u); } } if (hit && hitlist != NULL) { diff --git a/net/unix/scm.c b/net/unix/scm.c index e92f2fad64105..b5ae5ab167773 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -21,9 +21,8 @@ EXPORT_SYMBOL(gc_inflight_list); DEFINE_SPINLOCK(unix_gc_lock); EXPORT_SYMBOL(unix_gc_lock); -struct sock *unix_get_socket(struct file *filp) +struct unix_sock *unix_get_socket(struct file *filp) { - struct sock *u_sock = NULL; struct inode *inode = file_inode(filp); /* Socket ? */ @@ -34,10 +33,10 @@ struct sock *unix_get_socket(struct file *filp) /* PF_UNIX ? */ if (s && ops && ops->family == PF_UNIX) - u_sock = s; + return unix_sk(s); } - return u_sock; + return NULL; } EXPORT_SYMBOL(unix_get_socket); @@ -46,13 +45,11 @@ EXPORT_SYMBOL(unix_get_socket); */ void unix_inflight(struct user_struct *user, struct file *fp) { - struct sock *s = unix_get_socket(fp); + struct unix_sock *u = unix_get_socket(fp); spin_lock(&unix_gc_lock); - if (s) { - struct unix_sock *u = unix_sk(s); - + if (u) { if (!u->inflight) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -69,13 +66,11 @@ void unix_inflight(struct user_struct *user, struct file *fp) void unix_notinflight(struct user_struct *user, struct file *fp) { - struct sock *s = unix_get_socket(fp); + struct unix_sock *u = unix_get_socket(fp); spin_lock(&unix_gc_lock); - if (s) { - struct unix_sock *u = unix_sk(s); - + if (u) { BUG_ON(!u->inflight); BUG_ON(list_empty(&u->link)); -- cgit 1.2.3-korg From 8b90a9f819dc2a06baae4ec1a64d875e53b824ec Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:55 -0800 Subject: af_unix: Run GC on only one CPU. If more than 16000 inflight AF_UNIX sockets exist and the garbage collector is not running, unix_(dgram|stream)_sendmsg() call unix_gc(). Also, they wait for unix_gc() to complete. In unix_gc(), all inflight AF_UNIX sockets are traversed at least once, and more if they are the GC candidate. Thus, sendmsg() significantly slows down with too many inflight AF_UNIX sockets. There is a small window to invoke multiple unix_gc() instances, which will then be blocked by the same spinlock except for one. Let's convert unix_gc() to use struct work so that it will not consume CPUs unnecessarily. Note WRITE_ONCE(gc_in_progress, true) is moved before running GC. If we leave the WRITE_ONCE() as is and use the following test to call flush_work(), a process might not call it. CPU 0 CPU 1 --- --- start work and call __unix_gc() if (work_pending(&unix_gc_work) || <-- false READ_ONCE(gc_in_progress)) <-- false flush_work(); <-- missed! WRITE_ONCE(gc_in_progress, true) Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240123170856.41348-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/garbage.c | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d5ef46a75f1f4..3f599db59f9dc 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -86,7 +86,6 @@ /* Internal data structures and random procedures: */ static LIST_HEAD(gc_candidates); -static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff_head *hitlist) @@ -182,23 +181,8 @@ static void inc_inflight_move_tail(struct unix_sock *u) } static bool gc_in_progress; -#define UNIX_INFLIGHT_TRIGGER_GC 16000 - -void wait_for_unix_gc(void) -{ - /* If number of inflight sockets is insane, - * force a garbage collect right now. - * Paired with the WRITE_ONCE() in unix_inflight(), - * unix_notinflight() and gc_in_progress(). - */ - if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && - !READ_ONCE(gc_in_progress)) - unix_gc(); - wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress)); -} -/* The external entry point: unix_gc() */ -void unix_gc(void) +static void __unix_gc(struct work_struct *work) { struct sk_buff *next_skb, *skb; struct unix_sock *u; @@ -209,13 +193,6 @@ void unix_gc(void) spin_lock(&unix_gc_lock); - /* Avoid a recursive GC. */ - if (gc_in_progress) - goto out; - - /* Paired with READ_ONCE() in wait_for_unix_gc(). */ - WRITE_ONCE(gc_in_progress, true); - /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. @@ -322,8 +299,31 @@ void unix_gc(void) /* Paired with READ_ONCE() in wait_for_unix_gc(). */ WRITE_ONCE(gc_in_progress, false); - wake_up(&unix_gc_wait); - - out: spin_unlock(&unix_gc_lock); } + +static DECLARE_WORK(unix_gc_work, __unix_gc); + +void unix_gc(void) +{ + WRITE_ONCE(gc_in_progress, true); + queue_work(system_unbound_wq, &unix_gc_work); +} + +#define UNIX_INFLIGHT_TRIGGER_GC 16000 + +void wait_for_unix_gc(void) +{ + /* If number of inflight sockets is insane, + * force a garbage collect right now. + * + * Paired with the WRITE_ONCE() in unix_inflight(), + * unix_notinflight(), and __unix_gc(). + */ + if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && + !READ_ONCE(gc_in_progress)) + unix_gc(); + + if (READ_ONCE(gc_in_progress)) + flush_work(&unix_gc_work); +} -- cgit 1.2.3-korg From d9f21b3613337b55cc9d4a6ead484dca68475143 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:56 -0800 Subject: af_unix: Try to run GC async. If more than 16000 inflight AF_UNIX sockets exist and the garbage collector is not running, unix_(dgram|stream)_sendmsg() call unix_gc(). Also, they wait for unix_gc() to complete. In unix_gc(), all inflight AF_UNIX sockets are traversed at least once, and more if they are the GC candidate. Thus, sendmsg() significantly slows down with too many inflight AF_UNIX sockets. However, if a process sends data with no AF_UNIX FD, the sendmsg() call does not need to wait for GC. After this change, only the process that meets the condition below will be blocked under such a situation. 1) cmsg contains AF_UNIX socket 2) more than 32 AF_UNIX sent by the same user are still inflight Note that even a sendmsg() call that does not meet the condition but has AF_UNIX FD will be blocked later in unix_scm_to_skb() by the spinlock, but we allow that as a bonus for sane users. The results below are the time spent in unix_dgram_sendmsg() sending 1 byte of data with no FD 4096 times on a host where 32K inflight AF_UNIX sockets exist. Without series: the sane sendmsg() needs to wait gc unreasonably. $ sudo /usr/share/bcc/tools/funclatency -p 11165 unix_dgram_sendmsg Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end. ^C nsecs : count distribution [...] 524288 -> 1048575 : 0 | | 1048576 -> 2097151 : 3881 |****************************************| 2097152 -> 4194303 : 214 |** | 4194304 -> 8388607 : 1 | | avg = 1825567 nsecs, total: 7477526027 nsecs, count: 4096 With series: the sane sendmsg() can finish much faster. $ sudo /usr/share/bcc/tools/funclatency -p 8702 unix_dgram_sendmsg Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end. ^C nsecs : count distribution [...] 128 -> 255 : 0 | | 256 -> 511 : 4092 |****************************************| 512 -> 1023 : 2 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 0 | | 4096 -> 8191 : 1 | | 8192 -> 16383 : 1 | | avg = 410 nsecs, total: 1680510 nsecs, count: 4096 Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240123170856.41348-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 12 ++++++++++-- include/net/scm.h | 1 + net/core/scm.c | 5 +++++ net/unix/af_unix.c | 6 ++++-- net/unix/garbage.c | 10 +++++++++- 5 files changed, 29 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2c98ef95017b2..f045bbd9017d1 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -8,13 +8,21 @@ #include #include +#if IS_ENABLED(CONFIG_UNIX) +struct unix_sock *unix_get_socket(struct file *filp); +#else +static inline struct unix_sock *unix_get_socket(struct file *filp) +{ + return NULL; +} +#endif + void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); void unix_destruct_scm(struct sk_buff *skb); void io_uring_destruct_scm(struct sk_buff *skb); void unix_gc(void); -void wait_for_unix_gc(void); -struct unix_sock *unix_get_socket(struct file *filp); +void wait_for_unix_gc(struct scm_fp_list *fpl); struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) diff --git a/include/net/scm.h b/include/net/scm.h index cf68acec4d70a..92276a2c55436 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -25,6 +25,7 @@ struct scm_creds { struct scm_fp_list { short count; + short count_unix; short max; struct user_struct *user; struct file *fp[SCM_MAX_FD]; diff --git a/net/core/scm.c b/net/core/scm.c index d0e0852a24d5f..9cd4b0a01cd60 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -36,6 +36,7 @@ #include #include #include +#include /* @@ -85,6 +86,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) return -ENOMEM; *fplp = fpl; fpl->count = 0; + fpl->count_unix = 0; fpl->max = SCM_MAX_FD; fpl->user = NULL; } @@ -109,6 +111,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) fput(file); return -EINVAL; } + if (unix_get_socket(file)) + fpl->count_unix++; + *fpp++ = file; fpl->count++; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1e9378036dcc3..1720419d93d68 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1923,11 +1923,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, long timeo; int err; - wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); if (err < 0) return err; + wait_for_unix_gc(scm.fp); + err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) goto out; @@ -2199,11 +2200,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, bool fds_sent = false; int data_len; - wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); if (err < 0) return err; + wait_for_unix_gc(scm.fp); + err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) { #if IS_ENABLED(CONFIG_AF_UNIX_OOB) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 3f599db59f9dc..4046c606f0e6e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -311,8 +311,9 @@ void unix_gc(void) } #define UNIX_INFLIGHT_TRIGGER_GC 16000 +#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8) -void wait_for_unix_gc(void) +void wait_for_unix_gc(struct scm_fp_list *fpl) { /* If number of inflight sockets is insane, * force a garbage collect right now. @@ -324,6 +325,13 @@ void wait_for_unix_gc(void) !READ_ONCE(gc_in_progress)) unix_gc(); + /* Penalise users who want to send AF_UNIX sockets + * but whose sockets have not been received yet. + */ + if (!fpl || !fpl->count_unix || + READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER) + return; + if (READ_ONCE(gc_in_progress)) flush_work(&unix_gc_work); } -- cgit 1.2.3-korg From df3fc228dead6298ac9e985a270b8521e41471e6 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 12 Jan 2024 17:37:46 +0100 Subject: batman-adv: Start new development cycle This version will contain all the (major or even only minor) changes for Linux 6.9. The version number isn't a semantic version number with major and minor information. It is just encoding the year of the expected publishing as Linux -rc1 and the number of published versions this year (starting at 0). Signed-off-by: Simon Wunderlich --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 870dcd7f17869..8ca854a75a323 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2024.0" +#define BATADV_SOURCE_VERSION "2024.1" #endif /* B.A.T.M.A.N. parameters */ -- cgit 1.2.3-korg From ffc15626c861f811f9778914be004fcf43810a91 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 2 Jan 2024 07:27:45 +0100 Subject: batman-adv: Return directly after a failed batadv_dat_select_candidates() in batadv_dat_forward_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kfree() function was called in one case by the batadv_dat_forward_data() function during error handling even if the passed variable contained a null pointer. This issue was detected by using the Coccinelle software. * Thus return directly after a batadv_dat_select_candidates() call failed at the beginning. * Delete the label “out” which became unnecessary with this refactoring. Signed-off-by: Markus Elfring Acked-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/distributed-arp-table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 28a939d560906..4c7e855343245 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -684,7 +684,7 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv, cand = batadv_dat_select_candidates(bat_priv, ip, vid); if (!cand) - goto out; + return ret; batadv_dbg(BATADV_DBG_DAT, bat_priv, "DHT_SEND for %pI4\n", &ip); @@ -728,7 +728,6 @@ free_orig: batadv_orig_node_put(cand[i].orig_node); } -out: kfree(cand); return ret; } -- cgit 1.2.3-korg From 5593e9abf1cf2bf096366d8c7fd933bc69d561ce Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 2 Jan 2024 07:52:21 +0100 Subject: batman-adv: Improve exception handling in batadv_throw_uevent() The kfree() function was called in up to three cases by the batadv_throw_uevent() function during error handling even if the passed variable contained a null pointer. This issue was detected by using the Coccinelle software. * Thus adjust jump targets. * Reorder kfree() calls at the end. Signed-off-by: Markus Elfring Acked-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5fc754b0b3f7f..75119f1ffcccf 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -691,29 +691,31 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, "%s%s", BATADV_UEV_TYPE_VAR, batadv_uev_type_str[type]); if (!uevent_env[0]) - goto out; + goto report_error; uevent_env[1] = kasprintf(GFP_ATOMIC, "%s%s", BATADV_UEV_ACTION_VAR, batadv_uev_action_str[action]); if (!uevent_env[1]) - goto out; + goto free_first_env; /* If the event is DEL, ignore the data field */ if (action != BATADV_UEV_DEL) { uevent_env[2] = kasprintf(GFP_ATOMIC, "%s%s", BATADV_UEV_DATA_VAR, data); if (!uevent_env[2]) - goto out; + goto free_second_env; } ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env); -out: - kfree(uevent_env[0]); - kfree(uevent_env[1]); kfree(uevent_env[2]); +free_second_env: + kfree(uevent_env[1]); +free_first_env: + kfree(uevent_env[0]); if (ret) +report_error: batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", batadv_uev_type_str[type], -- cgit 1.2.3-korg From db60ad8b21cee0394cb0a1092d9f9190d310562c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 22 Jan 2024 14:03:38 +0100 Subject: batman-adv: Drop usage of export.h The linux/export.h include was introduced in commit 9bcb94c8617e ("batman-adv: Introduce missing headers for genetlink restructure") to have access to THIS_MODULE. But with commit 5b20755b7780 ("init: move THIS_MODULE from to "), it was moved and the include for export.h is no longer needed. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/netlink.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 1f7ed9d4f6fdf..0954757f0b8b8 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include -- cgit 1.2.3-korg From 0efc7e541fd51ed11a328c43ccf10a12f1727746 Mon Sep 17 00:00:00 2001 From: Alessandro Marcolini Date: Thu, 25 Jan 2024 17:59:42 +0100 Subject: taprio: validate TCA_TAPRIO_ATTR_FLAGS through policy instead of open-coding As of now, the field TCA_TAPRIO_ATTR_FLAGS is being validated by manually checking its value, using the function taprio_flags_valid(). With this patch, the field will be validated through the netlink policy NLA_POLICY_MASK, where the mask is defined by TAPRIO_SUPPORTED_FLAGS. The mutual exclusivity of the two flags TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD and TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST is still checked manually. Changes since RFC: - fixed reversed xmas tree - use NL_SET_ERR_MSG_MOD() for both invalid configuration Changes since v1: - Changed NL_SET_ERR_MSG_MOD to NL_SET_ERR_MSG_ATTR when wrong flags issued - Changed __u32 to u32 Changes since v2: - Added the missing parameter for NL_SET_ERR_MSG_ATTR (sorry again for the noise) Signed-off-by: Alessandro Marcolini Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 72 ++++++++++++++++++-------------------------------- 1 file changed, 26 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 31a8252bd09c9..827cb683efbff 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -40,6 +40,8 @@ static struct static_key_false taprio_have_working_mqprio; #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) +#define TAPRIO_SUPPORTED_FLAGS \ + (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) #define TAPRIO_FLAGS_INVALID U32_MAX struct sched_entry { @@ -408,19 +410,6 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) return entry; } -static bool taprio_flags_valid(u32 flags) -{ - /* Make sure no other flag bits are set. */ - if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | - TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) - return false; - /* txtime-assist and full offload are mutually exclusive */ - if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && - (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) - return false; - return true; -} - /* This returns the tstamp value set by TCP in terms of the set clock. */ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) { @@ -1031,7 +1020,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range), [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, - [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, + [TCA_TAPRIO_ATTR_FLAGS] = + NLA_POLICY_MASK(NLA_U32, TAPRIO_SUPPORTED_FLAGS), [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED }, }; @@ -1815,33 +1805,6 @@ static int taprio_mqprio_cmp(const struct net_device *dev, return 0; } -/* The semantics of the 'flags' argument in relation to 'change()' - * requests, are interpreted following two rules (which are applied in - * this order): (1) an omitted 'flags' argument is interpreted as - * zero; (2) the 'flags' of a "running" taprio instance cannot be - * changed. - */ -static int taprio_new_flags(const struct nlattr *attr, u32 old, - struct netlink_ext_ack *extack) -{ - u32 new = 0; - - if (attr) - new = nla_get_u32(attr); - - if (old != TAPRIO_FLAGS_INVALID && old != new) { - NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); - return -EOPNOTSUPP; - } - - if (!taprio_flags_valid(new)) { - NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); - return -EINVAL; - } - - return new; -} - static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1852,6 +1815,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; unsigned long flags; + u32 taprio_flags; ktime_t start; int i, err; @@ -1863,12 +1827,28 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); - err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS], - q->flags, extack); - if (err < 0) - return err; + /* The semantics of the 'flags' argument in relation to 'change()' + * requests, are interpreted following two rules (which are applied in + * this order): (1) an omitted 'flags' argument is interpreted as + * zero; (2) the 'flags' of a "running" taprio instance cannot be + * changed. + */ + taprio_flags = tb[TCA_TAPRIO_ATTR_FLAGS] ? nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]) : 0; - q->flags = err; + /* txtime-assist and full offload are mutually exclusive */ + if ((taprio_flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && + (taprio_flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_ATTR_FLAGS], + "TXTIME_ASSIST and FULL_OFFLOAD are mutually exclusive"); + return -EINVAL; + } + + if (q->flags != TAPRIO_FLAGS_INVALID && q->flags != taprio_flags) { + NL_SET_ERR_MSG_MOD(extack, + "Changing 'flags' of a running schedule is not supported"); + return -EOPNOTSUPP; + } + q->flags = taprio_flags; err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) -- cgit 1.2.3-korg From 6aa89bf8ac9a1f5e420c0f00bdc0101994544736 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 25 Jan 2024 11:34:19 -0800 Subject: net: fill in MODULE_DESCRIPTION()s for ieee802154 W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to ieee802154 modules. Signed-off-by: Breno Leitao Acked-by: Miquel Raynal Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- net/ieee802154/6lowpan/core.c | 1 + net/ieee802154/socket.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 2c087b7f17c56..e643f52663f9b 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -280,5 +280,6 @@ static void __exit lowpan_cleanup_module(void) module_init(lowpan_init_module); module_exit(lowpan_cleanup_module); +MODULE_DESCRIPTION("IPv6 over Low power Wireless Personal Area Network IEEE 802.15.4 core"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 00302e8b9615b..990a83455dcfb 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1137,4 +1137,5 @@ module_init(af_ieee802154_init); module_exit(af_ieee802154_remove); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IEEE 802.15.4 socket interface"); MODULE_ALIAS_NETPROTO(PF_IEEE802154); -- cgit 1.2.3-korg From 723de3ebef03bc14bd72531f00f9094337654009 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Jan 2024 12:14:49 -0800 Subject: net: free altname using an RCU callback We had to add another synchronize_rcu() in recent fix. Bite the bullet and add an rcu_head to netdev_name_node, free from RCU. Note that name_node does not hold any reference on dev to which it points, but there must be a synchronize_rcu() on device removal path, so we should be fine. Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 27 ++++++++++++++++----------- net/core/dev.h | 1 + 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index cb2dab0feee0a..b53b9c94de400 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -341,13 +341,22 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name) return 0; } -static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) +static void netdev_name_node_alt_free(struct rcu_head *head) { - list_del(&name_node->list); + struct netdev_name_node *name_node = + container_of(head, struct netdev_name_node, rcu); + kfree(name_node->name); netdev_name_node_free(name_node); } +static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) +{ + netdev_name_node_del(name_node); + list_del(&name_node->list); + call_rcu(&name_node->rcu, netdev_name_node_alt_free); +} + int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) { struct netdev_name_node *name_node; @@ -362,10 +371,7 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) if (name_node == dev->name_node || name_node->dev != dev) return -EINVAL; - netdev_name_node_del(name_node); - synchronize_rcu(); __netdev_name_node_alt_destroy(name_node); - return 0; } @@ -373,8 +379,10 @@ static void netdev_name_node_alt_flush(struct net_device *dev) { struct netdev_name_node *name_node, *tmp; - list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) - __netdev_name_node_alt_destroy(name_node); + list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list) { + list_del(&name_node->list); + netdev_name_node_alt_free(&name_node->rcu); + } } /* Device list insertion */ @@ -11576,11 +11584,8 @@ static void __net_exit default_device_exit_net(struct net *net) snprintf(fb_name, IFNAMSIZ, "dev%%d"); netdev_for_each_altname_safe(dev, name_node, tmp) - if (netdev_name_in_use(&init_net, name_node->name)) { - netdev_name_node_del(name_node); - synchronize_rcu(); + if (netdev_name_in_use(&init_net, name_node->name)) __netdev_name_node_alt_destroy(name_node); - } err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { diff --git a/net/core/dev.h b/net/core/dev.h index 7480b4c842980..a43dfe3de50ee 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -56,6 +56,7 @@ struct netdev_name_node { struct list_head list; struct net_device *dev; const char *name; + struct rcu_head rcu; }; int netdev_get_name(struct net *net, char *name, int ifindex); -- cgit 1.2.3-korg From da5141bbe0c2693d85f14a89ee991921904f4d0c Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 21 Dec 2023 14:31:58 +0100 Subject: netfilter: nf_tables: Introduce NFT_TABLE_F_PERSIST This companion flag to NFT_TABLE_F_OWNER requests the kernel to keep the table around after the process has exited. It marks such table as orphaned (by dropping OWNER flag but keeping PERSIST flag in place), which opens it for other processes to manipulate. For the sake of simplicity, PERSIST flag may not be altered though. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 5 ++++- net/netfilter/nf_tables_api.c | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index fbce238abdc16..3fee994721cd6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -180,13 +180,16 @@ enum nft_hook_attributes { * * @NFT_TABLE_F_DORMANT: this table is not active * @NFT_TABLE_F_OWNER: this table is owned by a process + * @NFT_TABLE_F_PERSIST: this table shall outlive its owner */ enum nft_table_flags { NFT_TABLE_F_DORMANT = 0x1, NFT_TABLE_F_OWNER = 0x2, + NFT_TABLE_F_PERSIST = 0x4, }; #define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT | \ - NFT_TABLE_F_OWNER) + NFT_TABLE_F_OWNER | \ + NFT_TABLE_F_PERSIST) /** * enum nft_table_attributes - nf_tables table netlink attributes diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c537104411e7d..6a96f0003faa9 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1219,6 +1219,9 @@ static int nf_tables_updtable(struct nft_ctx *ctx) flags & NFT_TABLE_F_OWNER)) return -EOPNOTSUPP; + if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST) + return -EOPNOTSUPP; + /* No dormant off/on/off/on games in single transaction */ if (ctx->table->flags & __NFT_TABLE_F_UPDATE) return -EINVAL; @@ -11345,6 +11348,10 @@ again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && n->portid == table->nlpid) { + if (table->flags & NFT_TABLE_F_PERSIST) { + table->flags &= ~NFT_TABLE_F_OWNER; + continue; + } __nft_release_hook(net, table); list_del_rcu(&table->list); to_delete[deleted++] = table; -- cgit 1.2.3-korg From 31bf508be656a429a17e3adb31e9acae5c1a6299 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 21 Dec 2023 14:31:59 +0100 Subject: netfilter: nf_tables: Implement table adoption support Allow a new process to take ownership of a previously owned table, useful mostly for firewall management services restarting or suspending when idle. By extending __NFT_TABLE_F_UPDATE, the on/off/on check in nf_tables_updtable() also covers table adoption, although it is actually not needed: Table adoption is irreversible because nf_tables_updtable() rejects attempts to drop NFT_TABLE_F_OWNER so table->nlpid setting can happen just once within the transaction. If the transaction commences, table's nlpid and flags fields are already set and no further action is required. If it aborts, the table returns to orphaned state. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nf_tables_api.c | 19 ++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4e1ea18eb5f05..ac7c94d3648e0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1271,6 +1271,12 @@ static inline bool nft_table_has_owner(const struct nft_table *table) return table->flags & NFT_TABLE_F_OWNER; } +static inline bool nft_table_is_orphan(const struct nft_table *table) +{ + return (table->flags & (NFT_TABLE_F_OWNER | NFT_TABLE_F_PERSIST)) == + NFT_TABLE_F_PERSIST; +} + static inline bool nft_base_chain_netdev(int family, u32 hooknum) { return family == NFPROTO_NETDEV || diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6a96f0003faa9..b0e0d039897ee 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1194,8 +1194,10 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) #define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) #define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) #define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) +#define __NFT_TABLE_F_WAS_ORPHAN (__NFT_TABLE_F_INTERNAL << 2) #define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ - __NFT_TABLE_F_WAS_AWAKEN) + __NFT_TABLE_F_WAS_AWAKEN | \ + __NFT_TABLE_F_WAS_ORPHAN) static int nf_tables_updtable(struct nft_ctx *ctx) { @@ -1215,8 +1217,8 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((nft_table_has_owner(ctx->table) && !(flags & NFT_TABLE_F_OWNER)) || - (!nft_table_has_owner(ctx->table) && - flags & NFT_TABLE_F_OWNER)) + (flags & NFT_TABLE_F_OWNER && + !nft_table_is_orphan(ctx->table))) return -EOPNOTSUPP; if ((flags ^ ctx->table->flags) & NFT_TABLE_F_PERSIST) @@ -1248,6 +1250,13 @@ static int nf_tables_updtable(struct nft_ctx *ctx) } } + if ((flags & NFT_TABLE_F_OWNER) && + !nft_table_has_owner(ctx->table)) { + ctx->table->nlpid = ctx->portid; + ctx->table->flags |= NFT_TABLE_F_OWNER | + __NFT_TABLE_F_WAS_ORPHAN; + } + nft_trans_table_update(trans) = true; nft_trans_commit_list_add_tail(ctx->net, trans); @@ -10423,6 +10432,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; } + if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) { + trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER; + trans->ctx.table->nlpid = 0; + } trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { -- cgit 1.2.3-korg From a128885ace60ff3da326dab7208fd7a04ce0b138 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 4 Jan 2024 13:26:09 +0100 Subject: netfilter: nf_tables: pass flags to set backend selection routine No need to refetch the flag from the netlink attribute, pass the existing flags variable which already provide validated flags. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b0e0d039897ee..7f25a04e4b81b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4247,23 +4247,18 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) * given, in that case the amount of memory per element is used. */ static const struct nft_set_ops * -nft_select_set_ops(const struct nft_ctx *ctx, - const struct nlattr * const nla[], +nft_select_set_ops(const struct nft_ctx *ctx, u32 flags, const struct nft_set_desc *desc) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; const struct nft_set_type *type; - u32 flags = 0; int i; lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); - if (nla[NFTA_SET_FLAGS] != NULL) - flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - bops = NULL; best.size = ~0; best.lookup = ~0; @@ -5149,7 +5144,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(&ctx, nla, &desc); + ops = nft_select_set_ops(&ctx, flags, &desc); if (IS_ERR(ops)) return PTR_ERR(ops); -- cgit 1.2.3-korg From 2ae6e9a03dadee5b2749d391d2e6df3056c5b6dd Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 23 Jan 2024 10:46:31 +0800 Subject: netfilter: nf_conncount: Use KMEM_CACHE instead of kmem_cache_create() Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Signed-off-by: Florian Westphal --- net/netfilter/nf_conncount.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 5d8ed6c90b7ef..8715617b02fe6 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -605,15 +605,11 @@ static int __init nf_conncount_modinit(void) for (i = 0; i < CONNCOUNT_SLOTS; ++i) spin_lock_init(&nf_conncount_locks[i]); - conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", - sizeof(struct nf_conncount_tuple), - 0, 0, NULL); + conncount_conn_cachep = KMEM_CACHE(nf_conncount_tuple, 0); if (!conncount_conn_cachep) return -ENOMEM; - conncount_rb_cachep = kmem_cache_create("nf_conncount_rb", - sizeof(struct nf_conncount_rb), - 0, 0, NULL); + conncount_rb_cachep = KMEM_CACHE(nf_conncount_rb, 0); if (!conncount_rb_cachep) { kmem_cache_destroy(conncount_conn_cachep); return -ENOMEM; -- cgit 1.2.3-korg From d5f9142fb96d4386ff4d06745bbd8f8c73b3791b Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 17 Jan 2024 15:20:45 +0800 Subject: ipvs: Simplify the allocation of ip_vs_conn slab caches Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Acked-by: Simon Horman Signed-off-by: Florian Westphal --- net/netfilter/ipvs/ip_vs_conn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a743db0738872..98d7dbe3d7876 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1511,9 +1511,7 @@ int __init ip_vs_conn_init(void) return -ENOMEM; /* Allocate ip_vs_conn slab cache */ - ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn", - sizeof(struct ip_vs_conn), 0, - SLAB_HWCACHE_ALIGN, NULL); + ip_vs_conn_cachep = KMEM_CACHE(ip_vs_conn, SLAB_HWCACHE_ALIGN); if (!ip_vs_conn_cachep) { kvfree(ip_vs_conn_tab); return -ENOMEM; -- cgit 1.2.3-korg From 4654467dc7e111e84f43ed1b70322873ae77e7be Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 23 Jan 2024 16:42:48 +0100 Subject: netfilter: arptables: allow xtables-nft only builds Allows to build kernel that supports the arptables mangle target via nftables' compat infra but without the arptables get/setsockopt interface or the old arptables filter interpreter. IOW, setting IP_NF_ARPFILTER=n will break arptables-legacy, but arptables-nft will continue to work as long as nftables compat support is enabled. Signed-off-by: Florian Westphal Reviewed-by: Phil Sutter --- net/ipv4/netfilter/Kconfig | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f71a7e9a7de6d..070475392236f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -309,36 +309,34 @@ endif # IP_NF_IPTABLES # ARP tables config IP_NF_ARPTABLES - tristate "ARP tables support" - select NETFILTER_XTABLES - select NETFILTER_FAMILY_ARP - depends on NETFILTER_ADVANCED - help - arptables is a general, extensible packet identification framework. - The ARP packet filtering and mangling (manipulation)subsystems - use this: say Y or M here if you want to use either of those. - - To compile it as a module, choose M here. If unsure, say N. + tristate -if IP_NF_ARPTABLES +config NFT_COMPAT_ARP + tristate + depends on NF_TABLES_ARP && NFT_COMPAT + default m if NFT_COMPAT=m + default y if NFT_COMPAT=y config IP_NF_ARPFILTER - tristate "ARP packet filtering" + tristate "arptables-legacy packet filtering support" + select IP_NF_ARPTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and - local output. On a bridge, you can also specify filtering rules - for forwarded ARP packets. See the man page for arptables(8). + local output. This is only needed for arptables-legacy(8). + Neither arptables-nft nor nftables need this to work. To compile it as a module, choose M here. If unsure, say N. config IP_NF_ARP_MANGLE tristate "ARP payload mangling" + depends on IP_NF_ARPTABLES || NFT_COMPAT_ARP help Allows altering the ARP packet payload: source and destination hardware and network addresses. -endif # IP_NF_ARPTABLES + This option is needed by both arptables-legacy and arptables-nft. + It is not used by nftables. endmenu -- cgit 1.2.3-korg From a9525c7f6219cee9284c0031c5930e8d41384677 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 24 Jan 2024 10:21:11 +0100 Subject: netfilter: xtables: allow xtables-nft only builds Add hidden IP(6)_NF_IPTABLES_LEGACY symbol. When any of the "old" builtin tables are enabled the "old" iptables interface will be supported. To disable the old set/getsockopt interface the existing options for the builtin tables need to be turned off: CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER is not set CONFIG_IP_NF_NAT is not set CONFIG_IP_NF_MANGLE is not set CONFIG_IP_NF_RAW is not set CONFIG_IP_NF_SECURITY is not set Same for CONFIG_IP6_NF_ variants. This allows to build a kernel that only supports ip(6)tables-nft (iptables-over-nftables api). In the future the _LEGACY symbol will become visible and the select statements will be turned into 'depends on', but for now be on safe side so "make oldconfig" won't break things. Signed-off-by: Florian Westphal --- net/ipv4/netfilter/Kconfig | 15 ++++++++++++--- net/ipv4/netfilter/Makefile | 2 +- net/ipv6/netfilter/Kconfig | 20 ++++++++++++++------ net/ipv6/netfilter/Makefile | 2 +- net/netfilter/Kconfig | 12 ++++++------ 5 files changed, 34 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 070475392236f..7835230872818 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -10,6 +10,10 @@ config NF_DEFRAG_IPV4 tristate default n +# old sockopt interface and eval loop +config IP_NF_IPTABLES_LEGACY + tristate + config NF_SOCKET_IPV4 tristate "IPv4 socket lookup support" help @@ -152,7 +156,7 @@ config IP_NF_MATCH_ECN config IP_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' depends on NETFILTER_ADVANCED - depends on IP_NF_MANGLE || IP_NF_RAW + depends on IP_NF_MANGLE || IP_NF_RAW || NFT_COMPAT help This option allows you to match packets whose replies would go out via the interface the packet came in. @@ -173,6 +177,7 @@ config IP_NF_MATCH_TTL config IP_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n + select IP_NF_IPTABLES_LEGACY help Packet filtering defines a table `filter', which has a series of rules for simple packet filtering at local input, forwarding and @@ -182,7 +187,7 @@ config IP_NF_FILTER config IP_NF_TARGET_REJECT tristate "REJECT target support" - depends on IP_NF_FILTER + depends on IP_NF_FILTER || NFT_COMPAT select NF_REJECT_IPV4 default m if NETFILTER_ADVANCED=n help @@ -212,6 +217,7 @@ config IP_NF_NAT default m if NETFILTER_ADVANCED=n select NF_NAT select NETFILTER_XT_NAT + select IP6_NF_IPTABLES_LEGACY help This enables the `nat' table in iptables. This allows masquerading, port forwarding and other forms of full Network Address Port @@ -252,6 +258,7 @@ endif # IP_NF_NAT config IP_NF_MANGLE tristate "Packet mangling" default m if NETFILTER_ADVANCED=n + select IP_NF_IPTABLES_LEGACY help This option adds a `mangle' table to iptables: see the man page for iptables(8). This table is used for various packet alterations @@ -261,7 +268,7 @@ config IP_NF_MANGLE config IP_NF_TARGET_ECN tristate "ECN target support" - depends on IP_NF_MANGLE + depends on IP_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds a `ECN' target, which can be used in the iptables mangle @@ -286,6 +293,7 @@ config IP_NF_TARGET_TTL # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' + select IP_NF_IPTABLES_LEGACY help This option adds a `raw' table to iptables. This table is the very first in the netfilter framework and hooks in at the PREROUTING @@ -299,6 +307,7 @@ config IP_NF_SECURITY tristate "Security table" depends on SECURITY depends on NETFILTER_ADVANCED + select IP_NF_IPTABLES_LEGACY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 5a26f9de1ab92..85502d4dfbb4d 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o # generic IP tables -obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o +obj-$(CONFIG_IP_NF_IPTABLES_LEGACY) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 0ba62f4868f97..f3c8e2d918e13 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -6,6 +6,10 @@ menu "IPv6: Netfilter Configuration" depends on INET && IPV6 && NETFILTER +# old sockopt interface and eval loop +config IP6_NF_IPTABLES_LEGACY + tristate + config NF_SOCKET_IPV6 tristate "IPv6 socket lookup support" help @@ -147,7 +151,7 @@ config IP6_NF_MATCH_MH config IP6_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' depends on NETFILTER_ADVANCED - depends on IP6_NF_MANGLE || IP6_NF_RAW + depends on IP6_NF_MANGLE || IP6_NF_RAW || NFT_COMPAT help This option allows you to match packets whose replies would go out via the interface the packet came in. @@ -186,6 +190,8 @@ config IP6_NF_TARGET_HL config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n + select IP6_NF_IPTABLES_LEGACY + tristate help Packet filtering defines a table `filter', which has a series of rules for simple packet filtering at local input, forwarding and @@ -195,7 +201,7 @@ config IP6_NF_FILTER config IP6_NF_TARGET_REJECT tristate "REJECT target support" - depends on IP6_NF_FILTER + depends on IP6_NF_FILTER || NFT_COMPAT select NF_REJECT_IPV6 default m if NETFILTER_ADVANCED=n help @@ -221,6 +227,7 @@ config IP6_NF_TARGET_SYNPROXY config IP6_NF_MANGLE tristate "Packet mangling" default m if NETFILTER_ADVANCED=n + select IP6_NF_IPTABLES_LEGACY help This option adds a `mangle' table to iptables: see the man page for iptables(8). This table is used for various packet alterations @@ -230,6 +237,7 @@ config IP6_NF_MANGLE config IP6_NF_RAW tristate 'raw table support (required for TRACE)' + select IP6_NF_IPTABLES_LEGACY help This option adds a `raw' table to ip6tables. This table is the very first in the netfilter framework and hooks in at the PREROUTING @@ -243,6 +251,7 @@ config IP6_NF_SECURITY tristate "Security table" depends on SECURITY depends on NETFILTER_ADVANCED + select IP6_NF_IPTABLES_LEGACY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. @@ -254,6 +263,7 @@ config IP6_NF_NAT depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_NAT + select IP6_NF_IPTABLES_LEGACY select NETFILTER_XT_NAT help This enables the `nat' table in ip6tables. This allows masquerading, @@ -262,25 +272,23 @@ config IP6_NF_NAT To compile it as a module, choose M here. If unsure, say N. -if IP6_NF_NAT - config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" select NETFILTER_XT_TARGET_MASQUERADE + depends on IP6_NF_NAT help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE. config IP6_NF_TARGET_NPT tristate "NPT (Network Prefix translation) target support" + depends on IP6_NF_NAT || NFT_COMPAT help This option adds the `SNPT' and `DNPT' target, which perform stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. To compile it as a module, choose M here. If unsure, say N. -endif # IP6_NF_NAT - endif # IP6_NF_IPTABLES endmenu diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b8d6dc9aeeb6f..66ce6fa5b2f52 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -4,7 +4,7 @@ # # Link order matters here. -obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o +obj-$(CONFIG_IP6_NF_IPTABLES_LEGACY) += ip6_tables.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 441d1f1341100..df2dc21304efb 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -818,7 +818,7 @@ config NETFILTER_XT_TARGET_AUDIT config NETFILTER_XT_TARGET_CHECKSUM tristate "CHECKSUM target support" - depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds a `CHECKSUM' target, which can be used in the iptables mangle @@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_CONNSECMARK config NETFILTER_XT_TARGET_CT tristate '"CT" target support' depends on NF_CONNTRACK - depends on IP_NF_RAW || IP6_NF_RAW + depends on IP_NF_RAW || IP6_NF_RAW || NFT_COMPAT depends on NETFILTER_ADVANCED help This options adds a `CT' target, which allows to specify initial @@ -880,7 +880,7 @@ config NETFILTER_XT_TARGET_CT config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' - depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds a `DSCP' target, which allows you to manipulate @@ -896,7 +896,7 @@ config NETFILTER_XT_TARGET_DSCP config NETFILTER_XT_TARGET_HL tristate '"HL" hoplimit target support' - depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds the "HL" (for IPv6) and "TTL" (for IPv4) @@ -1080,7 +1080,7 @@ config NETFILTER_XT_TARGET_TPROXY depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n - depends on IP_NF_MANGLE + depends on IP_NF_MANGLE || NFT_COMPAT select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n select NF_TPROXY_IPV4 @@ -1147,7 +1147,7 @@ config NETFILTER_XT_TARGET_TCPMSS config NETFILTER_XT_TARGET_TCPOPTSTRIP tristate '"TCPOPTSTRIP" target support' - depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on IP_NF_MANGLE || IP6_NF_MANGLE || NFT_COMPAT depends on NETFILTER_ADVANCED help This option adds a "TCPOPTSTRIP" target, which allows you to strip -- cgit 1.2.3-korg From 7ad269787b6615ca56bb161063331991fce51abf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 24 Jan 2024 10:21:12 +0100 Subject: netfilter: ebtables: allow xtables-nft only builds Same patch as previous one, but for ebtables. To build a kernel that only supports ebtables-nft, the builtin tables need to be disabled, i.e.: CONFIG_BRIDGE_EBT_BROUTE=n CONFIG_BRIDGE_EBT_T_FILTER=n CONFIG_BRIDGE_EBT_T_NAT=n The ebtables specific extensions can then be used nftables' NFT_COMPAT interface. Signed-off-by: Florian Westphal --- net/bridge/netfilter/Kconfig | 7 +++++++ net/bridge/netfilter/Makefile | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 7f304a19ac1bf..104c0125e32e8 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -39,6 +39,10 @@ config NF_CONNTRACK_BRIDGE To compile it as a module, choose M here. If unsure, say N. +# old sockopt interface and eval loop +config BRIDGE_NF_EBTABLES_LEGACY + tristate + menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" depends on BRIDGE && NETFILTER && NETFILTER_XTABLES @@ -55,6 +59,7 @@ if BRIDGE_NF_EBTABLES # config BRIDGE_EBT_BROUTE tristate "ebt: broute table support" + select BRIDGE_NF_EBTABLES_LEGACY help The ebtables broute table is used to define rules that decide between bridging and routing frames, giving Linux the functionality of a @@ -65,6 +70,7 @@ config BRIDGE_EBT_BROUTE config BRIDGE_EBT_T_FILTER tristate "ebt: filter table support" + select BRIDGE_NF_EBTABLES_LEGACY help The ebtables filter table is used to define frame filtering rules at local input, forwarding and local output. See the man page for @@ -74,6 +80,7 @@ config BRIDGE_EBT_T_FILTER config BRIDGE_EBT_T_NAT tristate "ebt: nat table support" + select BRIDGE_NF_EBTABLES_LEGACY help The ebtables nat table is used to define rules that alter the MAC source address (MAC SNAT) or the MAC destination address (MAC DNAT). diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 1c9ce49ab6513..b9a1303da9771 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o # connection tracking obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o -obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o +obj-$(CONFIG_BRIDGE_NF_EBTABLES_LEGACY) += ebtables.o # tables obj-$(CONFIG_BRIDGE_EBT_BROUTE) += ebtable_broute.o -- cgit 1.2.3-korg From 795a7dfbc3d95e4c7c09569f319f026f8c7f5a9c Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Fri, 26 Jan 2024 12:05:19 +0800 Subject: net: tcp: accept old ack during closing For now, the packet with an old ack is not accepted if we are in FIN_WAIT1 state, which can cause retransmission. Taking the following case as an example: Client Server | | FIN_WAIT1(Send FIN, seq=10) FIN_WAIT1(Send FIN, seq=20, ack=10) | | | Send ACK(seq=21, ack=11) Recv ACK(seq=21, ack=11) | Recv FIN(seq=20, ack=10) In the case above, simultaneous close is happening, and the FIN and ACK packet that send from the server is out of order. Then, the FIN will be dropped by the client, as it has an old ack. Then, the server has to retransmit the FIN, which can cause delay if the server has set the SO_LINGER on the socket. Old ack is accepted in the ESTABLISHED and TIME_WAIT state, and I think it should be better to keep the same logic. In this commit, we accept old ack in FIN_WAIT1/FIN_WAIT2/CLOSING/LAST_ACK states. Maybe we should limit it to FIN_WAIT1 for now? Signed-off-by: Menglong Dong Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240126040519.1846345-1-menglong8.dong@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index df7b13f0e5e0d..2d20edf652e6c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6699,17 +6699,21 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; /* step 5: check the ACK field */ - acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT | - FLAG_NO_CHALLENGE_ACK) > 0; + reason = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT | + FLAG_NO_CHALLENGE_ACK); - if (!acceptable) { + if ((int)reason <= 0) { if (sk->sk_state == TCP_SYN_RECV) return 1; /* send one RST */ - tcp_send_challenge_ack(sk); - SKB_DR_SET(reason, TCP_OLD_ACK); - goto discard; + /* accept old ack during closing */ + if ((int)reason < 0) { + tcp_send_challenge_ack(sk); + reason = -reason; + goto discard; + } } + SKB_DR_SET(reason, NOT_SPECIFIED); switch (sk->sk_state) { case TCP_SYN_RECV: tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */ -- cgit 1.2.3-korg From 047a7d261be652e0a8c756ac75936cc0dc537fc6 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 24 Jan 2024 15:58:01 +0800 Subject: net: rds: Simplify the allocation of slab caches in rds_conn_init Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240124075801.471330-1-chentao@kylinos.cn Signed-off-by: Jakub Kicinski --- net/rds/connection.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/rds/connection.c b/net/rds/connection.c index b4cc699c5fad3..c749c5525b40f 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -829,9 +829,7 @@ int rds_conn_init(void) if (ret) return ret; - rds_conn_slab = kmem_cache_create("rds_connection", - sizeof(struct rds_connection), - 0, 0, NULL); + rds_conn_slab = KMEM_CACHE(rds_connection, 0); if (!rds_conn_slab) { rds_loop_net_exit(); return -ENOMEM; -- cgit 1.2.3-korg From 6a571895116e688b2f54990cc537634337990b08 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 Jan 2024 08:02:20 +0100 Subject: xdp: Remove usage of the deprecated ida_simple_xx() API ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_range() is inclusive. So a -1 has been added when needed. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/8e889d18a6c881b09db4650d4b30a62d76f4fe77.1705734073.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- net/core/xdp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/xdp.c b/net/core/xdp.c index 4869c1c2d8f3d..27b585f3fa81a 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -75,7 +75,7 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) xa = container_of(rcu, struct xdp_mem_allocator, rcu); /* Allow this ID to be reused */ - ida_simple_remove(&mem_id_pool, xa->mem.id); + ida_free(&mem_id_pool, xa->mem.id); kfree(xa); } @@ -242,7 +242,7 @@ static int __mem_id_cyclic_get(gfp_t gfp) int id; again: - id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); + id = ida_alloc_range(&mem_id_pool, mem_id_next, MEM_ID_MAX - 1, gfp); if (id < 0) { if (id == -ENOSPC) { /* Cyclic allocator, reset next id */ @@ -317,7 +317,7 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, /* Insert allocator into ID lookup table */ ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); if (IS_ERR(ptr)) { - ida_simple_remove(&mem_id_pool, mem->id); + ida_free(&mem_id_pool, mem->id); mem->id = 0; errno = PTR_ERR(ptr); goto err; -- cgit 1.2.3-korg From d80a52335374e484a4ff2afdc9af843e73273945 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 27 Jan 2024 14:25:09 +0100 Subject: ethtool: replace struct ethtool_eee with a new struct ethtool_keee on kernel side In order to pass EEE link modes beyond bit 32 to userspace we have to complement the 32 bit bitmaps in struct ethtool_eee with linkmode bitmaps. Therefore, similar to ethtool_link_settings and ethtool_link_ksettings, add a struct ethtool_keee. In a first step it's an identical copy of ethtool_eee. This patch simply does a s/ethtool_eee/ethtool_keee/g for all users. No functional change intended. Suggested-by: Andrew Lunn Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 10 +++++----- drivers/net/dsa/b53/b53_priv.h | 6 +++--- drivers/net/dsa/bcm_sf2.c | 2 +- drivers/net/dsa/microchip/ksz_common.c | 4 ++-- drivers/net/dsa/mt7530.c | 4 ++-- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- drivers/net/dsa/qca/qca8k-common.c | 4 ++-- drivers/net/dsa/qca/qca8k.h | 4 ++-- drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c | 4 ++-- drivers/net/ethernet/broadcom/asp2/bcmasp.h | 2 +- drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c | 8 ++++---- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 6 +++--- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 ++++---- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +- drivers/net/ethernet/broadcom/tg3.c | 10 +++++----- drivers/net/ethernet/broadcom/tg3.h | 2 +- drivers/net/ethernet/engleder/tsnep_main.c | 6 +++--- drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++-- drivers/net/ethernet/freescale/fec.h | 2 +- drivers/net/ethernet/freescale/fec_main.c | 10 +++++----- drivers/net/ethernet/freescale/gianfar.c | 4 ++-- drivers/net/ethernet/intel/e1000e/ethtool.c | 6 +++--- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 6 +++--- drivers/net/ethernet/intel/igb/igb_ethtool.c | 8 ++++---- drivers/net/ethernet/intel/igc/igc.h | 2 +- drivers/net/ethernet/intel/igc/igc_ethtool.c | 8 ++++---- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 10 +++++----- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- drivers/net/ethernet/microchip/lan743x_ethtool.c | 4 ++-- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 4 ++-- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 4 ++-- drivers/net/ethernet/ti/am65-cpsw-ethtool.c | 4 ++-- drivers/net/ethernet/ti/cpsw_ethtool.c | 4 ++-- drivers/net/ethernet/ti/cpsw_priv.h | 4 ++-- drivers/net/ethernet/ti/icssg/icssg_ethtool.c | 4 ++-- drivers/net/phy/marvell.c | 2 +- drivers/net/phy/phy-c45.c | 8 ++++---- drivers/net/phy/phy.c | 8 ++++---- drivers/net/phy/phylink.c | 8 ++++---- drivers/net/usb/ax88179_178a.c | 10 +++++----- drivers/net/usb/lan78xx.c | 4 ++-- drivers/net/usb/r8152.c | 14 +++++++------- include/linux/ethtool.h | 16 ++++++++++++++-- include/linux/phy.h | 8 ++++---- include/linux/phylink.h | 4 ++-- include/net/dsa.h | 4 ++-- net/dsa/user.c | 4 ++-- net/ethtool/eee.c | 10 +++++----- net/ethtool/ioctl.c | 6 +++--- 54 files changed, 159 insertions(+), 147 deletions(-) (limited to 'net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0d628b35fd5c1..adc93abf4551a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1257,7 +1257,7 @@ static void b53_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct b53_device *dev = ds->priv; - struct ethtool_eee *p = &dev->ports[port].eee; + struct ethtool_keee *p = &dev->ports[port].eee; u8 rgmii_ctrl = 0, reg = 0, off; bool tx_pause = false; bool rx_pause = false; @@ -2224,10 +2224,10 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) } EXPORT_SYMBOL(b53_eee_init); -int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) +int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { struct b53_device *dev = ds->priv; - struct ethtool_eee *p = &dev->ports[port].eee; + struct ethtool_keee *p = &dev->ports[port].eee; u16 reg; if (is5325(dev) || is5365(dev)) @@ -2241,10 +2241,10 @@ int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) } EXPORT_SYMBOL(b53_get_mac_eee); -int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) +int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { struct b53_device *dev = ds->priv; - struct ethtool_eee *p = &dev->ports[port].eee; + struct ethtool_keee *p = &dev->ports[port].eee; if (is5325(dev) || is5365(dev)) return -EOPNOTSUPP; diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index fdcfd5081c284..c26a03755e833 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -95,7 +95,7 @@ struct b53_pcs { struct b53_port { u16 vlan_ctl_mask; - struct ethtool_eee eee; + struct ethtool_keee eee; }; struct b53_vlan { @@ -397,7 +397,7 @@ void b53_disable_port(struct dsa_switch *ds, int port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable); int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy); -int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); -int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); +int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e); +int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e); #endif diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 4a52ccbe393f7..bc77ee9e6d0aa 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -835,7 +835,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, bool tx_pause, bool rx_pause) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_eee *p = &priv->dev->ports[port].eee; + struct ethtool_keee *p = &priv->dev->ports[port].eee; u32 reg_rgmii_ctrl = 0; u32 reg, offset; diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 245dfb7a7a315..a7b5ddb8656ed 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2852,7 +2852,7 @@ static int ksz_validate_eee(struct dsa_switch *ds, int port) } static int ksz_get_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) + struct ethtool_keee *e) { int ret; @@ -2872,7 +2872,7 @@ static int ksz_get_mac_eee(struct dsa_switch *ds, int port, } static int ksz_set_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) + struct ethtool_keee *e) { struct ksz_device *dev = ds->priv; int ret; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 68be38ae66e05..98a73a62f2ee2 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -3048,7 +3048,7 @@ mt753x_setup(struct dsa_switch *ds) } static int mt753x_get_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) + struct ethtool_keee *e) { struct mt7530_priv *priv = ds->priv; u32 eeecr = mt7530_read(priv, MT7530_PMEEECR_P(port)); @@ -3060,7 +3060,7 @@ static int mt753x_get_mac_eee(struct dsa_switch *ds, int port, } static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) + struct ethtool_keee *e) { struct mt7530_priv *priv = ds->priv; u32 set, mask = LPI_THRESH_MASK | LPI_MODE_EN; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 383b3c4d6f599..8b0079b8e0e03 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1451,14 +1451,14 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, } static int mv88e6xxx_get_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) + struct ethtool_keee *e) { /* Nothing to do on the port's MAC */ return 0; } static int mv88e6xxx_set_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) + struct ethtool_keee *e) { /* Nothing to do on the port's MAC */ return 0; diff --git a/drivers/net/dsa/qca/qca8k-common.c b/drivers/net/dsa/qca/qca8k-common.c index 2358cd399c7eb..7f80035c5441b 100644 --- a/drivers/net/dsa/qca/qca8k-common.c +++ b/drivers/net/dsa/qca/qca8k-common.c @@ -534,7 +534,7 @@ int qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset) } int qca8k_set_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *eee) + struct ethtool_keee *eee) { u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port); struct qca8k_priv *priv = ds->priv; @@ -558,7 +558,7 @@ exit: } int qca8k_get_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) + struct ethtool_keee *e) { /* Nothing to do on the port's MAC */ return 0; diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index c8785c36c54e1..2184d8d2d5a93 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -518,8 +518,8 @@ void qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, int qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset); /* Common eee function */ -int qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee); -int qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); +int qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *eee); +int qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e); /* Common bridge function */ void qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index 18a6c8d99fa0a..be865776de55e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -697,7 +697,7 @@ static u32 eee_mask_to_ethtool_mask(u32 speed) return rate; } -static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_eee *eee) +static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_keee *eee) { struct aq_nic_s *aq_nic = netdev_priv(ndev); u32 rate, supported_rates; @@ -729,7 +729,7 @@ static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_eee *eee) return 0; } -static int aq_ethtool_set_eee(struct net_device *ndev, struct ethtool_eee *eee) +static int aq_ethtool_set_eee(struct net_device *ndev, struct ethtool_keee *eee) { struct aq_nic_s *aq_nic = netdev_priv(ndev); u32 rate, supported_rates; diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h index ec90add6b03e2..312bf9b6576ef 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.h +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h @@ -337,7 +337,7 @@ struct bcmasp_intf { int wol_irq; unsigned int wol_irq_enabled:1; - struct ethtool_eee eee; + struct ethtool_keee eee; }; #define NUM_NET_FILTERS 32 diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c index ce6a3d56fb239..2851bed153e61 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_ethtool.c @@ -363,10 +363,10 @@ void bcmasp_eee_enable_set(struct bcmasp_intf *intf, bool enable) intf->eee.eee_active = enable; } -static int bcmasp_get_eee(struct net_device *dev, struct ethtool_eee *e) +static int bcmasp_get_eee(struct net_device *dev, struct ethtool_keee *e) { struct bcmasp_intf *intf = netdev_priv(dev); - struct ethtool_eee *p = &intf->eee; + struct ethtool_keee *p = &intf->eee; if (!dev->phydev) return -ENODEV; @@ -379,10 +379,10 @@ static int bcmasp_get_eee(struct net_device *dev, struct ethtool_eee *e) return phy_ethtool_get_eee(dev->phydev, e); } -static int bcmasp_set_eee(struct net_device *dev, struct ethtool_eee *e) +static int bcmasp_set_eee(struct net_device *dev, struct ethtool_keee *e) { struct bcmasp_intf *intf = netdev_priv(dev); - struct ethtool_eee *p = &intf->eee; + struct ethtool_keee *p = &intf->eee; int ret; if (!dev->phydev) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 0bc7690cdee16..12d2785eeb8a2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2108,7 +2108,7 @@ static u32 bnx2x_adv_to_eee(u32 modes, u32 shift) return eee_adv << shift; } -static int bnx2x_get_eee(struct net_device *dev, struct ethtool_eee *edata) +static int bnx2x_get_eee(struct net_device *dev, struct ethtool_keee *edata) { struct bnx2x *bp = netdev_priv(dev); u32 eee_cfg; @@ -2141,7 +2141,7 @@ static int bnx2x_get_eee(struct net_device *dev, struct ethtool_eee *edata) return 0; } -static int bnx2x_set_eee(struct net_device *dev, struct ethtool_eee *edata) +static int bnx2x_set_eee(struct net_device *dev, struct ethtool_keee *edata) { struct bnx2x *bp = netdev_priv(dev); u32 eee_cfg; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 39845d556bafc..d7626c26f9a9b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10621,7 +10621,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) bp->phy_flags = resp->flags | (le16_to_cpu(resp->flags2) << 8); if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) { - struct ethtool_eee *eee = &bp->eee; + struct ethtool_keee *eee = &bp->eee; u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode); eee->supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0); @@ -10766,7 +10766,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state) link_info->module_status = resp->module_status; if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP) { - struct ethtool_eee *eee = &bp->eee; + struct ethtool_keee *eee = &bp->eee; u16 fw_speeds; eee->eee_active = 0; @@ -10957,7 +10957,7 @@ int bnxt_hwrm_set_pause(struct bnxt *bp) static void bnxt_hwrm_set_eee(struct bnxt *bp, struct hwrm_port_phy_cfg_input *req) { - struct ethtool_eee *eee = &bp->eee; + struct ethtool_keee *eee = &bp->eee; if (eee->eee_enabled) { u16 eee_speeds; @@ -11322,7 +11322,7 @@ static void bnxt_get_wol_settings(struct bnxt *bp) static bool bnxt_eee_config_ok(struct bnxt *bp) { - struct ethtool_eee *eee = &bp->eee; + struct ethtool_keee *eee = &bp->eee; struct bnxt_link_info *link_info = &bp->link_info; if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 47338b48ca203..b2cb3e77559dc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2442,7 +2442,7 @@ struct bnxt { */ struct mutex link_lock; struct bnxt_link_info link_info; - struct ethtool_eee eee; + struct ethtool_keee eee; u32 lpi_tmr_lo; u32 lpi_tmr_hi; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index dc4ca706b0e29..d6a8577d68af9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3884,10 +3884,10 @@ static int bnxt_set_eeprom(struct net_device *dev, eeprom->len); } -static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) +static int bnxt_set_eee(struct net_device *dev, struct ethtool_keee *edata) { struct bnxt *bp = netdev_priv(dev); - struct ethtool_eee *eee = &bp->eee; + struct ethtool_keee *eee = &bp->eee; struct bnxt_link_info *link_info = &bp->link_info; u32 advertising; int rc = 0; @@ -3942,7 +3942,7 @@ eee_exit: return rc; } -static int bnxt_get_eee(struct net_device *dev, struct ethtool_eee *edata) +static int bnxt_get_eee(struct net_device *dev, struct ethtool_keee *edata) { struct bnxt *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2d7ae71287b14..051c31fb17c2d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1317,10 +1317,10 @@ void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, priv->eee.tx_lpi_enabled = tx_lpi_enabled; } -static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) +static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_keee *e) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct ethtool_eee *p = &priv->eee; + struct ethtool_keee *p = &priv->eee; if (GENET_IS_V1(priv)) return -EOPNOTSUPP; @@ -1336,10 +1336,10 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) return phy_ethtool_get_eee(dev->phydev, e); } -static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) +static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_keee *e) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct ethtool_eee *p = &priv->eee; + struct ethtool_keee *p = &priv->eee; if (GENET_IS_V1(priv)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 1985c0ec4da2a..7523b60b3c1c0 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -645,7 +645,7 @@ struct bcmgenet_priv { struct bcmgenet_mib_counters mib; - struct ethtool_eee eee; + struct ethtool_keee eee; }; #define GENET_IO_MACRO(name, offset) \ diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 04964bbe08cf3..11054177c2ece 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -2338,10 +2338,10 @@ static void tg3_phy_apply_otp(struct tg3 *tp) tg3_phy_toggle_auxctl_smdsp(tp, false); } -static void tg3_eee_pull_config(struct tg3 *tp, struct ethtool_eee *eee) +static void tg3_eee_pull_config(struct tg3 *tp, struct ethtool_keee *eee) { u32 val; - struct ethtool_eee *dest = &tp->eee; + struct ethtool_keee *dest = &tp->eee; if (!(tp->phy_flags & TG3_PHYFLG_EEE_CAP)) return; @@ -4618,7 +4618,7 @@ static int tg3_init_5401phy_dsp(struct tg3 *tp) static bool tg3_phy_eee_config_ok(struct tg3 *tp) { - struct ethtool_eee eee; + struct ethtool_keee eee; if (!(tp->phy_flags & TG3_PHYFLG_EEE_CAP)) return true; @@ -14180,7 +14180,7 @@ static int tg3_set_coalesce(struct net_device *dev, return 0; } -static int tg3_set_eee(struct net_device *dev, struct ethtool_eee *edata) +static int tg3_set_eee(struct net_device *dev, struct ethtool_keee *edata) { struct tg3 *tp = netdev_priv(dev); @@ -14217,7 +14217,7 @@ static int tg3_set_eee(struct net_device *dev, struct ethtool_eee *edata) return 0; } -static int tg3_get_eee(struct net_device *dev, struct ethtool_eee *edata) +static int tg3_get_eee(struct net_device *dev, struct ethtool_keee *edata) { struct tg3 *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 5016475e50054..cf1b2b123c7ee 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3419,7 +3419,7 @@ struct tg3 { unsigned int irq_cnt; struct ethtool_coalesce coal; - struct ethtool_eee eee; + struct ethtool_keee eee; /* firmware info */ const char *fw_needed; diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index ae0b8b37b9bf1..eb64118f5b188 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -240,7 +240,7 @@ static int tsnep_phy_loopback(struct tsnep_adapter *adapter, bool enable) static int tsnep_phy_open(struct tsnep_adapter *adapter) { struct phy_device *phydev; - struct ethtool_eee ethtool_eee; + struct ethtool_keee ethtool_keee; int retval; retval = phy_connect_direct(adapter->netdev, adapter->phydev, @@ -259,8 +259,8 @@ static int tsnep_phy_open(struct tsnep_adapter *adapter) phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); /* disable EEE autoneg, EEE not supported by TSNEP */ - memset(ðtool_eee, 0, sizeof(ethtool_eee)); - phy_ethtool_set_eee(adapter->phydev, ðtool_eee); + memset(ðtool_keee, 0, sizeof(ethtool_keee)); + phy_ethtool_set_eee(adapter->phydev, ðtool_keee); adapter->phydev->irq = PHY_MAC_INTERRUPT; phy_start(adapter->phydev); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index bfdbdab443ae0..9f07f4947b631 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2402,7 +2402,7 @@ static void enetc_clear_interrupts(struct enetc_ndev_priv *priv) static int enetc_phylink_connect(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct ethtool_eee edata; + struct ethtool_keee edata; int err; if (!priv->phylink) { @@ -2418,7 +2418,7 @@ static int enetc_phylink_connect(struct net_device *ndev) } /* disable EEE autoneg, until ENETC driver supports it */ - memset(&edata, 0, sizeof(struct ethtool_eee)); + memset(&edata, 0, sizeof(struct ethtool_keee)); phylink_ethtool_set_eee(priv->phylink, &edata); phylink_start(priv->phylink); diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index a8fbcada6b01f..a19cb2a786fd2 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -672,7 +672,7 @@ struct fec_enet_private { unsigned int itr_clk_rate; /* tx lpi eee mode */ - struct ethtool_eee eee; + struct ethtool_keee eee; unsigned int clk_ref_rate; /* ptp clock period in ns*/ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 432523b2c7892..63707e0651419 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3122,7 +3122,7 @@ static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us) static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable) { struct fec_enet_private *fep = netdev_priv(ndev); - struct ethtool_eee *p = &fep->eee; + struct ethtool_keee *p = &fep->eee; unsigned int sleep_cycle, wake_cycle; int ret = 0; @@ -3149,10 +3149,10 @@ static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable) } static int -fec_enet_get_eee(struct net_device *ndev, struct ethtool_eee *edata) +fec_enet_get_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct fec_enet_private *fep = netdev_priv(ndev); - struct ethtool_eee *p = &fep->eee; + struct ethtool_keee *p = &fep->eee; if (!(fep->quirks & FEC_QUIRK_HAS_EEE)) return -EOPNOTSUPP; @@ -3169,10 +3169,10 @@ fec_enet_get_eee(struct net_device *ndev, struct ethtool_eee *edata) } static int -fec_enet_set_eee(struct net_device *ndev, struct ethtool_eee *edata) +fec_enet_set_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct fec_enet_private *fep = netdev_priv(ndev); - struct ethtool_eee *p = &fep->eee; + struct ethtool_keee *p = &fep->eee; int ret = 0; if (!(fep->quirks & FEC_QUIRK_HAS_EEE)) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index e3dfbd7a42361..a811238c018de 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1649,7 +1649,7 @@ static int init_phy(struct net_device *dev) struct gfar_private *priv = netdev_priv(dev); phy_interface_t interface = priv->interface; struct phy_device *phydev; - struct ethtool_eee edata; + struct ethtool_keee edata; linkmode_set_bit_array(phy_10_100_features_array, ARRAY_SIZE(phy_10_100_features_array), @@ -1681,7 +1681,7 @@ static int init_phy(struct net_device *dev) phy_support_asym_pause(phydev); /* disable EEE autoneg, EEE not supported by eTSEC */ - memset(&edata, 0, sizeof(struct ethtool_eee)); + memset(&edata, 0, sizeof(struct ethtool_keee)); phy_ethtool_set_eee(phydev, &edata); return 0; diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index fc0f98ea61332..343f54b2b6ae2 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -2186,7 +2186,7 @@ static int e1000_get_rxnfc(struct net_device *netdev, } } -static int e1000e_get_eee(struct net_device *netdev, struct ethtool_eee *edata) +static int e1000e_get_eee(struct net_device *netdev, struct ethtool_keee *edata) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -2262,11 +2262,11 @@ release: return ret_val; } -static int e1000e_set_eee(struct net_device *netdev, struct ethtool_eee *edata) +static int e1000e_set_eee(struct net_device *netdev, struct ethtool_keee *edata) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - struct ethtool_eee eee_curr; + struct ethtool_keee eee_curr; s32 ret_val; ret_val = e1000e_get_eee(netdev, &eee_curr); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c841779713f66..9dfda3c48af35 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5644,7 +5644,7 @@ static int i40e_get_module_eeprom(struct net_device *netdev, return 0; } -static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata) +static int i40e_get_eee(struct net_device *netdev, struct ethtool_keee *edata) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_aq_get_phy_abilities_resp phy_cfg; @@ -5682,7 +5682,7 @@ static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata) } static int i40e_is_eee_param_supported(struct net_device *netdev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -5709,7 +5709,7 @@ static int i40e_is_eee_param_supported(struct net_device *netdev, return 0; } -static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata) +static int i40e_set_eee(struct net_device *netdev, struct ethtool_keee *edata) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_aq_get_phy_abilities_resp abilities; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index b66199c9bb3a9..778d1e6cfc45d 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -3027,7 +3027,7 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return ret; } -static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata) +static int igb_get_eee(struct net_device *netdev, struct ethtool_keee *edata) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -3106,11 +3106,11 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata) } static int igb_set_eee(struct net_device *netdev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - struct ethtool_eee eee_curr; + struct ethtool_keee eee_curr; bool adv1g_eee = true, adv100m_eee = true; s32 ret_val; @@ -3118,7 +3118,7 @@ static int igb_set_eee(struct net_device *netdev, (hw->phy.media_type != e1000_media_type_copper)) return -EOPNOTSUPP; - memset(&eee_curr, 0, sizeof(struct ethtool_eee)); + memset(&eee_curr, 0, sizeof(struct ethtool_keee)); ret_val = igb_get_eee(netdev, &eee_curr); if (ret_val) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 45430e246e9cb..75f7c5ba65e09 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -168,7 +168,7 @@ struct igc_ring { struct igc_adapter { struct net_device *netdev; - struct ethtool_eee eee; + struct ethtool_keee eee; u16 eee_advert; unsigned long state; diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index b95d2c86e803e..f2dcfe920f4f3 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1623,7 +1623,7 @@ static int igc_ethtool_set_priv_flags(struct net_device *netdev, u32 priv_flags) } static int igc_ethtool_get_eee(struct net_device *netdev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; @@ -1664,14 +1664,14 @@ static int igc_ethtool_get_eee(struct net_device *netdev, } static int igc_ethtool_set_eee(struct net_device *netdev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; - struct ethtool_eee eee_curr; + struct ethtool_keee eee_curr; s32 ret_val; - memset(&eee_curr, 0, sizeof(struct ethtool_eee)); + memset(&eee_curr, 0, sizeof(struct ethtool_keee)); ret_val = igc_ethtool_get_eee(netdev, &eee_curr); if (ret_val) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 9a63457712c7b..0aa73519a29d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3425,7 +3425,7 @@ static const struct { }; static int -ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_eee *edata) +ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_keee *edata) { u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 }; struct ixgbe_hw *hw = &adapter->hw; @@ -3462,7 +3462,7 @@ ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_eee *edata) return 0; } -static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_eee *edata) +static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_keee *edata) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; @@ -3476,17 +3476,17 @@ static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_eee *edata) return -EOPNOTSUPP; } -static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_eee *edata) +static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_keee *edata) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - struct ethtool_eee eee_data; + struct ethtool_keee eee_data; s32 ret_val; if (!(adapter->flags2 & IXGBE_FLAG2_EEE_CAPABLE)) return -EOPNOTSUPP; - memset(&eee_data, 0, sizeof(struct ethtool_eee)); + memset(&eee_data, 0, sizeof(struct ethtool_keee)); ret_val = ixgbe_get_eee(netdev, &eee_data); if (ret_val) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index a641b3534ca3c..40a5f1431e4e8 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5097,7 +5097,7 @@ static int mvneta_ethtool_set_wol(struct net_device *dev, } static int mvneta_ethtool_get_eee(struct net_device *dev, - struct ethtool_eee *eee) + struct ethtool_keee *eee) { struct mvneta_port *pp = netdev_priv(dev); u32 lpi_ctl0; @@ -5113,7 +5113,7 @@ static int mvneta_ethtool_get_eee(struct net_device *dev, } static int mvneta_ethtool_set_eee(struct net_device *dev, - struct ethtool_eee *eee) + struct ethtool_keee *eee) { struct mvneta_port *pp = netdev_priv(dev); u32 lpi_ctl0; diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index a2b3f4433ca8f..8a6ae171e375b 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -1055,7 +1055,7 @@ static int lan743x_ethtool_get_ts_info(struct net_device *netdev, } static int lan743x_ethtool_get_eee(struct net_device *netdev, - struct ethtool_eee *eee) + struct ethtool_keee *eee) { struct lan743x_adapter *adapter = netdev_priv(netdev); struct phy_device *phydev = netdev->phydev; @@ -1092,7 +1092,7 @@ static int lan743x_ethtool_get_eee(struct net_device *netdev, } static int lan743x_ethtool_set_eee(struct net_device *netdev, - struct ethtool_eee *eee) + struct ethtool_keee *eee) { struct lan743x_adapter *adapter; struct phy_device *phydev; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 0e240b5ab8d48..77491fb640393 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1776,7 +1776,7 @@ static int qede_get_tunable(struct net_device *dev, return 0; } -static int qede_get_eee(struct net_device *dev, struct ethtool_eee *edata) +static int qede_get_eee(struct net_device *dev, struct ethtool_keee *edata) { struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; @@ -1810,7 +1810,7 @@ static int qede_get_eee(struct net_device *dev, struct ethtool_eee *edata) return 0; } -static int qede_set_eee(struct net_device *dev, struct ethtool_eee *edata) +static int qede_set_eee(struct net_device *dev, struct ethtool_keee *edata) { struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index dd73df6b17b06..3d30d44997918 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1974,7 +1974,7 @@ static int rtl_set_coalesce(struct net_device *dev, return 0; } -static int rtl8169_get_eee(struct net_device *dev, struct ethtool_eee *data) +static int rtl8169_get_eee(struct net_device *dev, struct ethtool_keee *data) { struct rtl8169_private *tp = netdev_priv(dev); @@ -1984,7 +1984,7 @@ static int rtl8169_get_eee(struct net_device *dev, struct ethtool_eee *data) return phy_ethtool_get_eee(tp->phydev, data); } -static int rtl8169_set_eee(struct net_device *dev, struct ethtool_eee *data) +static int rtl8169_set_eee(struct net_device *dev, struct ethtool_keee *data) { struct rtl8169_private *tp = netdev_priv(dev); int ret; diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c index 8ba017ec98493..d93b628b70468 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c @@ -133,7 +133,7 @@ static const struct sxgbe_stats sxgbe_gstrings_stats[] = { #define SXGBE_STATS_LEN ARRAY_SIZE(sxgbe_gstrings_stats) static int sxgbe_get_eee(struct net_device *dev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct sxgbe_priv_data *priv = netdev_priv(dev); @@ -148,7 +148,7 @@ static int sxgbe_get_eee(struct net_device *dev, } static int sxgbe_set_eee(struct net_device *dev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct sxgbe_priv_data *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 42d27b97dd1d0..bbecb3b89535c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -852,7 +852,7 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) } static int stmmac_ethtool_op_get_eee(struct net_device *dev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct stmmac_priv *priv = netdev_priv(dev); @@ -868,7 +868,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev, } static int stmmac_ethtool_op_set_eee(struct net_device *dev, - struct ethtool_eee *edata) + struct ethtool_keee *edata) { struct stmmac_priv *priv = netdev_priv(dev); int ret; diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c index 35fceba01ea45..d6ce2c9f0a8d8 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c @@ -514,14 +514,14 @@ am65_cpsw_set_link_ksettings(struct net_device *ndev, return phylink_ethtool_ksettings_set(salve->phylink, ecmd); } -static int am65_cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata) +static int am65_cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct am65_cpsw_slave_data *salve = am65_ndev_to_slave(ndev); return phylink_ethtool_get_eee(salve->phylink, edata); } -static int am65_cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata) +static int am65_cpsw_set_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct am65_cpsw_slave_data *salve = am65_ndev_to_slave(ndev); diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index a557a477d0393..f7b283353ba26 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -422,7 +422,7 @@ int cpsw_set_link_ksettings(struct net_device *ndev, return phy_ethtool_ksettings_set(cpsw->slaves[slave_no].phy, ecmd); } -int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata) +int cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; @@ -434,7 +434,7 @@ int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata) return -EOPNOTSUPP; } -int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata) +int cpsw_set_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 0e27c433098d8..7efa72502c86c 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -496,8 +496,8 @@ int cpsw_get_link_ksettings(struct net_device *ndev, struct ethtool_link_ksettings *ecmd); int cpsw_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *ecmd); -int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata); -int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata); +int cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata); +int cpsw_set_eee(struct net_device *ndev, struct ethtool_keee *edata); int cpsw_nway_reset(struct net_device *ndev); void cpsw_get_ringparam(struct net_device *ndev, struct ethtool_ringparam *ering, diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c index a27ec1dcc8d56..9a7dd7efcf699 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c +++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c @@ -45,7 +45,7 @@ static int emac_set_link_ksettings(struct net_device *ndev, return phy_ethtool_set_link_ksettings(ndev, ecmd); } -static int emac_get_eee(struct net_device *ndev, struct ethtool_eee *edata) +static int emac_get_eee(struct net_device *ndev, struct ethtool_keee *edata) { if (!ndev->phydev) return -EOPNOTSUPP; @@ -53,7 +53,7 @@ static int emac_get_eee(struct net_device *ndev, struct ethtool_eee *edata) return phy_ethtool_get_eee(ndev->phydev, edata); } -static int emac_set_eee(struct net_device *ndev, struct ethtool_eee *edata) +static int emac_set_eee(struct net_device *ndev, struct ethtool_keee *edata) { if (!ndev->phydev) return -EOPNOTSUPP; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index eba652a4c1d88..1faa22f583666 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1461,7 +1461,7 @@ static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs) static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs) { - struct ethtool_eee eee; + struct ethtool_keee eee; int val, ret; if (*msecs == ETHTOOL_PHY_FAST_LINK_DOWN_OFF) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 747d14bf152c3..adee5e712871f 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -1443,13 +1443,13 @@ EXPORT_SYMBOL(genphy_c45_eee_is_active); /** * genphy_c45_ethtool_get_eee - get EEE supported and status * @phydev: target phy_device struct - * @data: ethtool_eee data + * @data: ethtool_keee data * * Description: it reports the Supported/Advertisement/LP Advertisement * capabilities. */ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, - struct ethtool_eee *data) + struct ethtool_keee *data) { __ETHTOOL_DECLARE_LINK_MODE_MASK(adv) = {}; __ETHTOOL_DECLARE_LINK_MODE_MASK(lp) = {}; @@ -1481,7 +1481,7 @@ EXPORT_SYMBOL(genphy_c45_ethtool_get_eee); /** * genphy_c45_ethtool_set_eee - set EEE supported and status * @phydev: target phy_device struct - * @data: ethtool_eee data + * @data: ethtool_keee data * * Description: sets the Supported/Advertisement/LP Advertisement * capabilities. If eee_enabled is false, no links modes are @@ -1490,7 +1490,7 @@ EXPORT_SYMBOL(genphy_c45_ethtool_get_eee); * non-destructive way. */ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, - struct ethtool_eee *data) + struct ethtool_keee *data) { int ret; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 3376e58e2b88e..3b9531143be1c 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1632,12 +1632,12 @@ EXPORT_SYMBOL(phy_get_eee_err); /** * phy_ethtool_get_eee - get EEE supported and status * @phydev: target phy_device struct - * @data: ethtool_eee data + * @data: ethtool_keee data * * Description: it reportes the Supported/Advertisement/LP Advertisement * capabilities. */ -int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data) +int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data) { int ret; @@ -1655,11 +1655,11 @@ EXPORT_SYMBOL(phy_ethtool_get_eee); /** * phy_ethtool_set_eee - set EEE supported and status * @phydev: target phy_device struct - * @data: ethtool_eee data + * @data: ethtool_keee data * * Description: it is to program the Advertisement EEE register. */ -int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data) +int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data) { int ret; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index ed0b4ccaa6a66..503fd7c405235 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2764,9 +2764,9 @@ EXPORT_SYMBOL_GPL(phylink_init_eee); /** * phylink_ethtool_get_eee() - read the energy efficient ethernet parameters * @pl: a pointer to a &struct phylink returned from phylink_create() - * @eee: a pointer to a &struct ethtool_eee for the read parameters + * @eee: a pointer to a &struct ethtool_keee for the read parameters */ -int phylink_ethtool_get_eee(struct phylink *pl, struct ethtool_eee *eee) +int phylink_ethtool_get_eee(struct phylink *pl, struct ethtool_keee *eee) { int ret = -EOPNOTSUPP; @@ -2782,9 +2782,9 @@ EXPORT_SYMBOL_GPL(phylink_ethtool_get_eee); /** * phylink_ethtool_set_eee() - set the energy efficient ethernet parameters * @pl: a pointer to a &struct phylink returned from phylink_create() - * @eee: a pointer to a &struct ethtool_eee for the desired parameters + * @eee: a pointer to a &struct ethtool_keee for the desired parameters */ -int phylink_ethtool_set_eee(struct phylink *pl, struct ethtool_eee *eee) +int phylink_ethtool_set_eee(struct phylink *pl, struct ethtool_keee *eee) { int ret = -EOPNOTSUPP; diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index d837c18874161..3922a9afdd1fe 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -667,7 +667,7 @@ static int ax88179_set_link_ksettings(struct net_device *net, } static int -ax88179_ethtool_get_eee(struct usbnet *dev, struct ethtool_eee *data) +ax88179_ethtool_get_eee(struct usbnet *dev, struct ethtool_keee *data) { int val; @@ -696,7 +696,7 @@ ax88179_ethtool_get_eee(struct usbnet *dev, struct ethtool_eee *data) } static int -ax88179_ethtool_set_eee(struct usbnet *dev, struct ethtool_eee *data) +ax88179_ethtool_set_eee(struct usbnet *dev, struct ethtool_keee *data) { u16 tmp16 = ethtool_adv_to_mmd_eee_adv_t(data->advertised); @@ -807,7 +807,7 @@ static void ax88179_enable_eee(struct usbnet *dev) GMII_PHY_PAGE_SELECT, 2, &tmp16); } -static int ax88179_get_eee(struct net_device *net, struct ethtool_eee *edata) +static int ax88179_get_eee(struct net_device *net, struct ethtool_keee *edata) { struct usbnet *dev = netdev_priv(net); struct ax88179_data *priv = dev->driver_priv; @@ -818,7 +818,7 @@ static int ax88179_get_eee(struct net_device *net, struct ethtool_eee *edata) return ax88179_ethtool_get_eee(dev, edata); } -static int ax88179_set_eee(struct net_device *net, struct ethtool_eee *edata) +static int ax88179_set_eee(struct net_device *net, struct ethtool_keee *edata) { struct usbnet *dev = netdev_priv(net); struct ax88179_data *priv = dev->driver_priv; @@ -1587,7 +1587,7 @@ static int ax88179_reset(struct usbnet *dev) u16 *tmp16; u8 *tmp; struct ax88179_data *ax179_data = dev->driver_priv; - struct ethtool_eee eee_data; + struct ethtool_keee eee_data; tmp16 = (u16 *)buf; tmp = (u8 *)buf; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a6d653ff552a2..106282612bc25 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1673,7 +1673,7 @@ static int lan78xx_set_wol(struct net_device *netdev, return ret; } -static int lan78xx_get_eee(struct net_device *net, struct ethtool_eee *edata) +static int lan78xx_get_eee(struct net_device *net, struct ethtool_keee *edata) { struct lan78xx_net *dev = netdev_priv(net); struct phy_device *phydev = net->phydev; @@ -1709,7 +1709,7 @@ exit: return ret; } -static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata) +static int lan78xx_set_eee(struct net_device *net, struct ethtool_keee *edata) { struct lan78xx_net *dev = netdev_priv(net); int ret; diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0d0672d2a6545..dc163b766a890 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -891,8 +891,8 @@ struct r8152 { void (*up)(struct r8152 *tp); void (*down)(struct r8152 *tp); void (*unload)(struct r8152 *tp); - int (*eee_get)(struct r8152 *tp, struct ethtool_eee *eee); - int (*eee_set)(struct r8152 *tp, struct ethtool_eee *eee); + int (*eee_get)(struct r8152 *tp, struct ethtool_keee *eee); + int (*eee_set)(struct r8152 *tp, struct ethtool_keee *eee); bool (*in_nway)(struct r8152 *tp); void (*hw_phy_cfg)(struct r8152 *tp); void (*autosuspend_en)(struct r8152 *tp, bool enable); @@ -8922,7 +8922,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data) } } -static int r8152_get_eee(struct r8152 *tp, struct ethtool_eee *eee) +static int r8152_get_eee(struct r8152 *tp, struct ethtool_keee *eee) { u32 lp, adv, supported = 0; u16 val; @@ -8945,7 +8945,7 @@ static int r8152_get_eee(struct r8152 *tp, struct ethtool_eee *eee) return 0; } -static int r8152_set_eee(struct r8152 *tp, struct ethtool_eee *eee) +static int r8152_set_eee(struct r8152 *tp, struct ethtool_keee *eee) { u16 val = ethtool_adv_to_mmd_eee_adv_t(eee->advertised); @@ -8957,7 +8957,7 @@ static int r8152_set_eee(struct r8152 *tp, struct ethtool_eee *eee) return 0; } -static int r8153_get_eee(struct r8152 *tp, struct ethtool_eee *eee) +static int r8153_get_eee(struct r8152 *tp, struct ethtool_keee *eee) { u32 lp, adv, supported = 0; u16 val; @@ -8981,7 +8981,7 @@ static int r8153_get_eee(struct r8152 *tp, struct ethtool_eee *eee) } static int -rtl_ethtool_get_eee(struct net_device *net, struct ethtool_eee *edata) +rtl_ethtool_get_eee(struct net_device *net, struct ethtool_keee *edata) { struct r8152 *tp = netdev_priv(net); int ret; @@ -9008,7 +9008,7 @@ out: } static int -rtl_ethtool_set_eee(struct net_device *net, struct ethtool_eee *edata) +rtl_ethtool_set_eee(struct net_device *net, struct ethtool_keee *edata) { struct r8152 *tp = netdev_priv(net); int ret; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 325e0778e9371..a850bab8489f2 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -222,6 +222,18 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +struct ethtool_keee { + u32 cmd; + u32 supported; + u32 advertised; + u32 lp_advertised; + u32 eee_active; + u32 eee_enabled; + u32 tx_lpi_enabled; + u32 tx_lpi_timer; + u32 reserved[2]; +}; + struct kernel_ethtool_coalesce { u8 use_cqe_mode_tx; u8 use_cqe_mode_rx; @@ -892,8 +904,8 @@ struct ethtool_ops { struct ethtool_modinfo *); int (*get_module_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*get_eee)(struct net_device *, struct ethtool_eee *); - int (*set_eee)(struct net_device *, struct ethtool_eee *); + int (*get_eee)(struct net_device *dev, struct ethtool_keee *eee); + int (*set_eee)(struct net_device *dev, struct ethtool_keee *eee); int (*get_tunable)(struct net_device *, const struct ethtool_tunable *, void *); int (*set_tunable)(struct net_device *, diff --git a/include/linux/phy.h b/include/linux/phy.h index c9994a59ca2e6..a66f07d3f5f49 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1908,9 +1908,9 @@ int genphy_c45_plca_get_status(struct phy_device *phydev, int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, unsigned long *lp, bool *is_enabled); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); @@ -1988,8 +1988,8 @@ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); -int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); -int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); +int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); +int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index d589f89c612c6..6ba411732a0d2 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -584,8 +584,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); int phylink_init_eee(struct phylink *, bool); -int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); -int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); +int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee); +int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); int phylink_speed_down(struct phylink *pl, bool sync); int phylink_speed_up(struct phylink *pl); diff --git a/include/net/dsa.h b/include/net/dsa.h index 82135fbdb1e68..7c0da9effe4e9 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -991,9 +991,9 @@ struct dsa_switch_ops { * Port's MAC EEE settings */ int (*set_mac_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); + struct ethtool_keee *e); int (*get_mac_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); + struct ethtool_keee *e); /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); diff --git a/net/dsa/user.c b/net/dsa/user.c index b15e71cc342c7..e03da3a4ac380 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1222,7 +1222,7 @@ static int dsa_user_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) return ret; } -static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e) +static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; @@ -1242,7 +1242,7 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e) return phylink_ethtool_set_eee(dp->pl, e); } -static int dsa_user_get_eee(struct net_device *dev, struct ethtool_eee *e) +static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e) { struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index 2853394d06a89..21b0e845a531b 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -5,7 +5,7 @@ #include "bitset.h" #define EEE_MODES_COUNT \ - (sizeof_field(struct ethtool_eee, supported) * BITS_PER_BYTE) + (sizeof_field(struct ethtool_keee, supported) * BITS_PER_BYTE) struct eee_req_info { struct ethnl_req_info base; @@ -13,7 +13,7 @@ struct eee_req_info { struct eee_reply_data { struct ethnl_reply_data base; - struct ethtool_eee eee; + struct ethtool_keee eee; }; #define EEE_REPDATA(__reply_base) \ @@ -48,7 +48,7 @@ static int eee_reply_size(const struct ethnl_req_info *req_base, { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct eee_reply_data *data = EEE_REPDATA(reply_base); - const struct ethtool_eee *eee = &data->eee; + const struct ethtool_keee *eee = &data->eee; int len = 0; int ret; @@ -84,7 +84,7 @@ static int eee_fill_reply(struct sk_buff *skb, { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct eee_reply_data *data = EEE_REPDATA(reply_base); - const struct ethtool_eee *eee = &data->eee; + const struct ethtool_keee *eee = &data->eee; int ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_OURS, @@ -132,7 +132,7 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info) { struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; - struct ethtool_eee eee = {}; + struct ethtool_keee eee = {}; bool mod = false; int ret; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 7519b0818b913..b02ca72f438e5 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1510,13 +1510,13 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) { - struct ethtool_eee edata; + struct ethtool_keee edata; int rc; if (!dev->ethtool_ops->get_eee) return -EOPNOTSUPP; - memset(&edata, 0, sizeof(struct ethtool_eee)); + memset(&edata, 0, sizeof(struct ethtool_keee)); edata.cmd = ETHTOOL_GEEE; rc = dev->ethtool_ops->get_eee(dev, &edata); @@ -1531,7 +1531,7 @@ static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) static int ethtool_set_eee(struct net_device *dev, char __user *useraddr) { - struct ethtool_eee edata; + struct ethtool_keee edata; int ret; if (!dev->ethtool_ops->set_eee) -- cgit 1.2.3-korg From 0b3100bc8fa7a05804737dba96075ea8d45316ce Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 27 Jan 2024 14:26:06 +0100 Subject: ethtool: switch back from ethtool_keee to ethtool_eee for ioctl In order to later extend struct ethtool_keee, we have to decouple it from the userspace format represented by struct ethtool_eee. Therefore switch back to struct ethtool_eee, representing the userspace format, and add conversion between ethtool_eee and ethtool_keee. Struct ethtool_keee will be changed in follow-up patches, therefore don't do a *keee = *eee here. Member cmd isn't copied, because it's not used, and we'll remove it in the next patch of this series. In addition omit setting cmd to ETHTOOL_GEEE in the ioctl response, userspace ethtool isn't interested in it. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 48 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b02ca72f438e5..46c29b369aeb0 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1508,22 +1508,50 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) return 0; } +static void eee_to_keee(struct ethtool_keee *keee, + const struct ethtool_eee *eee) +{ + memset(keee, 0, sizeof(*keee)); + + keee->supported = eee->supported; + keee->advertised = eee->advertised; + keee->lp_advertised = eee->lp_advertised; + keee->eee_active = eee->eee_active; + keee->eee_enabled = eee->eee_enabled; + keee->tx_lpi_enabled = eee->tx_lpi_enabled; + keee->tx_lpi_timer = eee->tx_lpi_timer; +} + +static void keee_to_eee(struct ethtool_eee *eee, + const struct ethtool_keee *keee) +{ + memset(eee, 0, sizeof(*eee)); + + eee->supported = keee->supported; + eee->advertised = keee->advertised; + eee->lp_advertised = keee->lp_advertised; + eee->eee_active = keee->eee_active; + eee->eee_enabled = keee->eee_enabled; + eee->tx_lpi_enabled = keee->tx_lpi_enabled; + eee->tx_lpi_timer = keee->tx_lpi_timer; +} + static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) { - struct ethtool_keee edata; + struct ethtool_keee keee; + struct ethtool_eee eee; int rc; if (!dev->ethtool_ops->get_eee) return -EOPNOTSUPP; - memset(&edata, 0, sizeof(struct ethtool_keee)); - edata.cmd = ETHTOOL_GEEE; - rc = dev->ethtool_ops->get_eee(dev, &edata); - + memset(&keee, 0, sizeof(keee)); + rc = dev->ethtool_ops->get_eee(dev, &keee); if (rc) return rc; - if (copy_to_user(useraddr, &edata, sizeof(edata))) + keee_to_eee(&eee, &keee); + if (copy_to_user(useraddr, &eee, sizeof(eee))) return -EFAULT; return 0; @@ -1531,16 +1559,18 @@ static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) static int ethtool_set_eee(struct net_device *dev, char __user *useraddr) { - struct ethtool_keee edata; + struct ethtool_keee keee; + struct ethtool_eee eee; int ret; if (!dev->ethtool_ops->set_eee) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) + if (copy_from_user(&eee, useraddr, sizeof(eee))) return -EFAULT; - ret = dev->ethtool_ops->set_eee(dev, &edata); + eee_to_keee(&keee, &eee); + ret = dev->ethtool_ops->set_eee(dev, &keee); if (!ret) ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF, NULL); return ret; -- cgit 1.2.3-korg From 285cc15cc555b4f05ebf2556bc6e85a6d36b790a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 27 Jan 2024 14:26:50 +0100 Subject: ethtool: adjust struct ethtool_keee to kernel needs This patch changes the following in struct ethtool_keee - remove member cmd, it's not needed on kernel side - remove reserved fields - switch the semantically boolean members to type bool We don't have to change any user of the boolean members due to the implicit casting from/to bool. A small change is needed where a pointer to bool members is used, in addition remove few now unneeded double negations. Reviewed-by: Andrew Lunn Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/ethtool.h | 8 +++----- net/ethtool/eee.c | 12 ++++++------ 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a850bab8489f2..14549cb9e2b22 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -223,15 +223,13 @@ __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); struct ethtool_keee { - u32 cmd; u32 supported; u32 advertised; u32 lp_advertised; - u32 eee_active; - u32 eee_enabled; - u32 tx_lpi_enabled; u32 tx_lpi_timer; - u32 reserved[2]; + bool tx_lpi_enabled; + bool eee_active; + bool eee_enabled; }; struct kernel_ethtool_coalesce { diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index 21b0e845a531b..ac9f694ffe205 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -98,10 +98,10 @@ static int eee_fill_reply(struct sk_buff *skb, if (ret < 0) return ret; - if (nla_put_u8(skb, ETHTOOL_A_EEE_ACTIVE, !!eee->eee_active) || - nla_put_u8(skb, ETHTOOL_A_EEE_ENABLED, !!eee->eee_enabled) || + if (nla_put_u8(skb, ETHTOOL_A_EEE_ACTIVE, eee->eee_active) || + nla_put_u8(skb, ETHTOOL_A_EEE_ENABLED, eee->eee_enabled) || nla_put_u8(skb, ETHTOOL_A_EEE_TX_LPI_ENABLED, - !!eee->tx_lpi_enabled) || + eee->tx_lpi_enabled) || nla_put_u32(skb, ETHTOOL_A_EEE_TX_LPI_TIMER, eee->tx_lpi_timer)) return -EMSGSIZE; @@ -145,9 +145,9 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info) link_mode_names, info->extack, &mod); if (ret < 0) return ret; - ethnl_update_bool32(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod); - ethnl_update_bool32(&eee.tx_lpi_enabled, - tb[ETHTOOL_A_EEE_TX_LPI_ENABLED], &mod); + ethnl_update_bool(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod); + ethnl_update_bool(&eee.tx_lpi_enabled, tb[ETHTOOL_A_EEE_TX_LPI_ENABLED], + &mod); ethnl_update_u32(&eee.tx_lpi_timer, tb[ETHTOOL_A_EEE_TX_LPI_TIMER], &mod); if (!mod) -- cgit 1.2.3-korg From 1d756ff13da6a2222ac4387511f2a0e2e83ce670 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 27 Jan 2024 14:28:47 +0100 Subject: ethtool: add suffix _u32 to legacy bitmap members of struct ethtool_keee This is in preparation of using the existing names for linkmode bitmaps. Suggested-by: Andrew Lunn Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_ethtool.c | 8 +++---- .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 8 +++---- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 +++++----- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 14 +++++------ drivers/net/ethernet/broadcom/tg3.c | 22 ++++++++--------- drivers/net/ethernet/intel/e1000e/ethtool.c | 10 ++++---- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 10 ++++---- drivers/net/ethernet/intel/igb/igb_ethtool.c | 20 ++++++++-------- drivers/net/ethernet/intel/igc/igc_ethtool.c | 12 +++++----- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 18 +++++++------- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 28 +++++++++++----------- drivers/net/phy/phy-c45.c | 12 +++++----- drivers/net/usb/ax88179_178a.c | 10 ++++---- drivers/net/usb/r8152.c | 14 +++++------ include/linux/ethtool.h | 6 ++--- net/ethtool/eee.c | 16 ++++++------- net/ethtool/ioctl.c | 12 +++++----- 17 files changed, 116 insertions(+), 116 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index be865776de55e..0bd1a0a1ae6af 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -713,14 +713,14 @@ static int aq_ethtool_get_eee(struct net_device *ndev, struct ethtool_keee *eee) if (err < 0) return err; - eee->supported = eee_mask_to_ethtool_mask(supported_rates); + eee->supported_u32 = eee_mask_to_ethtool_mask(supported_rates); if (aq_nic->aq_nic_cfg.eee_speeds) - eee->advertised = eee->supported; + eee->advertised_u32 = eee->supported_u32; - eee->lp_advertised = eee_mask_to_ethtool_mask(rate); + eee->lp_advertised_u32 = eee_mask_to_ethtool_mask(rate); - eee->eee_enabled = !!eee->advertised; + eee->eee_enabled = !!eee->advertised_u32; eee->tx_lpi_enabled = eee->eee_enabled; if ((supported_rates & rate) & AQ_NIC_RATE_EEE_MSK) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 12d2785eeb8a2..5f0e1759d0788 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2120,14 +2120,14 @@ static int bnx2x_get_eee(struct net_device *dev, struct ethtool_keee *edata) eee_cfg = bp->link_vars.eee_status; - edata->supported = + edata->supported_u32 = bnx2x_eee_to_adv((eee_cfg & SHMEM_EEE_SUPPORTED_MASK) >> SHMEM_EEE_SUPPORTED_SHIFT); - edata->advertised = + edata->advertised_u32 = bnx2x_eee_to_adv((eee_cfg & SHMEM_EEE_ADV_STATUS_MASK) >> SHMEM_EEE_ADV_STATUS_SHIFT); - edata->lp_advertised = + edata->lp_advertised_u32 = bnx2x_eee_to_adv((eee_cfg & SHMEM_EEE_LP_ADV_STATUS_MASK) >> SHMEM_EEE_LP_ADV_STATUS_SHIFT); @@ -2162,7 +2162,7 @@ static int bnx2x_set_eee(struct net_device *dev, struct ethtool_keee *edata) return -EOPNOTSUPP; } - advertised = bnx2x_adv_to_eee(edata->advertised, + advertised = bnx2x_adv_to_eee(edata->advertised_u32, SHMEM_EEE_ADV_STATUS_SHIFT); if ((advertised != (eee_cfg & SHMEM_EEE_ADV_STATUS_MASK))) { DP(BNX2X_MSG_ETHTOOL, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d7626c26f9a9b..fde32b32fa81c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10624,7 +10624,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) struct ethtool_keee *eee = &bp->eee; u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode); - eee->supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0); + eee->supported_u32 = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0); bp->lpi_tmr_lo = le32_to_cpu(resp->tx_lpi_timer_low) & PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_LOW_MASK; bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) & @@ -10775,7 +10775,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state) eee->eee_active = 1; fw_speeds = le16_to_cpu( resp->link_partner_adv_eee_link_speed_mask); - eee->lp_advertised = + eee->lp_advertised_u32 = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0); } @@ -10786,7 +10786,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state) eee->eee_enabled = 1; fw_speeds = le16_to_cpu(resp->adv_eee_link_speed_mask); - eee->advertised = + eee->advertised_u32 = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0); if (resp->eee_config_phy_addr & @@ -10969,7 +10969,7 @@ static void bnxt_hwrm_set_eee(struct bnxt *bp, flags |= PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_DISABLE; req->flags |= cpu_to_le32(flags); - eee_speeds = bnxt_get_fw_auto_link_speeds(eee->advertised); + eee_speeds = bnxt_get_fw_auto_link_speeds(eee->advertised_u32); req->eee_link_speed_mask = cpu_to_le16(eee_speeds); req->tx_lpi_timer = cpu_to_le32(eee->tx_lpi_timer); } else { @@ -11336,8 +11336,8 @@ static bool bnxt_eee_config_ok(struct bnxt *bp) eee->eee_enabled = 0; return false; } - if (eee->advertised & ~advertising) { - eee->advertised = advertising & eee->supported; + if (eee->advertised_u32 & ~advertising) { + eee->advertised_u32 = advertising & eee->supported_u32; return false; } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d6a8577d68af9..481b835a7703a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3919,16 +3919,16 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_keee *edata) edata->tx_lpi_timer = eee->tx_lpi_timer; } } - if (!edata->advertised) { - edata->advertised = advertising & eee->supported; - } else if (edata->advertised & ~advertising) { + if (!edata->advertised_u32) { + edata->advertised_u32 = advertising & eee->supported_u32; + } else if (edata->advertised_u32 & ~advertising) { netdev_warn(dev, "EEE advertised %x must be a subset of autoneg advertised speeds %x\n", - edata->advertised, advertising); + edata->advertised_u32, advertising); rc = -EINVAL; goto eee_exit; } - eee->advertised = edata->advertised; + eee->advertised_u32 = edata->advertised_u32; eee->tx_lpi_enabled = edata->tx_lpi_enabled; eee->tx_lpi_timer = edata->tx_lpi_timer; eee_ok: @@ -3954,12 +3954,12 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_keee *edata) /* Preserve tx_lpi_timer so that the last value will be used * by default when it is re-enabled. */ - edata->advertised = 0; + edata->advertised_u32 = 0; edata->tx_lpi_enabled = 0; } if (!bp->eee.eee_active) - edata->lp_advertised = 0; + edata->lp_advertised_u32 = 0; return 0; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 11054177c2ece..f644a91317c91 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -2362,13 +2362,13 @@ static void tg3_eee_pull_config(struct tg3 *tp, struct ethtool_keee *eee) /* Pull lp advertised settings */ if (tg3_phy_cl45_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE, &val)) return; - dest->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(val); + dest->lp_advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(val); /* Pull advertised and eee_enabled settings */ if (tg3_phy_cl45_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, &val)) return; dest->eee_enabled = !!val; - dest->advertised = mmd_eee_adv_to_ethtool_adv_t(val); + dest->advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(val); /* Pull tx_lpi_enabled */ val = tr32(TG3_CPMU_EEE_MODE); @@ -4364,9 +4364,9 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl) if (!tp->eee.eee_enabled) { val = 0; - tp->eee.advertised = 0; + tp->eee.advertised_u32 = 0; } else { - tp->eee.advertised = advertise & + tp->eee.advertised_u32 = advertise & (ADVERTISED_100baseT_Full | ADVERTISED_1000baseT_Full); } @@ -4626,13 +4626,13 @@ static bool tg3_phy_eee_config_ok(struct tg3 *tp) tg3_eee_pull_config(tp, &eee); if (tp->eee.eee_enabled) { - if (tp->eee.advertised != eee.advertised || + if (tp->eee.advertised_u32 != eee.advertised_u32 || tp->eee.tx_lpi_timer != eee.tx_lpi_timer || tp->eee.tx_lpi_enabled != eee.tx_lpi_enabled) return false; } else { /* EEE is disabled but we're advertising */ - if (eee.advertised) + if (eee.advertised_u32) return false; } @@ -14189,7 +14189,7 @@ static int tg3_set_eee(struct net_device *dev, struct ethtool_keee *edata) return -EOPNOTSUPP; } - if (edata->advertised != tp->eee.advertised) { + if (edata->advertised_u32 != tp->eee.advertised_u32) { netdev_warn(tp->dev, "Direct manipulation of EEE advertisement is not supported\n"); return -EINVAL; @@ -15655,10 +15655,10 @@ static int tg3_phy_probe(struct tg3 *tp) tg3_chip_rev_id(tp) != CHIPREV_ID_57765_A0))) { tp->phy_flags |= TG3_PHYFLG_EEE_CAP; - tp->eee.supported = SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Full; - tp->eee.advertised = ADVERTISED_100baseT_Full | - ADVERTISED_1000baseT_Full; + tp->eee.supported_u32 = SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Full; + tp->eee.advertised_u32 = ADVERTISED_100baseT_Full | + ADVERTISED_1000baseT_Full; tp->eee.eee_enabled = 1; tp->eee.tx_lpi_enabled = 1; tp->eee.tx_lpi_timer = TG3_CPMU_DBTMR1_LNKIDLE_2047US; diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 343f54b2b6ae2..ff243ae71b780 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -2223,16 +2223,16 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_keee *edata) ret_val = e1000_read_emi_reg_locked(hw, cap_addr, &phy_data); if (ret_val) goto release; - edata->supported = mmd_eee_cap_to_ethtool_sup_t(phy_data); + edata->supported_u32 = mmd_eee_cap_to_ethtool_sup_t(phy_data); /* EEE Advertised */ - edata->advertised = mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); + edata->advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); /* EEE Link Partner Advertised */ ret_val = e1000_read_emi_reg_locked(hw, lpa_addr, &phy_data); if (ret_val) goto release; - edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + edata->lp_advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(phy_data); /* EEE PCS Status */ ret_val = e1000_read_emi_reg_locked(hw, pcs_stat_addr, &phy_data); @@ -2283,12 +2283,12 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_keee *edata) return -EINVAL; } - if (edata->advertised & ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) { + if (edata->advertised_u32 & ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) { e_err("EEE advertisement supports only 100TX and/or 1000T full-duplex\n"); return -EINVAL; } - adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); + adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised_u32); hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 9dfda3c48af35..1b5473358e1a0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5664,16 +5664,16 @@ static int i40e_get_eee(struct net_device *netdev, struct ethtool_keee *edata) if (phy_cfg.eee_capability == 0) return -EOPNOTSUPP; - edata->supported = SUPPORTED_Autoneg; - edata->lp_advertised = edata->supported; + edata->supported_u32 = SUPPORTED_Autoneg; + edata->lp_advertised_u32 = edata->supported_u32; /* Get current configuration */ status = i40e_aq_get_phy_capabilities(hw, false, false, &phy_cfg, NULL); if (status) return -EAGAIN; - edata->advertised = phy_cfg.eee_capability ? SUPPORTED_Autoneg : 0U; - edata->eee_enabled = !!edata->advertised; + edata->advertised_u32 = phy_cfg.eee_capability ? SUPPORTED_Autoneg : 0U; + edata->eee_enabled = !!edata->advertised_u32; edata->tx_lpi_enabled = pf->stats.tx_lpi_status; edata->eee_active = pf->stats.tx_lpi_status && pf->stats.rx_lpi_status; @@ -5691,7 +5691,7 @@ static int i40e_is_eee_param_supported(struct net_device *netdev, u32 value; const char *name; } param[] = { - {edata->advertised & ~SUPPORTED_Autoneg, "advertise"}, + {edata->advertised_u32 & ~SUPPORTED_Autoneg, "advertise"}, {edata->tx_lpi_timer, "tx-timer"}, {edata->tx_lpi_enabled != pf->stats.tx_lpi_status, "tx-lpi"} }; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 778d1e6cfc45d..b87b23d2151c0 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -3038,10 +3038,10 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_keee *edata) (hw->phy.media_type != e1000_media_type_copper)) return -EOPNOTSUPP; - edata->supported = (SUPPORTED_1000baseT_Full | - SUPPORTED_100baseT_Full); + edata->supported_u32 = (SUPPORTED_1000baseT_Full | + SUPPORTED_100baseT_Full); if (!hw->dev_spec._82575.eee_disable) - edata->advertised = + edata->advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); /* The IPCNFG and EEER registers are not supported on I354. */ @@ -3068,7 +3068,7 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_keee *edata) if (ret_val) return -ENODATA; - edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + edata->lp_advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(phy_data); break; case e1000_i354: case e1000_i210: @@ -3079,7 +3079,7 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_keee *edata) if (ret_val) return -ENODATA; - edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + edata->lp_advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(phy_data); break; default: @@ -3099,7 +3099,7 @@ static int igb_get_eee(struct net_device *netdev, struct ethtool_keee *edata) edata->eee_enabled = false; edata->eee_active = false; edata->tx_lpi_enabled = false; - edata->advertised &= ~edata->advertised; + edata->advertised_u32 &= ~edata->advertised_u32; } return 0; @@ -3138,14 +3138,14 @@ static int igb_set_eee(struct net_device *netdev, return -EINVAL; } - if (!edata->advertised || (edata->advertised & + if (!edata->advertised_u32 || (edata->advertised_u32 & ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL))) { dev_err(&adapter->pdev->dev, "EEE Advertisement supports only 100Tx and/or 100T full duplex\n"); return -EINVAL; } - adv100m_eee = !!(edata->advertised & ADVERTISE_100_FULL); - adv1g_eee = !!(edata->advertised & ADVERTISE_1000_FULL); + adv100m_eee = !!(edata->advertised_u32 & ADVERTISE_100_FULL); + adv1g_eee = !!(edata->advertised_u32 & ADVERTISE_1000_FULL); } else if (!edata->eee_enabled) { dev_err(&adapter->pdev->dev, @@ -3153,7 +3153,7 @@ static int igb_set_eee(struct net_device *netdev, return -EINVAL; } - adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); + adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised_u32); if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) { hw->dev_spec._82575.eee_disable = !edata->eee_enabled; adapter->flags |= IGB_FLAG_EEE; diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index f2dcfe920f4f3..7f844e9674219 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1630,11 +1630,11 @@ static int igc_ethtool_get_eee(struct net_device *netdev, u32 eeer; if (hw->dev_spec._base.eee_enable) - edata->advertised = + edata->advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); *edata = adapter->eee; - edata->supported = SUPPORTED_Autoneg; + edata->supported_u32 = SUPPORTED_Autoneg; eeer = rd32(IGC_EEER); @@ -1647,8 +1647,8 @@ static int igc_ethtool_get_eee(struct net_device *netdev, edata->eee_enabled = hw->dev_spec._base.eee_enable; - edata->advertised = SUPPORTED_Autoneg; - edata->lp_advertised = SUPPORTED_Autoneg; + edata->advertised_u32 = SUPPORTED_Autoneg; + edata->lp_advertised_u32 = SUPPORTED_Autoneg; /* Report correct negotiated EEE status for devices that * wrongly report EEE at half-duplex @@ -1657,7 +1657,7 @@ static int igc_ethtool_get_eee(struct net_device *netdev, edata->eee_enabled = false; edata->eee_active = false; edata->tx_lpi_enabled = false; - edata->advertised &= ~edata->advertised; + edata->advertised_u32 &= ~edata->advertised_u32; } return 0; @@ -1699,7 +1699,7 @@ static int igc_ethtool_set_eee(struct net_device *netdev, return -EINVAL; } - adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); + adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised_u32); if (hw->dev_spec._base.eee_enable != edata->eee_enabled) { hw->dev_spec._base.eee_enable = edata->eee_enabled; adapter->flags |= IGC_FLAG_EEE; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 0aa73519a29d3..ca69a8221793d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3436,27 +3436,27 @@ ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_keee *edata) if (rc) return rc; - edata->lp_advertised = 0; + edata->lp_advertised_u32 = 0; for (i = 0; i < ARRAY_SIZE(ixgbe_lp_map); ++i) { if (info[0] & ixgbe_lp_map[i].lp_advertised) - edata->lp_advertised |= ixgbe_lp_map[i].mac_speed; + edata->lp_advertised_u32 |= ixgbe_lp_map[i].mac_speed; } - edata->supported = 0; + edata->supported_u32 = 0; for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) { if (hw->phy.eee_speeds_supported & ixgbe_ls_map[i].mac_speed) - edata->supported |= ixgbe_ls_map[i].supported; + edata->supported_u32 |= ixgbe_ls_map[i].supported; } - edata->advertised = 0; + edata->advertised_u32 = 0; for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) { if (hw->phy.eee_speeds_advertised & ixgbe_ls_map[i].mac_speed) - edata->advertised |= ixgbe_ls_map[i].supported; + edata->advertised_u32 |= ixgbe_ls_map[i].supported; } - edata->eee_enabled = !!edata->advertised; + edata->eee_enabled = !!edata->advertised_u32; edata->tx_lpi_enabled = edata->eee_enabled; - if (edata->advertised & edata->lp_advertised) + if (edata->advertised_u32 & edata->lp_advertised_u32) edata->eee_active = true; return 0; @@ -3504,7 +3504,7 @@ static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_keee *edata) return -EINVAL; } - if (eee_data.advertised != edata->advertised) { + if (eee_data.advertised_u32 != edata->advertised_u32) { e_err(drv, "Setting EEE advertised speeds is not supported\n"); return -EINVAL; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 77491fb640393..dfa15619fd782 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1790,17 +1790,17 @@ static int qede_get_eee(struct net_device *dev, struct ethtool_keee *edata) } if (current_link.eee.adv_caps & QED_EEE_1G_ADV) - edata->advertised = ADVERTISED_1000baseT_Full; + edata->advertised_u32 = ADVERTISED_1000baseT_Full; if (current_link.eee.adv_caps & QED_EEE_10G_ADV) - edata->advertised |= ADVERTISED_10000baseT_Full; + edata->advertised_u32 |= ADVERTISED_10000baseT_Full; if (current_link.sup_caps & QED_EEE_1G_ADV) - edata->supported = ADVERTISED_1000baseT_Full; + edata->supported_u32 = ADVERTISED_1000baseT_Full; if (current_link.sup_caps & QED_EEE_10G_ADV) - edata->supported |= ADVERTISED_10000baseT_Full; + edata->supported_u32 |= ADVERTISED_10000baseT_Full; if (current_link.eee.lp_adv_caps & QED_EEE_1G_ADV) - edata->lp_advertised = ADVERTISED_1000baseT_Full; + edata->lp_advertised_u32 = ADVERTISED_1000baseT_Full; if (current_link.eee.lp_adv_caps & QED_EEE_10G_ADV) - edata->lp_advertised |= ADVERTISED_10000baseT_Full; + edata->lp_advertised_u32 |= ADVERTISED_10000baseT_Full; edata->tx_lpi_timer = current_link.eee.tx_lpi_timer; edata->eee_enabled = current_link.eee.enable; @@ -1832,20 +1832,20 @@ static int qede_set_eee(struct net_device *dev, struct ethtool_keee *edata) memset(¶ms, 0, sizeof(params)); params.override_flags |= QED_LINK_OVERRIDE_EEE_CONFIG; - if (!(edata->advertised & (ADVERTISED_1000baseT_Full | - ADVERTISED_10000baseT_Full)) || - ((edata->advertised & (ADVERTISED_1000baseT_Full | - ADVERTISED_10000baseT_Full)) != - edata->advertised)) { + if (!(edata->advertised_u32 & (ADVERTISED_1000baseT_Full | + ADVERTISED_10000baseT_Full)) || + ((edata->advertised_u32 & (ADVERTISED_1000baseT_Full | + ADVERTISED_10000baseT_Full)) != + edata->advertised_u32)) { DP_VERBOSE(edev, QED_MSG_DEBUG, "Invalid advertised capabilities %d\n", - edata->advertised); + edata->advertised_u32); return -EINVAL; } - if (edata->advertised & ADVERTISED_1000baseT_Full) + if (edata->advertised_u32 & ADVERTISED_1000baseT_Full) params.eee.adv_caps = QED_EEE_1G_ADV; - if (edata->advertised & ADVERTISED_10000baseT_Full) + if (edata->advertised_u32 & ADVERTISED_10000baseT_Full) params.eee.adv_caps |= QED_EEE_10G_ADV; params.eee.enable = edata->eee_enabled; params.eee.tx_lpi_enable = edata->tx_lpi_enabled; diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index adee5e712871f..99c84af257468 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -1463,12 +1463,12 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, data->eee_enabled = is_enabled; data->eee_active = ret; - if (!ethtool_convert_link_mode_to_legacy_u32(&data->supported, + if (!ethtool_convert_link_mode_to_legacy_u32(&data->supported_u32, phydev->supported_eee)) overflow = true; - if (!ethtool_convert_link_mode_to_legacy_u32(&data->advertised, adv)) + if (!ethtool_convert_link_mode_to_legacy_u32(&data->advertised_u32, adv)) overflow = true; - if (!ethtool_convert_link_mode_to_legacy_u32(&data->lp_advertised, lp)) + if (!ethtool_convert_link_mode_to_legacy_u32(&data->lp_advertised_u32, lp)) overflow = true; if (overflow) @@ -1495,11 +1495,11 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, int ret; if (data->eee_enabled) { - if (data->advertised) { + if (data->advertised_u32) { __ETHTOOL_DECLARE_LINK_MODE_MASK(adv); ethtool_convert_legacy_u32_to_link_mode(adv, - data->advertised); + data->advertised_u32); linkmode_andnot(adv, adv, phydev->supported_eee); if (!linkmode_empty(adv)) { phydev_warn(phydev, "At least some EEE link modes are not supported.\n"); @@ -1507,7 +1507,7 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, } ethtool_convert_legacy_u32_to_link_mode(phydev->advertising_eee, - data->advertised); + data->advertised_u32); } else { linkmode_copy(phydev->advertising_eee, phydev->supported_eee); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 3922a9afdd1fe..d6168eaa286fe 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -676,21 +676,21 @@ ax88179_ethtool_get_eee(struct usbnet *dev, struct ethtool_keee *data) MDIO_MMD_PCS); if (val < 0) return val; - data->supported = mmd_eee_cap_to_ethtool_sup_t(val); + data->supported_u32 = mmd_eee_cap_to_ethtool_sup_t(val); /* Get advertisement EEE */ val = ax88179_phy_read_mmd_indirect(dev, MDIO_AN_EEE_ADV, MDIO_MMD_AN); if (val < 0) return val; - data->advertised = mmd_eee_adv_to_ethtool_adv_t(val); + data->advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(val); /* Get LP advertisement EEE */ val = ax88179_phy_read_mmd_indirect(dev, MDIO_AN_EEE_LPABLE, MDIO_MMD_AN); if (val < 0) return val; - data->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(val); + data->lp_advertised_u32 = mmd_eee_adv_to_ethtool_adv_t(val); return 0; } @@ -698,7 +698,7 @@ ax88179_ethtool_get_eee(struct usbnet *dev, struct ethtool_keee *data) static int ax88179_ethtool_set_eee(struct usbnet *dev, struct ethtool_keee *data) { - u16 tmp16 = ethtool_adv_to_mmd_eee_adv_t(data->advertised); + u16 tmp16 = ethtool_adv_to_mmd_eee_adv_t(data->advertised_u32); return ax88179_phy_write_mmd_indirect(dev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, tmp16); @@ -1663,7 +1663,7 @@ static int ax88179_reset(struct usbnet *dev) ax88179_disable_eee(dev); ax88179_ethtool_get_eee(dev, &eee_data); - eee_data.advertised = 0; + eee_data.advertised_u32 = 0; ax88179_ethtool_set_eee(dev, &eee_data); /* Restart autoneg */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index dc163b766a890..3d806b3ff4258 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -8938,16 +8938,16 @@ static int r8152_get_eee(struct r8152 *tp, struct ethtool_keee *eee) eee->eee_enabled = tp->eee_en; eee->eee_active = !!(supported & adv & lp); - eee->supported = supported; - eee->advertised = tp->eee_adv; - eee->lp_advertised = lp; + eee->supported_u32 = supported; + eee->advertised_u32 = tp->eee_adv; + eee->lp_advertised_u32 = lp; return 0; } static int r8152_set_eee(struct r8152 *tp, struct ethtool_keee *eee) { - u16 val = ethtool_adv_to_mmd_eee_adv_t(eee->advertised); + u16 val = ethtool_adv_to_mmd_eee_adv_t(eee->advertised_u32); tp->eee_en = eee->eee_enabled; tp->eee_adv = val; @@ -8973,9 +8973,9 @@ static int r8153_get_eee(struct r8152 *tp, struct ethtool_keee *eee) eee->eee_enabled = tp->eee_en; eee->eee_active = !!(supported & adv & lp); - eee->supported = supported; - eee->advertised = tp->eee_adv; - eee->lp_advertised = lp; + eee->supported_u32 = supported; + eee->advertised_u32 = tp->eee_adv; + eee->lp_advertised_u32 = lp; return 0; } diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 14549cb9e2b22..89807c30f5a7a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -223,9 +223,9 @@ __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); struct ethtool_keee { - u32 supported; - u32 advertised; - u32 lp_advertised; + u32 supported_u32; + u32 advertised_u32; + u32 lp_advertised_u32; u32 tx_lpi_timer; bool tx_lpi_enabled; bool eee_active; diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index ac9f694ffe205..ca56f28173d50 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -5,7 +5,7 @@ #include "bitset.h" #define EEE_MODES_COUNT \ - (sizeof_field(struct ethtool_keee, supported) * BITS_PER_BYTE) + (sizeof_field(struct ethtool_keee, supported_u32) * BITS_PER_BYTE) struct eee_req_info { struct ethnl_req_info base; @@ -52,19 +52,19 @@ static int eee_reply_size(const struct ethnl_req_info *req_base, int len = 0; int ret; - BUILD_BUG_ON(sizeof(eee->advertised) * BITS_PER_BYTE != + BUILD_BUG_ON(sizeof(eee->advertised_u32) * BITS_PER_BYTE != EEE_MODES_COUNT); - BUILD_BUG_ON(sizeof(eee->lp_advertised) * BITS_PER_BYTE != + BUILD_BUG_ON(sizeof(eee->lp_advertised_u32) * BITS_PER_BYTE != EEE_MODES_COUNT); /* MODES_OURS */ - ret = ethnl_bitset32_size(&eee->advertised, &eee->supported, + ret = ethnl_bitset32_size(&eee->advertised_u32, &eee->supported_u32, EEE_MODES_COUNT, link_mode_names, compact); if (ret < 0) return ret; len += ret; /* MODES_PEERS */ - ret = ethnl_bitset32_size(&eee->lp_advertised, NULL, + ret = ethnl_bitset32_size(&eee->lp_advertised_u32, NULL, EEE_MODES_COUNT, link_mode_names, compact); if (ret < 0) return ret; @@ -88,12 +88,12 @@ static int eee_fill_reply(struct sk_buff *skb, int ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_OURS, - &eee->advertised, &eee->supported, + &eee->advertised_u32, &eee->supported_u32, EEE_MODES_COUNT, link_mode_names, compact); if (ret < 0) return ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_PEER, - &eee->lp_advertised, NULL, EEE_MODES_COUNT, + &eee->lp_advertised_u32, NULL, EEE_MODES_COUNT, link_mode_names, compact); if (ret < 0) return ret; @@ -140,7 +140,7 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info) if (ret < 0) return ret; - ret = ethnl_update_bitset32(&eee.advertised, EEE_MODES_COUNT, + ret = ethnl_update_bitset32(&eee.advertised_u32, EEE_MODES_COUNT, tb[ETHTOOL_A_EEE_MODES_OURS], link_mode_names, info->extack, &mod); if (ret < 0) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 46c29b369aeb0..5b2ca72e32037 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1513,9 +1513,9 @@ static void eee_to_keee(struct ethtool_keee *keee, { memset(keee, 0, sizeof(*keee)); - keee->supported = eee->supported; - keee->advertised = eee->advertised; - keee->lp_advertised = eee->lp_advertised; + keee->supported_u32 = eee->supported; + keee->advertised_u32 = eee->advertised; + keee->lp_advertised_u32 = eee->lp_advertised; keee->eee_active = eee->eee_active; keee->eee_enabled = eee->eee_enabled; keee->tx_lpi_enabled = eee->tx_lpi_enabled; @@ -1527,9 +1527,9 @@ static void keee_to_eee(struct ethtool_eee *eee, { memset(eee, 0, sizeof(*eee)); - eee->supported = keee->supported; - eee->advertised = keee->advertised; - eee->lp_advertised = keee->lp_advertised; + eee->supported = keee->supported_u32; + eee->advertised = keee->advertised_u32; + eee->lp_advertised = keee->lp_advertised_u32; eee->eee_active = keee->eee_active; eee->eee_enabled = keee->eee_enabled; eee->tx_lpi_enabled = keee->tx_lpi_enabled; -- cgit 1.2.3-korg From 1f069de63602e8d39d7d9fd6195f65235316f79a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 27 Jan 2024 14:29:33 +0100 Subject: ethtool: add linkmode bitmap support to struct ethtool_keee Add linkmode bitmap members to struct ethtool_keee, but keep the legacy u32 bitmaps for compatibility with existing drivers. Use linkmode "supported" not being empty as indicator that a user wants to use the linkmode bitmap members instead of the legacy bitmaps. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/ethtool.h | 3 +++ net/ethtool/common.c | 5 +++++ net/ethtool/common.h | 1 + net/ethtool/eee.c | 49 +++++++++++++++++++++++++++++++++++-------------- net/ethtool/ioctl.c | 27 ++++++++++++++++++++++++--- 5 files changed, 68 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 89807c30f5a7a..b90c33607594b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -223,6 +223,9 @@ __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); struct ethtool_keee { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertised); u32 supported_u32; u32 advertised_u32; u32 lp_advertised_u32; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 6b2a360dcdf06..ce486cec346c8 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -712,3 +712,8 @@ ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) } } EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); + +bool ethtool_eee_use_linkmodes(const struct ethtool_keee *eee) +{ + return !linkmode_empty(eee->supported); +} diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 28b8aaaf9bcb3..0f2b5f7eacee7 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -55,5 +55,6 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data); bool __ethtool_dev_mm_supported(struct net_device *dev); +bool ethtool_eee_use_linkmodes(const struct ethtool_keee *eee); #endif /* _ETHTOOL_COMMON_H */ diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index ca56f28173d50..db6faa18fe413 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -30,6 +30,7 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base, { struct eee_reply_data *data = EEE_REPDATA(reply_base); struct net_device *dev = reply_base->dev; + struct ethtool_keee *eee = &data->eee; int ret; if (!dev->ethtool_ops->get_eee) @@ -37,9 +38,18 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base, ret = ethnl_ops_begin(dev); if (ret < 0) return ret; - ret = dev->ethtool_ops->get_eee(dev, &data->eee); + ret = dev->ethtool_ops->get_eee(dev, eee); ethnl_ops_complete(dev); + if (!ret && !ethtool_eee_use_linkmodes(eee)) { + ethtool_convert_legacy_u32_to_link_mode(eee->supported, + eee->supported_u32); + ethtool_convert_legacy_u32_to_link_mode(eee->advertised, + eee->advertised_u32); + ethtool_convert_legacy_u32_to_link_mode(eee->lp_advertised, + eee->lp_advertised_u32); + } + return ret; } @@ -58,14 +68,16 @@ static int eee_reply_size(const struct ethnl_req_info *req_base, EEE_MODES_COUNT); /* MODES_OURS */ - ret = ethnl_bitset32_size(&eee->advertised_u32, &eee->supported_u32, - EEE_MODES_COUNT, link_mode_names, compact); + ret = ethnl_bitset_size(eee->advertised, eee->supported, + __ETHTOOL_LINK_MODE_MASK_NBITS, + link_mode_names, compact); if (ret < 0) return ret; len += ret; /* MODES_PEERS */ - ret = ethnl_bitset32_size(&eee->lp_advertised_u32, NULL, - EEE_MODES_COUNT, link_mode_names, compact); + ret = ethnl_bitset_size(eee->lp_advertised, NULL, + __ETHTOOL_LINK_MODE_MASK_NBITS, + link_mode_names, compact); if (ret < 0) return ret; len += ret; @@ -87,14 +99,16 @@ static int eee_fill_reply(struct sk_buff *skb, const struct ethtool_keee *eee = &data->eee; int ret; - ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_OURS, - &eee->advertised_u32, &eee->supported_u32, - EEE_MODES_COUNT, link_mode_names, compact); + ret = ethnl_put_bitset(skb, ETHTOOL_A_EEE_MODES_OURS, + eee->advertised, eee->supported, + __ETHTOOL_LINK_MODE_MASK_NBITS, + link_mode_names, compact); if (ret < 0) return ret; - ret = ethnl_put_bitset32(skb, ETHTOOL_A_EEE_MODES_PEER, - &eee->lp_advertised_u32, NULL, EEE_MODES_COUNT, - link_mode_names, compact); + ret = ethnl_put_bitset(skb, ETHTOOL_A_EEE_MODES_PEER, + eee->lp_advertised, NULL, + __ETHTOOL_LINK_MODE_MASK_NBITS, + link_mode_names, compact); if (ret < 0) return ret; @@ -140,9 +154,16 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info) if (ret < 0) return ret; - ret = ethnl_update_bitset32(&eee.advertised_u32, EEE_MODES_COUNT, - tb[ETHTOOL_A_EEE_MODES_OURS], - link_mode_names, info->extack, &mod); + if (ethtool_eee_use_linkmodes(&eee)) { + ret = ethnl_update_bitset(eee.advertised, + __ETHTOOL_LINK_MODE_MASK_NBITS, + tb[ETHTOOL_A_EEE_MODES_OURS], + link_mode_names, info->extack, &mod); + } else { + ret = ethnl_update_bitset32(&eee.advertised_u32, EEE_MODES_COUNT, + tb[ETHTOOL_A_EEE_MODES_OURS], + link_mode_names, info->extack, &mod); + } if (ret < 0) return ret; ethnl_update_bool(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 5b2ca72e32037..1763e8b697e14 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1520,6 +1520,13 @@ static void eee_to_keee(struct ethtool_keee *keee, keee->eee_enabled = eee->eee_enabled; keee->tx_lpi_enabled = eee->tx_lpi_enabled; keee->tx_lpi_timer = eee->tx_lpi_timer; + + ethtool_convert_legacy_u32_to_link_mode(keee->supported, + eee->supported); + ethtool_convert_legacy_u32_to_link_mode(keee->advertised, + eee->advertised); + ethtool_convert_legacy_u32_to_link_mode(keee->lp_advertised, + eee->lp_advertised); } static void keee_to_eee(struct ethtool_eee *eee, @@ -1527,13 +1534,27 @@ static void keee_to_eee(struct ethtool_eee *eee, { memset(eee, 0, sizeof(*eee)); - eee->supported = keee->supported_u32; - eee->advertised = keee->advertised_u32; - eee->lp_advertised = keee->lp_advertised_u32; eee->eee_active = keee->eee_active; eee->eee_enabled = keee->eee_enabled; eee->tx_lpi_enabled = keee->tx_lpi_enabled; eee->tx_lpi_timer = keee->tx_lpi_timer; + + if (ethtool_eee_use_linkmodes(keee)) { + bool overflow; + + overflow = !ethtool_convert_link_mode_to_legacy_u32(&eee->supported, + keee->supported); + ethtool_convert_link_mode_to_legacy_u32(&eee->advertised, + keee->advertised); + ethtool_convert_link_mode_to_legacy_u32(&eee->lp_advertised, + keee->lp_advertised); + if (overflow) + pr_warn("Ethtool ioctl interface doesn't support passing EEE linkmodes beyond bit 32\n"); + } else { + eee->supported = keee->supported_u32; + eee->advertised = keee->advertised_u32; + eee->lp_advertised = keee->lp_advertised_u32; + } } static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) -- cgit 1.2.3-korg From 57f2c6350f2d8d835def0a613a3d37c28d146102 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 30 Jan 2024 17:22:55 +0800 Subject: net: ipv4: Simplify the allocation of slab caches in inet_initpeers commit 0a31bd5f2bbb ("KMEM_CACHE(): simplify slab cache creation") introduces a new macro. Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240130092255.73078-1-chentao@kylinos.cn Signed-off-by: Jakub Kicinski --- net/ipv4/inetpeer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e9fed83e9b3cc..5bd7599634517 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -81,10 +81,7 @@ void __init inet_initpeers(void) inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128); - peer_cachep = kmem_cache_create("inet_peer_cache", - sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, - NULL); + peer_cachep = KMEM_CACHE(inet_peer, SLAB_HWCACHE_ALIGN | SLAB_PANIC); } /* Called with rcu_read_lock() or base->lock held */ -- cgit 1.2.3-korg From 2dc23b6f852bc7816d7ab421979d95223e894be3 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 30 Jan 2024 17:25:36 +0800 Subject: net: bridge: Use KMEM_CACHE instead of kmem_cache_create commit 0a31bd5f2bbb ("KMEM_CACHE(): simplify slab cache creation") introduces a new macro. Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Acked-by: Nikolay Aleksandrov Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240130092536.73623-1-chentao@kylinos.cn Signed-off-by: Jakub Kicinski --- net/bridge/br_fdb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index c622de5eccd0a..c77591e638417 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -35,10 +35,7 @@ static struct kmem_cache *br_fdb_cache __read_mostly; int __init br_fdb_init(void) { - br_fdb_cache = kmem_cache_create("bridge_fdb_cache", - sizeof(struct net_bridge_fdb_entry), - 0, - SLAB_HWCACHE_ALIGN, NULL); + br_fdb_cache = KMEM_CACHE(net_bridge_fdb_entry, SLAB_HWCACHE_ALIGN); if (!br_fdb_cache) return -ENOMEM; -- cgit 1.2.3-korg From d0f6dc26346863e1f4a23117f5468614e54df064 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 29 Jan 2024 11:04:33 -0800 Subject: af_unix: Replace BUG_ON() with WARN_ON_ONCE(). This is a prep patch for the last patch in this series so that checkpatch will not warn about BUG_ON(). Signed-off-by: Kuniyuki Iwashima Acked-by: Jens Axboe Link: https://lore.kernel.org/r/20240129190435.57228-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/garbage.c | 8 ++++---- net/unix/scm.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 4046c606f0e6e..af676bb8fb677 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -145,7 +145,7 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *), /* An embryo cannot be in-flight, so it's safe * to use the list link. */ - BUG_ON(!list_empty(&u->link)); + WARN_ON_ONCE(!list_empty(&u->link)); list_add_tail(&u->link, &embryos); } spin_unlock(&x->sk_receive_queue.lock); @@ -213,8 +213,8 @@ static void __unix_gc(struct work_struct *work) total_refs = file_count(u->sk.sk_socket->file); - BUG_ON(!u->inflight); - BUG_ON(total_refs < u->inflight); + WARN_ON_ONCE(!u->inflight); + WARN_ON_ONCE(total_refs < u->inflight); if (total_refs == u->inflight) { list_move_tail(&u->link, &gc_candidates); __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); @@ -294,7 +294,7 @@ static void __unix_gc(struct work_struct *work) list_move_tail(&u->link, &gc_inflight_list); /* All candidates should have been detached by now. */ - BUG_ON(!list_empty(&gc_candidates)); + WARN_ON_ONCE(!list_empty(&gc_candidates)); /* Paired with READ_ONCE() in wait_for_unix_gc(). */ WRITE_ONCE(gc_in_progress, false); diff --git a/net/unix/scm.c b/net/unix/scm.c index b5ae5ab167773..505e56cf02a21 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -51,10 +51,10 @@ void unix_inflight(struct user_struct *user, struct file *fp) if (u) { if (!u->inflight) { - BUG_ON(!list_empty(&u->link)); + WARN_ON_ONCE(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); } else { - BUG_ON(list_empty(&u->link)); + WARN_ON_ONCE(list_empty(&u->link)); } u->inflight++; /* Paired with READ_ONCE() in wait_for_unix_gc() */ @@ -71,8 +71,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp) spin_lock(&unix_gc_lock); if (u) { - BUG_ON(!u->inflight); - BUG_ON(list_empty(&u->link)); + WARN_ON_ONCE(!u->inflight); + WARN_ON_ONCE(list_empty(&u->link)); u->inflight--; if (!u->inflight) -- cgit 1.2.3-korg From 11498715f266a3fb4caabba9dd575636cbcaa8f1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 29 Jan 2024 11:04:34 -0800 Subject: af_unix: Remove io_uring code for GC. Since commit 705318a99a13 ("io_uring/af_unix: disable sending io_uring over sockets"), io_uring's unix socket cannot be passed via SCM_RIGHTS, so it does not contribute to cyclic reference and no longer be candidate for garbage collection. Also, commit 6e5e6d274956 ("io_uring: drop any code related to SCM_RIGHTS") cleaned up SCM_RIGHTS code in io_uring. Let's do it in AF_UNIX as well by reverting commit 0091bfc81741 ("io_uring/af_unix: defer registered files gc to io_uring release") and commit 10369080454d ("net: reclaim skb->scm_io_uring bit"). Signed-off-by: Kuniyuki Iwashima Acked-by: Jens Axboe Link: https://lore.kernel.org/r/20240129190435.57228-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 1 - net/unix/garbage.c | 25 ++----------------------- net/unix/scm.c | 6 ------ 3 files changed, 2 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index f045bbd9017d1..9e39b2ec45249 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -20,7 +20,6 @@ static inline struct unix_sock *unix_get_socket(struct file *filp) void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); void unix_destruct_scm(struct sk_buff *skb); -void io_uring_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(struct scm_fp_list *fpl); struct sock *unix_peer_get(struct sock *sk); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index af676bb8fb677..ce5b5f87b16e2 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -184,12 +184,10 @@ static bool gc_in_progress; static void __unix_gc(struct work_struct *work) { - struct sk_buff *next_skb, *skb; - struct unix_sock *u; - struct unix_sock *next; struct sk_buff_head hitlist; - struct list_head cursor; + struct unix_sock *u, *next; LIST_HEAD(not_cycle_list); + struct list_head cursor; spin_lock(&unix_gc_lock); @@ -269,30 +267,11 @@ static void __unix_gc(struct work_struct *work) spin_unlock(&unix_gc_lock); - /* We need io_uring to clean its registered files, ignore all io_uring - * originated skbs. It's fine as io_uring doesn't keep references to - * other io_uring instances and so killing all other files in the cycle - * will put all io_uring references forcing it to go through normal - * release.path eventually putting registered files. - */ - skb_queue_walk_safe(&hitlist, skb, next_skb) { - if (skb->destructor == io_uring_destruct_scm) { - __skb_unlink(skb, &hitlist); - skb_queue_tail(&skb->sk->sk_receive_queue, skb); - } - } - /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); spin_lock(&unix_gc_lock); - /* There could be io_uring registered files, just push them back to - * the inflight list - */ - list_for_each_entry_safe(u, next, &gc_candidates, link) - list_move_tail(&u->link, &gc_inflight_list); - /* All candidates should have been detached by now. */ WARN_ON_ONCE(!list_empty(&gc_candidates)); diff --git a/net/unix/scm.c b/net/unix/scm.c index 505e56cf02a21..db65b0ab59479 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -148,9 +148,3 @@ void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } EXPORT_SYMBOL(unix_destruct_scm); - -void io_uring_destruct_scm(struct sk_buff *skb) -{ - unix_destruct_scm(skb); -} -EXPORT_SYMBOL(io_uring_destruct_scm); -- cgit 1.2.3-korg From 99a7a5b9943ea2d05fb0dee38e4ae2290477ed83 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 29 Jan 2024 11:04:35 -0800 Subject: af_unix: Remove CONFIG_UNIX_SCM. Originally, the code related to garbage collection was all in garbage.c. Commit f4e65870e5ce ("net: split out functions related to registering inflight socket files") moved some functions to scm.c for io_uring and added CONFIG_UNIX_SCM just in case AF_UNIX was built as module. However, since commit 97154bcf4d1b ("af_unix: Kconfig: make CONFIG_UNIX bool"), AF_UNIX is no longer built separately. Also, io_uring does not support SCM_RIGHTS now. Let's move the functions back to garbage.c Signed-off-by: Kuniyuki Iwashima Acked-by: Jens Axboe Link: https://lore.kernel.org/r/20240129190435.57228-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 7 ++- net/Makefile | 2 +- net/unix/Kconfig | 5 -- net/unix/Makefile | 2 - net/unix/af_unix.c | 63 ++++++++++++++++++++- net/unix/garbage.c | 73 +++++++++++++++++++++++- net/unix/scm.c | 150 -------------------------------------------------- net/unix/scm.h | 10 ---- 8 files changed, 137 insertions(+), 175 deletions(-) delete mode 100644 net/unix/scm.c delete mode 100644 net/unix/scm.h (limited to 'net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 9e39b2ec45249..54e346152eb10 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -17,19 +17,20 @@ static inline struct unix_sock *unix_get_socket(struct file *filp) } #endif +extern spinlock_t unix_gc_lock; +extern unsigned int unix_tot_inflight; + void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); -void unix_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(struct scm_fp_list *fpl); + struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) #define UNIX_HASH_SIZE (256 * 2) #define UNIX_HASH_BITS 8 -extern unsigned int unix_tot_inflight; - struct unix_address { refcount_t refcnt; int len; diff --git a/net/Makefile b/net/Makefile index b06b5539e7a6b..65bb8c72a35e9 100644 --- a/net/Makefile +++ b/net/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ -obj-$(CONFIG_UNIX_SCM) += unix/ +obj-$(CONFIG_UNIX) += unix/ obj-y += ipv6/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ diff --git a/net/unix/Kconfig b/net/unix/Kconfig index 28b232f281ab1..8b5d04210d7cf 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -16,11 +16,6 @@ config UNIX Say Y unless you know what you are doing. -config UNIX_SCM - bool - depends on UNIX - default y - config AF_UNIX_OOB bool depends on UNIX diff --git a/net/unix/Makefile b/net/unix/Makefile index 20491825b4d0d..4ddd125c4642c 100644 --- a/net/unix/Makefile +++ b/net/unix/Makefile @@ -11,5 +11,3 @@ unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o obj-$(CONFIG_UNIX_DIAG) += unix_diag.o unix_diag-y := diag.o - -obj-$(CONFIG_UNIX_SCM) += scm.o diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1720419d93d68..1cfbc586adb41 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,8 +118,6 @@ #include #include -#include "scm.h" - static atomic_long_t unix_nr_socks; static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; @@ -1790,6 +1788,52 @@ out: return err; } +/* The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + +static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + int i; + + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + + /* Need to duplicate file references for the sake of garbage + * collection. Otherwise a socket in the fps might become a + * candidate for GC while the skb is not yet queued. + */ + UNIXCB(skb).fp = scm_fp_dup(scm->fp); + if (!UNIXCB(skb).fp) + return -ENOMEM; + + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->user, scm->fp->fp[i]); + + return 0; +} + +static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + int i; + + scm->fp = UNIXCB(skb).fp; + UNIXCB(skb).fp = NULL; + + for (i = scm->fp->count - 1; i >= 0; i--) + unix_notinflight(scm->fp->user, scm->fp->fp[i]); +} + static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = scm_fp_dup(UNIXCB(skb).fp); @@ -1837,6 +1881,21 @@ static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) spin_unlock(&unix_gc_lock); } +static void unix_destruct_scm(struct sk_buff *skb) +{ + struct scm_cookie scm; + + memset(&scm, 0, sizeof(scm)); + scm.pid = UNIXCB(skb).pid; + if (UNIXCB(skb).fp) + unix_detach_fds(&scm, skb); + + /* Alas, it calls VFS */ + /* So fscking what? fput() had been SMP-safe since the last Summer */ + scm_destroy(&scm); + sock_wfree(skb); +} + static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index ce5b5f87b16e2..9b8473dd79a4b 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -81,11 +81,80 @@ #include #include -#include "scm.h" +struct unix_sock *unix_get_socket(struct file *filp) +{ + struct inode *inode = file_inode(filp); + + /* Socket ? */ + if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { + struct socket *sock = SOCKET_I(inode); + const struct proto_ops *ops; + struct sock *sk = sock->sk; -/* Internal data structures and random procedures: */ + ops = READ_ONCE(sock->ops); + /* PF_UNIX ? */ + if (sk && ops && ops->family == PF_UNIX) + return unix_sk(sk); + } + + return NULL; +} + +DEFINE_SPINLOCK(unix_gc_lock); +unsigned int unix_tot_inflight; static LIST_HEAD(gc_candidates); +static LIST_HEAD(gc_inflight_list); + +/* Keep the number of times in flight count for the file + * descriptor if it is for an AF_UNIX socket. + */ +void unix_inflight(struct user_struct *user, struct file *filp) +{ + struct unix_sock *u = unix_get_socket(filp); + + spin_lock(&unix_gc_lock); + + if (u) { + if (!u->inflight) { + WARN_ON_ONCE(!list_empty(&u->link)); + list_add_tail(&u->link, &gc_inflight_list); + } else { + WARN_ON_ONCE(list_empty(&u->link)); + } + u->inflight++; + + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); + } + + WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1); + + spin_unlock(&unix_gc_lock); +} + +void unix_notinflight(struct user_struct *user, struct file *filp) +{ + struct unix_sock *u = unix_get_socket(filp); + + spin_lock(&unix_gc_lock); + + if (u) { + WARN_ON_ONCE(!u->inflight); + WARN_ON_ONCE(list_empty(&u->link)); + + u->inflight--; + if (!u->inflight) + list_del_init(&u->link); + + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); + } + + WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1); + + spin_unlock(&unix_gc_lock); +} static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff_head *hitlist) diff --git a/net/unix/scm.c b/net/unix/scm.c deleted file mode 100644 index db65b0ab59479..0000000000000 --- a/net/unix/scm.c +++ /dev/null @@ -1,150 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "scm.h" - -unsigned int unix_tot_inflight; -EXPORT_SYMBOL(unix_tot_inflight); - -LIST_HEAD(gc_inflight_list); -EXPORT_SYMBOL(gc_inflight_list); - -DEFINE_SPINLOCK(unix_gc_lock); -EXPORT_SYMBOL(unix_gc_lock); - -struct unix_sock *unix_get_socket(struct file *filp) -{ - struct inode *inode = file_inode(filp); - - /* Socket ? */ - if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { - struct socket *sock = SOCKET_I(inode); - const struct proto_ops *ops = READ_ONCE(sock->ops); - struct sock *s = sock->sk; - - /* PF_UNIX ? */ - if (s && ops && ops->family == PF_UNIX) - return unix_sk(s); - } - - return NULL; -} -EXPORT_SYMBOL(unix_get_socket); - -/* Keep the number of times in flight count for the file - * descriptor if it is for an AF_UNIX socket. - */ -void unix_inflight(struct user_struct *user, struct file *fp) -{ - struct unix_sock *u = unix_get_socket(fp); - - spin_lock(&unix_gc_lock); - - if (u) { - if (!u->inflight) { - WARN_ON_ONCE(!list_empty(&u->link)); - list_add_tail(&u->link, &gc_inflight_list); - } else { - WARN_ON_ONCE(list_empty(&u->link)); - } - u->inflight++; - /* Paired with READ_ONCE() in wait_for_unix_gc() */ - WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); - } - WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1); - spin_unlock(&unix_gc_lock); -} - -void unix_notinflight(struct user_struct *user, struct file *fp) -{ - struct unix_sock *u = unix_get_socket(fp); - - spin_lock(&unix_gc_lock); - - if (u) { - WARN_ON_ONCE(!u->inflight); - WARN_ON_ONCE(list_empty(&u->link)); - - u->inflight--; - if (!u->inflight) - list_del_init(&u->link); - /* Paired with READ_ONCE() in wait_for_unix_gc() */ - WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); - } - WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1); - spin_unlock(&unix_gc_lock); -} - -/* - * The "user->unix_inflight" variable is protected by the garbage - * collection lock, and we just read it locklessly here. If you go - * over the limit, there might be a tiny race in actually noticing - * it across threads. Tough. - */ -static inline bool too_many_unix_fds(struct task_struct *p) -{ - struct user_struct *user = current_user(); - - if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE))) - return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); - return false; -} - -int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) -{ - int i; - - if (too_many_unix_fds(current)) - return -ETOOMANYREFS; - - /* - * Need to duplicate file references for the sake of garbage - * collection. Otherwise a socket in the fps might become a - * candidate for GC while the skb is not yet queued. - */ - UNIXCB(skb).fp = scm_fp_dup(scm->fp); - if (!UNIXCB(skb).fp) - return -ENOMEM; - - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->user, scm->fp->fp[i]); - return 0; -} -EXPORT_SYMBOL(unix_attach_fds); - -void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) -{ - int i; - - scm->fp = UNIXCB(skb).fp; - UNIXCB(skb).fp = NULL; - - for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->user, scm->fp->fp[i]); -} -EXPORT_SYMBOL(unix_detach_fds); - -void unix_destruct_scm(struct sk_buff *skb) -{ - struct scm_cookie scm; - - memset(&scm, 0, sizeof(scm)); - scm.pid = UNIXCB(skb).pid; - if (UNIXCB(skb).fp) - unix_detach_fds(&scm, skb); - - /* Alas, it calls VFS */ - /* So fscking what? fput() had been SMP-safe since the last Summer */ - scm_destroy(&scm); - sock_wfree(skb); -} -EXPORT_SYMBOL(unix_destruct_scm); diff --git a/net/unix/scm.h b/net/unix/scm.h deleted file mode 100644 index 5a255a477f160..0000000000000 --- a/net/unix/scm.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef NET_UNIX_SCM_H -#define NET_UNIX_SCM_H - -extern struct list_head gc_inflight_list; -extern spinlock_t unix_gc_lock; - -int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb); -void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb); - -#endif -- cgit 1.2.3-korg From 6f3189f38a3e995232e028a4c341164c4aca1b20 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Sun, 28 Jan 2024 18:24:08 -0700 Subject: bpf: treewide: Annotate BPF kfuncs in BTF This commit marks kfuncs as such inside the .BTF_ids section. The upshot of these annotations is that we'll be able to automatically generate kfunc prototypes for downstream users. The process is as follows: 1. In source, use BTF_KFUNCS_START/END macro pair to mark kfuncs 2. During build, pahole injects into BTF a "bpf_kfunc" BTF_DECL_TAG for each function inside BTF_KFUNCS sets 3. At runtime, vmlinux or module BTF is made available in sysfs 4. At runtime, bpftool (or similar) can look at provided BTF and generate appropriate prototypes for functions with "bpf_kfunc" tag To ensure future kfunc are similarly tagged, we now also return error inside kfunc registration for untagged kfuncs. For vmlinux kfuncs, we also WARN(), as initcall machinery does not handle errors. Signed-off-by: Daniel Xu Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/e55150ceecbf0a5d961e608941165c0bee7bc943.1706491398.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 8 ++++---- drivers/hid/bpf/hid_bpf_dispatch.c | 8 ++++---- fs/verity/measure.c | 4 ++-- kernel/bpf/btf.c | 8 ++++++++ kernel/bpf/cpumask.c | 4 ++-- kernel/bpf/helpers.c | 8 ++++---- kernel/bpf/map_iter.c | 4 ++-- kernel/cgroup/rstat.c | 4 ++-- kernel/trace/bpf_trace.c | 8 ++++---- net/bpf/test_run.c | 8 ++++---- net/core/filter.c | 20 ++++++++++---------- net/core/xdp.c | 4 ++-- net/ipv4/bpf_tcp_ca.c | 4 ++-- net/ipv4/fou_bpf.c | 4 ++-- net/ipv4/tcp_bbr.c | 4 ++-- net/ipv4/tcp_cubic.c | 4 ++-- net/ipv4/tcp_dctcp.c | 4 ++-- net/netfilter/nf_conntrack_bpf.c | 4 ++-- net/netfilter/nf_nat_bpf.c | 4 ++-- net/xfrm/xfrm_interface_bpf.c | 4 ++-- net/xfrm/xfrm_state_bpf.c | 4 ++-- .../testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 8 ++++---- 22 files changed, 70 insertions(+), 62 deletions(-) (limited to 'net') diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 7985c6615f3c2..a8f5782bd8331 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -177,10 +177,10 @@ In addition to kfuncs' arguments, verifier may need more information about the type of kfunc(s) being registered with the BPF subsystem. To do so, we define flags on a set of kfuncs as follows:: - BTF_SET8_START(bpf_task_set) + BTF_KFUNCS_START(bpf_task_set) BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) - BTF_SET8_END(bpf_task_set) + BTF_KFUNCS_END(bpf_task_set) This set encodes the BTF ID of each kfunc listed above, and encodes the flags along with it. Ofcourse, it is also allowed to specify no flags. @@ -347,10 +347,10 @@ Once the kfunc is prepared for use, the final step to making it visible is registering it with the BPF subsystem. Registration is done per BPF program type. An example is shown below:: - BTF_SET8_START(bpf_task_set) + BTF_KFUNCS_START(bpf_task_set) BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE) - BTF_SET8_END(bpf_task_set) + BTF_KFUNCS_END(bpf_task_set) static const struct btf_kfunc_id_set bpf_task_kfunc_set = { .owner = THIS_MODULE, diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index d9ef45fcaeab1..02c441aaa2175 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -172,9 +172,9 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr * The following set contains all functions we agree BPF programs * can use. */ -BTF_SET8_START(hid_bpf_kfunc_ids) +BTF_KFUNCS_START(hid_bpf_kfunc_ids) BTF_ID_FLAGS(func, hid_bpf_get_data, KF_RET_NULL) -BTF_SET8_END(hid_bpf_kfunc_ids) +BTF_KFUNCS_END(hid_bpf_kfunc_ids) static const struct btf_kfunc_id_set hid_bpf_kfunc_set = { .owner = THIS_MODULE, @@ -440,12 +440,12 @@ static const struct btf_kfunc_id_set hid_bpf_fmodret_set = { }; /* for syscall HID-BPF */ -BTF_SET8_START(hid_bpf_syscall_kfunc_ids) +BTF_KFUNCS_START(hid_bpf_syscall_kfunc_ids) BTF_ID_FLAGS(func, hid_bpf_attach_prog) BTF_ID_FLAGS(func, hid_bpf_allocate_context, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, hid_bpf_release_context, KF_RELEASE) BTF_ID_FLAGS(func, hid_bpf_hw_request) -BTF_SET8_END(hid_bpf_syscall_kfunc_ids) +BTF_KFUNCS_END(hid_bpf_syscall_kfunc_ids) static const struct btf_kfunc_id_set hid_bpf_syscall_kfunc_set = { .owner = THIS_MODULE, diff --git a/fs/verity/measure.c b/fs/verity/measure.c index bf7a5f4cccaf0..3969d54158d12 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -159,9 +159,9 @@ __bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_ker __bpf_kfunc_end_defs(); -BTF_SET8_START(fsverity_set_ids) +BTF_KFUNCS_START(fsverity_set_ids) BTF_ID_FLAGS(func, bpf_get_fsverity_digest, KF_TRUSTED_ARGS) -BTF_SET8_END(fsverity_set_ids) +BTF_KFUNCS_END(fsverity_set_ids) static int bpf_get_fsverity_digest_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c8c6e6cf18e7f..ef380e5469521 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8124,6 +8124,14 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, { enum btf_kfunc_hook hook; + /* All kfuncs need to be tagged as such in BTF. + * WARN() for initcall registrations that do not check errors. + */ + if (!(kset->set->flags & BTF_SET8_KFUNCS)) { + WARN_ON(!kset->owner); + return -EINVAL; + } + hook = bpf_prog_type_to_kfunc_hook(prog_type); return __register_btf_kfunc_id_set(hook, kset); } diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 2e73533a3811c..dad0fb1c8e876 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -424,7 +424,7 @@ __bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask) __bpf_kfunc_end_defs(); -BTF_SET8_START(cpumask_kfunc_btf_ids) +BTF_KFUNCS_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) @@ -450,7 +450,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU) -BTF_SET8_END(cpumask_kfunc_btf_ids) +BTF_KFUNCS_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bcb951a2ecf4b..4db1c658254c1 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2544,7 +2544,7 @@ __bpf_kfunc void bpf_throw(u64 cookie) __bpf_kfunc_end_defs(); -BTF_SET8_START(generic_btf_ids) +BTF_KFUNCS_START(generic_btf_ids) #ifdef CONFIG_KEXEC_CORE BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif @@ -2573,7 +2573,7 @@ BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) -BTF_SET8_END(generic_btf_ids) +BTF_KFUNCS_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { .owner = THIS_MODULE, @@ -2589,7 +2589,7 @@ BTF_ID(struct, cgroup) BTF_ID(func, bpf_cgroup_release_dtor) #endif -BTF_SET8_START(common_btf_ids) +BTF_KFUNCS_START(common_btf_ids) BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) BTF_ID_FLAGS(func, bpf_rdonly_cast) BTF_ID_FLAGS(func, bpf_rcu_read_lock) @@ -2618,7 +2618,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) BTF_ID_FLAGS(func, bpf_dynptr_size) BTF_ID_FLAGS(func, bpf_dynptr_clone) -BTF_SET8_END(common_btf_ids) +BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index 6abd7c5df4b39..9575314f40a69 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -213,9 +213,9 @@ __bpf_kfunc s64 bpf_map_sum_elem_count(const struct bpf_map *map) __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_map_iter_kfunc_ids) +BTF_KFUNCS_START(bpf_map_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_map_iter_kfunc_ids) +BTF_KFUNCS_END(bpf_map_iter_kfunc_ids) static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index a8350d2d63e6b..07e2284bb4997 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -562,10 +562,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) } /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */ -BTF_SET8_START(bpf_rstat_kfunc_ids) +BTF_KFUNCS_START(bpf_rstat_kfunc_ids) BTF_ID_FLAGS(func, cgroup_rstat_updated) BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE) -BTF_SET8_END(bpf_rstat_kfunc_ids) +BTF_KFUNCS_END(bpf_rstat_kfunc_ids) static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = { .owner = THIS_MODULE, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 64fdaf79d1136..241ddf5e38953 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1412,14 +1412,14 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, __bpf_kfunc_end_defs(); -BTF_SET8_START(key_sig_kfunc_set) +BTF_KFUNCS_START(key_sig_kfunc_set) BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE) #endif -BTF_SET8_END(key_sig_kfunc_set) +BTF_KFUNCS_END(key_sig_kfunc_set) static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = { .owner = THIS_MODULE, @@ -1475,9 +1475,9 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, __bpf_kfunc_end_defs(); -BTF_SET8_START(fs_kfunc_set_ids) +BTF_KFUNCS_START(fs_kfunc_set_ids) BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) -BTF_SET8_END(fs_kfunc_set_ids) +BTF_KFUNCS_END(fs_kfunc_set_ids) static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfd9193740178..5535f9adc6589 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -617,21 +617,21 @@ CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor); __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_test_modify_return_ids) +BTF_KFUNCS_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) BTF_ID_FLAGS(func, bpf_modify_return_test2) BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) -BTF_SET8_END(bpf_test_modify_return_ids) +BTF_KFUNCS_END(bpf_test_modify_return_ids) static const struct btf_kfunc_id_set bpf_test_modify_return_set = { .owner = THIS_MODULE, .set = &bpf_test_modify_return_ids, }; -BTF_SET8_START(test_sk_check_kfunc_ids) +BTF_KFUNCS_START(test_sk_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) -BTF_SET8_END(test_sk_check_kfunc_ids) +BTF_KFUNCS_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) diff --git a/net/core/filter.c b/net/core/filter.c index 358870408a51e..524adf1fa6d01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11982,21 +11982,21 @@ int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, return 0; } -BTF_SET8_START(bpf_kfunc_check_set_skb) +BTF_KFUNCS_START(bpf_kfunc_check_set_skb) BTF_ID_FLAGS(func, bpf_dynptr_from_skb) -BTF_SET8_END(bpf_kfunc_check_set_skb) +BTF_KFUNCS_END(bpf_kfunc_check_set_skb) -BTF_SET8_START(bpf_kfunc_check_set_xdp) +BTF_KFUNCS_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) -BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_KFUNCS_END(bpf_kfunc_check_set_xdp) -BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr) BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) -BTF_SET8_END(bpf_kfunc_check_set_sock_addr) +BTF_KFUNCS_END(bpf_kfunc_check_set_sock_addr) -BTF_SET8_START(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_START(bpf_kfunc_check_set_tcp_reqsk) BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_kfunc_check_set_tcp_reqsk) +BTF_KFUNCS_END(bpf_kfunc_check_set_tcp_reqsk) static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, @@ -12075,9 +12075,9 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) __bpf_kfunc_end_defs(); -BTF_SET8_START(bpf_sk_iter_kfunc_ids) +BTF_KFUNCS_START(bpf_sk_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) -BTF_SET8_END(bpf_sk_iter_kfunc_ids) +BTF_KFUNCS_END(bpf_sk_iter_kfunc_ids) static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) { diff --git a/net/core/xdp.c b/net/core/xdp.c index 4869c1c2d8f3d..034fb80f3fbe9 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -771,11 +771,11 @@ __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, __bpf_kfunc_end_defs(); -BTF_SET8_START(xdp_metadata_kfunc_ids) +BTF_KFUNCS_START(xdp_metadata_kfunc_ids) #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC -BTF_SET8_END(xdp_metadata_kfunc_ids) +BTF_KFUNCS_END(xdp_metadata_kfunc_ids) static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 834edc18463ac..7f518ea5f4ac7 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -201,13 +201,13 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids) +BTF_KFUNCS_START(bpf_tcp_ca_check_kfunc_ids) BTF_ID_FLAGS(func, tcp_reno_ssthresh) BTF_ID_FLAGS(func, tcp_reno_cong_avoid) BTF_ID_FLAGS(func, tcp_reno_undo_cwnd) BTF_ID_FLAGS(func, tcp_slow_start) BTF_ID_FLAGS(func, tcp_cong_avoid_ai) -BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids) +BTF_KFUNCS_END(bpf_tcp_ca_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 4da03bf45c9b7..06e5572f296f1 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -100,10 +100,10 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, __bpf_kfunc_end_defs(); -BTF_SET8_START(fou_kfunc_set) +BTF_KFUNCS_START(fou_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) BTF_ID_FLAGS(func, bpf_skb_get_fou_encap) -BTF_SET8_END(fou_kfunc_set) +BTF_KFUNCS_END(fou_kfunc_set) static const struct btf_kfunc_id_set fou_bpf_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 22358032dd484..05dc2d05bc7cb 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1155,7 +1155,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET8_START(tcp_bbr_check_kfunc_ids) +BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, bbr_init) @@ -1168,7 +1168,7 @@ BTF_ID_FLAGS(func, bbr_min_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) #endif #endif -BTF_SET8_END(tcp_bbr_check_kfunc_ids) +BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 0fd78ecb67e75..44869ea089e34 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET8_START(tcp_cubic_check_kfunc_ids) +BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, cubictcp_init) @@ -496,7 +496,7 @@ BTF_ID_FLAGS(func, cubictcp_cwnd_event) BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif -BTF_SET8_END(tcp_cubic_check_kfunc_ids) +BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index bb23bb5b387a0..e33fbe4933e42 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -260,7 +260,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET8_START(tcp_dctcp_check_kfunc_ids) +BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, dctcp_init) @@ -271,7 +271,7 @@ BTF_ID_FLAGS(func, dctcp_cwnd_undo) BTF_ID_FLAGS(func, dctcp_state) #endif #endif -BTF_SET8_END(tcp_dctcp_check_kfunc_ids) +BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 475358ec82129..d2492d050fe60 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -467,7 +467,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) __bpf_kfunc_end_defs(); -BTF_SET8_START(nf_ct_kfunc_set) +BTF_KFUNCS_START(nf_ct_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL) @@ -478,7 +478,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) -BTF_SET8_END(nf_ct_kfunc_set) +BTF_KFUNCS_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c index 6e3b2f58855fc..481be15609b16 100644 --- a/net/netfilter/nf_nat_bpf.c +++ b/net/netfilter/nf_nat_bpf.c @@ -54,9 +54,9 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct, __bpf_kfunc_end_defs(); -BTF_SET8_START(nf_nat_kfunc_set) +BTF_KFUNCS_START(nf_nat_kfunc_set) BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) -BTF_SET8_END(nf_nat_kfunc_set) +BTF_KFUNCS_END(nf_nat_kfunc_set) static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c index 7d5e920141e9b..5ea15037ebd10 100644 --- a/net/xfrm/xfrm_interface_bpf.c +++ b/net/xfrm/xfrm_interface_bpf.c @@ -93,10 +93,10 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp __bpf_kfunc_end_defs(); -BTF_SET8_START(xfrm_ifc_kfunc_set) +BTF_KFUNCS_START(xfrm_ifc_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info) -BTF_SET8_END(xfrm_ifc_kfunc_set) +BTF_KFUNCS_END(xfrm_ifc_kfunc_set) static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = { .owner = THIS_MODULE, diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c index 9e20d4a377f7e..2248eda741f8e 100644 --- a/net/xfrm/xfrm_state_bpf.c +++ b/net/xfrm/xfrm_state_bpf.c @@ -117,10 +117,10 @@ __bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __bpf_kfunc_end_defs(); -BTF_SET8_START(xfrm_state_kfunc_set) +BTF_KFUNCS_START(xfrm_state_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE) BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE) -BTF_SET8_END(xfrm_state_kfunc_set) +BTF_KFUNCS_END(xfrm_state_kfunc_set) static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = { .owner = THIS_MODULE, diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 6f163a0f1c94c..4754c662b39ff 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -343,12 +343,12 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET8_START(bpf_testmod_common_kfunc_ids) +BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_kfunc_common_test) -BTF_SET8_END(bpf_testmod_common_kfunc_ids) +BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .owner = THIS_MODULE, @@ -494,7 +494,7 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused return arg; } -BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) BTF_ID_FLAGS(func, bpf_kfunc_call_test2) @@ -520,7 +520,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) -BTF_SET8_END(bpf_testmod_check_kfunc_ids) +BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) static int bpf_testmod_ops_init(struct btf *btf) { -- cgit 1.2.3-korg From fa33b35f86b8010938e474643ef4f88b62a9b262 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 31 Jan 2024 16:45:49 +0800 Subject: sctp: Simplify the allocation of slab caches commit 0a31bd5f2bbb ("KMEM_CACHE(): simplify slab cache creation") introduces a new macro. Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Reviewed-by: Jiri Pirko Acked-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/protocol.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 94c6dd53cd62d..e849f368ed913 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1495,17 +1495,11 @@ static __init int sctp_init(void) /* Allocate bind_bucket and chunk caches. */ status = -ENOBUFS; - sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket", - sizeof(struct sctp_bind_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL); + sctp_bucket_cachep = KMEM_CACHE(sctp_bind_bucket, SLAB_HWCACHE_ALIGN); if (!sctp_bucket_cachep) goto out; - sctp_chunk_cachep = kmem_cache_create("sctp_chunk", - sizeof(struct sctp_chunk), - 0, SLAB_HWCACHE_ALIGN, - NULL); + sctp_chunk_cachep = KMEM_CACHE(sctp_chunk, SLAB_HWCACHE_ALIGN); if (!sctp_chunk_cachep) goto err_chunk_cachep; -- cgit 1.2.3-korg From 20ea9327c2fd545d6b96e998727bcd724290694d Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 31 Jan 2024 17:08:51 +0800 Subject: net: dccp: Simplify the allocation of slab caches in dccp_ackvec_init Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/dccp/ackvec.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index c4bbac99740d8..1cba001bb4c8e 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -376,15 +376,11 @@ EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup); int __init dccp_ackvec_init(void) { - dccp_ackvec_slab = kmem_cache_create("dccp_ackvec", - sizeof(struct dccp_ackvec), 0, - SLAB_HWCACHE_ALIGN, NULL); + dccp_ackvec_slab = KMEM_CACHE(dccp_ackvec, SLAB_HWCACHE_ALIGN); if (dccp_ackvec_slab == NULL) goto out_err; - dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record", - sizeof(struct dccp_ackvec_record), - 0, SLAB_HWCACHE_ALIGN, NULL); + dccp_ackvec_record_slab = KMEM_CACHE(dccp_ackvec_record, SLAB_HWCACHE_ALIGN); if (dccp_ackvec_record_slab == NULL) goto out_destroy_slab; -- cgit 1.2.3-korg From 45d43937a44c806b8649323b8f5d9f42ae838b0e Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 29 Jan 2024 22:09:19 +0100 Subject: wifi: cfg80211: add a kunit test for 6 GHz colocated AP parsing Test a few things around parsing of 6 GHz colocated APs to e.g. ensure that we are not going to scan for a disabled (affiliated) AP. Signed-off-by: Benjamin Berg Link: https://msgid.link/20240129220918.079dc50ab43b.Ide898d9f1d4c26d7e774d6fd0ec57766967d6572@changeid Signed-off-by: Johannes Berg --- net/wireless/core.h | 44 ++++++++++++ net/wireless/scan.c | 49 ++----------- net/wireless/tests/scan.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index 30434551b3774..debf63e6c61fc 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -550,9 +550,53 @@ int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask); +/** + * struct cfg80211_colocated_ap - colocated AP information + * + * @list: linked list to all colocated APs + * @bssid: BSSID of the reported AP + * @ssid: SSID of the reported AP + * @ssid_len: length of the ssid + * @center_freq: frequency the reported AP is on + * @unsolicited_probe: the reported AP is part of an ESS, where all the APs + * that operate in the same channel as the reported AP and that might be + * detected by a STA receiving this frame, are transmitting unsolicited + * Probe Response frames every 20 TUs + * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP + * @same_ssid: the reported AP has the same SSID as the reporting AP + * @multi_bss: the reported AP is part of a multiple BSSID set + * @transmitted_bssid: the reported AP is the transmitting BSSID + * @colocated_ess: all the APs that share the same ESS as the reported AP are + * colocated and can be discovered via legacy bands. + * @short_ssid_valid: short_ssid is valid and can be used + * @short_ssid: the short SSID for this SSID + * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP + */ +struct cfg80211_colocated_ap { + struct list_head list; + u8 bssid[ETH_ALEN]; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + size_t ssid_len; + u32 short_ssid; + u32 center_freq; + u8 unsolicited_probe:1, + oct_recommended:1, + same_ssid:1, + multi_bss:1, + transmitted_bssid:1, + colocated_ess:1, + short_ssid_valid:1; + s8 psd_20; +}; + #if IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST) #define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym) #define VISIBLE_IF_CFG80211_KUNIT +void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list); + +int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, + struct list_head *list); + size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, const u8 *subie, size_t subie_len, u8 *new_ie, size_t new_ie_len); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 01618c4059f48..c78b10e1a1674 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -77,45 +77,6 @@ MODULE_PARM_DESC(bss_entries_limit, #define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) -/** - * struct cfg80211_colocated_ap - colocated AP information - * - * @list: linked list to all colocated aPS - * @bssid: BSSID of the reported AP - * @ssid: SSID of the reported AP - * @ssid_len: length of the ssid - * @center_freq: frequency the reported AP is on - * @unsolicited_probe: the reported AP is part of an ESS, where all the APs - * that operate in the same channel as the reported AP and that might be - * detected by a STA receiving this frame, are transmitting unsolicited - * Probe Response frames every 20 TUs - * @oct_recommended: OCT is recommended to exchange MMPDUs with the reported AP - * @same_ssid: the reported AP has the same SSID as the reporting AP - * @multi_bss: the reported AP is part of a multiple BSSID set - * @transmitted_bssid: the reported AP is the transmitting BSSID - * @colocated_ess: all the APs that share the same ESS as the reported AP are - * colocated and can be discovered via legacy bands. - * @short_ssid_valid: short_ssid is valid and can be used - * @short_ssid: the short SSID for this SSID - * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP - */ -struct cfg80211_colocated_ap { - struct list_head list; - u8 bssid[ETH_ALEN]; - u8 ssid[IEEE80211_MAX_SSID_LEN]; - size_t ssid_len; - u32 short_ssid; - u32 center_freq; - u8 unsolicited_probe:1, - oct_recommended:1, - same_ssid:1, - multi_bss:1, - transmitted_bssid:1, - colocated_ess:1, - short_ssid_valid:1; - s8 psd_20; -}; - static void bss_free(struct cfg80211_internal_bss *bss) { struct cfg80211_bss_ies *ies; @@ -566,7 +527,8 @@ static int cfg80211_calc_short_ssid(const struct cfg80211_bss_ies *ies, return 0; } -static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list) +VISIBLE_IF_CFG80211_KUNIT void +cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list) { struct cfg80211_colocated_ap *ap, *tmp_ap; @@ -575,6 +537,7 @@ static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list) kfree(ap); } } +EXPORT_SYMBOL_IF_KUNIT(cfg80211_free_coloc_ap_list); static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry, const u8 *pos, u8 length, @@ -648,8 +611,9 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry, return 0; } -static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, - struct list_head *list) +VISIBLE_IF_CFG80211_KUNIT int +cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, + struct list_head *list) { struct ieee80211_neighbor_ap_info *ap_info; const struct element *elem, *ssid_elem; @@ -746,6 +710,7 @@ error: list_splice_tail(&ap_list, list); return n_coloc; } +EXPORT_SYMBOL_IF_KUNIT(cfg80211_parse_colocated_ap); static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request, struct ieee80211_channel *chan, diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c index f9ea44aee9952..b40a0c06a6bbc 100644 --- a/net/wireless/tests/scan.c +++ b/net/wireless/tests/scan.c @@ -628,6 +628,172 @@ static void test_inform_bss_ml_sta(struct kunit *test) cfg80211_put_bss(wiphy, link_bss); } +static struct cfg80211_parse_colocated_ap_case { + const char *desc; + u8 op_class; + u8 channel; + struct ieee80211_neighbor_ap_info info; + union { + struct ieee80211_tbtt_info_ge_11 tbtt_long; + struct ieee80211_tbtt_info_7_8_9 tbtt_short; + }; + bool add_junk; + bool same_ssid; + bool valid; +} cfg80211_parse_colocated_ap_cases[] = { + { + .desc = "wrong_band", + .info.op_class = 81, + .info.channel = 11, + .tbtt_long = { + .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP, + }, + .valid = false, + }, + { + .desc = "wrong_type", + /* IEEE80211_AP_INFO_TBTT_HDR_TYPE is in the least significant bits */ + .info.tbtt_info_hdr = IEEE80211_TBTT_INFO_TYPE_MLD, + .tbtt_long = { + .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP, + }, + .valid = false, + }, + { + .desc = "colocated_invalid_len_short", + .info.tbtt_info_len = 6, + .tbtt_short = { + .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP | + IEEE80211_RNR_TBTT_PARAMS_SAME_SSID, + }, + .valid = false, + }, + { + .desc = "colocated_invalid_len_short_mld", + .info.tbtt_info_len = 10, + .tbtt_long = { + .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP, + }, + .valid = false, + }, + { + .desc = "colocated_non_mld", + .info.tbtt_info_len = sizeof(struct ieee80211_tbtt_info_7_8_9), + .tbtt_short = { + .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP | + IEEE80211_RNR_TBTT_PARAMS_SAME_SSID, + }, + .same_ssid = true, + .valid = true, + }, + { + .desc = "colocated_non_mld_invalid_bssid", + .info.tbtt_info_len = sizeof(struct ieee80211_tbtt_info_7_8_9), + .tbtt_short = { + .bssid = { 0xff, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP | + IEEE80211_RNR_TBTT_PARAMS_SAME_SSID, + }, + .same_ssid = true, + .valid = false, + }, + { + .desc = "colocated_mld", + .tbtt_long = { + .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP, + }, + .valid = true, + }, + { + .desc = "colocated_mld", + .tbtt_long = { + .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP, + }, + .add_junk = true, + .valid = false, + }, + { + .desc = "colocated_disabled_mld", + .tbtt_long = { + .bssid = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55 }, + .bss_params = IEEE80211_RNR_TBTT_PARAMS_COLOC_AP, + .mld_params.params = cpu_to_le16(IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK), + }, + .valid = false, + }, +}; +KUNIT_ARRAY_PARAM_DESC(cfg80211_parse_colocated_ap, cfg80211_parse_colocated_ap_cases, desc) + +static void test_cfg80211_parse_colocated_ap(struct kunit *test) +{ + const struct cfg80211_parse_colocated_ap_case *params = test->param_value; + struct sk_buff *input = kunit_zalloc_skb(test, 1024, GFP_KERNEL); + struct cfg80211_bss_ies *ies; + struct ieee80211_neighbor_ap_info info; + LIST_HEAD(coloc_ap_list); + int count; + + KUNIT_ASSERT_NOT_NULL(test, input); + + info = params->info; + + /* Reasonable values for a colocated AP */ + if (!info.tbtt_info_len) + info.tbtt_info_len = sizeof(params->tbtt_long); + if (!info.op_class) + info.op_class = 131; + if (!info.channel) + info.channel = 33; + /* Zero is the correct default for .btt_info_hdr (one entry, TBTT type) */ + + skb_put_u8(input, WLAN_EID_SSID); + skb_put_u8(input, 4); + skb_put_data(input, "TEST", 4); + + skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT); + skb_put_u8(input, sizeof(info) + info.tbtt_info_len + (params->add_junk ? 3 : 0)); + skb_put_data(input, &info, sizeof(info)); + skb_put_data(input, ¶ms->tbtt_long, info.tbtt_info_len); + + if (params->add_junk) + skb_put_data(input, "123", 3); + + ies = kunit_kzalloc(test, struct_size(ies, data, input->len), GFP_KERNEL); + ies->len = input->len; + memcpy(ies->data, input->data, input->len); + + count = cfg80211_parse_colocated_ap(ies, &coloc_ap_list); + + KUNIT_EXPECT_EQ(test, count, params->valid); + KUNIT_EXPECT_EQ(test, list_count_nodes(&coloc_ap_list), params->valid); + + if (params->valid && !list_empty(&coloc_ap_list)) { + struct cfg80211_colocated_ap *ap; + + ap = list_first_entry(&coloc_ap_list, typeof(*ap), list); + if (info.tbtt_info_len <= sizeof(params->tbtt_short)) + KUNIT_EXPECT_MEMEQ(test, ap->bssid, params->tbtt_short.bssid, ETH_ALEN); + else + KUNIT_EXPECT_MEMEQ(test, ap->bssid, params->tbtt_long.bssid, ETH_ALEN); + + if (params->same_ssid) { + KUNIT_EXPECT_EQ(test, ap->ssid_len, 4); + KUNIT_EXPECT_MEMEQ(test, ap->ssid, "TEST", 4); + } else { + KUNIT_EXPECT_EQ(test, ap->ssid_len, 0); + } + } + + cfg80211_free_coloc_ap_list(&coloc_ap_list); +} + static struct kunit_case gen_new_ie_test_cases[] = { KUNIT_CASE_PARAM(test_gen_new_ie, gen_new_ie_gen_params), KUNIT_CASE(test_gen_new_ie_malformed), @@ -653,3 +819,16 @@ static struct kunit_suite inform_bss = { }; kunit_test_suite(inform_bss); + +static struct kunit_case scan_6ghz_cases[] = { + KUNIT_CASE_PARAM(test_cfg80211_parse_colocated_ap, + cfg80211_parse_colocated_ap_gen_params), + {} +}; + +static struct kunit_suite scan_6ghz = { + .name = "cfg80211-scan-6ghz", + .test_cases = scan_6ghz_cases, +}; + +kunit_test_suite(scan_6ghz); -- cgit 1.2.3-korg From cfbb2add482ab86a9d183dbb9ac00d18c1389cc7 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 29 Jan 2024 22:09:20 +0100 Subject: wifi: cfg80211: tests: verify BSS use flags of NSTR links Extend the test to pass an RNR appropriate for an NSTR and verify that use_for as well as cannot_use_reasons are set correctly. Signed-off-by: Benjamin Berg Link: https://msgid.link/20240129220918.b00aee4c4c9f.I942fddf51cabaab761de3865b4e06cce831a46ef@changeid Signed-off-by: Johannes Berg --- net/wireless/tests/scan.c | 68 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c index b40a0c06a6bbc..9f458be716595 100644 --- a/net/wireless/tests/scan.c +++ b/net/wireless/tests/scan.c @@ -407,6 +407,7 @@ static struct inform_bss_ml_sta_case { int mld_id; bool sta_prof_vendor_elems; bool include_oper_class; + bool nstr; } inform_bss_ml_sta_cases[] = { { .desc = "zero_mld_id", @@ -426,6 +427,10 @@ static struct inform_bss_ml_sta_case { .mld_id = 1, .sta_prof_vendor_elems = true, .include_oper_class = true, + }, { + .desc = "nstr", + .mld_id = 0, + .nstr = true, }, }; KUNIT_ARRAY_PARAM_DESC(inform_bss_ml_sta, inform_bss_ml_sta_cases, desc) @@ -458,7 +463,7 @@ static void test_inform_bss_ml_sta(struct kunit *test) struct { struct ieee80211_neighbor_ap_info info; struct ieee80211_tbtt_info_ge_11 ap; - } __packed rnr = { + } __packed rnr_normal = { .info = { .tbtt_info_hdr = u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT), .tbtt_info_len = sizeof(struct ieee80211_tbtt_info_ge_11), @@ -477,6 +482,28 @@ static void test_inform_bss_ml_sta(struct kunit *test) IEEE80211_RNR_MLD_PARAMS_LINK_ID), } }; + struct { + struct ieee80211_neighbor_ap_info info; + struct ieee80211_rnr_mld_params mld_params; + } __packed rnr_nstr = { + .info = { + .tbtt_info_hdr = + u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT) | + u8_encode_bits(IEEE80211_TBTT_INFO_TYPE_MLD, + IEEE80211_AP_INFO_TBTT_HDR_TYPE), + .tbtt_info_len = sizeof(struct ieee80211_rnr_mld_params), + .op_class = 81, + .channel = 11, + }, + .mld_params = { + .mld_id = params->mld_id, + .params = + le16_encode_bits(link_id, + IEEE80211_RNR_MLD_PARAMS_LINK_ID), + } + }; + size_t rnr_len = params->nstr ? sizeof(rnr_nstr) : sizeof(rnr_normal); + void *rnr = params->nstr ? (void *)&rnr_nstr : (void *)&rnr_normal; struct { __le16 control; u8 var_len; @@ -516,7 +543,7 @@ static void test_inform_bss_ml_sta(struct kunit *test) u16_encode_bits(link_id, IEEE80211_MLE_STA_CONTROL_LINK_ID)), .var_len = sizeof(sta_prof) - 2 - 2, - .bssid = { *rnr.ap.bssid }, + .bssid = { *rnr_normal.ap.bssid }, .beacon_int = cpu_to_le16(101), .tsf_offset = cpu_to_le64(-123ll), .capabilities = cpu_to_le16(0xdead), @@ -540,8 +567,8 @@ static void test_inform_bss_ml_sta(struct kunit *test) } skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT); - skb_put_u8(input, sizeof(rnr)); - skb_put_data(input, &rnr, sizeof(rnr)); + skb_put_u8(input, rnr_len); + skb_put_data(input, rnr, rnr_len); /* build a multi-link element */ skb_put_u8(input, WLAN_EID_EXTENSION); @@ -587,9 +614,10 @@ static void test_inform_bss_ml_sta(struct kunit *test) KUNIT_EXPECT_EQ(test, ctx.inform_bss_count, 2); /* Check link_bss *****************************************************/ - link_bss = cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, NULL, 0, - IEEE80211_BSS_TYPE_ANY, - IEEE80211_PRIVACY_ANY); + link_bss = __cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, NULL, 0, + IEEE80211_BSS_TYPE_ANY, + IEEE80211_PRIVACY_ANY, + 0); KUNIT_ASSERT_NOT_NULL(test, link_bss); KUNIT_EXPECT_EQ(test, link_bss->signal, 0); KUNIT_EXPECT_EQ(test, link_bss->beacon_interval, @@ -600,6 +628,22 @@ static void test_inform_bss_ml_sta(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, link_bss->channel, ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2462))); + /* Test wiphy does not set WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY */ + if (params->nstr) { + KUNIT_EXPECT_EQ(test, link_bss->use_for, 0); + KUNIT_EXPECT_EQ(test, link_bss->cannot_use_reasons, + NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY); + KUNIT_EXPECT_NULL(test, + cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, + NULL, 0, + IEEE80211_BSS_TYPE_ANY, + IEEE80211_PRIVACY_ANY)); + } else { + KUNIT_EXPECT_EQ(test, link_bss->use_for, + NL80211_BSS_USE_FOR_ALL); + KUNIT_EXPECT_EQ(test, link_bss->cannot_use_reasons, 0); + } + rcu_read_lock(); ies = rcu_dereference(link_bss->ies); KUNIT_EXPECT_NOT_NULL(test, ies); @@ -607,20 +651,20 @@ static void test_inform_bss_ml_sta(struct kunit *test) /* Resulting length should be: * SSID (inherited) + RNR (inherited) + vendor element(s) + * operating class (if requested) + - * generated RNR (if MLD ID == 0) + + * generated RNR (if MLD ID == 0 and not NSTR) + * MLE common info + MLE header and control */ if (params->sta_prof_vendor_elems) KUNIT_EXPECT_EQ(test, ies->len, - 6 + 2 + sizeof(rnr) + 2 + 160 + 2 + 165 + + 6 + 2 + rnr_len + 2 + 160 + 2 + 165 + (params->include_oper_class ? 3 : 0) + - (!params->mld_id ? 22 : 0) + + (!params->mld_id && !params->nstr ? 22 : 0) + mle_basic_common_info.var_len + 5); else KUNIT_EXPECT_EQ(test, ies->len, - 6 + 2 + sizeof(rnr) + 2 + 155 + + 6 + 2 + rnr_len + 2 + 155 + (params->include_oper_class ? 3 : 0) + - (!params->mld_id ? 22 : 0) + + (!params->mld_id && !params->nstr ? 22 : 0) + mle_basic_common_info.var_len + 5); rcu_read_unlock(); -- cgit 1.2.3-korg From f455f5ad500a993b65ae1c4c59639eff558e1688 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:54:35 +0100 Subject: wifi: mac80211: trace SMPS requests from driver Even if there are a lot of possible ways drivers might call this, at least knowing when they do and with what settings can be useful. Add tracing for it. Link: https://msgid.link/20240129195435.b20d2ead2013.I8213e65c274451d523a3397519ac578c3ed2df4d@changeid [removed link-id contortions as suggested by Jeff] Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 4 +++- net/mac80211/trace.h | 30 +++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 749f4ecab9903..bccc99a003832 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright 2017 Intel Deutschland GmbH - * Copyright(c) 2020-2023 Intel Corporation + * Copyright(c) 2020-2024 Intel Corporation */ #include @@ -603,6 +603,8 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id, if (WARN_ON(!link)) goto out; + trace_api_request_smps(sdata->local, sdata, link, smps_mode); + if (link->u.mgd.driver_smps_mode == smps_mode) goto out; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 1c0c46b11c6d7..e2dde3e77c301 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation */ #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -3035,6 +3035,34 @@ TRACE_EVENT(api_radar_detected, ) ); +TRACE_EVENT(api_request_smps, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_data *link, + enum ieee80211_smps_mode smps_mode), + + TP_ARGS(local, sdata, link, smps_mode), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(int, link_id) + __field(u32, smps_mode) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->link_id = link->link_id, + __entry->smps_mode = smps_mode; + ), + + TP_printk( + LOCAL_PR_FMT " " VIF_PR_FMT " link:%d, smps_mode:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->smps_mode + ) +); + /* * Tracing for internal functions * (which may also be called in response to driver calls) -- cgit 1.2.3-korg From 392d3dfdfd68f4be6964d4fc8f3979a7a859e6f2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:57:39 +0100 Subject: wifi: mac80211: clean up FILS discovery change flags handling It doesn't make sense to return BSS change flags in an int, as they're a bigger type. For this particular function it still works OK, but clean it up to avoid future errors (or copying this code in a broken way.) Reviewed-by: Jeff Johnson Link: https://msgid.link/20240129195739.e340a7d5e7c6.I1dfcca32d43dce903494a2c474844491682671b4@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index dd237081dbe33..2830ddac45b2e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -953,7 +953,8 @@ ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata, struct cfg80211_fils_discovery *params, struct ieee80211_link_data *link, - struct ieee80211_bss_conf *link_conf) + struct ieee80211_bss_conf *link_conf, + u64 *changed) { struct fils_discovery_data *new, *old = NULL; struct ieee80211_fils_discovery *fd; @@ -980,7 +981,8 @@ static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata, RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); } - return BSS_CHANGED_FILS_DISCOVERY; + *changed |= BSS_CHANGED_FILS_DISCOVERY; + return 0; } static int @@ -1443,10 +1445,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, goto error; err = ieee80211_set_fils_discovery(sdata, ¶ms->fils_discovery, - link, link_conf); + link, link_conf, &changed); if (err < 0) goto error; - changed |= err; err = ieee80211_set_unsol_bcast_probe_resp(sdata, ¶ms->unsol_bcast_probe_resp, @@ -1518,10 +1519,9 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, return err; err = ieee80211_set_fils_discovery(sdata, ¶ms->fils_discovery, - link, link_conf); + link, link_conf, &changed); if (err < 0) return err; - changed |= err; err = ieee80211_set_unsol_bcast_probe_resp(sdata, ¶ms->unsol_bcast_probe_resp, -- cgit 1.2.3-korg From 57d1b4632e03e5f938972055c45b0f2e475e6929 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:54:21 +0100 Subject: wifi: nl80211: move WPA version validation to policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a contiguous mask (starting with bit 0) of allowed values in a bitmap, it's equivalent to check "!(val & ~mask)" and "val ∈ [0, mask]". Use that to move the WPA versions check to the policy, for better error reporting. Reviewed-by: Jeff Johnson Link: https://msgid.link/20240129195421.e8cae9866ccb.I2539b395e3476307d702c6867e51a937e52e57a0@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e4f41f86e295d..68c20409eca65 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -581,7 +581,11 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, - [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, + [NL80211_ATTR_WPA_VERSIONS] = + NLA_POLICY_RANGE(NLA_U32, 0, + NL80211_WPA_VERSION_1 | + NL80211_WPA_VERSION_2 | + NL80211_WPA_VERSION_3), [NL80211_ATTR_PID] = { .type = NLA_U32 }, [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, [NL80211_ATTR_PMKID] = NLA_POLICY_EXACT_LEN_WARN(WLAN_PMKID_LEN), @@ -10604,13 +10608,6 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) return res; } -static bool nl80211_valid_wpa_versions(u32 wpa_versions) -{ - return !(wpa_versions & ~(NL80211_WPA_VERSION_1 | - NL80211_WPA_VERSION_2 | - NL80211_WPA_VERSION_3)); -} - static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -10836,12 +10833,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, return -EINVAL; } - if (info->attrs[NL80211_ATTR_WPA_VERSIONS]) { + if (info->attrs[NL80211_ATTR_WPA_VERSIONS]) settings->wpa_versions = nla_get_u32(info->attrs[NL80211_ATTR_WPA_VERSIONS]); - if (!nl80211_valid_wpa_versions(settings->wpa_versions)) - return -EINVAL; - } if (info->attrs[NL80211_ATTR_AKM_SUITES]) { void *data; -- cgit 1.2.3-korg From b341590e77d89d8812051fdd2354ff04e12f211b Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 29 Jan 2024 20:00:53 +0100 Subject: wifi: mac80211: don't allow deactivation of all links The set_active_links API is intended for link switching, so switching to no links at all is not supported. Add a warning to check that. Signed-off-by: Miri Korenblit Link: https://msgid.link/20240129200054.e3c113f94508.Ia35f927f914bf98dd8f9350dd4f78b1d901b1c1d@changeid Signed-off-by: Johannes Berg --- net/mac80211/link.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d4f86955afa6d..c0d05efcf6f8c 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -444,6 +444,9 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(!active_links)) + return -EINVAL; + if (!drv_can_activate_links(local, sdata, active_links)) return -EINVAL; @@ -472,6 +475,9 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + if (WARN_ON(!active_links)) + return; + if (!ieee80211_sdata_running(sdata)) return; -- cgit 1.2.3-korg From 241a94abcf465ba9363d93168da5ddd47002930f Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Thu, 1 Feb 2024 14:09:41 +0100 Subject: net/sched: Add module aliases for cls_,sch_,act_ modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change intended, aliases will be used in followup commits. Note for backporters: you may need to add aliases also for modules that are already removed in mainline kernel but still in your version. Patches were generated with the help of Coccinelle scripts like: cat >scripts/coccinelle/misc/tcf_alias.cocci </dev/null | \ sed 's/char module_alias = "\([^"]*\)";/MODULE_ALIAS_NET_CLS("\1");/' And analogously for: static struct tc_action_ops ops = { .kind = K, static struct Qdisc_ops ops = { .id = K, (Someone familiar would be able to fit those into one .cocci file without sed post processing.) Signed-off-by: Michal Koutný Acked-by: Jamal Hadi Salim Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201130943.19536-3-mkoutny@suse.com Signed-off-by: Jakub Kicinski --- net/sched/act_bpf.c | 1 + net/sched/act_connmark.c | 1 + net/sched/act_csum.c | 1 + net/sched/act_ct.c | 1 + net/sched/act_ctinfo.c | 1 + net/sched/act_gact.c | 1 + net/sched/act_gate.c | 1 + net/sched/act_ife.c | 1 + net/sched/act_mirred.c | 1 + net/sched/act_mpls.c | 1 + net/sched/act_nat.c | 1 + net/sched/act_pedit.c | 1 + net/sched/act_police.c | 1 + net/sched/act_sample.c | 1 + net/sched/act_simple.c | 1 + net/sched/act_skbedit.c | 1 + net/sched/act_skbmod.c | 1 + net/sched/act_tunnel_key.c | 1 + net/sched/act_vlan.c | 1 + net/sched/cls_basic.c | 1 + net/sched/cls_bpf.c | 1 + net/sched/cls_cgroup.c | 1 + net/sched/cls_flow.c | 1 + net/sched/cls_flower.c | 1 + net/sched/cls_fw.c | 1 + net/sched/cls_matchall.c | 1 + net/sched/cls_route.c | 1 + net/sched/cls_u32.c | 1 + net/sched/sch_cake.c | 1 + net/sched/sch_cbs.c | 1 + net/sched/sch_choke.c | 1 + net/sched/sch_codel.c | 1 + net/sched/sch_drr.c | 1 + net/sched/sch_etf.c | 1 + net/sched/sch_ets.c | 1 + net/sched/sch_fq.c | 1 + net/sched/sch_fq_codel.c | 1 + net/sched/sch_gred.c | 1 + net/sched/sch_hfsc.c | 1 + net/sched/sch_hhf.c | 1 + net/sched/sch_htb.c | 1 + net/sched/sch_ingress.c | 2 ++ net/sched/sch_mqprio.c | 1 + net/sched/sch_multiq.c | 1 + net/sched/sch_netem.c | 1 + net/sched/sch_pie.c | 1 + net/sched/sch_plug.c | 1 + net/sched/sch_prio.c | 1 + net/sched/sch_qfq.c | 1 + net/sched/sch_red.c | 1 + net/sched/sch_sfb.c | 1 + net/sched/sch_sfq.c | 1 + net/sched/sch_skbprio.c | 1 + net/sched/sch_taprio.c | 1 + net/sched/sch_tbf.c | 1 + 55 files changed, 56 insertions(+) (limited to 'net') diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 6cfee6658103d..0e3cf11ae5fc0 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -401,6 +401,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = { .init = tcf_bpf_init, .size = sizeof(struct tcf_bpf), }; +MODULE_ALIAS_NET_ACT("bpf"); static __net_init int bpf_init_net(struct net *net) { diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index f8762756657d3..0fce631e7c911 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -242,6 +242,7 @@ static struct tc_action_ops act_connmark_ops = { .cleanup = tcf_connmark_cleanup, .size = sizeof(struct tcf_connmark_info), }; +MODULE_ALIAS_NET_ACT("connmark"); static __net_init int connmark_init_net(struct net *net) { diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 7f8b1f2f2ed92..5cc8e407e7911 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -709,6 +709,7 @@ static struct tc_action_ops act_csum_ops = { .offload_act_setup = tcf_csum_offload_act_setup, .size = sizeof(struct tcf_csum), }; +MODULE_ALIAS_NET_ACT("csum"); static __net_init int csum_init_net(struct net *net) { diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 6124d8b128d16..baac083fd8f10 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1600,6 +1600,7 @@ static struct tc_action_ops act_ct_ops = { .offload_act_setup = tcf_ct_offload_act_setup, .size = sizeof(struct tcf_ct), }; +MODULE_ALIAS_NET_ACT("ct"); static __net_init int ct_init_net(struct net *net) { diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index e620f9a84afe5..5dd41a012110e 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -363,6 +363,7 @@ static struct tc_action_ops act_ctinfo_ops = { .cleanup= tcf_ctinfo_cleanup, .size = sizeof(struct tcf_ctinfo), }; +MODULE_ALIAS_NET_ACT("ctinfo"); static __net_init int ctinfo_init_net(struct net *net) { diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 4af3b7ec249fa..e949280eb800d 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -296,6 +296,7 @@ static struct tc_action_ops act_gact_ops = { .offload_act_setup = tcf_gact_offload_act_setup, .size = sizeof(struct tcf_gact), }; +MODULE_ALIAS_NET_ACT("gact"); static __net_init int gact_init_net(struct net *net) { diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index c681cd011afd0..1dd74125398a0 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -645,6 +645,7 @@ static struct tc_action_ops act_gate_ops = { .offload_act_setup = tcf_gate_offload_act_setup, .size = sizeof(struct tcf_gate), }; +MODULE_ALIAS_NET_ACT("gate"); static __net_init int gate_init_net(struct net *net) { diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 0e867d13beb5d..107c6d83dc5c4 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -889,6 +889,7 @@ static struct tc_action_ops act_ife_ops = { .init = tcf_ife_init, .size = sizeof(struct tcf_ife_info), }; +MODULE_ALIAS_NET_ACT("ife"); static __net_init int ife_init_net(struct net *net) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 12386f590b0f6..93a96e9d8d900 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -643,6 +643,7 @@ static struct tc_action_ops act_mirred_ops = { .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, }; +MODULE_ALIAS_NET_ACT("mirred"); static __net_init int mirred_init_net(struct net *net) { diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 34b8edb6cc77a..44a37a71ae923 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -452,6 +452,7 @@ static struct tc_action_ops act_mpls_ops = { .offload_act_setup = tcf_mpls_offload_act_setup, .size = sizeof(struct tcf_mpls), }; +MODULE_ALIAS_NET_ACT("mpls"); static __net_init int mpls_init_net(struct net *net) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index a180e724634ed..d541f553805fa 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -324,6 +324,7 @@ static struct tc_action_ops act_nat_ops = { .cleanup = tcf_nat_cleanup, .size = sizeof(struct tcf_nat), }; +MODULE_ALIAS_NET_ACT("nat"); static __net_init int nat_init_net(struct net *net) { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 2ef22969f274d..df5a02d5f919c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -620,6 +620,7 @@ static struct tc_action_ops act_pedit_ops = { .offload_act_setup = tcf_pedit_offload_act_setup, .size = sizeof(struct tcf_pedit), }; +MODULE_ALIAS_NET_ACT("pedit"); static __net_init int pedit_init_net(struct net *net) { diff --git a/net/sched/act_police.c b/net/sched/act_police.c index e119b4a3db9ff..8555125ed34d8 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -502,6 +502,7 @@ static struct tc_action_ops act_police_ops = { .offload_act_setup = tcf_police_offload_act_setup, .size = sizeof(struct tcf_police), }; +MODULE_ALIAS_NET_ACT("police"); static __net_init int police_init_net(struct net *net) { diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index c5c61efe6db42..a69b53d54039e 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -316,6 +316,7 @@ static struct tc_action_ops act_sample_ops = { .offload_act_setup = tcf_sample_offload_act_setup, .size = sizeof(struct tcf_sample), }; +MODULE_ALIAS_NET_ACT("sample"); static __net_init int sample_init_net(struct net *net) { diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 0a3e928882955..f3abe05459895 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -209,6 +209,7 @@ static struct tc_action_ops act_simp_ops = { .init = tcf_simp_init, .size = sizeof(struct tcf_defact), }; +MODULE_ALIAS_NET_ACT("simple"); static __net_init int simp_init_net(struct net *net) { diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 754f78b35bb81..1f1d9ce3e968a 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -426,6 +426,7 @@ static struct tc_action_ops act_skbedit_ops = { .offload_act_setup = tcf_skbedit_offload_act_setup, .size = sizeof(struct tcf_skbedit), }; +MODULE_ALIAS_NET_ACT("skbedit"); static __net_init int skbedit_init_net(struct net *net) { diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index bcb673ab0008d..39945b139c481 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -287,6 +287,7 @@ static struct tc_action_ops act_skbmod_ops = { .cleanup = tcf_skbmod_cleanup, .size = sizeof(struct tcf_skbmod), }; +MODULE_ALIAS_NET_ACT("skbmod"); static __net_init int skbmod_init_net(struct net *net) { diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 300b08aa82831..1536f8b16f1b2 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -842,6 +842,7 @@ static struct tc_action_ops act_tunnel_key_ops = { .offload_act_setup = tcf_tunnel_key_offload_act_setup, .size = sizeof(struct tcf_tunnel_key), }; +MODULE_ALIAS_NET_ACT("tunnel_key"); static __net_init int tunnel_key_init_net(struct net *net) { diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 836183011a7c9..22f4b1e8ade9f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -427,6 +427,7 @@ static struct tc_action_ops act_vlan_ops = { .offload_act_setup = tcf_vlan_offload_act_setup, .size = sizeof(struct tcf_vlan), }; +MODULE_ALIAS_NET_ACT("vlan"); static __net_init int vlan_init_net(struct net *net) { diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index a1f56931330ca..ecfaa4f9a04ea 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -328,6 +328,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = { .bind_class = basic_bind_class, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_CLS("basic"); static int __init init_basic(void) { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 382c7a71f81f2..5e83e890f6a4b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -693,6 +693,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = { .dump = cls_bpf_dump, .bind_class = cls_bpf_bind_class, }; +MODULE_ALIAS_NET_CLS("bpf"); static int __init cls_bpf_init_mod(void) { diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7ee8dbf49ed0d..424252982d6ae 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -209,6 +209,7 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { .dump = cls_cgroup_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_CLS("cgroup"); static int __init init_cgroup_cls(void) { diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 6ab317b48d6c3..5502998aace74 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -702,6 +702,7 @@ static struct tcf_proto_ops cls_flow_ops __read_mostly = { .walk = flow_walk, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_CLS("flow"); static int __init cls_flow_init(void) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index efb9d2811b73d..bfedc3d4423d7 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -3656,6 +3656,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .owner = THIS_MODULE, .flags = TCF_PROTO_OPS_DOIT_UNLOCKED, }; +MODULE_ALIAS_NET_CLS("flower"); static int __init cls_fl_init(void) { diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index afc534ee0a186..cdddc86952284 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -433,6 +433,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = { .bind_class = fw_bind_class, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_CLS("fw"); static int __init init_fw(void) { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index c4ed11df62548..9f1e62ca508d0 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -398,6 +398,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = { .bind_class = mall_bind_class, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_CLS("matchall"); static int __init cls_mall_init(void) { diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 12a505db41832..b9c58c040c305 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -671,6 +671,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = { .bind_class = route4_bind_class, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_CLS("route"); static int __init init_route4(void) { diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 289e1755c26be..9412d88a99bc1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1453,6 +1453,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = { .bind_class = u32_bind_class, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_CLS("u32"); static int __init init_u32(void) { diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 9cff99558694d..edee926ccde8b 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -3103,6 +3103,7 @@ static struct Qdisc_ops cake_qdisc_ops __read_mostly = { .dump_stats = cake_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("cake"); static int __init cake_module_init(void) { diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index beece8e82c233..69001eff03155 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -546,6 +546,7 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { .dump = cbs_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("cbs"); static struct notifier_block cbs_device_notifier = { .notifier_call = cbs_dev_notifier, diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index ae1da08e268f1..ea108030c6b41 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -498,6 +498,7 @@ static struct Qdisc_ops choke_qdisc_ops __read_mostly = { .dump_stats = choke_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("choke"); static int __init choke_module_init(void) { diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index d7a4874543de5..61904d3a593b7 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -287,6 +287,7 @@ static struct Qdisc_ops codel_qdisc_ops __read_mostly = { .dump_stats = codel_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("codel"); static int __init codel_module_init(void) { diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 097740a9afeaf..c69b999fae171 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -481,6 +481,7 @@ static struct Qdisc_ops drr_qdisc_ops __read_mostly = { .destroy = drr_destroy_qdisc, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("drr"); static int __init drr_init(void) { diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 4808159a54660..2e4bef713b6ab 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -500,6 +500,7 @@ static struct Qdisc_ops etf_qdisc_ops __read_mostly = { .dump = etf_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("etf"); static int __init etf_module_init(void) { diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index f7c88495946b0..835b4460b4485 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -812,6 +812,7 @@ static struct Qdisc_ops ets_qdisc_ops __read_mostly = { .dump = ets_qdisc_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("ets"); static int __init ets_init(void) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 3a31c47fea9bd..cdf23ff16f40b 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1264,6 +1264,7 @@ static struct Qdisc_ops fq_qdisc_ops __read_mostly = { .dump_stats = fq_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("fq"); static int __init fq_module_init(void) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 8c4fee0634366..79f9d6de6c852 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -717,6 +717,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { .dump_stats = fq_codel_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("fq_codel"); static int __init fq_codel_module_init(void) { diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 8c61eb3dc9431..79ba9dc702541 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -930,6 +930,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .dump = gred_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("gred"); static int __init gred_module_init(void) { diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 16c45da4036a7..4e626df742d7a 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1679,6 +1679,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .priv_size = sizeof(struct hfsc_sched), .owner = THIS_MODULE }; +MODULE_ALIAS_NET_SCH("hfsc"); static int __init hfsc_init(void) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index d26cd436cbe31..3f906df1435b2 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -702,6 +702,7 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = { .dump_stats = hhf_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("hhf"); static int __init hhf_module_init(void) { diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 7349233eaa9b6..93e6fb56f3b58 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -2166,6 +2166,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .dump = htb_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("htb"); static int __init htb_module_init(void) { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 5fa9eaa79bfc9..48a800131e99d 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -168,6 +168,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .ingress_block_get = ingress_ingress_block_get, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("ingress"); struct clsact_sched_data { struct tcf_block *ingress_block; @@ -344,6 +345,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { .egress_block_get = clsact_egress_block_get, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("clsact"); static int __init ingress_module_init(void) { diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 43e53ee00a56a..225353fbb3f15 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -774,6 +774,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .dump = mqprio_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("mqprio"); static int __init mqprio_module_init(void) { diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index d66d5f0ec0805..79e93a19d5fab 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -395,6 +395,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { .dump = multiq_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("multiq"); static int __init multiq_module_init(void) { diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index fa678eb885284..edc72962ae63a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1293,6 +1293,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .dump = netem_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("netem"); static int __init netem_module_init(void) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 2da6250ec3463..1764059b06353 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -556,6 +556,7 @@ static struct Qdisc_ops pie_qdisc_ops __read_mostly = { .dump_stats = pie_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("pie"); static int __init pie_module_init(void) { diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 992f0c8d79886..cefb65201e178 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -213,6 +213,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = { .reset = qdisc_reset_queue, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("plug"); static int __init plug_module_init(void) { diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 8ecdd3ef6f8ea..cc30f7a32f1a7 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -418,6 +418,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .dump = prio_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("prio"); static int __init prio_module_init(void) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 48a604c320c76..d584c0c25899c 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1521,6 +1521,7 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = { .destroy = qfq_destroy_qdisc, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("qfq"); static int __init qfq_init(void) { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 607b6c8b3a9bf..b5f096588fae6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -548,6 +548,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = { .dump_stats = red_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("red"); static int __init red_module_init(void) { diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 1871a1c0224d4..b717e15a3a17b 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -709,6 +709,7 @@ static struct Qdisc_ops sfb_qdisc_ops __read_mostly = { .dump_stats = sfb_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("sfb"); static int __init sfb_module_init(void) { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index eb77558fa367e..e66f4afb920d2 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -925,6 +925,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .dump = sfq_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("sfq"); static int __init sfq_module_init(void) { diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 28beb11762d8a..b4dd626c309c3 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -292,6 +292,7 @@ static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = { .destroy = skbprio_destroy, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("skbprio"); static int __init skbprio_module_init(void) { diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 827cb683efbff..c5de70efdc866 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2528,6 +2528,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .dump_stats = taprio_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("taprio"); static struct notifier_block taprio_device_notifier = { .notifier_call = taprio_dev_notifier, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index dd6b1a723bf72..f1d09183ae632 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -608,6 +608,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .dump = tbf_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("tbf"); static int __init tbf_module_init(void) { -- cgit 1.2.3-korg From 2c15a5aee2f32e341d1585fa1867eece76a1edb8 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Thu, 1 Feb 2024 14:09:42 +0100 Subject: net/sched: Load modules via their alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cls_,sch_,act_ modules may be loaded lazily during network configuration but without user's awareness and control. Switch the lazy loading from canonical module names to a module alias. This allows finer control over lazy loading, the precedent from commit 7f78e0351394 ("fs: Limit sys_mount to only request filesystem modules.") explains it already: Using aliases means user space can control the policy of which filesystem^W net/sched modules are auto-loaded by editing /etc/modprobe.d/*.conf with blacklist and alias directives. Allowing simple, safe, well understood work-arounds to known problematic software. By default, nothing changes. However, if a specific module is blacklisted (its canonical name), it won't be modprobe'd when requested under its alias (i.e. kernel auto-loading). It would appear as if the given module was unknown. The module can still be loaded under its canonical name, which is an explicit (privileged) user action. Signed-off-by: Michal Koutný Acked-by: Jamal Hadi Salim Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201130943.19536-4-mkoutny@suse.com Signed-off-by: Jakub Kicinski --- net/sched/act_api.c | 2 +- net/sched/cls_api.c | 2 +- net/sched/sch_api.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3e30d7260493e..9ee622fb1160f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1363,7 +1363,7 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags, if (rtnl_held) rtnl_unlock(); - request_module("act_%s", act_name); + request_module(NET_ACT_ALIAS_PREFIX "%s", act_name); if (rtnl_held) rtnl_lock(); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ff3d396a65aac..ca5676b2668e1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -257,7 +257,7 @@ tcf_proto_lookup_ops(const char *kind, bool rtnl_held, #ifdef CONFIG_MODULES if (rtnl_held) rtnl_unlock(); - request_module("cls_%s", kind); + request_module(NET_CLS_ALIAS_PREFIX "%s", kind); if (rtnl_held) rtnl_lock(); ops = __tcf_proto_lookup_ops(kind); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 36b025cc4fd26..9d928f6a473ab 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -228,7 +228,7 @@ int qdisc_set_default(const char *name) if (!ops) { /* Not found, drop lock and try to load module */ write_unlock(&qdisc_mod_lock); - request_module("sch_%s", name); + request_module(NET_SCH_ALIAS_PREFIX "%s", name); write_lock(&qdisc_mod_lock); ops = qdisc_lookup_default(name); @@ -1275,7 +1275,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, * go away in the mean time. */ rtnl_unlock(); - request_module("sch_%s", name); + request_module(NET_SCH_ALIAS_PREFIX "%s", name); rtnl_lock(); ops = qdisc_lookup_ops(kind); if (ops != NULL) { -- cgit 1.2.3-korg From 6cff015817890a97311a10d3482906cfb67f14bf Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Thu, 1 Feb 2024 14:09:43 +0100 Subject: net/sched: Remove alias of sch_clsact MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The module sch_ingress stands out among net/sched modules because it provides multiple act/sch functionalities in a single .ko. They have aliases to make autoloading work for any of the provided functionalities. Since the autoloading was changed to uniformly request any functionality under its alias, the non-systemic aliases can be removed now (i.e. assuming the alias were only used to ensure autoloading). Signed-off-by: Michal Koutný Acked-by: Jamal Hadi Salim Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201130943.19536-5-mkoutny@suse.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ingress.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 48a800131e99d..c2ef9dcf91d2d 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -370,6 +370,5 @@ static void __exit ingress_module_exit(void) module_init(ingress_module_init); module_exit(ingress_module_exit); -MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs"); -- cgit 1.2.3-korg From dfd2ee086a63c730022cb095576a8b3a5a752109 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Feb 2024 17:30:31 +0000 Subject: ipv6: make addrconf_wq single threaded Both addrconf_verify_work() and addrconf_dad_work() acquire rtnl, there is no point trying to have one thread per cpu. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240201173031.3654257-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 733ace18806c6..d63f5d063f073 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7349,7 +7349,8 @@ int __init addrconf_init(void) if (err < 0) goto out_addrlabel; - addrconf_wq = create_workqueue("ipv6_addrconf"); + /* All works using addrconf_wq need to lock rtnl. */ + addrconf_wq = create_singlethread_workqueue("ipv6_addrconf"); if (!addrconf_wq) { err = -ENOMEM; goto out_nowq; -- cgit 1.2.3-korg From 679dd27b4ef33d4f596cbf450a3b2742fc54962a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 4 Feb 2024 11:03:39 +0100 Subject: wifi: cfg80211: fix kunit exports These can only be exported if cfg80211's kunit is enabled, since they're otherwise static. kunit itself can be enabled even if cfg80211's kunit isn't. Fix that by using the right macro. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402040534.6AEKtZ7Y-lkp@intel.com/ Fixes: 45d43937a44c ("wifi: cfg80211: add a kunit test for 6 GHz colocated AP parsing") Signed-off-by: Johannes Berg --- net/wireless/scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index c78b10e1a1674..6dd9df347771e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -537,7 +537,7 @@ cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list) kfree(ap); } } -EXPORT_SYMBOL_IF_KUNIT(cfg80211_free_coloc_ap_list); +EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_free_coloc_ap_list); static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry, const u8 *pos, u8 length, @@ -710,7 +710,7 @@ error: list_splice_tail(&ap_list, list); return n_coloc; } -EXPORT_SYMBOL_IF_KUNIT(cfg80211_parse_colocated_ap); +EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_parse_colocated_ap); static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request, struct ieee80211_channel *chan, -- cgit 1.2.3-korg From ffabe98cb576097b77d404d39e8b3df03caa986a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2024 10:11:06 +0000 Subject: net: make dev_unreg_count global We can use a global dev_unreg_count counter instead of a per netns one. As a bonus we can factorize the changes done on it for bulk device removals. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 1 + include/net/net_namespace.h | 2 -- net/core/dev.c | 12 +++++++++--- net/core/rtnetlink.c | 11 +---------- 4 files changed, 11 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 410529fca18b2..21780608cf47c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -47,6 +47,7 @@ extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; +extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 13b3a4e29fdb3..cd0c2eedbb5e9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -67,8 +67,6 @@ struct net { */ spinlock_t rules_mod_lock; - atomic_t dev_unreg_count; - unsigned int dev_base_seq; /* protected by rtnl_mutex */ u32 ifindex; diff --git a/net/core/dev.c b/net/core/dev.c index b53b9c94de400..27ba057d06c49 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9698,11 +9698,11 @@ static void dev_index_release(struct net *net, int ifindex) /* Delayed registration/unregisteration */ LIST_HEAD(net_todo_list); DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); +atomic_t dev_unreg_count = ATOMIC_INIT(0); static void net_set_todo(struct net_device *dev) { list_add_tail(&dev->todo_list, &net_todo_list); - atomic_inc(&dev_net(dev)->dev_unreg_count); } static netdev_features_t netdev_sync_upper_features(struct net_device *lower, @@ -10529,6 +10529,7 @@ void netdev_run_todo(void) { struct net_device *dev, *tmp; struct list_head list; + int cnt; #ifdef CONFIG_LOCKDEP struct list_head unlink_list; @@ -10565,6 +10566,7 @@ void netdev_run_todo(void) linkwatch_sync_dev(dev); } + cnt = 0; while (!list_empty(&list)) { dev = netdev_wait_allrefs_any(&list); list_del(&dev->todo_list); @@ -10582,12 +10584,13 @@ void netdev_run_todo(void) if (dev->needs_free_netdev) free_netdev(dev); - if (atomic_dec_and_test(&dev_net(dev)->dev_unreg_count)) - wake_up(&netdev_unregistering_wq); + cnt++; /* Free network device */ kobject_put(&dev->dev.kobj); } + if (cnt && atomic_sub_and_test(cnt, &dev_unreg_count)) + wake_up(&netdev_unregistering_wq); } /* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has @@ -11034,6 +11037,7 @@ void unregister_netdevice_many_notify(struct list_head *head, { struct net_device *dev, *tmp; LIST_HEAD(close_head); + int cnt = 0; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -11130,7 +11134,9 @@ void unregister_netdevice_many_notify(struct list_head *head, list_for_each_entry(dev, head, unreg_list) { netdev_put(dev, &dev->dev_registered_tracker); net_set_todo(dev); + cnt++; } + atomic_add(cnt, &dev_unreg_count); list_del(head); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f6f29eb03ec27..31f433950c8dc 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -483,24 +483,15 @@ EXPORT_SYMBOL_GPL(__rtnl_link_unregister); */ static void rtnl_lock_unregistering_all(void) { - struct net *net; - bool unregistering; DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(&netdev_unregistering_wq, &wait); for (;;) { - unregistering = false; rtnl_lock(); /* We held write locked pernet_ops_rwsem, and parallel * setup_net() and cleanup_net() are not possible. */ - for_each_net(net) { - if (atomic_read(&net->dev_unreg_count) > 0) { - unregistering = true; - break; - } - } - if (!unregistering) + if (!atomic_read(&dev_unreg_count)) break; __rtnl_unlock(); -- cgit 1.2.3-korg From 1c09d7cbb57abcea66148923cef717cc7ab35704 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Feb 2024 12:40:07 +0100 Subject: mptcp: annotate access for msk keys Both the local and the remote key follow the same locking schema, put in place the proper ONCE accessors. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/options.c | 12 ++++++------ net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 6 ++++-- net/mptcp/subflow.c | 10 ++++++---- 4 files changed, 17 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index d2527d189a799..5e2b130d86804 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -689,8 +689,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); - opts->ahmac = add_addr_generate_hmac(msk->local_key, - msk->remote_key, + opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key), + READ_ONCE(msk->remote_key), &opts->addr); } else { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); @@ -792,7 +792,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk, *size = TCPOLEN_MPTCP_FASTCLOSE; opts->suboptions |= OPTION_MPTCP_FASTCLOSE; - opts->rcvr_key = msk->remote_key; + opts->rcvr_key = READ_ONCE(msk->remote_key); pr_debug("FASTCLOSE key=%llu", opts->rcvr_key); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); @@ -1099,8 +1099,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, if (mp_opt->echo) return true; - hmac = add_addr_generate_hmac(msk->remote_key, - msk->local_key, + hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key), + READ_ONCE(msk->local_key), &mp_opt->addr); pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", @@ -1147,7 +1147,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && - msk->local_key == mp_opt.rcvr_key) { + READ_ONCE(msk->local_key) == mp_opt.rcvr_key) { WRITE_ONCE(msk->rcv_fastclose, true); mptcp_schedule_work((struct sock *)msk); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 028e8b473626f..874f019c5093a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3193,7 +3193,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); - msk->local_key = subflow_req->local_key; + WRITE_ONCE(msk->local_key, subflow_req->local_key); msk->token = subflow_req->token; msk->in_accept_queue = 1; WRITE_ONCE(msk->fully_established, false); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3517f2d24a226..13b8cf8ec7044 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -260,8 +260,10 @@ struct mptcp_data_frag { struct mptcp_sock { /* inet_connection_sock must be the first member */ struct inet_connection_sock sk; - u64 local_key; - u64 remote_key; + u64 local_key; /* protected by the first subflow socket lock + * lockless access read + */ + u64 remote_key; /* same as above */ u64 write_seq; u64 bytes_sent; u64 snd_nxt; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 0dcb721c89d19..d60b83511302b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -75,7 +75,8 @@ static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_ get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); - subflow_generate_hmac(msk->local_key, msk->remote_key, + subflow_generate_hmac(READ_ONCE(msk->local_key), + READ_ONCE(msk->remote_key), subflow_req->local_nonce, subflow_req->remote_nonce, hmac); @@ -694,7 +695,8 @@ static bool subflow_hmac_valid(const struct request_sock *req, if (!msk) return false; - subflow_generate_hmac(msk->remote_key, msk->local_key, + subflow_generate_hmac(READ_ONCE(msk->remote_key), + READ_ONCE(msk->local_key), subflow_req->remote_nonce, subflow_req->local_nonce, hmac); @@ -1530,8 +1532,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id, &flags, &ifindex); subflow->remote_key_valid = 1; - subflow->remote_key = msk->remote_key; - subflow->local_key = msk->local_key; + subflow->remote_key = READ_ONCE(msk->remote_key); + subflow->local_key = READ_ONCE(msk->local_key); subflow->token = msk->token; mptcp_info2sockaddr(loc, &addr, ssk->sk_family); -- cgit 1.2.3-korg From d440a4e27acdede686b974b62a6b2b2bd7914437 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Feb 2024 12:40:08 +0100 Subject: mptcp: annotate lockless access for the tx path The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under the msk socket lock protection, and are accessed lockless in a few spots. Always mark the write operations with WRITE_ONCE, read operations outside the lock with READ_ONCE and drop the annotation for read under such lock. To simplify the annotations move mptcp_pending_data_fin_ack() from __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket lock, where such call would belong. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/options.c | 2 +- net/mptcp/protocol.c | 15 +++++++-------- net/mptcp/protocol.h | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 5e2b130d86804..9b31a0a06265d 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1060,7 +1060,7 @@ static void ack_update_msk(struct mptcp_sock *msk, msk->wnd_end = new_wnd_end; /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) + if (after64(msk->wnd_end, snd_nxt)) __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 874f019c5093a..b1c24ac3630ff 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1033,13 +1033,15 @@ static void __mptcp_clean_una(struct sock *sk) msk->recovery = false; out: - if (snd_una == READ_ONCE(msk->snd_nxt) && - snd_una == READ_ONCE(msk->write_seq)) { + if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) { if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) mptcp_stop_rtx_timer(sk); } else { mptcp_reset_rtx_timer(sk); } + + if (mptcp_pending_data_fin_ack(sk)) + mptcp_schedule_work(sk); } static void __mptcp_clean_una_wakeup(struct sock *sk) @@ -1499,7 +1501,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, */ if (likely(after64(snd_nxt_new, msk->snd_nxt))) { msk->bytes_sent += snd_nxt_new - msk->snd_nxt; - msk->snd_nxt = snd_nxt_new; + WRITE_ONCE(msk->snd_nxt, snd_nxt_new); } } @@ -3200,8 +3202,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); - msk->write_seq = subflow_req->idsn + 1; - msk->snd_nxt = msk->write_seq; + WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1); + WRITE_ONCE(msk->snd_nxt, msk->write_seq); msk->snd_una = msk->write_seq; msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; @@ -3303,9 +3305,6 @@ void __mptcp_data_acked(struct sock *sk) __mptcp_clean_una(sk); else __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags); - - if (mptcp_pending_data_fin_ack(sk)) - mptcp_schedule_work(sk); } void __mptcp_check_push(struct sock *sk, struct sock *ssk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 13b8cf8ec7044..421dede93e2b7 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -402,7 +402,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (msk->snd_una == READ_ONCE(msk->snd_nxt)) + if (msk->snd_una == msk->snd_nxt) return NULL; return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); -- cgit 1.2.3-korg From 9426ce476a705d13eb086d4be7c817856417816f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Feb 2024 12:40:09 +0100 Subject: mptcp: annotate lockless access for RX path fields The following fields: - ack_seq - snd_una - wnd_end - rmem_fwd_alloc are protected by the data lock end accessed lockless in a few spots. Ensure ONCE annotation for write (under such lock) and for lockless read. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/options.c | 6 +++--- net/mptcp/protocol.c | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 9b31a0a06265d..801a3525230d0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1030,7 +1030,7 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) { msk->bytes_acked += new_snd_una - msk->snd_una; - msk->snd_una = new_snd_una; + WRITE_ONCE(msk->snd_una, new_snd_una); } static void ack_update_msk(struct mptcp_sock *msk, @@ -1057,7 +1057,7 @@ static void ack_update_msk(struct mptcp_sock *msk, new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; if (after64(new_wnd_end, msk->wnd_end)) - msk->wnd_end = new_wnd_end; + WRITE_ONCE(msk->wnd_end, new_wnd_end); /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ if (after64(msk->wnd_end, snd_nxt)) @@ -1071,7 +1071,7 @@ static void ack_update_msk(struct mptcp_sock *msk, trace_ack_update_msk(mp_opt->data_ack, old_snd_una, new_snd_una, - new_wnd_end, msk->wnd_end); + new_wnd_end, READ_ONCE(msk->wnd_end)); } bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b1c24ac3630ff..90804e7fb7f6f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -410,6 +410,7 @@ static void mptcp_close_wake_up(struct sock *sk) sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); } +/* called under the msk socket lock */ static bool mptcp_pending_data_fin_ack(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -441,16 +442,17 @@ static void mptcp_check_data_fin_ack(struct sock *sk) } } +/* can be called with no lock acquired */ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) { struct mptcp_sock *msk = mptcp_sk(sk); if (READ_ONCE(msk->rcv_data_fin) && - ((1 << sk->sk_state) & + ((1 << inet_sk_state_load(sk)) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) { u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq); - if (msk->ack_seq == rcv_data_fin_seq) { + if (READ_ONCE(msk->ack_seq) == rcv_data_fin_seq) { if (seq) *seq = rcv_data_fin_seq; @@ -748,7 +750,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) __skb_queue_tail(&sk->sk_receive_queue, skb); } msk->bytes_received += end_seq - msk->ack_seq; - msk->ack_seq = end_seq; + WRITE_ONCE(msk->ack_seq, end_seq); moved = true; } return moved; @@ -985,6 +987,7 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag) put_page(dfrag->page); } +/* called under both the msk socket lock and the data lock */ static void __mptcp_clean_una(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -2110,7 +2113,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk) skb = skb_peek(&msk->receive_queue); if (skb) { - u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; + u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq; if (hint_val >= INT_MAX) return INT_MAX; @@ -2754,7 +2757,7 @@ static void __mptcp_init_sock(struct sock *sk) __skb_queue_head_init(&msk->receive_queue); msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; - msk->rmem_fwd_alloc = 0; + WRITE_ONCE(msk->rmem_fwd_alloc, 0); WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival = TCP_RTO_MIN; msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; @@ -2970,7 +2973,7 @@ static void __mptcp_destroy_sock(struct sock *sk) sk->sk_prot->destroy(sk); - WARN_ON_ONCE(msk->rmem_fwd_alloc); + WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc)); WARN_ON_ONCE(msk->rmem_released); sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); @@ -3204,8 +3207,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1); WRITE_ONCE(msk->snd_nxt, msk->write_seq); - msk->snd_una = msk->write_seq; - msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; + WRITE_ONCE(msk->snd_una, msk->write_seq); + WRITE_ONCE(msk->wnd_end, msk->snd_nxt + req->rsk_rcv_wnd); msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; mptcp_init_sched(msk, mptcp_sk(sk)->sched); -- cgit 1.2.3-korg From b9f4554356f60c6ab33ef6604d1aa94475449b1f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Feb 2024 12:40:10 +0100 Subject: mptcp: annotate lockless access for token The token field is manipulated under the msk socket lock and accessed lockless in a few spots, add proper ONCE annotation Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm.c | 2 +- net/mptcp/pm_netlink.c | 10 +++++----- net/mptcp/protocol.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 4ae19113b8eb2..53e0b08b11232 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -77,7 +77,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int { struct mptcp_pm_data *pm = &msk->pm; - pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); + pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side); WRITE_ONCE(pm->server_side, server_side); mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 287a60381eae6..d9ad459592193 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1997,7 +1997,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, const struct mptcp_subflow_context *sf; u8 sk_err; - if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) return -EMSGSIZE; if (mptcp_event_add_subflow(skb, ssk)) @@ -2055,7 +2055,7 @@ static int mptcp_event_created(struct sk_buff *skb, const struct mptcp_sock *msk, const struct sock *ssk) { - int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token); + int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)); if (err) return err; @@ -2083,7 +2083,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) if (!nlh) goto nla_put_failure; - if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) goto nla_put_failure; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id)) @@ -2118,7 +2118,7 @@ void mptcp_event_addr_announced(const struct sock *ssk, if (!nlh) goto nla_put_failure; - if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token))) goto nla_put_failure; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) @@ -2234,7 +2234,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, goto nla_put_failure; break; case MPTCP_EVENT_CLOSED: - if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0) + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)) < 0) goto nla_put_failure; break; case MPTCP_EVENT_ANNOUNCED: diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 90804e7fb7f6f..4478ddd3b5fa7 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3199,7 +3199,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk = mptcp_sk(nsk); WRITE_ONCE(msk->local_key, subflow_req->local_key); - msk->token = subflow_req->token; + WRITE_ONCE(msk->token, subflow_req->token); msk->in_accept_queue = 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) -- cgit 1.2.3-korg From 28e5c138050670e813017102c1ac842a6a0aa297 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Feb 2024 12:40:11 +0100 Subject: mptcp: annotate lockless accesses around read-mostly fields The following MPTCP socket fields: - can_ack - fully_established - rcv_data_fin - snd_data_fin_enable - rcv_fastclose - use_64bit_ack are accessed without any lock, add the appropriate annotation. The schema is safe as each field can change its value at most once in the whole mptcp socket life cycle. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 14 +++++++------- net/mptcp/sockopt.c | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4478ddd3b5fa7..ad39f54b3a81b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3149,16 +3149,16 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->cb_flags = 0; msk->push_pending = 0; msk->recovery = false; - msk->can_ack = false; - msk->fully_established = false; - msk->rcv_data_fin = false; - msk->snd_data_fin_enable = false; - msk->rcv_fastclose = false; - msk->use_64bit_ack = false; - msk->bytes_consumed = 0; + WRITE_ONCE(msk->can_ack, false); + WRITE_ONCE(msk->fully_established, false); + WRITE_ONCE(msk->rcv_data_fin, false); + WRITE_ONCE(msk->snd_data_fin_enable, false); + WRITE_ONCE(msk->rcv_fastclose, false); + WRITE_ONCE(msk->use_64bit_ack, false); WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); + msk->bytes_consumed = 0; msk->bytes_acked = 0; msk->bytes_received = 0; msk->bytes_sent = 0; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index c40f1428e6020..da37e4541a5d7 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -942,7 +942,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) mptcp_data_unlock(sk); slow = lock_sock_fast(sk); - info->mptcpi_csum_enabled = msk->csum_enabled; + info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); info->mptcpi_token = msk->token; info->mptcpi_write_seq = msk->write_seq; info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; -- cgit 1.2.3-korg From 0cd216d769fbd161c06f5a702bf7a951f276f558 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 2 Feb 2024 18:20:41 +0200 Subject: net: dsa: reindent arguments of dsa_user_vlan_for_each() These got misaligned after commit 6ca80638b90c ("net: dsa: Use conduit and user terms"). Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/user.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dsa/user.c b/net/dsa/user.c index e03da3a4ac380..ba2e53b5e16a0 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -210,7 +210,7 @@ static int dsa_user_sync_uc(struct net_device *dev, return 0; return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, - &ctx); + &ctx); } static int dsa_user_unsync_uc(struct net_device *dev, @@ -230,7 +230,7 @@ static int dsa_user_unsync_uc(struct net_device *dev, return 0; return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, - &ctx); + &ctx); } static int dsa_user_sync_mc(struct net_device *dev, @@ -250,7 +250,7 @@ static int dsa_user_sync_mc(struct net_device *dev, return 0; return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, - &ctx); + &ctx); } static int dsa_user_unsync_mc(struct net_device *dev, @@ -270,7 +270,7 @@ static int dsa_user_unsync_mc(struct net_device *dev, return 0; return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, - &ctx); + &ctx); } void dsa_user_sync_ha(struct net_device *dev) -- cgit 1.2.3-korg From fd2bc4195d5107f88c1b90e1ec935888ccbfc5c0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 3 Oct 2023 20:57:20 +0300 Subject: xfrm: generalize xdo_dev_state_update_curlft to allow statistics update In order to allow drivers to fill all statistics, change the name of xdo_dev_state_update_curlft to be xdo_dev_state_update_stats. Acked-by: Steffen Klassert Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- Documentation/networking/xfrm_device.rst | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 7 ++++--- include/linux/netdevice.h | 2 +- include/net/xfrm.h | 11 ++++------- net/xfrm/xfrm_state.c | 4 ++-- net/xfrm/xfrm_user.c | 2 +- 6 files changed, 14 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/Documentation/networking/xfrm_device.rst b/Documentation/networking/xfrm_device.rst index 535077cbeb07d..bfea9d8579ede 100644 --- a/Documentation/networking/xfrm_device.rst +++ b/Documentation/networking/xfrm_device.rst @@ -71,9 +71,9 @@ Callbacks to implement bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); + void (*xdo_dev_state_update_stats) (struct xfrm_state *x); /* Solely packet offload callbacks */ - void (*xdo_dev_state_update_curlft) (struct xfrm_state *x); int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack); void (*xdo_dev_policy_delete) (struct xfrm_policy *x); void (*xdo_dev_policy_free) (struct xfrm_policy *x); @@ -191,6 +191,6 @@ xdo_dev_policy_free() on any remaining offloaded states. Outcome of HW handling packets, the XFRM core can't count hard, soft limits. The HW/driver are responsible to perform it and provide accurate data when -xdo_dev_state_update_curlft() is called. In case of one of these limits +xdo_dev_state_update_stats() is called. In case of one of these limits occuried, the driver needs to call to xfrm_state_check_expire() to make sure that XFRM performs rekeying sequence. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 05612d9c6080c..f160522fbe757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -984,7 +984,7 @@ static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) queue_work(sa_entry->ipsec->wq, &work->work); } -static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) +static void mlx5e_xfrm_update_stats(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; @@ -993,7 +993,8 @@ static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) lockdep_assert(lockdep_is_held(&x->lock) || lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex)); - if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) + if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ || + x->xso.type != XFRM_DEV_OFFLOAD_PACKET) return; mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); @@ -1156,7 +1157,7 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, - .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, + .xdo_dev_state_update_stats = mlx5e_xfrm_update_stats, .xdo_dev_policy_add = mlx5e_xfrm_add_policy, .xdo_dev_policy_delete = mlx5e_xfrm_del_policy, .xdo_dev_policy_free = mlx5e_xfrm_free_policy, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 118c40258d07b..9538576dbebcd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1062,7 +1062,7 @@ struct xfrmdev_ops { bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); - void (*xdo_dev_state_update_curlft) (struct xfrm_state *x); + void (*xdo_dev_state_update_stats) (struct xfrm_state *x); int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack); void (*xdo_dev_policy_delete) (struct xfrm_policy *x); void (*xdo_dev_policy_free) (struct xfrm_policy *x); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1d107241b9018..4ca2f3205190c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1578,21 +1578,18 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, unsigned short family); int xfrm_state_check_expire(struct xfrm_state *x); #ifdef CONFIG_XFRM_OFFLOAD -static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) +static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { struct xfrm_dev_offload *xdo = &x->xso; struct net_device *dev = xdo->dev; - if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) - return; - if (dev && dev->xfrmdev_ops && - dev->xfrmdev_ops->xdo_dev_state_update_curlft) - dev->xfrmdev_ops->xdo_dev_state_update_curlft(x); + dev->xfrmdev_ops->xdo_dev_state_update_stats) + dev->xfrmdev_ops->xdo_dev_state_update_stats(x); } #else -static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {} +static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {} #endif void xfrm_state_insert(struct xfrm_state *x); int xfrm_state_add(struct xfrm_state *x); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index bda5327bf34df..d8701b2d0d57b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -570,7 +570,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) int err = 0; spin_lock(&x->lock); - xfrm_dev_state_update_curlft(x); + xfrm_dev_state_update_stats(x); if (x->km.state == XFRM_STATE_DEAD) goto out; @@ -1935,7 +1935,7 @@ EXPORT_SYMBOL(xfrm_state_update); int xfrm_state_check_expire(struct xfrm_state *x) { - xfrm_dev_state_update_curlft(x); + xfrm_dev_state_update_stats(x); if (!READ_ONCE(x->curlft.use_time)) WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds()); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ad01997c3aa9d..dc4f9b8d7cb0f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -902,7 +902,7 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); if (x->xso.dev) - xfrm_dev_state_update_curlft(x); + xfrm_dev_state_update_stats(x); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); put_unaligned(x->stats.replay_window, &p->stats.replay_window); put_unaligned(x->stats.replay, &p->stats.replay); -- cgit 1.2.3-korg From f9f221c98fd83df518fbb2f5ad33980cfedfe1bf Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Oct 2023 14:11:48 +0300 Subject: xfrm: get global statistics from the offloaded device Iterate over all SAs in order to fill global IPsec statistics. Acked-by: Steffen Klassert Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 ++- include/net/xfrm.h | 3 +++ net/xfrm/xfrm_proc.c | 1 + net/xfrm/xfrm_state.c | 13 +++++++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index f160522fbe757..46cce4e38d846 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -991,7 +991,8 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) u64 packets, bytes, lastuse; lockdep_assert(lockdep_is_held(&x->lock) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex)); + lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) || + lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock)); if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ || x->xso.type != XFRM_DEV_OFFLOAD_PACKET) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4ca2f3205190c..57c743b7e4fef 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -51,8 +51,10 @@ #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) +#define XFRM_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.xfrm_statistics, field, val) #else #define XFRM_INC_STATS(net, field) ((void)(net)) +#define XFRM_ADD_STATS(net, field, val) ((void)(net)) #endif @@ -1577,6 +1579,7 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, unsigned short family); int xfrm_state_check_expire(struct xfrm_state *x); +void xfrm_state_update_stats(struct net *net); #ifdef CONFIG_XFRM_OFFLOAD static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index fee9b5cf37a7c..5f9bf8e5c9338 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -52,6 +52,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + xfrm_state_update_stats(net); snmp_get_cpu_field_batch(buff, xfrm_mib_list, net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d8701b2d0d57b..0c306473a79d1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1957,6 +1957,19 @@ int xfrm_state_check_expire(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_check_expire); +void xfrm_state_update_stats(struct net *net) +{ + struct xfrm_state *x; + int i; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + for (i = 0; i <= net->xfrm.state_hmask; i++) { + hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst) + xfrm_dev_state_update_stats(x); + } + spin_unlock_bh(&net->xfrm.xfrm_state_lock); +} + struct xfrm_state * xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) -- cgit 1.2.3-korg From 2863d665ea41282379f108e4da6c8a2366ba66db Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 5 Feb 2024 13:35:50 +0100 Subject: xsk: support redirect to any socket bound to the same umem Add support for directing a packet to any socket bound to the same umem. This makes it possible to use the XDP program to select what socket the packet should be received on. The user can populate the XSKMAP with various sockets and as long as they share the same umem, the XDP program can pick any one of them. Suggested-by: Yuval El-Hanany Signed-off-by: Magnus Karlsson Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240205123553.22180-2-magnus.karlsson@gmail.com Signed-off-by: Alexei Starovoitov --- net/xdp/xsk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 1eadfac03cc41..a339e9a1b5575 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -313,10 +313,13 @@ static bool xsk_is_bound(struct xdp_sock *xs) static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { + struct net_device *dev = xdp->rxq->dev; + u32 qid = xdp->rxq->queue_index; + if (!xsk_is_bound(xs)) return -ENXIO; - if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) + if (!dev->_rx[qid].pool || xs->umem != dev->_rx[qid].pool->umem) return -EINVAL; if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { -- cgit 1.2.3-korg From 03ba6dc035c60991033529e630bd1552b2bca4d7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 2 Feb 2024 17:37:46 +0100 Subject: net: dst: Make dst_destroy() static and return void. Since commit 52df157f17e56 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle") dst_destroy() returns only NULL and no caller cares about the return value. There are no in in-tree users of dst_destroy() outside of the file. Make dst_destroy() static and return void. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240202163746.2489150-1-bigeasy@linutronix.de Signed-off-by: Paolo Abeni --- include/net/dst.h | 1 - net/core/dst.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/dst.h b/include/net/dst.h index f5dfc8fb7b379..0aa331bd2fdb7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -390,7 +390,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags); -struct dst_entry *dst_destroy(struct dst_entry *dst); void dst_dev_put(struct dst_entry *dst); static inline void dst_confirm(struct dst_entry *dst) diff --git a/net/core/dst.c b/net/core/dst.c index 6838d3212c374..95f533844f17f 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -96,7 +96,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, } EXPORT_SYMBOL(dst_alloc); -struct dst_entry *dst_destroy(struct dst_entry * dst) +static void dst_destroy(struct dst_entry *dst) { struct dst_entry *child = NULL; @@ -126,15 +126,13 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) dst = child; if (dst) dst_release_immediate(dst); - return NULL; } -EXPORT_SYMBOL(dst_destroy); static void dst_destroy_rcu(struct rcu_head *head) { struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst = dst_destroy(dst); + dst_destroy(dst); } /* Operations to mark dst as DEAD and clean up the net device referenced -- cgit 1.2.3-korg From 06e6bc1b7aafa309c38d9d6623e8be3846647207 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 2 Feb 2024 15:11:09 -0500 Subject: tipc: rename the module name diag to tipc_diag It is not appropriate for TIPC to use "diag" as its diag module name while the other protocols are using "$(protoname)_diag" like tcp_diag, udp_diag and sctp_diag etc. So this patch is to rename diag.ko to tipc_diag.ko in tipc's Makefile. Signed-off-by: Xin Long Reviewed-by: Tung Nguyen Link: https://lore.kernel.org/r/d909edeef072da1810bd5869fdbbfe84411efdb2.1706904669.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/tipc/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/Makefile b/net/tipc/Makefile index ee49a9f1dd4fe..18e1636aa036c 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -18,5 +18,5 @@ tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o - -obj-$(CONFIG_TIPC_DIAG) += diag.o +obj-$(CONFIG_TIPC_DIAG) += tipc_diag.o +tipc_diag-y += diag.o -- cgit 1.2.3-korg From d6f4aac19ad44998c6b1cde0334ad76900136ca4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 3 Feb 2024 08:51:03 +0100 Subject: nfc: hci: Introduce nfc_llc_del_engine() to reduce code duplication Add a new helper to avoid code duplication between nfc_llc_exit() and nfc_llc_unregister(). Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/nfc/hci/llc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index 2140f67246441..480c17f372a53 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -30,15 +30,19 @@ exit: return r; } +static void nfc_llc_del_engine(struct nfc_llc_engine *llc_engine) +{ + list_del(&llc_engine->entry); + kfree(llc_engine->name); + kfree(llc_engine); +} + void nfc_llc_exit(void) { struct nfc_llc_engine *llc_engine, *n; - list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) { - list_del(&llc_engine->entry); - kfree(llc_engine->name); - kfree(llc_engine); - } + list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) + nfc_llc_del_engine(llc_engine); } int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops) @@ -82,9 +86,7 @@ void nfc_llc_unregister(const char *name) if (llc_engine == NULL) return; - list_del(&llc_engine->entry); - kfree(llc_engine->name); - kfree(llc_engine); + nfc_llc_del_engine(llc_engine); } struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev, -- cgit 1.2.3-korg From 83cdd8db75085f3e415f420d6022f9c813ea50af Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 3 Feb 2024 08:51:04 +0100 Subject: nfc: hci: Save a few bytes of memory when registering a 'nfc_llc' engine nfc_llc_register() calls pass a string literal as the 'name' parameter. So kstrdup_const() can be used instead of kfree() to avoid a memory allocation in such cases. Signed-off-by: Christophe JAILLET Signed-off-by: Paolo Abeni --- net/nfc/hci/llc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c index 480c17f372a53..ba91284f4086e 100644 --- a/net/nfc/hci/llc.c +++ b/net/nfc/hci/llc.c @@ -33,7 +33,7 @@ exit: static void nfc_llc_del_engine(struct nfc_llc_engine *llc_engine) { list_del(&llc_engine->entry); - kfree(llc_engine->name); + kfree_const(llc_engine->name); kfree(llc_engine); } @@ -53,7 +53,7 @@ int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops) if (llc_engine == NULL) return -ENOMEM; - llc_engine->name = kstrdup(name, GFP_KERNEL); + llc_engine->name = kstrdup_const(name, GFP_KERNEL); if (llc_engine->name == NULL) { kfree(llc_engine); return -ENOMEM; -- cgit 1.2.3-korg From 23c5ae6d467520987dbc8682c3ae6ea0e80a5f27 Mon Sep 17 00:00:00 2001 From: George Guo Date: Sun, 4 Feb 2024 10:35:31 +0800 Subject: netlabel: cleanup struct netlbl_lsm_catmap Simplify the code from macro NETLBL_CATMAP_MAPTYPE to u64, and fix warning "Macros with complex values should be enclosed in parentheses" on "#define NETLBL_CATMAP_BIT (NETLBL_CATMAP_MAPTYPE)0x01", which is modified to "#define NETLBL_CATMAP_BIT ((u64)0x01)". Signed-off-by: George Guo Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/netlabel.h | 7 +++---- net/netlabel/netlabel_kapi.c | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 43ae503376851..f3ab0b8a4b18a 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -145,15 +145,14 @@ struct netlbl_lsm_cache { * processing. * */ -#define NETLBL_CATMAP_MAPTYPE u64 #define NETLBL_CATMAP_MAPCNT 4 -#define NETLBL_CATMAP_MAPSIZE (sizeof(NETLBL_CATMAP_MAPTYPE) * 8) +#define NETLBL_CATMAP_MAPSIZE (sizeof(u64) * 8) #define NETLBL_CATMAP_SIZE (NETLBL_CATMAP_MAPSIZE * \ NETLBL_CATMAP_MAPCNT) -#define NETLBL_CATMAP_BIT (NETLBL_CATMAP_MAPTYPE)0x01 +#define NETLBL_CATMAP_BIT ((u64)0x01) struct netlbl_lsm_catmap { u32 startbit; - NETLBL_CATMAP_MAPTYPE bitmap[NETLBL_CATMAP_MAPCNT]; + u64 bitmap[NETLBL_CATMAP_MAPCNT]; struct netlbl_lsm_catmap *next; }; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 27511c90a26f4..7b844581ebeea 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -610,7 +610,7 @@ int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset) struct netlbl_lsm_catmap *iter; u32 idx; u32 bit; - NETLBL_CATMAP_MAPTYPE bitmap; + u64 bitmap; iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0); if (iter == NULL) @@ -666,8 +666,8 @@ int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset) struct netlbl_lsm_catmap *prev = NULL; u32 idx; u32 bit; - NETLBL_CATMAP_MAPTYPE bitmask; - NETLBL_CATMAP_MAPTYPE bitmap; + u64 bitmask; + u64 bitmap; iter = _netlbl_catmap_getnode(&catmap, offset, _CM_F_WALK, 0); if (iter == NULL) @@ -857,7 +857,7 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, offset -= iter->startbit; idx = offset / NETLBL_CATMAP_MAPSIZE; - iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap + iter->bitmap[idx] |= (u64)bitmap << (offset % NETLBL_CATMAP_MAPSIZE); return 0; -- cgit 1.2.3-korg From 59d6bccebe5c747d6f4dcbc4862ad7b52571b3fc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 4 Feb 2024 22:12:26 +0900 Subject: net: tipc: remove redundant 'bool' from CONFIG_TIPC_{MEDIA_UDP,CRYPTO} The 'bool' is already specified for these options. The second 'bool' under the help message is redundant. While I am here, I moved 'default y' above, as it is common to place the help text last. Signed-off-by: Masahiro Yamada Reviewed-by: Randy Dunlap Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tipc/Kconfig | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index be1c4003d67d2..bb0d71eb02a69 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -32,16 +32,17 @@ config TIPC_MEDIA_UDP bool "IP/UDP media type support" depends on TIPC select NET_UDP_TUNNEL + default y help Saying Y here will enable support for running TIPC over IP/UDP - bool - default y + config TIPC_CRYPTO bool "TIPC encryption support" depends on TIPC select CRYPTO select CRYPTO_AES select CRYPTO_GCM + default y help Saying Y here will enable support for TIPC encryption. All TIPC messages will be encrypted/decrypted by using the currently most @@ -49,8 +50,6 @@ config TIPC_CRYPTO entering the TIPC stack. Key setting from user-space is performed via netlink by a user program (e.g. the iproute2 'tipc' tool). - bool - default y config TIPC_DIAG tristate "TIPC: socket monitoring interface" -- cgit 1.2.3-korg From d160c66cda0ac8614adc53a5b5b0e6d6f1a05a5b Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 5 Feb 2024 12:30:22 +0200 Subject: net: Do not return value from init_dummy_netdev() init_dummy_netdev() always returns zero and all the callers do not check the returned value. Set the function to not return value, as it is not really used today. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240205103022.440946-1-amcohen@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- net/core/dev.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 118c40258d07b..1845dd5043b44 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3198,7 +3198,7 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); void netdev_freemem(struct net_device *dev); -int init_dummy_netdev(struct net_device *dev); +void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 27ba057d06c49..e52e2888cccd3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10345,7 +10345,7 @@ EXPORT_SYMBOL(register_netdevice); * that need to tie several hardware interfaces to a single NAPI * poll scheduler due to HW limitations. */ -int init_dummy_netdev(struct net_device *dev) +void init_dummy_netdev(struct net_device *dev) { /* Clear everything. Note we don't initialize spinlocks * are they aren't supposed to be taken by any of the @@ -10373,8 +10373,6 @@ int init_dummy_netdev(struct net_device *dev) * because users of this 'device' dont need to change * its refcount. */ - - return 0; } EXPORT_SYMBOL_GPL(init_dummy_netdev); -- cgit 1.2.3-korg From b27696cd8fcc0ace1b2dfd81b7bff824a30b1fd1 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Mon, 5 Feb 2024 11:33:17 +0800 Subject: net/smc: change the term virtual ISM to Emulated-ISM According to latest release of SMCv2.1[1], the term 'virtual ISM' has been changed to 'Emulated-ISM' to avoid the ambiguity of the word 'virtual' in different contexts. So the names or comments in the code need be modified accordingly. [1] https://www.ibm.com/support/pages/node/7112343 Signed-off-by: Wen Gu Reviewed-by: Simon Horman Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20240205033317.127269-1-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 22 +++++++++++----------- net/smc/smc.h | 4 ++-- net/smc/smc_clc.c | 6 +++--- net/smc/smc_clc.h | 2 +- net/smc/smc_core.c | 4 ++-- net/smc/smc_ism.h | 10 +++++----- 6 files changed, 24 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a2cb30af46cb1..66763c74ab767 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1045,7 +1045,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, int rc = SMC_CLC_DECL_NOSMCDDEV; struct smcd_dev *smcd; int i = 1, entry = 1; - bool is_virtual; + bool is_emulated; u16 chid; if (smcd_indicated(ini->smc_type_v1)) @@ -1057,12 +1057,12 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, chid = smc_ism_get_chid(smcd); if (!smc_find_ism_v2_is_unique_chid(chid, ini, i)) continue; - is_virtual = __smc_ism_is_virtual(chid); + is_emulated = __smc_ism_is_emulated(chid); if (!smc_pnet_is_pnetid_set(smcd->pnetid) || smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { - if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES) + if (is_emulated && entry == SMCD_CLC_MAX_V2_GID_ENTRIES) /* It's the last GID-CHID entry left in CLC - * Proposal SMC-Dv2 extension, but a virtual + * Proposal SMC-Dv2 extension, but an Emulated- * ISM device will take two entries. So give * up it and try the next potential ISM device. */ @@ -1072,7 +1072,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, ini->is_smcd = true; rc = 0; i++; - entry = is_virtual ? entry + 2 : entry + 1; + entry = is_emulated ? entry + 2 : entry + 1; if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES) break; } @@ -1413,10 +1413,10 @@ static int smc_connect_ism(struct smc_sock *smc, if (rc) return rc; - if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected])) + if (__smc_ism_is_emulated(ini->ism_chid[ini->ism_selected])) ini->ism_peer_gid[ini->ism_selected].gid_ext = ntohll(aclc->d1.gid_ext); - /* for non-virtual ISM devices, peer gid_ext remains 0. */ + /* for non-Emulated-ISM devices, peer gid_ext remains 0. */ } ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid); @@ -2117,10 +2117,10 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini, if (smc_ism_get_chid(smcd) == proposed_chid && !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) { ini->ism_peer_gid[*matches].gid = proposed_gid->gid; - if (__smc_ism_is_virtual(proposed_chid)) + if (__smc_ism_is_emulated(proposed_chid)) ini->ism_peer_gid[*matches].gid_ext = proposed_gid->gid_ext; - /* non-virtual ISM's peer gid_ext remains 0. */ + /* non-Emulated-ISM's peer gid_ext remains 0. */ ini->ism_dev[*matches] = smcd; (*matches)++; break; @@ -2170,10 +2170,10 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid); smcd_gid.gid_ext = 0; chid = ntohs(smcd_v2_ext->gidchid[i].chid); - if (__smc_ism_is_virtual(chid)) { + if (__smc_ism_is_emulated(chid)) { if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt || chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid)) - /* each virtual ISM device takes two GID-CHID + /* each Emulated-ISM device takes two GID-CHID * entries and CHID of the second entry repeats * that of the first entry. * diff --git a/net/smc/smc.h b/net/smc/smc.h index df64efd2dee82..18c8b7870198b 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -56,11 +56,11 @@ enum smc_state { /* possible states of an SMC socket */ }; enum smc_supplemental_features { - SMC_SPF_VIRT_ISM_DEV = 0, + SMC_SPF_EMULATED_ISM_DEV = 0, }; #define SMC_FEATURE_MASK \ - (BIT(SMC_SPF_VIRT_ISM_DEV)) + (BIT(SMC_SPF_EMULATED_ISM_DEV)) struct smc_link_group; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 9a13709bea1cb..e55026c7529cf 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -952,8 +952,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) gidchids[entry].chid = htons(smc_ism_get_chid(ini->ism_dev[i])); gidchids[entry].gid = htonll(smcd_gid.gid); - if (smc_ism_is_virtual(smcd)) { - /* a virtual ISM device takes two + if (smc_ism_is_emulated(smcd)) { + /* an Emulated-ISM device takes two * entries. CHID of the second entry * repeats that of the first entry. */ @@ -1055,7 +1055,7 @@ smcd_clc_prep_confirm_accept(struct smc_connection *conn, clc->d1.chid = htons(chid); if (eid && eid[0]) memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN); - if (__smc_ism_is_virtual(chid)) + if (__smc_ism_is_emulated(chid)) clc->d1.gid_ext = htonll(smcd_gid.gid_ext); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; if (first_contact) { diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index a9f9bdd26dcd8..7cc7070b9772d 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -175,7 +175,7 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ #define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC * proposal SMC-Dv2 extension. * each ISM device takes one entry and - * each virtual ISM takes two entries. + * each Emulated-ISM takes two entries */ struct smc_clc_msg_proposal_area { diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index e4c858411207a..9b84d5897aa58 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1535,7 +1535,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { if ((!peer_gid->gid || (lgr->peer_gid.gid == peer_gid->gid && - !smc_ism_is_virtual(dev) ? 1 : + !smc_ism_is_emulated(dev) ? 1 : lgr->peer_gid.gid_ext == peer_gid->gid_ext)) && (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { if (peer_gid->gid) /* peer triggered termination */ @@ -1881,7 +1881,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, lgr->smcd != smcismdev) return false; - if (smc_ism_is_virtual(smcismdev) && + if (smc_ism_is_emulated(smcismdev) && lgr->peer_gid.gid_ext != peer_gid->gid_ext) return false; diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index ffff40c30a06c..165cd013404bc 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -15,7 +15,7 @@ #include "smc.h" -#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00 +#define SMC_EMULATED_ISM_CHID_MASK 0xFF00 #define SMC_ISM_IDENT_MASK 0x00FFFF struct smcd_dev_list { /* List of SMCD devices */ @@ -66,10 +66,10 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok, return rc < 0 ? rc : 0; } -static inline bool __smc_ism_is_virtual(u16 chid) +static inline bool __smc_ism_is_emulated(u16 chid) { /* CHIDs in range of 0xFF00 to 0xFFFF are reserved - * for virtual ISM device. + * for Emulated-ISM device. * * loopback-ism: 0xFFFF * virtio-ism: 0xFF00 ~ 0xFFFE @@ -77,11 +77,11 @@ static inline bool __smc_ism_is_virtual(u16 chid) return ((chid & 0xFF00) == 0xFF00); } -static inline bool smc_ism_is_virtual(struct smcd_dev *smcd) +static inline bool smc_ism_is_emulated(struct smcd_dev *smcd) { u16 chid = smcd->ops->get_chid(smcd); - return __smc_ism_is_virtual(chid); + return __smc_ism_is_emulated(chid); } #endif -- cgit 1.2.3-korg From fd4f101edbd9f99567ab2adb1f2169579ede7c13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:42:57 +0000 Subject: net: add exit_batch_rtnl() method Many (struct pernet_operations)->exit_batch() methods have to acquire rtnl. In presence of rtnl mutex pressure, this makes cleanup_net() very slow. This patch adds a new exit_batch_rtnl() method to reduce number of rtnl acquisitions from cleanup_net(). exit_batch_rtnl() handlers are called while rtnl is locked, and devices to be killed can be queued in a list provided as their second argument. A single unregister_netdevice_many() is called right before rtnl is released. exit_batch_rtnl() handlers are called before ->exit() and ->exit_batch() handlers. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/net_namespace.h | 3 +++ net/core/net_namespace.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cd0c2eedbb5e9..20c34bd7a0778 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -448,6 +448,9 @@ struct pernet_operations { void (*pre_exit)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); + /* Following method is called with RTNL held. */ + void (*exit_batch_rtnl)(struct list_head *net_exit_list, + struct list_head *dev_kill_list); unsigned int *id; size_t size; }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72799533426b6..233ec0cdd0111 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -318,8 +318,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with pernet_ops_rwsem held */ const struct pernet_operations *ops, *saved_ops; - int error = 0; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); + int error = 0; refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); @@ -357,6 +358,15 @@ out_undo: synchronize_rcu(); + ops = saved_ops; + rtnl_lock(); + list_for_each_entry_continue_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -573,6 +583,7 @@ static void cleanup_net(struct work_struct *work) struct net *net, *tmp, *last; struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); @@ -613,6 +624,14 @@ static void cleanup_net(struct work_struct *work) */ synchronize_rcu(); + rtnl_lock(); + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + /* Run all of the network namespace exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -1193,7 +1212,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_ { ops_pre_exit_list(ops, net_exit_list); synchronize_rcu(); + + if (ops->exit_batch_rtnl) { + LIST_HEAD(dev_kill_list); + + rtnl_lock(); + ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + } ops_exit_list(ops, net_exit_list); + ops_free_list(ops, net_exit_list); } -- cgit 1.2.3-korg From a7ec2512ad7b23340059f59f3fd710cab056791a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:42:59 +0000 Subject: nexthop: convert nexthop_net_exit_batch to exit_batch_rtnl method exit_batch_rtnl() is called while RTNL is held. This saves one rtnl_lock()/rtnl_unlock() pair. We also need to create nexthop_net_exit() to make sure net->nexthop.devhash is not freed too soon, otherwise we will not be able to unregister netdev from exit_batch_rtnl() methods. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index bbff68b5b5d4a..7270a8631406c 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3737,16 +3737,20 @@ out: } EXPORT_SYMBOL(nexthop_res_grp_activity_update); -static void __net_exit nexthop_net_exit_batch(struct list_head *net_list) +static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { + ASSERT_RTNL(); + list_for_each_entry(net, net_list, exit_list) flush_all_nexthops(net); - kfree(net->nexthop.devhash); - } - rtnl_unlock(); +} + +static void __net_exit nexthop_net_exit(struct net *net) +{ + kfree(net->nexthop.devhash); + net->nexthop.devhash = NULL; } static int __net_init nexthop_net_init(struct net *net) @@ -3764,7 +3768,8 @@ static int __net_init nexthop_net_init(struct net *net) static struct pernet_operations nexthop_net_ops = { .init = nexthop_net_init, - .exit_batch = nexthop_net_exit_batch, + .exit = nexthop_net_exit, + .exit_batch_rtnl = nexthop_net_exit_batch_rtnl, }; static int __init nexthop_init(void) -- cgit 1.2.3-korg From 70f16ea2e4f673fc769fd13c00c20a32b4fe238a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:04 +0000 Subject: ipv4: add __unregister_nexthop_notifier() unregister_nexthop_notifier() assumes the caller does not hold rtnl. We need in the following patch to use it from a context already holding rtnl. Add __unregister_nexthop_notifier(). unregister_nexthop_notifier() becomes a wrapper. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 1 + net/ipv4/nexthop.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index d92046a4a0782..6647ad509faa0 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -218,6 +218,7 @@ struct nh_notifier_info { int register_nexthop_notifier(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); +int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7270a8631406c..70509da4f0806 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3631,17 +3631,24 @@ unlock: } EXPORT_SYMBOL(register_nexthop_notifier); -int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) { int err; - rtnl_lock(); err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, nb); - if (err) - goto unlock; - nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); -unlock: + if (!err) + nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); + return err; +} +EXPORT_SYMBOL(__unregister_nexthop_notifier); + +int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = __unregister_nexthop_notifier(net, nb); rtnl_unlock(); return err; } -- cgit 1.2.3-korg From bc50c535c3a011605f4e0d219431a8e42249e71e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:06 +0000 Subject: ip6_gre: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-11-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_gre.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 070d87abf7c02..428f03e9da45a 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1632,21 +1632,19 @@ err_alloc_dev: return err; } -static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list) +static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - ip6gre_destroy_tunnels(net, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + ip6gre_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations ip6gre_net_ops = { .init = ip6gre_init_net, - .exit_batch = ip6gre_exit_batch_net, + .exit_batch_rtnl = ip6gre_exit_batch_rtnl, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; -- cgit 1.2.3-korg From a1fab9aff5c05589ece2893a8d312f16eb0ff5e6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:07 +0000 Subject: ip6_tunnel: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-12-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9bbabf750a21e..bfb0a6c601c11 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2282,21 +2282,19 @@ err_alloc_dev: return err; } -static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) +static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - ip6_tnl_destroy_tunnels(net, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + ip6_tnl_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, - .exit_batch = ip6_tnl_exit_batch_net, + .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; -- cgit 1.2.3-korg From 7a99f3c1994b2c3add235090d8721bfbb2b95320 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:08 +0000 Subject: ip6_vti: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-13-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_vti.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index e550240c85e1c..cfe1b1ad4d85d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1174,24 +1174,22 @@ err_alloc_dev: return err; } -static void __net_exit vti6_exit_batch_net(struct list_head *net_list) +static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct vti6_net *ip6n; struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) { ip6n = net_generic(net, vti6_net_id); - vti6_destroy_tunnels(ip6n, &list); + vti6_destroy_tunnels(ip6n, dev_to_kill); } - unregister_netdevice_many(&list); - rtnl_unlock(); } static struct pernet_operations vti6_net_ops = { .init = vti6_init_net, - .exit_batch = vti6_exit_batch_net, + .exit_batch_rtnl = vti6_exit_batch_rtnl, .id = &vti6_net_id, .size = sizeof(struct vti6_net), }; -- cgit 1.2.3-korg From de02deab27fd63063e056811d0473ad5b9a56e5e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:09 +0000 Subject: sit: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-14-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cc24cefdb85c0..61b2b71fa8bed 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1875,22 +1875,19 @@ err_alloc_dev: return err; } -static void __net_exit sit_exit_batch_net(struct list_head *net_list) +static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { - LIST_HEAD(list); struct net *net; - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - sit_destroy_tunnels(net, &list); - - unregister_netdevice_many(&list); - rtnl_unlock(); + sit_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, - .exit_batch = sit_exit_batch_net, + .exit_batch_rtnl = sit_exit_batch_rtnl, .id = &sit_net_id, .size = sizeof(struct sit_net), }; -- cgit 1.2.3-korg From 9b5b36374ed6953f3efcc82e7cb4c353b9869faf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:10 +0000 Subject: ip_tunnel: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. This patch takes care of ipip, ip_vti, and ip_gre tunnels. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-15-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 3 ++- net/ipv4/ip_gre.c | 24 +++++++++++++++--------- net/ipv4/ip_tunnel.c | 10 ++++------ net/ipv4/ip_vti.c | 8 +++++--- net/ipv4/ipip.c | 8 +++++--- 5 files changed, 31 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 2d746f4c9a0a4..5cd64bb2104df 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -284,7 +284,8 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, - struct rtnl_link_ops *ops); + struct rtnl_link_ops *ops, + struct list_head *dev_to_kill); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5169c3c72cffe..aad5125b7a65e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1025,14 +1025,16 @@ static int __net_init ipgre_init_net(struct net *net) return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } -static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) +static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); + ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops, + dev_to_kill); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, - .exit_batch = ipgre_exit_batch_net, + .exit_batch_rtnl = ipgre_exit_batch_rtnl, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1697,14 +1699,16 @@ static int __net_init ipgre_tap_init_net(struct net *net) return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } -static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) +static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); + ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops, + dev_to_kill); } static struct pernet_operations ipgre_tap_net_ops = { .init = ipgre_tap_init_net, - .exit_batch = ipgre_tap_exit_batch_net, + .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1715,14 +1719,16 @@ static int __net_init erspan_init_net(struct net *net) &erspan_link_ops, "erspan0"); } -static void __net_exit erspan_exit_batch_net(struct list_head *net_list) +static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); + ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops, + dev_to_kill); } static struct pernet_operations erspan_net_ops = { .init = erspan_init_net, - .exit_batch = erspan_exit_batch_net, + .exit_batch_rtnl = erspan_exit_batch_rtnl, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index beeae624c412d..00da0b80320fb 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1130,19 +1130,17 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, } void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, - struct rtnl_link_ops *ops) + struct rtnl_link_ops *ops, + struct list_head *dev_to_kill) { struct ip_tunnel_net *itn; struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) { itn = net_generic(net, id); - ip_tunnel_destroy(net, itn, &list, ops); + ip_tunnel_destroy(net, itn, dev_to_kill, ops); } - unregister_netdevice_many(&list); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9ab9b3ebe0cd1..fb1f52d213112 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -510,14 +510,16 @@ static int __net_init vti_init_net(struct net *net) return 0; } -static void __net_exit vti_exit_batch_net(struct list_head *list_net) +static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops); + ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops, + dev_to_kill); } static struct pernet_operations vti_net_ops = { .init = vti_init_net, - .exit_batch = vti_exit_batch_net, + .exit_batch_rtnl = vti_exit_batch_rtnl, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 27b8f83c6ea20..0151eea06cc50 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -592,14 +592,16 @@ static int __net_init ipip_init_net(struct net *net) return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } -static void __net_exit ipip_exit_batch_net(struct list_head *list_net) +static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops); + ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops, + dev_to_kill); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, - .exit_batch = ipip_exit_batch_net, + .exit_batch_rtnl = ipip_exit_batch_rtnl, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), }; -- cgit 1.2.3-korg From 806b67850787bae3df1bc2e5a30e8c658c579d80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:11 +0000 Subject: bridge: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair per netns and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-16-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/bridge/br.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bridge/br.c b/net/bridge/br.c index ac19b797dbece..2cab878e0a39c 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -356,26 +356,21 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on) clear_bit(opt, &br->options); } -static void __net_exit br_net_exit_batch(struct list_head *net_list) +static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net_device *dev; struct net *net; - LIST_HEAD(list); - - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) for_each_netdev(net, dev) if (netif_is_bridge_master(dev)) - br_dev_delete(dev, &list); - - unregister_netdevice_many(&list); - - rtnl_unlock(); + br_dev_delete(dev, dev_to_kill); } static struct pernet_operations br_net_ops = { - .exit_batch = br_net_exit_batch, + .exit_batch_rtnl = br_net_exit_batch_rtnl, }; static const struct stp_proto br_stp_proto = { -- cgit 1.2.3-korg From 8962daccc2d32812fe24bd21496c036eb4f454b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:12 +0000 Subject: xfrm: interface: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair per netns and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-17-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/xfrm/xfrm_interface_core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 21d50d75c2608..dafefef3cf51a 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -957,12 +957,12 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { .get_link_net = xfrmi_get_link_net, }; -static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) +static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_exit_list, exit_list) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if __rcu **xip; @@ -973,18 +973,16 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) for (xip = &xfrmn->xfrmi[i]; (xi = rtnl_dereference(*xip)) != NULL; xip = &xi->next) - unregister_netdevice_queue(xi->dev, &list); + unregister_netdevice_queue(xi->dev, dev_to_kill); } xi = rtnl_dereference(xfrmn->collect_md_xfrmi); if (xi) - unregister_netdevice_queue(xi->dev, &list); + unregister_netdevice_queue(xi->dev, dev_to_kill); } - unregister_netdevice_many(&list); - rtnl_unlock(); } static struct pernet_operations xfrmi_net_ops = { - .exit_batch = xfrmi_exit_batch_net, + .exit_batch_rtnl = xfrmi_exit_batch_rtnl, .id = &xfrmi_net_id, .size = sizeof(struct xfrmi_net), }; -- cgit 1.2.3-korg From 2fd53eb04c492eb9a2b06f994b36e5cf34ba7541 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:49:07 +0100 Subject: wifi: mac80211: remove unused MAX_MSG_LEN define This got unused when the tracing was converted to dynamic strings, so the define can be removed. Signed-off-by: Johannes Berg --- net/mac80211/trace_msg.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index c9dbe9aab7bdc..aea4ce55c5aca 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -16,8 +16,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg -#define MAX_MSG_LEN 120 - DECLARE_EVENT_CLASS(mac80211_msg_event, TP_PROTO(struct va_format *vaf), -- cgit 1.2.3-korg From efa2cce6e272b36c7f9687385ef4fd538cc3bf51 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:49:04 +0100 Subject: wifi: mac80211: remove extra shadowing variable Not sure how this happened or how nothing complained, but this variable already exists in the outer function scope with the same value (and the SKB isn't changed either.) Remove the extra one. Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 615795c4b0526..76798e8057f79 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -5214,7 +5214,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, */ if (!status->link_valid && pubsta->mlo) { - struct ieee80211_hdr *hdr = (void *)skb->data; struct link_sta_info *link_sta; link_sta = link_sta_info_get_bss(rx.sdata, -- cgit 1.2.3-korg From 61f0261131c8dc2beeb6b34781a54788221081e9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:35 +0100 Subject: wifi: mac80211: clean up band switch in duration Most devices now do duration calculations, so we don't hit this code at all any more. Clearly the approach of warning at compile time here when new bands are added didn't work, the new bands were just added with "TODO". Clean it up, it won't matter for new bands since they'll just not have any need to calculate durations in software. While at it, also clean up and unify the code a bit. Link: https://msgid.link/20240129194108.70a97bd69265.Icdd8b0ac60a382244466510090eb0f5868151f39@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e448ab3384489..7b33942ea51f4 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -133,6 +133,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, mrate = sband->bitrates[0].bitrate; for (i = 0; i < sband->n_bitrates; i++) { struct ieee80211_rate *r = &sband->bitrates[i]; + u32 flag; if (r->bitrate > txrate->bitrate) break; @@ -145,28 +146,24 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, switch (sband->band) { case NL80211_BAND_2GHZ: - case NL80211_BAND_LC: { - u32 flag; + case NL80211_BAND_LC: if (tx->sdata->deflink.operating_11g_mode) flag = IEEE80211_RATE_MANDATORY_G; else flag = IEEE80211_RATE_MANDATORY_B; - if (r->flags & flag) - mrate = r->bitrate; break; - } case NL80211_BAND_5GHZ: case NL80211_BAND_6GHZ: - if (r->flags & IEEE80211_RATE_MANDATORY_A) - mrate = r->bitrate; + flag = IEEE80211_RATE_MANDATORY_A; break; - case NL80211_BAND_S1GHZ: - case NL80211_BAND_60GHZ: - /* TODO, for now fall through */ - case NUM_NL80211_BANDS: + default: + flag = 0; WARN_ON(1); break; } + + if (r->flags & flag) + mrate = r->bitrate; } if (rate == -1) { /* No matching basic rate found; use highest suitable mandatory -- cgit 1.2.3-korg From 310c8387c63830bc375827242e0f9fa689f82e21 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:36 +0100 Subject: wifi: mac80211: clean up connection process Rewrite the station-side connection handling. The connection flags (IEEE80211_DISABLE_*) are rather confusing, and they're not always maintained well. Additionally, for wider-bandwidth OFDMA support we need to know the precise bandwidth of the AP, which is currently somewhat difficult. Rewrite this to have a 'mode' (S1G/legacy/HT/...) and a limit on the bandwidth. This is not entirely clean because some of those modes aren't completely sequenced (as this assumes in some places), e.g. VHT doesn't exist on 2.4 GHz, but HE does. However, it still simplifies things and gives us a good idea what we're operating as, so we can parse elements accordingly etc. This leaves a FIXME for puncturing, this is addressed in a later patch. Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240129194108.9451722c0110.I3e61f4cfe9da89008e1854160093c76a1e69dc2a@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 2 +- net/mac80211/debug.h | 18 +- net/mac80211/ibss.c | 15 +- net/mac80211/ieee80211_i.h | 60 +- net/mac80211/mesh.c | 21 +- net/mac80211/mlme.c | 2279 +++++++++++++++++++++++--------------------- net/mac80211/spectmgmt.c | 21 +- net/mac80211/tdls.c | 8 +- net/mac80211/tests/elems.c | 1 + net/mac80211/util.c | 164 +++- 10 files changed, 1430 insertions(+), 1159 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index ef4c2cebc0808..70ba5dc4b2837 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -382,7 +382,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, /* downgrade chandef up to max_bw */ min_def = ctx->conf.def; while (min_def.width > max_bw) - ieee80211_chandef_downgrade(&min_def); + ieee80211_chandef_downgrade(&min_def, NULL); if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def)) return 0; diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index d49894df23513..49da401c53408 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -152,16 +152,17 @@ do { \ else \ _sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \ } while (0) -#define link_dbg(link, fmt, ...) \ +#define _link_id_dbg(print, sdata, link_id, fmt, ...) \ do { \ - if (ieee80211_vif_is_mld(&(link)->sdata->vif)) \ - _sdata_dbg(1, (link)->sdata, "[link %d] " fmt, \ - (link)->link_id, \ - ##__VA_ARGS__); \ + if (ieee80211_vif_is_mld(&(sdata)->vif)) \ + _sdata_dbg(print, sdata, "[link %d] " fmt, \ + link_id, ##__VA_ARGS__); \ else \ - _sdata_dbg(1, (link)->sdata, fmt, \ - ##__VA_ARGS__); \ + _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__); \ } while (0) +#define link_dbg(link, fmt, ...) \ + _link_id_dbg(1, (link)->sdata, (link)->link_id, \ + fmt, ##__VA_ARGS__) #define ht_dbg(sdata, fmt, ...) \ _sdata_dbg(MAC80211_HT_DEBUG, \ @@ -226,6 +227,9 @@ do { \ #define mlme_dbg(sdata, fmt, ...) \ _sdata_dbg(MAC80211_MLME_DEBUG, \ sdata, fmt, ##__VA_ARGS__) +#define mlme_link_id_dbg(sdata, link_id, fmt, ...) \ + _link_id_dbg(MAC80211_MLME_DEBUG, sdata, link_id, \ + fmt, ##__VA_ARGS__) #define mlme_dbg_ratelimited(sdata, fmt, ...) \ _sdata_dbg(MAC80211_MLME_DEBUG && net_ratelimit(), \ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 8f2b445a5ec3b..c23f46218af73 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -757,21 +757,22 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; enum nl80211_channel_type ch_type; int err; - ieee80211_conn_flags_t conn_flags; + struct ieee80211_conn_settings conn = { + .mode = IEEE80211_CONN_MODE_HT, + .bw_limit = IEEE80211_CONN_BW_LIMIT_40, + }; u32 vht_cap_info = 0; lockdep_assert_wiphy(sdata->local->hw.wiphy); - conn_flags = IEEE80211_CONN_DISABLE_VHT; - switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: - conn_flags |= IEEE80211_CONN_DISABLE_HT; + conn.mode = IEEE80211_CONN_MODE_LEGACY; fallthrough; case NL80211_CHAN_WIDTH_20: - conn_flags |= IEEE80211_CONN_DISABLE_40MHZ; + conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20; break; default: break; @@ -783,8 +784,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, memset(¶ms, 0, sizeof(params)); err = ieee80211_parse_ch_switch_ie(sdata, elems, ifibss->chandef.chan->band, - vht_cap_info, - conn_flags, ifibss->bssid, &csa_ie); + vht_cap_info, &conn, + ifibss->bssid, &csa_ie); /* can't switch to destination channel, fail */ if (err < 0) goto disconnect; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb32174984c3e..112029f5a7df2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -370,19 +370,32 @@ enum ieee80211_sta_flags { IEEE80211_STA_ENABLE_RRM = BIT(15), }; -typedef u32 __bitwise ieee80211_conn_flags_t; - -enum ieee80211_conn_flags { - IEEE80211_CONN_DISABLE_HT = (__force ieee80211_conn_flags_t)BIT(0), - IEEE80211_CONN_DISABLE_40MHZ = (__force ieee80211_conn_flags_t)BIT(1), - IEEE80211_CONN_DISABLE_VHT = (__force ieee80211_conn_flags_t)BIT(2), - IEEE80211_CONN_DISABLE_80P80MHZ = (__force ieee80211_conn_flags_t)BIT(3), - IEEE80211_CONN_DISABLE_160MHZ = (__force ieee80211_conn_flags_t)BIT(4), - IEEE80211_CONN_DISABLE_HE = (__force ieee80211_conn_flags_t)BIT(5), - IEEE80211_CONN_DISABLE_EHT = (__force ieee80211_conn_flags_t)BIT(6), - IEEE80211_CONN_DISABLE_320MHZ = (__force ieee80211_conn_flags_t)BIT(7), +enum ieee80211_conn_mode { + IEEE80211_CONN_MODE_S1G, + IEEE80211_CONN_MODE_LEGACY, + IEEE80211_CONN_MODE_HT, + IEEE80211_CONN_MODE_VHT, + IEEE80211_CONN_MODE_HE, + IEEE80211_CONN_MODE_EHT, }; +#define IEEE80211_CONN_MODE_HIGHEST IEEE80211_CONN_MODE_EHT + +enum ieee80211_conn_bw_limit { + IEEE80211_CONN_BW_LIMIT_20, + IEEE80211_CONN_BW_LIMIT_40, + IEEE80211_CONN_BW_LIMIT_80, + IEEE80211_CONN_BW_LIMIT_160, /* also 80+80 */ + IEEE80211_CONN_BW_LIMIT_320, +}; + +struct ieee80211_conn_settings { + enum ieee80211_conn_mode mode; + enum ieee80211_conn_bw_limit bw_limit; +}; + +extern const struct ieee80211_conn_settings ieee80211_conn_settings_unlimited; + struct ieee80211_mgd_auth_data { struct cfg80211_bss *bss; unsigned long timeout; @@ -416,7 +429,7 @@ struct ieee80211_mgd_assoc_data { size_t elems_len; u8 *elems; /* pointing to inside ie[] below */ - ieee80211_conn_flags_t conn_flags; + struct ieee80211_conn_settings conn; u16 status; @@ -943,7 +956,7 @@ struct ieee80211_link_data_managed { enum ieee80211_smps_mode req_smps, /* requested smps mode */ driver_smps_mode; /* smps mode request */ - ieee80211_conn_flags_t conn_flags; + struct ieee80211_conn_settings conn; s16 p2p_noa_index; @@ -2171,9 +2184,8 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * @elems: parsed 802.11 elements received with the frame * @current_band: indicates the current band * @vht_cap_info: VHT capabilities of the transmitter - * @conn_flags: contains information about own capabilities and restrictions - * to decide which channel switch announcements can be accepted, using - * flags from &enum ieee80211_conn_flags. + * @conn: contains information about own capabilities and restrictions + * to decide which channel switch announcements can be accepted * @bssid: the currently connected bssid (for reporting) * @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl. * All of them will be filled with if success only. @@ -2183,7 +2195,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band current_band, u32 vht_cap_info, - ieee80211_conn_flags_t conn_flags, u8 *bssid, + struct ieee80211_conn_settings *conn, + u8 *bssid, struct ieee80211_csa_ie *csa_ie); /* Suspend/resume and hw reconfiguration */ @@ -2207,6 +2220,9 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) /* utility functions/constants */ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */ +const char *ieee80211_conn_mode_str(enum ieee80211_conn_mode mode); +enum ieee80211_conn_bw_limit +ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef); int ieee80211_frame_duration(enum nl80211_band band, size_t len, int rate, int erp, int short_preamble); void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, @@ -2248,6 +2264,7 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, /** * struct ieee80211_elems_parse_params - element parsing parameters + * @mode: connection mode for parsing * @start: pointer to the elements * @len: length of the elements * @action: %true if the elements came from an action frame @@ -2265,6 +2282,7 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, * for EHT capabilities parsing) */ struct ieee80211_elems_parse_params { + enum ieee80211_conn_mode mode; const u8 *start; size_t len; bool action; @@ -2284,6 +2302,7 @@ ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, struct cfg80211_bss *bss) { struct ieee80211_elems_parse_params params = { + .mode = IEEE80211_CONN_MODE_HIGHEST, .start = start, .len = len, .action = action, @@ -2459,9 +2478,9 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); -u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos, +u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, const struct ieee80211_sta_he_cap *he_cap, - u8 *end); + u8 *pos, u8 *end); void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode, struct sk_buff *skb); @@ -2501,7 +2520,8 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def *chandef); bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper, struct cfg80211_chan_def *chandef); -ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c); +void ieee80211_chandef_downgrade(struct cfg80211_chan_def *chandef, + struct ieee80211_conn_settings *conn); int __must_check ieee80211_link_use_channel(struct ieee80211_link_data *link, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index fccbcde3359af..6b48914b39fd4 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -586,7 +586,7 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, return -ENOMEM; pos = skb_put(skb, ie_len); - ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len); + ieee80211_ie_build_he_cap(NULL, he_cap, pos, pos + ie_len); return 0; } @@ -1292,7 +1292,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_supported_band *sband; int err; - ieee80211_conn_flags_t conn_flags = 0; + struct ieee80211_conn_settings conn = ieee80211_conn_settings_unlimited; u32 vht_cap_info = 0; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -1303,13 +1303,16 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.bss_conf.chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: - conn_flags |= IEEE80211_CONN_DISABLE_HT; - fallthrough; + conn.mode = IEEE80211_CONN_MODE_LEGACY; + conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20; + break; case NL80211_CHAN_WIDTH_20: - conn_flags |= IEEE80211_CONN_DISABLE_40MHZ; - fallthrough; + conn.mode = IEEE80211_CONN_MODE_HT; + conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20; + break; case NL80211_CHAN_WIDTH_40: - conn_flags |= IEEE80211_CONN_DISABLE_VHT; + conn.mode = IEEE80211_CONN_MODE_HT; + conn.bw_limit = IEEE80211_CONN_BW_LIMIT_40; break; default: break; @@ -1321,8 +1324,8 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, memset(¶ms, 0, sizeof(params)); err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band, - vht_cap_info, - conn_flags, sdata->vif.addr, + vht_cap_info, &conn, + sdata->vif.addr, &csa_ie); if (err < 0) return false; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e5d0988513dd0..549a06c9a2e56 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -157,8 +157,7 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link, !(bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))) break; - link->u.mgd.conn_flags |= - ieee80211_chandef_downgrade(chandef); + ieee80211_chandef_downgrade(chandef, &link->u.mgd.conn); *changed |= BSS_CHANGED_BANDWIDTH; } @@ -225,77 +224,84 @@ static int ecw2cw(int ecw) return (1 << ecw) - 1; } -static ieee80211_conn_flags_t -ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, - struct ieee80211_link_data *link, - ieee80211_conn_flags_t conn_flags, - struct ieee80211_supported_band *sband, - struct ieee80211_channel *channel, - u32 vht_cap_info, - const struct ieee80211_ht_operation *ht_oper, - const struct ieee80211_vht_operation *vht_oper, - const struct ieee80211_he_operation *he_oper, - const struct ieee80211_eht_operation *eht_oper, - const struct ieee80211_s1g_oper_ie *s1g_oper, - struct cfg80211_chan_def *chandef, bool tracking) +static enum ieee80211_conn_mode +ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, + u32 vht_cap_info, + const struct ieee802_11_elems *elems, + bool ignore_ht_channel_mismatch, + const struct ieee80211_conn_settings *conn, + struct cfg80211_chan_def *chandef) { + const struct ieee80211_ht_operation *ht_oper = elems->ht_operation; + const struct ieee80211_vht_operation *vht_oper = elems->vht_operation; + const struct ieee80211_he_operation *he_oper = elems->he_operation; + const struct ieee80211_eht_operation *eht_oper = elems->eht_operation; + struct ieee80211_supported_band *sband = + sdata->local->hw.wiphy->bands[channel->band]; struct cfg80211_chan_def vht_chandef; - struct ieee80211_sta_ht_cap sta_ht_cap; - ieee80211_conn_flags_t ret; + bool no_vht = false; u32 ht_cfreq; - memset(chandef, 0, sizeof(struct cfg80211_chan_def)); - chandef->chan = channel; - chandef->width = NL80211_CHAN_WIDTH_20_NOHT; - chandef->center_freq1 = channel->center_freq; - chandef->freq1_offset = channel->freq_offset; + *chandef = (struct cfg80211_chan_def) { + .chan = channel, + .width = NL80211_CHAN_WIDTH_20_NOHT, + .center_freq1 = channel->center_freq, + .freq1_offset = channel->freq_offset, + }; - if (channel->band == NL80211_BAND_6GHZ) { - if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, eht_oper, - chandef)) { - mlme_dbg(sdata, - "bad 6 GHz operation, disabling HT/VHT/HE/EHT\n"); - ret = IEEE80211_CONN_DISABLE_HT | - IEEE80211_CONN_DISABLE_VHT | - IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT; - } else { - ret = 0; - } - vht_chandef = *chandef; - goto out; - } else if (sband->band == NL80211_BAND_S1GHZ) { - if (!ieee80211_chandef_s1g_oper(s1g_oper, chandef)) { + /* get special S1G case out of the way */ + if (sband->band == NL80211_BAND_S1GHZ) { + if (!ieee80211_chandef_s1g_oper(elems->s1g_oper, chandef)) { sdata_info(sdata, "Missing S1G Operation Element? Trying operating == primary\n"); chandef->width = ieee80211_s1g_channel_width(channel); } - ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_40MHZ | - IEEE80211_CONN_DISABLE_VHT | - IEEE80211_CONN_DISABLE_80P80MHZ | - IEEE80211_CONN_DISABLE_160MHZ; - goto out; + return IEEE80211_CONN_MODE_S1G; } - memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); - ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + /* get special 6 GHz case out of the way */ + if (sband->band == NL80211_BAND_6GHZ) { + enum ieee80211_conn_mode mode = IEEE80211_CONN_MODE_EHT; - if (!ht_oper || !sta_ht_cap.ht_supported) { - mlme_dbg(sdata, "HT operation missing / HT not supported\n"); - ret = IEEE80211_CONN_DISABLE_HT | - IEEE80211_CONN_DISABLE_VHT | - IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT; - goto out; + /* this is an error */ + if (conn->mode < IEEE80211_CONN_MODE_HE) + return IEEE80211_CONN_MODE_LEGACY; + + if (!elems->he_6ghz_capa || !elems->he_cap) { + sdata_info(sdata, + "HE 6 GHz AP is missing HE/HE 6 GHz band capability\n"); + return IEEE80211_CONN_MODE_LEGACY; + } + + if (!eht_oper || !elems->eht_cap) { + eht_oper = NULL; + mode = IEEE80211_CONN_MODE_HE; + } + + if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, + eht_oper, chandef)) { + sdata_info(sdata, "bad HE/EHT 6 GHz operation\n"); + return IEEE80211_CONN_MODE_LEGACY; + } + + return mode; } + /* now we have the progression HT, VHT, ... */ + if (conn->mode < IEEE80211_CONN_MODE_HT) + return IEEE80211_CONN_MODE_LEGACY; + + if (!ht_oper || !elems->ht_cap_elem) + return IEEE80211_CONN_MODE_LEGACY; + chandef->width = NL80211_CHAN_WIDTH_20; ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, channel->band); /* check that channel matches the right operating channel */ - if (!tracking && channel->center_freq != ht_cfreq) { + if (!ignore_ht_channel_mismatch && channel->center_freq != ht_cfreq) { /* * It's possible that some APs are confused here; * Netgear WNDR3700 sometimes reports 4 higher than @@ -307,36 +313,22 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", channel->center_freq, ht_cfreq, ht_oper->primary_chan, channel->band); - ret = IEEE80211_CONN_DISABLE_HT | - IEEE80211_CONN_DISABLE_VHT | - IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT; - goto out; + return IEEE80211_CONN_MODE_LEGACY; } - /* check 40 MHz support, if we have it */ - if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { - ieee80211_chandef_ht_oper(ht_oper, chandef); - } else { - mlme_dbg(sdata, "40 MHz not supported\n"); - /* 40 MHz (and 80 MHz) must be supported for VHT */ - ret = IEEE80211_CONN_DISABLE_VHT; - /* also mark 40 MHz disabled */ - ret |= IEEE80211_CONN_DISABLE_40MHZ; - goto out; - } + ieee80211_chandef_ht_oper(ht_oper, chandef); - if (!vht_oper || !sband->vht_cap.vht_supported) { - mlme_dbg(sdata, "VHT operation missing / VHT not supported\n"); - ret = IEEE80211_CONN_DISABLE_VHT; - goto out; - } + if (conn->mode < IEEE80211_CONN_MODE_VHT) + return IEEE80211_CONN_MODE_HT; vht_chandef = *chandef; - if (!(conn_flags & IEEE80211_CONN_DISABLE_HE) && - he_oper && - (le32_to_cpu(he_oper->he_oper_params) & - IEEE80211_HE_OPERATION_VHT_OPER_INFO)) { + + /* + * having he_cap/he_oper parsed out implies we're at + * least operating as HE STA + */ + if (elems->he_cap && he_oper && + he_oper->he_oper_params & cpu_to_le32(IEEE80211_HE_OPERATION_VHT_OPER_INFO)) { struct ieee80211_vht_operation he_oper_vht_cap; /* @@ -349,52 +341,56 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, &he_oper_vht_cap, ht_oper, &vht_chandef)) { - if (!(conn_flags & IEEE80211_CONN_DISABLE_HE)) - sdata_info(sdata, - "HE AP VHT information is invalid, disabling HE\n"); - ret = IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT; - goto out; + sdata_info(sdata, + "HE AP VHT information is invalid, disabling HE\n"); + /* this will cause us to re-parse as VHT STA */ + return IEEE80211_CONN_MODE_VHT; } + } else if (!vht_oper || !elems->vht_cap_elem) { + if (sband->band == NL80211_BAND_5GHZ) { + sdata_info(sdata, + "VHT information is missing, disabling VHT\n"); + return IEEE80211_CONN_MODE_HT; + } + no_vht = true; + } else if (sband->band == NL80211_BAND_2GHZ) { + no_vht = true; } else if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, vht_oper, ht_oper, &vht_chandef)) { - if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT)) - sdata_info(sdata, - "AP VHT information is invalid, disabling VHT\n"); - ret = IEEE80211_CONN_DISABLE_VHT; - goto out; + sdata_info(sdata, + "AP VHT information is invalid, disabling VHT\n"); + return IEEE80211_CONN_MODE_HT; } - if (!cfg80211_chandef_valid(&vht_chandef)) { - if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT)) - sdata_info(sdata, - "AP VHT information is invalid, disabling VHT\n"); - ret = IEEE80211_CONN_DISABLE_VHT; - goto out; + if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { + sdata_info(sdata, + "AP VHT information doesn't match HT, disabling VHT\n"); + return IEEE80211_CONN_MODE_HT; } - if (cfg80211_chandef_identical(chandef, &vht_chandef)) { - ret = 0; - goto out; - } + *chandef = vht_chandef; - if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { - if (!(conn_flags & IEEE80211_CONN_DISABLE_VHT)) - sdata_info(sdata, - "AP VHT information doesn't match HT, disabling VHT\n"); - ret = IEEE80211_CONN_DISABLE_VHT; - goto out; + /* stick to current max mode if we or the AP don't have HE */ + if (conn->mode < IEEE80211_CONN_MODE_HE || + !elems->he_operation || !elems->he_cap) { + if (no_vht) + return IEEE80211_CONN_MODE_HT; + return IEEE80211_CONN_MODE_VHT; } - *chandef = vht_chandef; + /* stick to HE if we or the AP don't have EHT */ + if (conn->mode < IEEE80211_CONN_MODE_EHT || + !eht_oper || !elems->eht_cap) + return IEEE80211_CONN_MODE_HE; /* * handle the case that the EHT operation indicates that it holds EHT * operation information (in case that the channel width differs from * the channel width reported in HT/VHT/HE). */ - if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) { + if (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) { struct cfg80211_chan_def eht_chandef = *chandef; ieee80211_chandef_eht_oper((const void *)eht_oper->optional, @@ -403,271 +399,629 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, false, &eht_chandef); if (!cfg80211_chandef_valid(&eht_chandef)) { - if (!(conn_flags & IEEE80211_CONN_DISABLE_EHT)) - sdata_info(sdata, - "AP EHT information is invalid, disabling EHT\n"); - ret = IEEE80211_CONN_DISABLE_EHT; - goto out; + sdata_info(sdata, + "AP EHT information is invalid, disabling EHT\n"); + return IEEE80211_CONN_MODE_HE; } if (!cfg80211_chandef_compatible(chandef, &eht_chandef)) { - if (!(conn_flags & IEEE80211_CONN_DISABLE_EHT)) - sdata_info(sdata, - "AP EHT information is incompatible, disabling EHT\n"); - ret = IEEE80211_CONN_DISABLE_EHT; - goto out; + sdata_info(sdata, + "AP EHT information doesn't match HT/VHT/HE, disabling EHT\n"); + return IEEE80211_CONN_MODE_HE; } *chandef = eht_chandef; } - ret = 0; - -out: - /* - * When tracking the current AP, don't do any further checks if the - * new chandef is identical to the one we're currently using for the - * connection. This keeps us from playing ping-pong with regulatory, - * without it the following can happen (for example): - * - connect to an AP with 80 MHz, world regdom allows 80 MHz - * - AP advertises regdom US - * - CRDA loads regdom US with 80 MHz prohibited (old database) - * - the code below detects an unsupported channel, downgrades, and - * we disconnect from the AP in the caller - * - disconnect causes CRDA to reload world regdomain and the game - * starts anew. - * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881) - * - * It seems possible that there are still scenarios with CSA or real - * bandwidth changes where a this could happen, but those cases are - * less common and wouldn't completely prevent using the AP. - */ - if (tracking && - cfg80211_chandef_identical(chandef, &link->conf->chandef)) - return ret; - - /* don't print the message below for VHT mismatch if VHT is disabled */ - if (ret & IEEE80211_CONN_DISABLE_VHT) - vht_chandef = *chandef; - - /* - * Ignore the DISABLED flag when we're already connected and only - * tracking the APs beacon for bandwidth changes - otherwise we - * might get disconnected here if we connect to an AP, update our - * regulatory information based on the AP's country IE and the - * information we have is wrong/outdated and disables the channel - * that we're actually using for the connection to the AP. - */ - while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - tracking ? 0 : - IEEE80211_CHAN_DISABLED)) { - if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { - ret = IEEE80211_CONN_DISABLE_HT | - IEEE80211_CONN_DISABLE_VHT | - IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT; - break; - } - - ret |= ieee80211_chandef_downgrade(chandef); - } - - if (!he_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, - IEEE80211_CHAN_NO_HE)) - ret |= IEEE80211_CONN_DISABLE_HE | IEEE80211_CONN_DISABLE_EHT; - - if (!eht_oper || !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, - IEEE80211_CHAN_NO_EHT)) - ret |= IEEE80211_CONN_DISABLE_EHT; - - if (chandef->width != vht_chandef.width && !tracking) - sdata_info(sdata, - "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); - - WARN_ON_ONCE(!cfg80211_chandef_valid(chandef)); - return ret; + return IEEE80211_CONN_MODE_EHT; } -static int ieee80211_config_bw(struct ieee80211_link_data *link, - struct ieee802_11_elems *elems, - const u8 *bssid, u64 *changed) +static bool +ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_he_cap_elem *he_cap, + const struct ieee80211_he_operation *he_op) { - const struct ieee80211_vht_cap *vht_cap = elems->vht_cap_elem; - const struct ieee80211_ht_operation *ht_oper = elems->ht_operation; - const struct ieee80211_vht_operation *vht_oper = elems->vht_operation; - const struct ieee80211_he_operation *he_oper = elems->he_operation; - const struct ieee80211_eht_operation *eht_oper = elems->eht_operation; - const struct ieee80211_s1g_oper_ie *s1g_oper = elems->s1g_oper; - struct ieee80211_sub_if_data *sdata = link->sdata; - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_channel *chan = link->conf->chandef.chan; - struct ieee80211_supported_band *sband = - local->hw.wiphy->bands[chan->band]; - struct cfg80211_chan_def chandef; - u16 ht_opmode; - ieee80211_conn_flags_t flags; - u32 vht_cap_info = 0; - int ret; - - /* if HT was/is disabled, don't track any bandwidth changes */ - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT || !ht_oper) - return 0; + struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp; + u16 mcs_80_map_tx, mcs_80_map_rx; + u16 ap_min_req_set; + int nss; - /* don't check VHT if we associated as non-VHT station */ - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) - vht_oper = NULL; + if (!he_cap) + return false; - /* don't check HE if we associated as non-HE station */ - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE || - !ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif)) { - he_oper = NULL; - eht_oper = NULL; - } + /* mcs_nss is right after he_cap info */ + he_mcs_nss_supp = (void *)(he_cap + 1); - /* don't check EHT if we associated as non-EHT station */ - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT || - !ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif)) - eht_oper = NULL; + mcs_80_map_tx = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80); + mcs_80_map_rx = le16_to_cpu(he_mcs_nss_supp->rx_mcs_80); - /* - * if bss configuration changed store the new one - - * this may be applicable even if channel is identical + /* P802.11-REVme/D0.3 + * 27.1.1 Introduction to the HE PHY + * ... + * An HE STA shall support the following features: + * ... + * Single spatial stream HE-MCSs 0 to 7 (transmit and receive) in all + * supported channel widths for HE SU PPDUs */ - ht_opmode = le16_to_cpu(ht_oper->operation_mode); - if (link->conf->ht_operation_mode != ht_opmode) { - *changed |= BSS_CHANGED_HT; - link->conf->ht_operation_mode = ht_opmode; + if ((mcs_80_map_tx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED || + (mcs_80_map_rx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED) { + sdata_info(sdata, + "Missing mandatory rates for 1 Nss, rx 0x%x, tx 0x%x, disable HE\n", + mcs_80_map_tx, mcs_80_map_rx); + return false; } - if (vht_cap) - vht_cap_info = le32_to_cpu(vht_cap->vht_cap_info); + if (!he_op) + return true; - /* calculate new channel (type) based on HT/VHT/HE operation IEs */ - flags = ieee80211_determine_chantype(sdata, link, - link->u.mgd.conn_flags, - sband, chan, vht_cap_info, - ht_oper, vht_oper, - he_oper, eht_oper, - s1g_oper, &chandef, true); + ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set); /* - * Downgrade the new channel if we associated with restricted - * capabilities. For example, if we associated as a 20 MHz STA - * to a 40 MHz AP (due to regulatory, capabilities or config - * reasons) then switching to a 40 MHz channel now won't do us - * any good -- we couldn't use it with the AP. + * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all + * zeroes, which is nonsense, and completely inconsistent with itself + * (it doesn't have 8 streams). Accept the settings in this case anyway. */ - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ && - chandef.width == NL80211_CHAN_WIDTH_80P80) - flags |= ieee80211_chandef_downgrade(&chandef); - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_160MHZ && - chandef.width == NL80211_CHAN_WIDTH_160) - flags |= ieee80211_chandef_downgrade(&chandef); - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_40MHZ && - chandef.width > NL80211_CHAN_WIDTH_20) - flags |= ieee80211_chandef_downgrade(&chandef); - - if (cfg80211_chandef_identical(&chandef, &link->conf->chandef)) - return 0; + if (!ap_min_req_set) + return true; - link_info(link, - "AP %pM changed bandwidth, new config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n", - link->u.mgd.bssid, chandef.chan->center_freq, - chandef.chan->freq_offset, chandef.width, - chandef.center_freq1, chandef.freq1_offset, - chandef.center_freq2); - - if (flags != (link->u.mgd.conn_flags & - (IEEE80211_CONN_DISABLE_HT | - IEEE80211_CONN_DISABLE_VHT | - IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT | - IEEE80211_CONN_DISABLE_40MHZ | - IEEE80211_CONN_DISABLE_80P80MHZ | - IEEE80211_CONN_DISABLE_160MHZ | - IEEE80211_CONN_DISABLE_320MHZ)) || - !cfg80211_chandef_valid(&chandef)) { - sdata_info(sdata, - "AP %pM changed caps/bw in a way we can't support (0x%x/0x%x) - disconnect\n", - link->u.mgd.bssid, flags, ifmgd->flags); - return -EINVAL; - } + /* make sure the AP is consistent with itself + * + * P802.11-REVme/D0.3 + * 26.17.1 Basic HE BSS operation + * + * A STA that is operating in an HE BSS shall be able to receive and + * transmit at each of the tuple values indicated by the + * Basic HE-MCS And NSS Set field of the HE Operation parameter of the + * MLME-START.request primitive and shall be able to receive at each of + * the tuple values indicated by the Supported HE-MCS and + * NSS Set field in the HE Capabilities parameter of the MLMESTART.request + * primitive + */ + for (nss = 8; nss > 0; nss--) { + u8 ap_op_val = (ap_min_req_set >> (2 * (nss - 1))) & 3; + u8 ap_rx_val; + u8 ap_tx_val; - ret = ieee80211_link_change_bandwidth(link, &chandef, changed); + if (ap_op_val == IEEE80211_HE_MCS_NOT_SUPPORTED) + continue; - if (ret) { - sdata_info(sdata, - "AP %pM changed bandwidth to incompatible one - disconnect\n", - link->u.mgd.bssid); - return ret; + ap_rx_val = (mcs_80_map_rx >> (2 * (nss - 1))) & 3; + ap_tx_val = (mcs_80_map_tx >> (2 * (nss - 1))) & 3; + + if (ap_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || + ap_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || + ap_rx_val < ap_op_val || ap_tx_val < ap_op_val) { + sdata_info(sdata, + "Invalid rates for %d Nss, rx %d, tx %d oper %d, disable HE\n", + nss, ap_rx_val, ap_rx_val, ap_op_val); + return false; + } } - cfg80211_schedule_channels_check(&sdata->wdev); - return 0; + return true; } -/* frame sending functions */ - -static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, u8 ap_ht_param, - struct ieee80211_supported_band *sband, - struct ieee80211_channel *channel, - enum ieee80211_smps_mode smps, - ieee80211_conn_flags_t conn_flags) +static bool +ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_he_operation *he_op) { - u8 *pos; - u32 flags = channel->flags; - u16 cap; - struct ieee80211_sta_ht_cap ht_cap; - - BUILD_BUG_ON(sizeof(ht_cap) != sizeof(sband->ht_cap)); - - memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); - ieee80211_apply_htcap_overrides(sdata, &ht_cap); + const struct ieee80211_sta_he_cap *sta_he_cap = + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + u16 ap_min_req_set; + int i; - /* determine capability flags */ - cap = ht_cap.cap; + if (!sta_he_cap || !he_op) + return false; - switch (ap_ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { - case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: - if (flags & IEEE80211_CHAN_NO_HT40PLUS) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - cap &= ~IEEE80211_HT_CAP_SGI_40; - } - break; - case IEEE80211_HT_PARAM_CHA_SEC_BELOW: - if (flags & IEEE80211_CHAN_NO_HT40MINUS) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - cap &= ~IEEE80211_HT_CAP_SGI_40; - } - break; - } + ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set); /* - * If 40 MHz was disabled associate as though we weren't - * capable of 40 MHz -- some broken APs will never fall - * back to trying to transmit in 20 MHz. + * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all + * zeroes, which is nonsense, and completely inconsistent with itself + * (it doesn't have 8 streams). Accept the settings in this case anyway. */ - if (conn_flags & IEEE80211_CONN_DISABLE_40MHZ) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - cap &= ~IEEE80211_HT_CAP_SGI_40; - } + if (!ap_min_req_set) + return true; - /* set SM PS mode properly */ - cap &= ~IEEE80211_HT_CAP_SM_PS; - switch (smps) { - case IEEE80211_SMPS_AUTOMATIC: - case IEEE80211_SMPS_NUM_MODES: - WARN_ON(1); - fallthrough; - case IEEE80211_SMPS_OFF: - cap |= WLAN_HT_CAP_SM_PS_DISABLED << - IEEE80211_HT_CAP_SM_PS_SHIFT; - break; - case IEEE80211_SMPS_STATIC: - cap |= WLAN_HT_CAP_SM_PS_STATIC << - IEEE80211_HT_CAP_SM_PS_SHIFT; + /* Need to go over for 80MHz, 160MHz and for 80+80 */ + for (i = 0; i < 3; i++) { + const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp = + &sta_he_cap->he_mcs_nss_supp; + u16 sta_mcs_map_rx = + le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]); + u16 sta_mcs_map_tx = + le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]); + u8 nss; + bool verified = true; + + /* + * For each band there is a maximum of 8 spatial streams + * possible. Each of the sta_mcs_map_* is a 16-bit struct built + * of 2 bits per NSS (1-8), with the values defined in enum + * ieee80211_he_mcs_support. Need to make sure STA TX and RX + * capabilities aren't less than the AP's minimum requirements + * for this HE BSS per SS. + * It is enough to find one such band that meets the reqs. + */ + for (nss = 8; nss > 0; nss--) { + u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3; + u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3; + u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3; + + if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED) + continue; + + /* + * Make sure the HE AP doesn't require MCSs that aren't + * supported by the client as required by spec + * + * P802.11-REVme/D0.3 + * 26.17.1 Basic HE BSS operation + * + * An HE STA shall not attempt to join * (MLME-JOIN.request primitive) + * a BSS, unless it supports (i.e., is able to both transmit and + * receive using) all of the tuples in the basic + * HE-MCS and NSS set. + */ + if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || + sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || + (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) { + verified = false; + break; + } + } + + if (verified) + return true; + } + + /* If here, STA doesn't meet AP's HE min requirements */ + return false; +} + +static u8 +ieee80211_get_eht_cap_mcs_nss(const struct ieee80211_sta_he_cap *sta_he_cap, + const struct ieee80211_sta_eht_cap *sta_eht_cap, + unsigned int idx, int bw) +{ + u8 he_phy_cap0 = sta_he_cap->he_cap_elem.phy_cap_info[0]; + u8 eht_phy_cap0 = sta_eht_cap->eht_cap_elem.phy_cap_info[0]; + + /* handle us being a 20 MHz-only EHT STA - with four values + * for MCS 0-7, 8-9, 10-11, 12-13. + */ + if (!(he_phy_cap0 & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) + return sta_eht_cap->eht_mcs_nss_supp.only_20mhz.rx_tx_max_nss[idx]; + + /* the others have MCS 0-9 together, rather than separately from 0-7 */ + if (idx > 0) + idx--; + + switch (bw) { + case 0: + return sta_eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_max_nss[idx]; + case 1: + if (!(he_phy_cap0 & + (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G))) + return 0xff; /* pass check */ + return sta_eht_cap->eht_mcs_nss_supp.bw._160.rx_tx_max_nss[idx]; + case 2: + if (!(eht_phy_cap0 & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ)) + return 0xff; /* pass check */ + return sta_eht_cap->eht_mcs_nss_supp.bw._320.rx_tx_max_nss[idx]; + } + + WARN_ON(1); + return 0; +} + +static bool +ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const struct ieee80211_eht_operation *eht_op) +{ + const struct ieee80211_sta_he_cap *sta_he_cap = + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + const struct ieee80211_sta_eht_cap *sta_eht_cap = + ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); + const struct ieee80211_eht_mcs_nss_supp_20mhz_only *req; + unsigned int i; + + if (!sta_he_cap || !sta_eht_cap || !eht_op) + return false; + + req = &eht_op->basic_mcs_nss; + + for (i = 0; i < ARRAY_SIZE(req->rx_tx_max_nss); i++) { + u8 req_rx_nss, req_tx_nss; + unsigned int bw; + + req_rx_nss = u8_get_bits(req->rx_tx_max_nss[i], + IEEE80211_EHT_MCS_NSS_RX); + req_tx_nss = u8_get_bits(req->rx_tx_max_nss[i], + IEEE80211_EHT_MCS_NSS_TX); + + for (bw = 0; bw < 3; bw++) { + u8 have, have_rx_nss, have_tx_nss; + + have = ieee80211_get_eht_cap_mcs_nss(sta_he_cap, + sta_eht_cap, + i, bw); + have_rx_nss = u8_get_bits(have, + IEEE80211_EHT_MCS_NSS_RX); + have_tx_nss = u8_get_bits(have, + IEEE80211_EHT_MCS_NSS_TX); + + if (req_rx_nss > have_rx_nss || + req_tx_nss > have_tx_nss) + return false; + } + } + + return true; +} + +static struct ieee802_11_elems * +ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, + struct ieee80211_conn_settings *conn, + struct cfg80211_bss *cbss, int link_id, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_local *local = sdata->local; + const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies); + struct ieee80211_bss *bss = (void *)cbss->priv; + struct ieee80211_channel *channel = cbss->channel; + struct ieee80211_elems_parse_params parse_params = { + .link_id = -1, + .from_ap = true, + .start = ies->data, + .len = ies->len, + .mode = conn->mode, + }; + struct ieee802_11_elems *elems; + struct ieee80211_supported_band *sband; + struct cfg80211_chan_def ap_chandef; + enum ieee80211_conn_mode ap_mode; + int ret; + +again: + elems = ieee802_11_parse_elems_full(&parse_params); + if (!elems) + return ERR_PTR(-ENOMEM); + + ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info, + elems, false, conn, &ap_chandef); + + mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n", + cbss->bssid, ieee80211_conn_mode_str(ap_mode)); + + /* this should be impossible since parsing depends on our mode */ + if (WARN_ON(ap_mode > conn->mode)) { + ret = -EINVAL; + goto free; + } + + sband = sdata->local->hw.wiphy->bands[channel->band]; + + switch (channel->band) { + case NL80211_BAND_S1GHZ: + if (WARN_ON(ap_mode != IEEE80211_CONN_MODE_S1G)) { + ret = -EINVAL; + goto free; + } + return elems; + case NL80211_BAND_6GHZ: + if (ap_mode < IEEE80211_CONN_MODE_HE) { + sdata_info(sdata, + "Rejecting non-HE 6/7 GHz connection"); + ret = -EINVAL; + goto free; + } + break; + default: + if (WARN_ON(ap_mode == IEEE80211_CONN_MODE_S1G)) { + ret = -EINVAL; + goto free; + } + } + + switch (ap_mode) { + case IEEE80211_CONN_MODE_S1G: + WARN_ON(1); + ret = -EINVAL; + goto free; + case IEEE80211_CONN_MODE_LEGACY: + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; + break; + case IEEE80211_CONN_MODE_HT: + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_40); + break; + case IEEE80211_CONN_MODE_VHT: + case IEEE80211_CONN_MODE_HE: + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_160); + break; + case IEEE80211_CONN_MODE_EHT: + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_320); + break; + } + + conn->mode = ap_mode; + *chandef = ap_chandef; + + while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + IEEE80211_CHAN_DISABLED)) { + if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { + ret = -EINVAL; + goto free; + } + + ieee80211_chandef_downgrade(chandef, conn); + } + + if (conn->mode >= IEEE80211_CONN_MODE_HE && + !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, + IEEE80211_CHAN_NO_HE)) { + conn->mode = IEEE80211_CONN_MODE_VHT; + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_160); + } + + if (conn->mode >= IEEE80211_CONN_MODE_EHT && + !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, + IEEE80211_CHAN_NO_EHT)) { + conn->mode = IEEE80211_CONN_MODE_HE; + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_160); + } + + if (chandef->width != ap_chandef.width || ap_mode != conn->mode) + sdata_info(sdata, + "regulatory prevented using AP config, downgraded\n"); + + if (conn->mode >= IEEE80211_CONN_MODE_HE && + (!ieee80211_verify_peer_he_mcs_support(sdata, (void *)elems->he_cap, + elems->he_operation) || + !ieee80211_verify_sta_he_mcs_support(sdata, sband, + elems->he_operation))) { + conn->mode = IEEE80211_CONN_MODE_VHT; + sdata_info(sdata, "required MCSes not supported, disabling HE\n"); + } + + if (conn->mode >= IEEE80211_CONN_MODE_EHT && + !ieee80211_verify_sta_eht_mcs_support(sdata, sband, + elems->eht_operation)) { + conn->mode = IEEE80211_CONN_MODE_HE; + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_160); + sdata_info(sdata, "required MCSes not supported, disabling EHT\n"); + } + + if (conn->mode >= IEEE80211_CONN_MODE_EHT) { + const struct ieee80211_eht_operation *eht_oper; + + eht_oper = elems->eht_operation; + + if (WARN_ON_ONCE(!eht_oper)) { + ret = -EINVAL; + goto free; + } + + if (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT && + eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) { + const struct ieee80211_eht_operation_info *info = + (void *)eht_oper->optional; + const u8 *disable_subchannel_bitmap = info->optional; + u16 bitmap; + + bitmap = get_unaligned_le16(disable_subchannel_bitmap); + if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap, + &ap_chandef) || + (bitmap && + ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))) { + conn->mode = IEEE80211_CONN_MODE_HE; + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_160); + sdata_info(sdata, + "AP has invalid/unsupported puncturing, disabling EHT\n"); + } + /* FIXME: store puncturing bitmap */ + } + } + + /* the mode can only decrease, so this must terminate */ + if (ap_mode != conn->mode) + goto again; + + mlme_link_id_dbg(sdata, link_id, + "connecting with %s mode, max bandwidth %d MHz\n", + ieee80211_conn_mode_str(conn->mode), + 20 * (1 << conn->bw_limit)); + + if (WARN_ON_ONCE(!cfg80211_chandef_valid(chandef))) { + ret = -EINVAL; + goto free; + } + + return elems; +free: + kfree(elems); + return ERR_PTR(ret); +} + +static int ieee80211_config_bw(struct ieee80211_link_data *link, + struct ieee802_11_elems *elems, + u64 *changed) +{ + struct ieee80211_channel *channel = link->conf->chandef.chan; + struct ieee80211_sub_if_data *sdata = link->sdata; + struct cfg80211_chan_def ap_chandef; + enum ieee80211_conn_mode ap_mode; + u32 vht_cap_info = 0; + u16 ht_opmode; + int ret; + + /* don't track any bandwidth changes in legacy/S1G modes */ + if (link->u.mgd.conn.mode == IEEE80211_CONN_MODE_LEGACY || + link->u.mgd.conn.mode == IEEE80211_CONN_MODE_S1G) + return 0; + + if (elems->vht_cap_elem) + vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info); + + ap_mode = ieee80211_determine_ap_chan(sdata, channel, vht_cap_info, + elems, true, &link->u.mgd.conn, + &ap_chandef); + + if (ap_mode != link->u.mgd.conn.mode) { + link_info(link, + "AP appears to change mode (expected %s, found %s), disconnect\n", + ieee80211_conn_mode_str(link->u.mgd.conn.mode), + ieee80211_conn_mode_str(ap_mode)); + return -EINVAL; + } + + /* + * if HT operation mode changed store the new one - + * this may be applicable even if channel is identical + */ + if (elems->ht_operation) { + ht_opmode = le16_to_cpu(elems->ht_operation->operation_mode); + if (link->conf->ht_operation_mode != ht_opmode) { + *changed |= BSS_CHANGED_HT; + link->conf->ht_operation_mode = ht_opmode; + } + } + + /* + * Downgrade the new channel if we associated with restricted + * bandwidth capabilities. For example, if we associated as a + * 20 MHz STA to a 40 MHz AP (due to regulatory, capabilities + * or config reasons) then switching to a 40 MHz channel now + * won't do us any good -- we couldn't use it with the AP. + */ + while (link->u.mgd.conn.bw_limit < + ieee80211_min_bw_limit_from_chandef(&ap_chandef)) + ieee80211_chandef_downgrade(&ap_chandef, NULL); + + if (cfg80211_chandef_identical(&ap_chandef, &link->conf->chandef)) + return 0; + + link_info(link, + "AP %pM changed bandwidth, new config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n", + link->u.mgd.bssid, ap_chandef.chan->center_freq, + ap_chandef.chan->freq_offset, ap_chandef.width, + ap_chandef.center_freq1, ap_chandef.freq1_offset, + ap_chandef.center_freq2); + + if (!cfg80211_chandef_valid(&ap_chandef)) { + sdata_info(sdata, + "AP %pM changed caps/bw in a way we can't support - disconnect\n", + link->u.mgd.bssid); + return -EINVAL; + } + + /* + * We're tracking the current AP here, so don't do any further checks + * here. This keeps us from playing ping-pong with regulatory, without + * it the following can happen (for example): + * - connect to an AP with 80 MHz, world regdom allows 80 MHz + * - AP advertises regdom US + * - CRDA loads regdom US with 80 MHz prohibited (old database) + * - we detect an unsupported channel and disconnect + * - disconnect causes CRDA to reload world regdomain and the game + * starts anew. + * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881) + * + * It seems possible that there are still scenarios with CSA or real + * bandwidth changes where a this could happen, but those cases are + * less common and wouldn't completely prevent using the AP. + */ + + ret = ieee80211_link_change_bandwidth(link, &ap_chandef, changed); + if (ret) { + sdata_info(sdata, + "AP %pM changed bandwidth to incompatible one - disconnect\n", + link->u.mgd.bssid); + return ret; + } + + cfg80211_schedule_channels_check(&sdata->wdev); + return 0; +} + +/* frame sending functions */ + +static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 ap_ht_param, + struct ieee80211_supported_band *sband, + struct ieee80211_channel *channel, + enum ieee80211_smps_mode smps, + const struct ieee80211_conn_settings *conn) +{ + u8 *pos; + u32 flags = channel->flags; + u16 cap; + struct ieee80211_sta_ht_cap ht_cap; + + BUILD_BUG_ON(sizeof(ht_cap) != sizeof(sband->ht_cap)); + + memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &ht_cap); + + /* determine capability flags */ + cap = ht_cap.cap; + + switch (ap_ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + if (flags & IEEE80211_CHAN_NO_HT40PLUS) { + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + cap &= ~IEEE80211_HT_CAP_SGI_40; + } + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + if (flags & IEEE80211_CHAN_NO_HT40MINUS) { + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + cap &= ~IEEE80211_HT_CAP_SGI_40; + } + break; + } + + /* + * If 40 MHz was disabled associate as though we weren't + * capable of 40 MHz -- some broken APs will never fall + * back to trying to transmit in 20 MHz. + */ + if (conn->bw_limit <= IEEE80211_CONN_BW_LIMIT_20) { + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + cap &= ~IEEE80211_HT_CAP_SGI_40; + } + + /* set SM PS mode properly */ + cap &= ~IEEE80211_HT_CAP_SM_PS; + switch (smps) { + case IEEE80211_SMPS_AUTOMATIC: + case IEEE80211_SMPS_NUM_MODES: + WARN_ON(1); + fallthrough; + case IEEE80211_SMPS_OFF: + cap |= WLAN_HT_CAP_SM_PS_DISABLED << + IEEE80211_HT_CAP_SM_PS_SHIFT; + break; + case IEEE80211_SMPS_STATIC: + cap |= WLAN_HT_CAP_SM_PS_STATIC << + IEEE80211_HT_CAP_SM_PS_SHIFT; break; case IEEE80211_SMPS_DYNAMIC: cap |= WLAN_HT_CAP_SM_PS_DYNAMIC << @@ -688,7 +1042,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_supported_band *sband, struct ieee80211_vht_cap *ap_vht_cap, - ieee80211_conn_flags_t conn_flags) + const struct ieee80211_conn_settings *conn) { struct ieee80211_local *local = sdata->local; u8 *pos; @@ -705,16 +1059,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, /* determine capability flags */ cap = vht_cap.cap; - if (conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ) { - u32 bw = cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; - - cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; - if (bw == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ || - bw == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) - cap |= IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; - } - - if (conn_flags & IEEE80211_CONN_DISABLE_160MHZ) { + if (conn->bw_limit <= IEEE80211_CONN_BW_LIMIT_80) { cap &= ~IEEE80211_VHT_CAP_SHORT_GI_160; cap &= ~IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; } @@ -778,7 +1123,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_supported_band *sband, enum ieee80211_smps_mode smps_mode, - ieee80211_conn_flags_t conn_flags) + const struct ieee80211_conn_settings *conn) { u8 *pos, *pre_he_pos; const struct ieee80211_sta_he_cap *he_cap; @@ -796,8 +1141,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, he_cap->he_cap_elem.phy_cap_info); pos = skb_put(skb, he_cap_size); pre_he_pos = pos; - pos = ieee80211_ie_build_he_cap(conn_flags, - pos, he_cap, pos + he_cap_size); + pos = ieee80211_ie_build_he_cap(conn, he_cap, pos, pos + he_cap_size); /* trim excess if any */ skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos)); @@ -1135,11 +1479,11 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, offset); if (sband->band != NL80211_BAND_6GHZ && - !(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HT)) { + assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_HT) { ieee80211_add_ht_ie(sdata, skb, assoc_data->link[link_id].ap_ht_param, sband, chan, smps_mode, - assoc_data->link[link_id].conn_flags); + &assoc_data->link[link_id].conn); ADD_PRESENT_ELEM(WLAN_EID_HT_CAPABILITY); } @@ -1149,36 +1493,25 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, offset); if (sband->band != NL80211_BAND_6GHZ && - !(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_VHT)) { + assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_VHT) { bool mu_mimo_owner = ieee80211_add_vht_ie(sdata, skb, sband, &assoc_data->link[link_id].ap_vht_cap, - assoc_data->link[link_id].conn_flags); + &assoc_data->link[link_id].conn); if (link) link->conf->mu_mimo_owner = mu_mimo_owner; ADD_PRESENT_ELEM(WLAN_EID_VHT_CAPABILITY); } - /* - * If AP doesn't support HT, mark HE and EHT as disabled. - * If on the 5GHz band, make sure it supports VHT. - */ - if (assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HT || - (sband->band == NL80211_BAND_5GHZ && - assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_VHT)) - assoc_data->link[link_id].conn_flags |= - IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT; - /* if present, add any custom IEs that go before HE */ offset = ieee80211_add_before_he_elems(skb, extra_elems, extra_elems_len, offset); - if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_HE)) { + if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_HE) { ieee80211_add_he_ie(sdata, skb, sband, smps_mode, - assoc_data->link[link_id].conn_flags); + &assoc_data->link[link_id].conn); ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_HE_CAPABILITY); } @@ -1187,7 +1520,7 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, * calling ieee80211_assoc_add_ml_elem(), so add this one if * we're going to put it after the ML element */ - if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_EHT)) + if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_EHT) ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_EHT_CAPABILITY); if (link_id == assoc_data->assoc_link_id) @@ -1197,7 +1530,7 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, /* crash if somebody gets it wrong */ present_elems = NULL; - if (!(assoc_data->link[link_id].conn_flags & IEEE80211_CONN_DISABLE_EHT)) + if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_EHT) ieee80211_add_eht_ie(sdata, skb, sband); if (sband->band == NL80211_BAND_S1GHZ) { @@ -1208,9 +1541,6 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len) skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len); - if (link) - link->u.mgd.conn_flags = assoc_data->link[link_id].conn_flags; - return offset; } @@ -1499,7 +1829,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) /* Set MBSSID support for HE AP if needed */ if (ieee80211_hw_check(&local->hw, SUPPORTS_ONLY_HE_MULTI_BSSID) && - !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE && ext_capa && ext_capa->datalen >= 3) ext_capa->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT; @@ -1544,7 +1874,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) * for some reason check it and want it to be set, set the bit for all * pre-EHT connections as we used to do. */ - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT) + if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT) capab |= WLAN_CAPABILITY_ESS; /* add the elements for the assoc (main) link */ @@ -1876,7 +2206,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, bss = (void *)cbss->priv; res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, bss->vht_cap_info, - link->u.mgd.conn_flags, + &link->u.mgd.conn, link->u.mgd.bssid, &csa_ie); if (!res) { @@ -3057,8 +3387,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->deflink.u.mgd.tracking_signal_avg = false; sdata->deflink.u.mgd.disable_wmm_tracking = false; + sdata->vif.bss_conf.eht_puncturing = 0; + ifmgd->flags = 0; - sdata->deflink.u.mgd.conn_flags = 0; for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; @@ -3526,7 +3857,6 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, sta_info_destroy_addr(sdata, auth_data->ap_addr); /* other links are destroyed */ - sdata->deflink.u.mgd.conn_flags = 0; eth_zero_addr(sdata->deflink.u.mgd.bssid); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_BSSID); @@ -3564,7 +3894,6 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.timer); sta_info_destroy_addr(sdata, assoc_data->ap_addr); - sdata->deflink.u.mgd.conn_flags = 0; eth_zero_addr(sdata->deflink.u.mgd.bssid); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_BSSID); @@ -4014,11 +4343,13 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; unsigned int link_id = link->link_id; struct ieee80211_elems_parse_params parse_params = { + .mode = link->u.mgd.conn.mode, .start = elem_start, .len = elem_len, .link_id = link_id == assoc_data->assoc_link_id ? -1 : link_id, .from_ap = true, }; + bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; bool is_s1g = cbss->channel->band == NL80211_BAND_S1GHZ; const struct cfg80211_bss_ies *bss_ies = NULL; @@ -4094,9 +4425,9 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, */ if (!is_6ghz && ((assoc_data->wmm && !elems->wmm_param) || - (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) && + (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT && (!elems->ht_cap_elem || !elems->ht_operation)) || - (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) && + (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT && (!elems->vht_cap_elem || !elems->vht_operation)))) { const struct cfg80211_bss_ies *ies; struct ieee802_11_elems *bss_elems; @@ -4133,25 +4464,25 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, * have to include the IEs in the (re)association response. */ if (!elems->ht_cap_elem && bss_elems->ht_cap_elem && - !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) { + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) { elems->ht_cap_elem = bss_elems->ht_cap_elem; sdata_info(sdata, "AP bug: HT capability missing from AssocResp\n"); } if (!elems->ht_operation && bss_elems->ht_operation && - !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) { + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) { elems->ht_operation = bss_elems->ht_operation; sdata_info(sdata, "AP bug: HT operation missing from AssocResp\n"); } if (!elems->vht_cap_elem && bss_elems->vht_cap_elem && - !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) { + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) { elems->vht_cap_elem = bss_elems->vht_cap_elem; sdata_info(sdata, "AP bug: VHT capa missing from AssocResp\n"); } if (!elems->vht_operation && bss_elems->vht_operation && - !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) { + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) { elems->vht_operation = bss_elems->vht_operation; sdata_info(sdata, "AP bug: VHT operation missing from AssocResp\n"); @@ -4164,7 +4495,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, * We previously checked these in the beacon/probe response, so * they should be present here. This is just a safety net. */ - if (!is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) && + if (!is_6ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT && (!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) { sdata_info(sdata, "HT AP is missing WMM params or HT capability/operation\n"); @@ -4172,7 +4503,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, goto out; } - if (!is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) && + if (is_5ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT && (!elems->vht_cap_elem || !elems->vht_operation)) { sdata_info(sdata, "VHT AP is missing VHT capability/operation\n"); @@ -4180,36 +4511,20 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, goto out; } - if (is_6ghz && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && - !elems->he_6ghz_capa) { - sdata_info(sdata, - "HE 6 GHz AP is missing HE 6 GHz band capability\n"); - ret = false; - goto out; - } - if (WARN_ON(!link->conf->chandef.chan)) { ret = false; goto out; } sband = local->hw.wiphy->bands[link->conf->chandef.chan->band]; - if (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && - (!elems->he_cap || !elems->he_operation)) { - sdata_info(sdata, - "HE AP is missing HE capability/operation\n"); - ret = false; - goto out; - } - /* Set up internal HT/VHT capabilities */ - if (elems->ht_cap_elem && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) + if (elems->ht_cap_elem && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, elems->ht_cap_elem, link_sta); if (elems->vht_cap_elem && - !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT)) { + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) { const struct ieee80211_vht_cap *bss_vht_cap = NULL; const struct cfg80211_bss_ies *ies; @@ -4236,7 +4551,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, rcu_read_unlock(); } - if (elems->he_operation && !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && + if (elems->he_operation && + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE && elems->he_cap) { const struct ieee80211_he_6ghz_oper *he_6ghz_oper; @@ -4283,7 +4599,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, link_sta, elems); if (elems->eht_operation && elems->eht_cap && - !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) { + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_EHT) { ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, elems->he_cap, elems->he_cap_len, @@ -4490,548 +4806,394 @@ static u8 ieee80211_max_rx_chains(struct ieee80211_link_data *link, bool support_160; u8 chains = 1; - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT) - return chains; - - ht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_CAPABILITY); - if (ht_cap_elem && ht_cap_elem->datalen >= sizeof(*ht_cap)) { - ht_cap = (void *)ht_cap_elem->data; - chains = ieee80211_mcs_to_chains(&ht_cap->mcs); - /* - * TODO: use "Tx Maximum Number Spatial Streams Supported" and - * "Tx Unequal Modulation Supported" fields. - */ - } - - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_VHT) - return chains; - - vht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY); - if (vht_cap_elem && vht_cap_elem->datalen >= sizeof(*vht_cap)) { - u8 nss; - u16 tx_mcs_map; - - vht_cap = (void *)vht_cap_elem->data; - tx_mcs_map = le16_to_cpu(vht_cap->supp_mcs.tx_mcs_map); - for (nss = 8; nss > 0; nss--) { - if (((tx_mcs_map >> (2 * (nss - 1))) & 3) != - IEEE80211_VHT_MCS_NOT_SUPPORTED) - break; - } - /* TODO: use "Tx Highest Supported Long GI Data Rate" field? */ - chains = max(chains, nss); - } - - if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) - return chains; - - ies = rcu_dereference(cbss->ies); - he_cap_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, - ies->data, ies->len); - - if (!he_cap_elem || he_cap_elem->datalen < sizeof(*he_cap)) - return chains; - - /* skip one byte ext_tag_id */ - he_cap = (void *)(he_cap_elem->data + 1); - mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap); - - /* invalid HE IE */ - if (he_cap_elem->datalen < 1 + mcs_nss_size + sizeof(*he_cap)) - return chains; - - /* mcs_nss is right after he_cap info */ - he_mcs_nss_supp = (void *)(he_cap + 1); - - mcs_80_map = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80); - - for (i = 7; i >= 0; i--) { - u8 mcs_80 = mcs_80_map >> (2 * i) & 3; - - if (mcs_80 != IEEE80211_VHT_MCS_NOT_SUPPORTED) { - chains = max_t(u8, chains, i + 1); - break; - } - } - - support_160 = he_cap->phy_cap_info[0] & - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; - - if (!support_160) - return chains; - - mcs_160_map = le16_to_cpu(he_mcs_nss_supp->tx_mcs_160); - for (i = 7; i >= 0; i--) { - u8 mcs_160 = mcs_160_map >> (2 * i) & 3; - - if (mcs_160 != IEEE80211_VHT_MCS_NOT_SUPPORTED) { - chains = max_t(u8, chains, i + 1); - break; - } - } - - return chains; -} - -static bool -ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata, - const struct ieee80211_he_cap_elem *he_cap, - const struct ieee80211_he_operation *he_op) -{ - struct ieee80211_he_mcs_nss_supp *he_mcs_nss_supp; - u16 mcs_80_map_tx, mcs_80_map_rx; - u16 ap_min_req_set; - int nss; - - if (!he_cap) - return false; - - /* mcs_nss is right after he_cap info */ - he_mcs_nss_supp = (void *)(he_cap + 1); - - mcs_80_map_tx = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80); - mcs_80_map_rx = le16_to_cpu(he_mcs_nss_supp->rx_mcs_80); - - /* P802.11-REVme/D0.3 - * 27.1.1 Introduction to the HE PHY - * ... - * An HE STA shall support the following features: - * ... - * Single spatial stream HE-MCSs 0 to 7 (transmit and receive) in all - * supported channel widths for HE SU PPDUs - */ - if ((mcs_80_map_tx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED || - (mcs_80_map_rx & 0x3) == IEEE80211_HE_MCS_NOT_SUPPORTED) { - sdata_info(sdata, - "Missing mandatory rates for 1 Nss, rx 0x%x, tx 0x%x, disable HE\n", - mcs_80_map_tx, mcs_80_map_rx); - return false; - } - - if (!he_op) - return true; - - ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set); - - /* - * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all - * zeroes, which is nonsense, and completely inconsistent with itself - * (it doesn't have 8 streams). Accept the settings in this case anyway. - */ - if (!ap_min_req_set) - return true; - - /* make sure the AP is consistent with itself - * - * P802.11-REVme/D0.3 - * 26.17.1 Basic HE BSS operation - * - * A STA that is operating in an HE BSS shall be able to receive and - * transmit at each of the tuple values indicated by the - * Basic HE-MCS And NSS Set field of the HE Operation parameter of the - * MLME-START.request primitive and shall be able to receive at each of - * the tuple values indicated by the Supported HE-MCS and - * NSS Set field in the HE Capabilities parameter of the MLMESTART.request - * primitive - */ - for (nss = 8; nss > 0; nss--) { - u8 ap_op_val = (ap_min_req_set >> (2 * (nss - 1))) & 3; - u8 ap_rx_val; - u8 ap_tx_val; + if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_HT) + return chains; - if (ap_op_val == IEEE80211_HE_MCS_NOT_SUPPORTED) - continue; + ht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_CAPABILITY); + if (ht_cap_elem && ht_cap_elem->datalen >= sizeof(*ht_cap)) { + ht_cap = (void *)ht_cap_elem->data; + chains = ieee80211_mcs_to_chains(&ht_cap->mcs); + /* + * TODO: use "Tx Maximum Number Spatial Streams Supported" and + * "Tx Unequal Modulation Supported" fields. + */ + } - ap_rx_val = (mcs_80_map_rx >> (2 * (nss - 1))) & 3; - ap_tx_val = (mcs_80_map_tx >> (2 * (nss - 1))) & 3; + if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_VHT) + return chains; - if (ap_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || - ap_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || - ap_rx_val < ap_op_val || ap_tx_val < ap_op_val) { - sdata_info(sdata, - "Invalid rates for %d Nss, rx %d, tx %d oper %d, disable HE\n", - nss, ap_rx_val, ap_rx_val, ap_op_val); - return false; + vht_cap_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY); + if (vht_cap_elem && vht_cap_elem->datalen >= sizeof(*vht_cap)) { + u8 nss; + u16 tx_mcs_map; + + vht_cap = (void *)vht_cap_elem->data; + tx_mcs_map = le16_to_cpu(vht_cap->supp_mcs.tx_mcs_map); + for (nss = 8; nss > 0; nss--) { + if (((tx_mcs_map >> (2 * (nss - 1))) & 3) != + IEEE80211_VHT_MCS_NOT_SUPPORTED) + break; } + /* TODO: use "Tx Highest Supported Long GI Data Rate" field? */ + chains = max(chains, nss); } - return true; -} + if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_HE) + return chains; -static bool -ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - const struct ieee80211_he_operation *he_op) -{ - const struct ieee80211_sta_he_cap *sta_he_cap = - ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); - u16 ap_min_req_set; - int i; + ies = rcu_dereference(cbss->ies); + he_cap_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, + ies->data, ies->len); - if (!sta_he_cap || !he_op) - return false; + if (!he_cap_elem || he_cap_elem->datalen < sizeof(*he_cap)) + return chains; - ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set); + /* skip one byte ext_tag_id */ + he_cap = (void *)(he_cap_elem->data + 1); + mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap); - /* - * Apparently iPhone 13 (at least iOS version 15.3.1) sets this to all - * zeroes, which is nonsense, and completely inconsistent with itself - * (it doesn't have 8 streams). Accept the settings in this case anyway. - */ - if (!ap_min_req_set) - return true; + /* invalid HE IE */ + if (he_cap_elem->datalen < 1 + mcs_nss_size + sizeof(*he_cap)) + return chains; - /* Need to go over for 80MHz, 160MHz and for 80+80 */ - for (i = 0; i < 3; i++) { - const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp = - &sta_he_cap->he_mcs_nss_supp; - u16 sta_mcs_map_rx = - le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]); - u16 sta_mcs_map_tx = - le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]); - u8 nss; - bool verified = true; + /* mcs_nss is right after he_cap info */ + he_mcs_nss_supp = (void *)(he_cap + 1); - /* - * For each band there is a maximum of 8 spatial streams - * possible. Each of the sta_mcs_map_* is a 16-bit struct built - * of 2 bits per NSS (1-8), with the values defined in enum - * ieee80211_he_mcs_support. Need to make sure STA TX and RX - * capabilities aren't less than the AP's minimum requirements - * for this HE BSS per SS. - * It is enough to find one such band that meets the reqs. - */ - for (nss = 8; nss > 0; nss--) { - u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3; - u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3; - u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3; + mcs_80_map = le16_to_cpu(he_mcs_nss_supp->tx_mcs_80); - if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED) - continue; + for (i = 7; i >= 0; i--) { + u8 mcs_80 = mcs_80_map >> (2 * i) & 3; - /* - * Make sure the HE AP doesn't require MCSs that aren't - * supported by the client as required by spec - * - * P802.11-REVme/D0.3 - * 26.17.1 Basic HE BSS operation - * - * An HE STA shall not attempt to join * (MLME-JOIN.request primitive) - * a BSS, unless it supports (i.e., is able to both transmit and - * receive using) all of the tuples in the basic - * HE-MCS and NSS set. - */ - if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || - sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED || - (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) { - verified = false; - break; - } + if (mcs_80 != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + chains = max_t(u8, chains, i + 1); + break; } - - if (verified) - return true; } - /* If here, STA doesn't meet AP's HE min requirements */ - return false; -} - -static u8 -ieee80211_get_eht_cap_mcs_nss(const struct ieee80211_sta_he_cap *sta_he_cap, - const struct ieee80211_sta_eht_cap *sta_eht_cap, - unsigned int idx, int bw) -{ - u8 he_phy_cap0 = sta_he_cap->he_cap_elem.phy_cap_info[0]; - u8 eht_phy_cap0 = sta_eht_cap->eht_cap_elem.phy_cap_info[0]; + support_160 = he_cap->phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; - /* handle us being a 20 MHz-only EHT STA - with four values - * for MCS 0-7, 8-9, 10-11, 12-13. - */ - if (!(he_phy_cap0 & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL)) - return sta_eht_cap->eht_mcs_nss_supp.only_20mhz.rx_tx_max_nss[idx]; + if (!support_160) + return chains; - /* the others have MCS 0-9 together, rather than separately from 0-7 */ - if (idx > 0) - idx--; + mcs_160_map = le16_to_cpu(he_mcs_nss_supp->tx_mcs_160); + for (i = 7; i >= 0; i--) { + u8 mcs_160 = mcs_160_map >> (2 * i) & 3; - switch (bw) { - case 0: - return sta_eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_max_nss[idx]; - case 1: - if (!(he_phy_cap0 & - (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G))) - return 0xff; /* pass check */ - return sta_eht_cap->eht_mcs_nss_supp.bw._160.rx_tx_max_nss[idx]; - case 2: - if (!(eht_phy_cap0 & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ)) - return 0xff; /* pass check */ - return sta_eht_cap->eht_mcs_nss_supp.bw._320.rx_tx_max_nss[idx]; + if (mcs_160 != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + chains = max_t(u8, chains, i + 1); + break; + } } - WARN_ON(1); - return 0; + return chains; } -static bool -ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - const struct ieee80211_eht_operation *eht_op) +static void +ieee80211_determine_our_sta_mode(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + struct cfg80211_assoc_request *req, + bool wmm_used, int link_id, + struct ieee80211_conn_settings *conn) { - const struct ieee80211_sta_he_cap *sta_he_cap = - ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); - const struct ieee80211_sta_eht_cap *sta_eht_cap = - ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); - const struct ieee80211_eht_mcs_nss_supp_20mhz_only *req; - unsigned int i; + struct ieee80211_sta_ht_cap sta_ht_cap = sband->ht_cap; + bool is_5ghz = sband->band == NL80211_BAND_5GHZ; + bool is_6ghz = sband->band == NL80211_BAND_6GHZ; + const struct ieee80211_sta_he_cap *he_cap; + const struct ieee80211_sta_eht_cap *eht_cap; + struct ieee80211_sta_vht_cap vht_cap; - if (!sta_he_cap || !sta_eht_cap || !eht_op) - return false; + if (sband->band == NL80211_BAND_S1GHZ) { + conn->mode = IEEE80211_CONN_MODE_S1G; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; + mlme_dbg(sdata, "operating as S1G STA\n"); + return; + } - req = &eht_op->basic_mcs_nss; + conn->mode = IEEE80211_CONN_MODE_LEGACY; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; - for (i = 0; i < ARRAY_SIZE(req->rx_tx_max_nss); i++) { - u8 req_rx_nss, req_tx_nss; - unsigned int bw; + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); - req_rx_nss = u8_get_bits(req->rx_tx_max_nss[i], - IEEE80211_EHT_MCS_NSS_RX); - req_tx_nss = u8_get_bits(req->rx_tx_max_nss[i], - IEEE80211_EHT_MCS_NSS_TX); + if (req && req->flags & ASSOC_REQ_DISABLE_HT) { + mlme_link_id_dbg(sdata, link_id, + "HT disabled by flag, limiting to legacy\n"); + goto out; + } - for (bw = 0; bw < 3; bw++) { - u8 have, have_rx_nss, have_tx_nss; + if (!wmm_used) { + mlme_link_id_dbg(sdata, link_id, + "WMM/QoS not supported, limiting to legacy\n"); + goto out; + } - have = ieee80211_get_eht_cap_mcs_nss(sta_he_cap, - sta_eht_cap, - i, bw); - have_rx_nss = u8_get_bits(have, - IEEE80211_EHT_MCS_NSS_RX); - have_tx_nss = u8_get_bits(have, - IEEE80211_EHT_MCS_NSS_TX); + if (req) { + unsigned int i; - if (req_rx_nss > have_rx_nss || - req_tx_nss > have_tx_nss) - return false; + for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) { + if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 || + req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP || + req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) { + netdev_info(sdata->dev, + "WEP/TKIP use, limiting to legacy\n"); + goto out; + } } } - return true; -} + if (!sta_ht_cap.ht_supported && !is_6ghz) { + mlme_link_id_dbg(sdata, link_id, + "HT not supported (and not on 6 GHz), limiting to legacy\n"); + goto out; + } -static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, - struct ieee80211_link_data *link, - struct cfg80211_bss *cbss, - bool mlo, - ieee80211_conn_flags_t *conn_flags) -{ - struct ieee80211_local *local = sdata->local; - const struct ieee80211_ht_cap *ht_cap = NULL; - const struct ieee80211_ht_operation *ht_oper = NULL; - const struct ieee80211_vht_operation *vht_oper = NULL; - const struct ieee80211_he_operation *he_oper = NULL; - const struct ieee80211_eht_operation *eht_oper = NULL; - const struct ieee80211_s1g_oper_ie *s1g_oper = NULL; - struct ieee80211_supported_band *sband; - struct cfg80211_chan_def chandef; - bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; - bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; - bool supports_mlo = false; - struct ieee80211_bss *bss = (void *)cbss->priv; - struct ieee80211_elems_parse_params parse_params = { - .link_id = -1, - .from_ap = true, - }; - struct ieee802_11_elems *elems; - const struct cfg80211_bss_ies *ies; - int ret; - u32 i; - bool have_80mhz; + /* HT is fine */ + conn->mode = IEEE80211_CONN_MODE_HT; + conn->bw_limit = sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + IEEE80211_CONN_BW_LIMIT_40 : + IEEE80211_CONN_BW_LIMIT_20; + + memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); + ieee80211_apply_vhtcap_overrides(sdata, &vht_cap); - lockdep_assert_wiphy(local->hw.wiphy); + if (req && req->flags & ASSOC_REQ_DISABLE_VHT) { + mlme_link_id_dbg(sdata, link_id, + "VHT disabled by flag, limiting to HT\n"); + goto out; + } - rcu_read_lock(); + if (vht_cap.vht_supported && is_5ghz) { + bool have_80mhz = false; + unsigned int i; - ies = rcu_dereference(cbss->ies); - parse_params.start = ies->data; - parse_params.len = ies->len; - elems = ieee802_11_parse_elems_full(&parse_params); - if (!elems) { - rcu_read_unlock(); - return -ENOMEM; - } + if (conn->bw_limit == IEEE80211_CONN_BW_LIMIT_20) { + mlme_link_id_dbg(sdata, link_id, + "no 40 MHz support on 5 GHz, limiting to HT\n"); + goto out; + } - sband = local->hw.wiphy->bands[cbss->channel->band]; + /* Allow VHT if at least one channel on the sband supports 80 MHz */ + for (i = 0; i < sband->n_channels; i++) { + if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_NO_80MHZ)) + continue; + + have_80mhz = true; + break; + } - *conn_flags &= ~(IEEE80211_CONN_DISABLE_40MHZ | - IEEE80211_CONN_DISABLE_80P80MHZ | - IEEE80211_CONN_DISABLE_160MHZ); + if (!have_80mhz) { + mlme_link_id_dbg(sdata, link_id, + "no 80 MHz channel support on 5 GHz, limiting to HT\n"); + goto out; + } + } else if (is_5ghz) { /* !vht_supported but on 5 GHz */ + mlme_link_id_dbg(sdata, link_id, + "no VHT support on 5 GHz, limiting to HT\n"); + goto out; + } - /* disable HT/VHT/HE if we don't support them */ - if (!sband->ht_cap.ht_supported && !is_6ghz) { - mlme_dbg(sdata, "HT not supported, disabling HT/VHT/HE/EHT\n"); - *conn_flags |= IEEE80211_CONN_DISABLE_HT; - *conn_flags |= IEEE80211_CONN_DISABLE_VHT; - *conn_flags |= IEEE80211_CONN_DISABLE_HE; - *conn_flags |= IEEE80211_CONN_DISABLE_EHT; + /* VHT - if we have - is fine, including 80 MHz, check 160 below again */ + if (sband->band != NL80211_BAND_2GHZ) { + conn->mode = IEEE80211_CONN_MODE_VHT; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_160; } - if (!sband->vht_cap.vht_supported && is_5ghz) { - mlme_dbg(sdata, "VHT not supported, disabling VHT/HE/EHT\n"); - *conn_flags |= IEEE80211_CONN_DISABLE_VHT; - *conn_flags |= IEEE80211_CONN_DISABLE_HE; - *conn_flags |= IEEE80211_CONN_DISABLE_EHT; + if (is_5ghz && + !(vht_cap.cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ | + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ))) { + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80; + mlme_link_id_dbg(sdata, link_id, + "no VHT 160 MHz capability on 5 GHz, limiting to 80 MHz"); } - if (!ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif)) { - mlme_dbg(sdata, "HE not supported, disabling HE and EHT\n"); - *conn_flags |= IEEE80211_CONN_DISABLE_HE; - *conn_flags |= IEEE80211_CONN_DISABLE_EHT; + if (req && req->flags & ASSOC_REQ_DISABLE_HE) { + mlme_link_id_dbg(sdata, link_id, + "HE disabled by flag, limiting to HT/VHT\n"); + goto out; } - if (!ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif)) { - mlme_dbg(sdata, "EHT not supported, disabling EHT\n"); - *conn_flags |= IEEE80211_CONN_DISABLE_EHT; + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + if (!he_cap) { + WARN_ON(is_6ghz); + mlme_link_id_dbg(sdata, link_id, + "no HE support, limiting to HT/VHT\n"); + goto out; } - if (!(*conn_flags & IEEE80211_CONN_DISABLE_HT) && !is_6ghz) { - ht_oper = elems->ht_operation; - ht_cap = elems->ht_cap_elem; + /* so we have HE */ + conn->mode = IEEE80211_CONN_MODE_HE; - if (!ht_cap) { - *conn_flags |= IEEE80211_CONN_DISABLE_HT; - ht_oper = NULL; + /* check bandwidth */ + switch (sband->band) { + default: + case NL80211_BAND_2GHZ: + if (he_cap->he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) + break; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; + mlme_link_id_dbg(sdata, link_id, + "no 40 MHz HE cap in 2.4 GHz, limiting to 20 MHz\n"); + break; + case NL80211_BAND_5GHZ: + if (!(he_cap->he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G)) { + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; + mlme_link_id_dbg(sdata, link_id, + "no 40/80 MHz HE cap in 5 GHz, limiting to 20 MHz\n"); + break; + } + if (!(he_cap->he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G)) { + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_80); + mlme_link_id_dbg(sdata, link_id, + "no 160 MHz HE cap in 5 GHz, limiting to 80 MHz\n"); } + break; + case NL80211_BAND_6GHZ: + if (he_cap->he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) + break; + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_80); + mlme_link_id_dbg(sdata, link_id, + "no 160 MHz HE cap in 6 GHz, limiting to 80 MHz\n"); + break; } - if (!(*conn_flags & IEEE80211_CONN_DISABLE_VHT) && !is_6ghz) { - vht_oper = elems->vht_operation; - if (vht_oper && !ht_oper) { - vht_oper = NULL; - sdata_info(sdata, - "AP advertised VHT without HT, disabling HT/VHT/HE\n"); - *conn_flags |= IEEE80211_CONN_DISABLE_HT; - *conn_flags |= IEEE80211_CONN_DISABLE_VHT; - *conn_flags |= IEEE80211_CONN_DISABLE_HE; - *conn_flags |= IEEE80211_CONN_DISABLE_EHT; - } + if (req && req->flags & ASSOC_REQ_DISABLE_EHT) { + mlme_link_id_dbg(sdata, link_id, + "EHT disabled by flag, limiting to HE\n"); + goto out; + } - if (!elems->vht_cap_elem) { - *conn_flags |= IEEE80211_CONN_DISABLE_VHT; - vht_oper = NULL; - } + eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); + if (!eht_cap) { + mlme_link_id_dbg(sdata, link_id, + "no EHT support, limiting to HE\n"); + goto out; } - if (!(*conn_flags & IEEE80211_CONN_DISABLE_HE)) { - he_oper = elems->he_operation; + /* we have EHT */ - if (link && is_6ghz) { - struct ieee80211_bss_conf *bss_conf; - u8 j = 0; + conn->mode = IEEE80211_CONN_MODE_EHT; - bss_conf = link->conf; + /* check bandwidth */ + if (is_6ghz && + eht_cap->eht_cap_elem.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_320; + else if (is_6ghz) + mlme_link_id_dbg(sdata, link_id, + "no EHT 320 MHz cap in 6 GHz, limiting to 160 MHz\n"); - if (elems->pwr_constr_elem) - bss_conf->pwr_reduction = *elems->pwr_constr_elem; +out: + mlme_link_id_dbg(sdata, link_id, + "determined local STA to be %s, BW limited to %d MHz\n", + ieee80211_conn_mode_str(conn->mode), + 20 * (1 << conn->bw_limit)); +} - BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) != - ARRAY_SIZE(elems->tx_pwr_env)); +static void +ieee80211_determine_our_sta_mode_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + struct cfg80211_auth_request *req, + bool wmm_used, + struct ieee80211_conn_settings *conn) +{ + ieee80211_determine_our_sta_mode(sdata, sband, NULL, wmm_used, + req->link_id > 0 ? req->link_id : 0, + conn); +} - for (i = 0; i < elems->tx_pwr_env_num; i++) { - if (elems->tx_pwr_env_len[i] > - sizeof(bss_conf->tx_pwr_env[j])) - continue; +static void +ieee80211_determine_our_sta_mode_assoc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + struct cfg80211_assoc_request *req, + bool wmm_used, int link_id, + struct ieee80211_conn_settings *conn) +{ + struct ieee80211_conn_settings tmp; - bss_conf->tx_pwr_env_num++; - memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i], - elems->tx_pwr_env_len[i]); - j++; - } - } + WARN_ON(!req); - if (!ieee80211_verify_peer_he_mcs_support(sdata, - (void *)elems->he_cap, - he_oper) || - !ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper)) - *conn_flags |= IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT; - } + ieee80211_determine_our_sta_mode(sdata, sband, req, wmm_used, link_id, + &tmp); - /* - * EHT requires HE to be supported as well. Specifically for 6 GHz - * channels, the operation channel information can only be deduced from - * both the 6 GHz operation information (from the HE operation IE) and - * EHT operation. - */ - if (!(*conn_flags & - (IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT)) && - he_oper) { - eht_oper = elems->eht_operation; + conn->mode = min_t(enum ieee80211_conn_mode, + conn->mode, tmp.mode); + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, tmp.bw_limit); +} - if (!ieee80211_verify_sta_eht_mcs_support(sdata, sband, eht_oper)) - *conn_flags |= IEEE80211_CONN_DISABLE_EHT; +static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_data *link, + int link_id, + struct cfg80211_bss *cbss, bool mlo, + struct ieee80211_conn_settings *conn) +{ + struct ieee80211_local *local = sdata->local; + struct cfg80211_chan_def chandef; + bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; + struct ieee802_11_elems *elems; + int ret; + u32 i; - supports_mlo = elems->ml_basic; - } + lockdep_assert_wiphy(local->hw.wiphy); - /* Allow VHT if at least one channel on the sband supports 80 MHz */ - have_80mhz = false; - for (i = 0; i < sband->n_channels; i++) { - if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_80MHZ)) - continue; + rcu_read_lock(); + elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id, + &chandef); - have_80mhz = true; - break; + if (IS_ERR(elems)) { + rcu_read_unlock(); + return PTR_ERR(elems); } - if (!have_80mhz) { - sdata_info(sdata, "80 MHz not supported, disabling VHT\n"); - *conn_flags |= IEEE80211_CONN_DISABLE_VHT; + if (mlo && !elems->ml_basic) { + sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n"); + rcu_read_unlock(); + kfree(elems); + return -EINVAL; } - if (sband->band == NL80211_BAND_S1GHZ) { - s1g_oper = elems->s1g_oper; - if (!s1g_oper) - sdata_info(sdata, - "AP missing S1G operation element?\n"); - } + if (link && is_6ghz && conn->mode >= IEEE80211_CONN_MODE_HE) { + struct ieee80211_bss_conf *bss_conf; + u8 j = 0; - *conn_flags |= - ieee80211_determine_chantype(sdata, link, *conn_flags, - sband, - cbss->channel, - bss->vht_cap_info, - ht_oper, vht_oper, - he_oper, eht_oper, - s1g_oper, - &chandef, false); + bss_conf = link->conf; - if (link) - link->needed_rx_chains = - min(ieee80211_max_rx_chains(link, cbss), - local->rx_chains); + if (elems->pwr_constr_elem) + bss_conf->pwr_reduction = *elems->pwr_constr_elem; + + BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) != + ARRAY_SIZE(elems->tx_pwr_env)); + + for (i = 0; i < elems->tx_pwr_env_num; i++) { + if (elems->tx_pwr_env_len[i] > sizeof(bss_conf->tx_pwr_env[j])) + continue; + bss_conf->tx_pwr_env_num++; + memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i], + elems->tx_pwr_env_len[i]); + j++; + } + } rcu_read_unlock(); /* the element data was RCU protected so no longer valid anyway */ kfree(elems); elems = NULL; - if (*conn_flags & IEEE80211_CONN_DISABLE_HE && is_6ghz) { - sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection"); - return -EINVAL; - } - - if (mlo && !supports_mlo) { - sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n"); - return -EINVAL; - } - if (!link) return 0; + rcu_read_lock(); + link->needed_rx_chains = min(ieee80211_max_rx_chains(link, cbss), + local->rx_chains); + rcu_read_unlock(); + /* * If this fails (possibly due to channel context sharing * on incompatible channels, e.g. 80+80 and 160 sharing the @@ -5043,15 +5205,14 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, /* don't downgrade for 5 and 10 MHz channels, though. */ if (chandef.width == NL80211_CHAN_WIDTH_5 || chandef.width == NL80211_CHAN_WIDTH_10) - goto out; + return ret; while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { - *conn_flags |= - ieee80211_chandef_downgrade(&chandef); + ieee80211_chandef_downgrade(&chandef, conn); ret = ieee80211_link_use_channel(link, &chandef, IEEE80211_CHANCTX_SHARED); } - out: + return ret; } @@ -5177,8 +5338,10 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, link->conf->dtim_period = link->u.mgd.dtim_period ?: 1; if (link_id != assoc_data->assoc_link_id) { - err = ieee80211_prep_channel(sdata, link, cbss, true, - &link->u.mgd.conn_flags); + link->u.mgd.conn = assoc_data->link[link_id].conn; + + err = ieee80211_prep_channel(sdata, link, link_id, cbss, + true, &link->u.mgd.conn); if (err) { link_info(link, "prep_channel failed\n"); goto out_err; @@ -5296,6 +5459,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (!assoc_data) return; + parse_params.mode = + assoc_data->link[assoc_data->assoc_link_id].conn.mode; + if (!ether_addr_equal(assoc_data->ap_addr, mgmt->bssid) || !ether_addr_equal(assoc_data->ap_addr, mgmt->sa)) return; @@ -6162,6 +6328,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, u8 *bssid, *variable = mgmt->u.beacon.variable; u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; struct ieee80211_elems_parse_params parse_params = { + .mode = link->u.mgd.conn.mode, .link_id = -1, .from_ap = true, }; @@ -6416,10 +6583,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems); - if (ieee80211_config_bw(link, elems, bssid, &changed)) { - sdata_info(sdata, - "failed to follow AP %pM bandwidth change, disconnect\n", - bssid); + if (ieee80211_config_bw(link, elems, &changed)) { ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); @@ -6442,7 +6606,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, elems->cisco_dtpc_elem); if (elems->eht_operation && - !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) { + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_EHT) { if (!ieee80211_config_puncturing(link, elems->eht_operation, &changed)) { ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, @@ -7494,7 +7658,6 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) unsigned int link_id = link->link_id; link->u.mgd.p2p_noa_index = -1; - link->u.mgd.conn_flags = 0; link->conf->bssid = link->u.mgd.bssid; link->smps_mode = IEEE80211_SMPS_OFF; @@ -7534,6 +7697,7 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss, s8 link_id, const u8 *ap_mld_addr, bool assoc, + struct ieee80211_conn_settings *conn, bool override) { struct ieee80211_local *local = sdata->local; @@ -7665,13 +7829,22 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } if (new_sta || override) { - err = ieee80211_prep_channel(sdata, link, cbss, mlo, - &link->u.mgd.conn_flags); + /* + * Only set this if we're also going to calculate the AP + * settings etc., otherwise this was set before in a + * previous call. Note override is set to %true in assoc + * if the settings were changed. + */ + link->u.mgd.conn = *conn; + err = ieee80211_prep_channel(sdata, link, link->link_id, cbss, + mlo, &link->u.mgd.conn); if (err) { if (new_sta) sta_info_free(local, new_sta); goto out_err; } + /* pass out for use in assoc */ + *conn = link->u.mgd.conn; } if (new_sta) { @@ -7786,10 +7959,13 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_auth_data *auth_data; + struct ieee80211_conn_settings conn; struct ieee80211_link_data *link; + struct ieee80211_supported_band *sband; + struct ieee80211_bss *bss; u16 auth_alg; int err; - bool cont_auth; + bool cont_auth, wmm_used; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -7920,8 +8096,17 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* needed for transmitting the auth frame(s) properly */ memcpy(sdata->vif.cfg.ap_addr, auth_data->ap_addr, ETH_ALEN); + bss = (void *)req->bss->priv; + wmm_used = bss->wmm_used && (local->hw.queues >= IEEE80211_NUM_ACS); + + sband = local->hw.wiphy->bands[req->bss->channel->band]; + + ieee80211_determine_our_sta_mode_auth(sdata, sband, req, wmm_used, + &conn); + err = ieee80211_prep_connection(sdata, req->bss, req->link_id, - req->ap_mld_addr, cont_auth, false); + req->ap_mld_addr, cont_auth, + &conn, false); if (err) goto err_clear; @@ -7960,38 +8145,33 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, return err; } -static ieee80211_conn_flags_t +static void ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgd_assoc_data *assoc_data, struct cfg80211_assoc_request *req, - ieee80211_conn_flags_t conn_flags, + struct ieee80211_conn_settings *conn, unsigned int link_id) { struct ieee80211_local *local = sdata->local; const struct cfg80211_bss_ies *bss_ies; struct ieee80211_supported_band *sband; - const struct element *ht_elem, *vht_elem; struct ieee80211_link_data *link; struct cfg80211_bss *cbss; struct ieee80211_bss *bss; - bool is_5ghz, is_6ghz; cbss = assoc_data->link[link_id].bss; if (WARN_ON(!cbss)) - return 0; + return; bss = (void *)cbss->priv; sband = local->hw.wiphy->bands[cbss->channel->band]; if (WARN_ON(!sband)) - return 0; + return; link = sdata_dereference(sdata->link[link_id], sdata); if (WARN_ON(!link)) - return 0; - - is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; - is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; + return; /* for MLO connections assume advertising all rates is OK */ if (!req->ap_mld_addr) { @@ -8008,40 +8188,18 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, assoc_data->ie_pos += req->links[link_id].elems_len; } - rcu_read_lock(); - ht_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_OPERATION); - if (ht_elem && ht_elem->datalen >= sizeof(struct ieee80211_ht_operation)) - assoc_data->link[link_id].ap_ht_param = - ((struct ieee80211_ht_operation *)(ht_elem->data))->ht_param; - else if (!is_6ghz) - conn_flags |= IEEE80211_CONN_DISABLE_HT; - vht_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY); - if (vht_elem && vht_elem->datalen >= sizeof(struct ieee80211_vht_cap)) { - memcpy(&assoc_data->link[link_id].ap_vht_cap, vht_elem->data, - sizeof(struct ieee80211_vht_cap)); - } else if (is_5ghz) { - link_info(link, - "VHT capa missing/short, disabling VHT/HE/EHT\n"); - conn_flags |= IEEE80211_CONN_DISABLE_VHT | - IEEE80211_CONN_DISABLE_HE | - IEEE80211_CONN_DISABLE_EHT; - } - rcu_read_unlock(); - link->u.mgd.beacon_crc_valid = false; link->u.mgd.dtim_period = 0; link->u.mgd.have_beacon = false; - /* override HT/VHT configuration only if the AP and we support it */ - if (!(conn_flags & IEEE80211_CONN_DISABLE_HT)) { + /* override HT configuration only if the AP and we support it */ + if (conn->mode >= IEEE80211_CONN_MODE_HT) { struct ieee80211_sta_ht_cap sta_ht_cap; memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); } - link->conf->eht_puncturing = 0; - rcu_read_lock(); bss_ies = rcu_dereference(cbss->beacon_ies); if (bss_ies) { @@ -8062,7 +8220,6 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, } if (bss_ies) { - const struct ieee80211_eht_operation *eht_oper; const struct element *elem; elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION, @@ -8079,32 +8236,6 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, link->conf->ema_ap = true; else link->conf->ema_ap = false; - - elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION, - bss_ies->data, bss_ies->len); - eht_oper = (const void *)(elem->data + 1); - - if (elem && - ieee80211_eht_oper_size_ok((const void *)(elem->data + 1), - elem->datalen - 1) && - (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) && - (eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) { - const struct ieee80211_eht_operation_info *info = - (void *)eht_oper->optional; - const u8 *disable_subchannel_bitmap = info->optional; - u16 bitmap; - - bitmap = get_unaligned_le16(disable_subchannel_bitmap); - if (cfg80211_valid_disable_subchannel_bitmap(&bitmap, - &link->conf->chandef) && - !(bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))) - ieee80211_handle_puncturing_bitmap(link, - eht_oper, - bitmap, - NULL); - else - conn_flags |= IEEE80211_CONN_DISABLE_EHT; - } } rcu_read_unlock(); @@ -8131,8 +8262,6 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, } else { link->smps_mode = link->u.mgd.req_smps; } - - return conn_flags; } int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, @@ -8144,11 +8273,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgd_assoc_data *assoc_data; const struct element *ssid_elem; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; - ieee80211_conn_flags_t conn_flags = 0; struct ieee80211_link_data *link; struct cfg80211_bss *cbss; - struct ieee80211_bss *bss; - bool override; + bool override, uapsd_supported; + bool match_auth; int i, err; size_t size = sizeof(*assoc_data) + req->ie_len; @@ -8167,44 +8295,26 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (ieee80211_mgd_csa_in_process(sdata, cbss)) { sdata_info(sdata, "AP is in CSA process, reject assoc\n"); - kfree(assoc_data); - return -EINVAL; + err = -EINVAL; + goto err_free; } rcu_read_lock(); ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID); if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) { rcu_read_unlock(); - kfree(assoc_data); - return -EINVAL; + err = -EINVAL; + goto err_free; } memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen); assoc_data->ssid_len = ssid_elem->datalen; - memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len); - vif_cfg->ssid_len = assoc_data->ssid_len; rcu_read_unlock(); - if (req->ap_mld_addr) { - for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) { - if (!req->links[i].bss) - continue; - link = sdata_dereference(sdata->link[i], sdata); - if (link) - ether_addr_copy(assoc_data->link[i].addr, - link->conf->addr); - else - eth_random_addr(assoc_data->link[i].addr); - } - } else { - memcpy(assoc_data->link[0].addr, sdata->vif.addr, ETH_ALEN); - } - - assoc_data->s1g = cbss->channel->band == NL80211_BAND_S1GHZ; - - memcpy(assoc_data->ap_addr, - req->ap_mld_addr ?: req->bss->bssid, - ETH_ALEN); + if (req->ap_mld_addr) + memcpy(assoc_data->ap_addr, req->ap_mld_addr, ETH_ALEN); + else + memcpy(assoc_data->ap_addr, cbss->bssid, ETH_ALEN); if (ifmgd->associated) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -8222,100 +8332,134 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, false); } - if (ifmgd->auth_data && !ifmgd->auth_data->done) { - err = -EBUSY; - goto err_free; - } + memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa)); + memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask, + sizeof(ifmgd->ht_capa_mask)); - if (ifmgd->assoc_data) { - err = -EBUSY; - goto err_free; - } + memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa)); + memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask, + sizeof(ifmgd->vht_capa_mask)); - if (ifmgd->auth_data) { - bool match; + memcpy(&ifmgd->s1g_capa, &req->s1g_capa, sizeof(ifmgd->s1g_capa)); + memcpy(&ifmgd->s1g_capa_mask, &req->s1g_capa_mask, + sizeof(ifmgd->s1g_capa_mask)); - /* keep sta info, bssid if matching */ - match = ether_addr_equal(ifmgd->auth_data->ap_addr, - assoc_data->ap_addr) && - ifmgd->auth_data->link_id == req->link_id; + /* keep some setup (AP STA, channel, ...) if matching */ + if (ifmgd->auth_data) + match_auth = ether_addr_equal(ifmgd->auth_data->ap_addr, + assoc_data->ap_addr) && + ifmgd->auth_data->link_id == req->link_id; - /* Cleanup is delayed if auth_data matches */ - if (!match) - ieee80211_destroy_auth_data(sdata, false); - } + if (req->ap_mld_addr) { + uapsd_supported = true; - /* prepare assoc data */ + for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) { + struct ieee80211_supported_band *sband; + struct cfg80211_bss *link_cbss = req->links[i].bss; + struct ieee80211_bss *bss; - bss = (void *)cbss->priv; - assoc_data->wmm = bss->wmm_used && - (local->hw.queues >= IEEE80211_NUM_ACS); + if (!link_cbss) + continue; - assoc_data->spp_amsdu = req->flags & ASSOC_REQ_SPP_AMSDU; + bss = (void *)link_cbss->priv; - /* - * IEEE802.11n does not allow TKIP/WEP as pairwise ciphers in HT mode. - * We still associate in non-HT mode (11a/b/g) if any one of these - * ciphers is configured as pairwise. - * We can set this to true for non-11n hardware, that'll be checked - * separately along with the peer capabilities. - */ - for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) { - if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 || - req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP || - req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) { - conn_flags |= IEEE80211_CONN_DISABLE_HT; - conn_flags |= IEEE80211_CONN_DISABLE_VHT; - conn_flags |= IEEE80211_CONN_DISABLE_HE; - conn_flags |= IEEE80211_CONN_DISABLE_EHT; - netdev_info(sdata->dev, - "disabling HT/VHT/HE due to WEP/TKIP use\n"); + if (!bss->wmm_used) { + err = -EINVAL; + goto err_free; + } + + if (req->flags & (ASSOC_REQ_DISABLE_HT | + ASSOC_REQ_DISABLE_VHT | + ASSOC_REQ_DISABLE_HE | + ASSOC_REQ_DISABLE_EHT)) { + err = -EINVAL; + goto err_free; + } + + if (link_cbss->channel->band == NL80211_BAND_S1GHZ) { + err = -EINVAL; + goto err_free; + } + + link = sdata_dereference(sdata->link[i], sdata); + if (link) + ether_addr_copy(assoc_data->link[i].addr, + link->conf->addr); + else + eth_random_addr(assoc_data->link[i].addr); + sband = local->hw.wiphy->bands[link_cbss->channel->band]; + + if (match_auth && i == assoc_link_id) + assoc_data->link[i].conn = link->u.mgd.conn; + else + assoc_data->link[i].conn = + ieee80211_conn_settings_unlimited; + ieee80211_determine_our_sta_mode_assoc(sdata, sband, + req, true, i, + &assoc_data->link[i].conn); + assoc_data->link[i].bss = link_cbss; + assoc_data->link[i].disabled = req->links[i].disabled; + + if (!bss->uapsd_supported) + uapsd_supported = false; + + if (assoc_data->link[i].conn.mode < IEEE80211_CONN_MODE_EHT) { + err = -EINVAL; + req->links[i].error = err; + goto err_free; + } } - } - /* also disable HT/VHT/HE/EHT if the AP doesn't use WMM */ - if (!bss->wmm_used) { - conn_flags |= IEEE80211_CONN_DISABLE_HT; - conn_flags |= IEEE80211_CONN_DISABLE_VHT; - conn_flags |= IEEE80211_CONN_DISABLE_HE; - conn_flags |= IEEE80211_CONN_DISABLE_EHT; - netdev_info(sdata->dev, - "disabling HT/VHT/HE as WMM/QoS is not supported by the AP\n"); - } + assoc_data->wmm = true; + } else { + struct ieee80211_supported_band *sband; + struct ieee80211_bss *bss = (void *)cbss->priv; - if (req->flags & ASSOC_REQ_DISABLE_HT) { - mlme_dbg(sdata, "HT disabled by flag, disabling HT/VHT/HE\n"); - conn_flags |= IEEE80211_CONN_DISABLE_HT; - conn_flags |= IEEE80211_CONN_DISABLE_VHT; - conn_flags |= IEEE80211_CONN_DISABLE_HE; - conn_flags |= IEEE80211_CONN_DISABLE_EHT; - } + memcpy(assoc_data->link[0].addr, sdata->vif.addr, ETH_ALEN); + assoc_data->s1g = cbss->channel->band == NL80211_BAND_S1GHZ; - if (req->flags & ASSOC_REQ_DISABLE_VHT) { - mlme_dbg(sdata, "VHT disabled by flag, disabling VHT\n"); - conn_flags |= IEEE80211_CONN_DISABLE_VHT; - } + assoc_data->wmm = bss->wmm_used && + (local->hw.queues >= IEEE80211_NUM_ACS); + + if (cbss->channel->band == NL80211_BAND_6GHZ && + req->flags & (ASSOC_REQ_DISABLE_HT | + ASSOC_REQ_DISABLE_VHT | + ASSOC_REQ_DISABLE_HE)) { + err = -EINVAL; + goto err_free; + } + + sband = local->hw.wiphy->bands[cbss->channel->band]; + + assoc_data->link[0].bss = cbss; + + if (match_auth) + assoc_data->link[0].conn = sdata->deflink.u.mgd.conn; + else + assoc_data->link[0].conn = + ieee80211_conn_settings_unlimited; + ieee80211_determine_our_sta_mode_assoc(sdata, sband, req, + assoc_data->wmm, 0, + &assoc_data->link[0].conn); - if (req->flags & ASSOC_REQ_DISABLE_HE) { - mlme_dbg(sdata, "HE disabled by flag, disabling HE/EHT\n"); - conn_flags |= IEEE80211_CONN_DISABLE_HE; - conn_flags |= IEEE80211_CONN_DISABLE_EHT; + uapsd_supported = bss->uapsd_supported; } - if (req->flags & ASSOC_REQ_DISABLE_EHT) - conn_flags |= IEEE80211_CONN_DISABLE_EHT; + assoc_data->spp_amsdu = req->flags & ASSOC_REQ_SPP_AMSDU; - memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa)); - memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask, - sizeof(ifmgd->ht_capa_mask)); + if (ifmgd->auth_data && !ifmgd->auth_data->done) { + err = -EBUSY; + goto err_free; + } - memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa)); - memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask, - sizeof(ifmgd->vht_capa_mask)); + if (ifmgd->assoc_data) { + err = -EBUSY; + goto err_free; + } - memcpy(&ifmgd->s1g_capa, &req->s1g_capa, sizeof(ifmgd->s1g_capa)); - memcpy(&ifmgd->s1g_capa_mask, &req->s1g_capa_mask, - sizeof(ifmgd->s1g_capa_mask)); + /* Cleanup is delayed if auth_data matches */ + if (ifmgd->auth_data && !match_auth) + ieee80211_destroy_auth_data(sdata, false); if (req->ie && req->ie_len) { memcpy(assoc_data->ie, req->ie, req->ie_len); @@ -8347,19 +8491,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->assoc_link_id = assoc_link_id; if (req->ap_mld_addr) { - for (i = 0; i < ARRAY_SIZE(assoc_data->link); i++) { - assoc_data->link[i].conn_flags = conn_flags; - assoc_data->link[i].bss = req->links[i].bss; - assoc_data->link[i].disabled = req->links[i].disabled; - } - /* if there was no authentication, set up the link */ err = ieee80211_vif_set_links(sdata, BIT(assoc_link_id), 0); if (err) goto err_clear; - } else { - assoc_data->link[0].conn_flags = conn_flags; - assoc_data->link[0].bss = cbss; } link = sdata_dereference(sdata->link[assoc_link_id], sdata); @@ -8368,19 +8503,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, goto err_clear; } - /* keep old conn_flags from ieee80211_prep_channel() from auth */ - conn_flags |= link->u.mgd.conn_flags; - conn_flags |= ieee80211_setup_assoc_link(sdata, assoc_data, req, - conn_flags, assoc_link_id); - override = link->u.mgd.conn_flags != conn_flags; - link->u.mgd.conn_flags |= conn_flags; + override = link->u.mgd.conn.mode != + assoc_data->link[assoc_link_id].conn.mode || + link->u.mgd.conn.bw_limit != + assoc_data->link[assoc_link_id].conn.bw_limit; + link->u.mgd.conn = assoc_data->link[assoc_link_id].conn; + + ieee80211_setup_assoc_link(sdata, assoc_data, req, &link->u.mgd.conn, + assoc_link_id); if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) && ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK), "U-APSD not supported with HW_PS_NULLFUNC_STACK\n")) sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD; - if (bss->wmm_used && bss->uapsd_supported && + if (assoc_data->wmm && uapsd_supported && (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD)) { assoc_data->uapsd = true; ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED; @@ -8424,27 +8561,29 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, continue; if (i == assoc_data->assoc_link_id) continue; - /* only calculate the flags, hence link == NULL */ - err = ieee80211_prep_channel(sdata, NULL, + /* only calculate the mode, hence link == NULL */ + err = ieee80211_prep_channel(sdata, NULL, i, assoc_data->link[i].bss, true, - &assoc_data->link[i].conn_flags); + &assoc_data->link[i].conn); if (err) { req->links[i].error = err; goto err_clear; } } + memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len); + vif_cfg->ssid_len = assoc_data->ssid_len; + /* needed for transmitting the assoc frames properly */ memcpy(sdata->vif.cfg.ap_addr, assoc_data->ap_addr, ETH_ALEN); err = ieee80211_prep_connection(sdata, cbss, req->link_id, - req->ap_mld_addr, true, override); + req->ap_mld_addr, true, + &assoc_data->link[assoc_link_id].conn, + override); if (err) goto err_clear; - assoc_data->link[assoc_data->assoc_link_id].conn_flags = - link->u.mgd.conn_flags; - if (ieee80211_hw_check(&sdata->local->hw, NEED_DTIM_BEFORE_ASSOC)) { const struct cfg80211_bss_ies *beacon_ies; diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 55959b0b24c51..d8c7b3e16eb76 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -23,7 +23,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band current_band, u32 vht_cap_info, - ieee80211_conn_flags_t conn_flags, u8 *bssid, + struct ieee80211_conn_settings *conn, + u8 *bssid, struct ieee80211_csa_ie *csa_ie) { enum nl80211_band new_band = current_band; @@ -42,13 +43,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, wide_bw_chansw_ie = elems->wide_bw_chansw_ie; bwi = elems->bandwidth_indication; - if (conn_flags & (IEEE80211_CONN_DISABLE_HT | - IEEE80211_CONN_DISABLE_40MHZ)) { + if (conn->mode < IEEE80211_CONN_MODE_HT || + conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) { sec_chan_offs = NULL; wide_bw_chansw_ie = NULL; } - if (conn_flags & IEEE80211_CONN_DISABLE_VHT) + if (conn->mode < IEEE80211_CONN_MODE_VHT) wide_bw_chansw_ie = NULL; if (elems->ext_chansw_ie) { @@ -95,7 +96,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, if (sec_chan_offs) { secondary_channel_offset = sec_chan_offs->sec_chan_offs; - } else if (!(conn_flags & IEEE80211_CONN_DISABLE_HT)) { + } else if (conn->mode >= IEEE80211_CONN_MODE_HT) { /* If the secondary channel offset IE is not present, * we can't know what's the post-CSA offset, so the * best we can do is use 20MHz. @@ -169,12 +170,10 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, &new_vht_chandef)) new_vht_chandef.chan = NULL; - if (conn_flags & IEEE80211_CONN_DISABLE_80P80MHZ && - new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80) - ieee80211_chandef_downgrade(&new_vht_chandef); - if (conn_flags & IEEE80211_CONN_DISABLE_160MHZ && - new_vht_chandef.width == NL80211_CHAN_WIDTH_160) - ieee80211_chandef_downgrade(&new_vht_chandef); + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160 && + (new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80 || + new_vht_chandef.width == NL80211_CHAN_WIDTH_160)) + ieee80211_chandef_downgrade(&new_vht_chandef, NULL); } /* if VHT data is there validate & use it */ diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 49730b424141a..396fd54d8bf7b 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -347,7 +347,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, (uc.width > sta->tdls_chandef.width && !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc, sdata->wdev.iftype))) - ieee80211_chandef_downgrade(&uc); + ieee80211_chandef_downgrade(&uc, NULL); if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) { tdls_dbg(sdata, "TDLS ch width upgraded %d -> %d\n", @@ -561,7 +561,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, ieee80211_he_ppe_size(he_cap->ppe_thres[0], he_cap->he_cap_elem.phy_cap_info); pos = skb_put(skb, cap_size); - pos = ieee80211_ie_build_he_cap(0, pos, he_cap, pos + cap_size); + pos = ieee80211_ie_build_he_cap(NULL, he_cap, pos, pos + cap_size); /* Build HE 6Ghz capa IE from sband */ if (sband->band == NL80211_BAND_6GHZ) { @@ -1413,8 +1413,8 @@ iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata, IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; u16 opmode; - /* Nothing to do if the BSS connection uses HT */ - if (!(sdata->deflink.u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HT)) + /* Nothing to do if the BSS connection uses (at least) HT */ + if (sdata->deflink.u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) return; tdls_ht = (sta && sta->sta.deflink.ht_cap.ht_supported) || diff --git a/net/mac80211/tests/elems.c b/net/mac80211/tests/elems.c index 997d0cd27b2da..30fc0acb7ac29 100644 --- a/net/mac80211/tests/elems.c +++ b/net/mac80211/tests/elems.c @@ -14,6 +14,7 @@ static void mle_defrag(struct kunit *test) struct ieee80211_elems_parse_params parse_params = { .link_id = 12, .from_ap = true, + .mode = IEEE80211_CONN_MODE_EHT, }; struct ieee802_11_elems *parsed; struct sk_buff *skb; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 685b55a053f3f..1d504c8e6bfc5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -46,6 +46,11 @@ struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy) } EXPORT_SYMBOL(wiphy_to_ieee80211_hw); +const struct ieee80211_conn_settings ieee80211_conn_settings_unlimited = { + .mode = IEEE80211_CONN_MODE_EHT, + .bw_limit = IEEE80211_CONN_BW_LIMIT_320, +}; + u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, enum nl80211_iftype type) { @@ -929,23 +934,31 @@ ieee80211_parse_extension_element(u32 *crc, switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; calc_crc = true; if (len >= sizeof(*elems->mu_edca_param_set)) elems->mu_edca_param_set = data; break; case WLAN_EID_EXT_HE_CAPABILITY: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; if (ieee80211_he_capa_size_ok(data, len)) { elems->he_cap = data; elems->he_cap_len = len; } break; case WLAN_EID_EXT_HE_OPERATION: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; calc_crc = true; if (len >= sizeof(*elems->he_operation) && len >= ieee80211_he_oper_size(data) - 1) elems->he_operation = data; break; case WLAN_EID_EXT_UORA: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; if (len >= 1) elems->uora_element = data; break; @@ -958,15 +971,21 @@ ieee80211_parse_extension_element(u32 *crc, elems->mbssid_config_ie = data; break; case WLAN_EID_EXT_HE_SPR: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; if (len >= sizeof(*elems->he_spr) && len >= ieee80211_he_spr_size(data)) elems->he_spr = data; break; case WLAN_EID_EXT_HE_6GHZ_CAPA: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; if (len >= sizeof(*elems->he_6ghz_capa)) elems->he_6ghz_capa = data; break; case WLAN_EID_EXT_EHT_CAPABILITY: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; if (ieee80211_eht_capa_size_ok(elems->he_cap, data, len, params->from_ap)) { @@ -975,11 +994,15 @@ ieee80211_parse_extension_element(u32 *crc, } break; case WLAN_EID_EXT_EHT_OPERATION: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; if (ieee80211_eht_oper_size_ok(data, len)) elems->eht_operation = data; calc_crc = true; break; case WLAN_EID_EXT_EHT_MULTI_LINK: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; calc_crc = true; if (ieee80211_mle_size_ok(data, len)) { @@ -1004,11 +1027,15 @@ ieee80211_parse_extension_element(u32 *crc, } break; case WLAN_EID_EXT_BANDWIDTH_INDICATION: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; if (ieee80211_bandwidth_indication_size_ok(data, len)) elems->bandwidth_indication = data; calc_crc = true; break; case WLAN_EID_EXT_TID_TO_LINK_MAPPING: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; calc_crc = true; if (ieee80211_tid_to_link_map_size_ok(data, len) && elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) { @@ -1178,24 +1205,32 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elems->ext_supp_rates_len = elen; break; case WLAN_EID_HT_CAPABILITY: + if (params->mode < IEEE80211_CONN_MODE_HT) + break; if (elen >= sizeof(struct ieee80211_ht_cap)) elems->ht_cap_elem = (void *)pos; else elem_parse_failed = true; break; case WLAN_EID_HT_OPERATION: + if (params->mode < IEEE80211_CONN_MODE_HT) + break; if (elen >= sizeof(struct ieee80211_ht_operation)) elems->ht_operation = (void *)pos; else elem_parse_failed = true; break; case WLAN_EID_VHT_CAPABILITY: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; if (elen >= sizeof(struct ieee80211_vht_cap)) elems->vht_cap_elem = (void *)pos; else elem_parse_failed = true; break; case WLAN_EID_VHT_OPERATION: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; if (elen >= sizeof(struct ieee80211_vht_operation)) { elems->vht_operation = (void *)pos; if (calc_crc) @@ -1205,6 +1240,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elem_parse_failed = true; break; case WLAN_EID_OPMODE_NOTIF: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; if (elen > 0) { elems->opmode_notif = pos; if (calc_crc) @@ -1264,6 +1301,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elems->ext_chansw_ie = (void *)pos; break; case WLAN_EID_SECONDARY_CHANNEL_OFFSET: + if (params->mode < IEEE80211_CONN_MODE_HT) + break; if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) { elem_parse_failed = true; break; @@ -1279,6 +1318,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elems->mesh_chansw_params_ie = (void *)pos; break; case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; if (!params->action || elen < sizeof(*elems->wide_bw_chansw_ie)) { elem_parse_failed = true; @@ -1287,6 +1328,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elems->wide_bw_chansw_ie = (void *)pos; break; case WLAN_EID_CHANNEL_SWITCH_WRAPPER: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; if (params->action) { elem_parse_failed = true; break; @@ -1305,6 +1348,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elem_parse_failed = true; } + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; + subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION, pos, elen); if (subelem) { @@ -1393,24 +1439,32 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elem, elems, params); break; case WLAN_EID_S1G_CAPABILITIES: + if (params->mode != IEEE80211_CONN_MODE_S1G) + break; if (elen >= sizeof(*elems->s1g_capab)) elems->s1g_capab = (void *)pos; else elem_parse_failed = true; break; case WLAN_EID_S1G_OPERATION: + if (params->mode != IEEE80211_CONN_MODE_S1G) + break; if (elen == sizeof(*elems->s1g_oper)) elems->s1g_oper = (void *)pos; else elem_parse_failed = true; break; case WLAN_EID_S1G_BCN_COMPAT: + if (params->mode != IEEE80211_CONN_MODE_S1G) + break; if (elen == sizeof(*elems->s1g_bcn_compat)) elems->s1g_bcn_compat = (void *)pos; else elem_parse_failed = true; break; case WLAN_EID_AID_RESPONSE: + if (params->mode != IEEE80211_CONN_MODE_S1G) + break; if (elen == sizeof(struct ieee80211_aid_response_ie)) elems->aid_resp = (void *)pos; else @@ -1562,6 +1616,7 @@ static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, { struct ieee80211_mle_per_sta_profile *prof; struct ieee80211_elems_parse_params sub = { + .mode = params->mode, .action = params->action, .from_ap = params->from_ap, .link_id = -1, @@ -1649,6 +1704,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) /* Override with nontransmitted profile, if found */ if (nontransmitted_profile_len) { struct ieee80211_elems_parse_params sub = { + .mode = params->mode, .start = nontransmitted_profile, .len = nontransmitted_profile_len, .action = params->action, @@ -2142,7 +2198,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, if (he_cap && cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE)) { - pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end); + pos = ieee80211_ie_build_he_cap(NULL, he_cap, pos, end); if (!pos) goto out_err; } @@ -3201,15 +3257,18 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) he_cap->he_cap_elem.phy_cap_info); } -u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos, +u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, const struct ieee80211_sta_he_cap *he_cap, - u8 *end) + u8 *pos, u8 *end) { struct ieee80211_he_cap_elem elem; u8 n; u8 ie_len; u8 *orig_pos = pos; + if (!conn) + conn = &ieee80211_conn_settings_unlimited; + /* Make sure we have place for the IE */ /* * TODO: the 1 added is because this temporarily is under the EXTENSION @@ -3221,18 +3280,15 @@ u8 *ieee80211_ie_build_he_cap(ieee80211_conn_flags_t disable_flags, u8 *pos, /* modify on stack first to calculate 'n' and 'ie_len' correctly */ elem = he_cap->he_cap_elem; - if (disable_flags & IEEE80211_CONN_DISABLE_40MHZ) + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) elem.phy_cap_info[0] &= ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G); - if (disable_flags & IEEE80211_CONN_DISABLE_160MHZ) + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) elem.phy_cap_info[0] &= - ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; - - if (disable_flags & IEEE80211_CONN_DISABLE_80P80MHZ) - elem.phy_cap_info[0] &= - ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; + ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G); n = ieee80211_he_mcs_nss_size(&elem); ie_len = 2 + 1 + @@ -4367,21 +4423,32 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_radar_detected); -ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c) +void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, + struct ieee80211_conn_settings *conn) { - ieee80211_conn_flags_t ret; + struct ieee80211_conn_settings _ignored = {}; int tmp; + /* allow passing NULL if caller doesn't care */ + if (!conn) + conn = &_ignored; + switch (c->width) { + default: + case NL80211_CHAN_WIDTH_20_NOHT: + WARN_ON_ONCE(1); + fallthrough; case NL80211_CHAN_WIDTH_20: c->width = NL80211_CHAN_WIDTH_20_NOHT; - ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT; + conn->mode = IEEE80211_CONN_MODE_LEGACY; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; break; case NL80211_CHAN_WIDTH_40: c->width = NL80211_CHAN_WIDTH_20; c->center_freq1 = c->chan->center_freq; - ret = IEEE80211_CONN_DISABLE_40MHZ | - IEEE80211_CONN_DISABLE_VHT; + if (conn->mode == IEEE80211_CONN_MODE_VHT) + conn->mode = IEEE80211_CONN_MODE_HT; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; break; case NL80211_CHAN_WIDTH_80: tmp = (30 + c->chan->center_freq - c->center_freq1)/20; @@ -4390,13 +4457,14 @@ ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c) /* freq_P40 */ c->center_freq1 = c->center_freq1 - 20 + 40 * tmp; c->width = NL80211_CHAN_WIDTH_40; - ret = IEEE80211_CONN_DISABLE_VHT; + if (conn->mode == IEEE80211_CONN_MODE_VHT) + conn->mode = IEEE80211_CONN_MODE_HT; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_40; break; case NL80211_CHAN_WIDTH_80P80: c->center_freq2 = 0; c->width = NL80211_CHAN_WIDTH_80; - ret = IEEE80211_CONN_DISABLE_80P80MHZ | - IEEE80211_CONN_DISABLE_160MHZ; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80; break; case NL80211_CHAN_WIDTH_160: /* n_P20 */ @@ -4405,8 +4473,7 @@ ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c) tmp /= 4; c->center_freq1 = c->center_freq1 - 40 + 80 * tmp; c->width = NL80211_CHAN_WIDTH_80; - ret = IEEE80211_CONN_DISABLE_80P80MHZ | - IEEE80211_CONN_DISABLE_160MHZ; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80; break; case NL80211_CHAN_WIDTH_320: /* n_P20 */ @@ -4415,30 +4482,28 @@ ieee80211_conn_flags_t ieee80211_chandef_downgrade(struct cfg80211_chan_def *c) tmp /= 8; c->center_freq1 = c->center_freq1 - 80 + 160 * tmp; c->width = NL80211_CHAN_WIDTH_160; - ret = IEEE80211_CONN_DISABLE_320MHZ; - break; - default: - case NL80211_CHAN_WIDTH_20_NOHT: - WARN_ON_ONCE(1); - c->width = NL80211_CHAN_WIDTH_20_NOHT; - ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_160; break; case NL80211_CHAN_WIDTH_1: case NL80211_CHAN_WIDTH_2: case NL80211_CHAN_WIDTH_4: case NL80211_CHAN_WIDTH_8: case NL80211_CHAN_WIDTH_16: + WARN_ON_ONCE(1); + /* keep c->width */ + conn->mode = IEEE80211_CONN_MODE_S1G; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; + break; case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: WARN_ON_ONCE(1); /* keep c->width */ - ret = IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_VHT; + conn->mode = IEEE80211_CONN_MODE_LEGACY; + conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; break; } WARN_ON_ONCE(!cfg80211_chandef_valid(c)); - - return ret; } /* @@ -5090,3 +5155,42 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos, return pos; } + +const char *ieee80211_conn_mode_str(enum ieee80211_conn_mode mode) +{ + static const char * const modes[] = { + [IEEE80211_CONN_MODE_S1G] = "S1G", + [IEEE80211_CONN_MODE_LEGACY] = "legacy", + [IEEE80211_CONN_MODE_HT] = "HT", + [IEEE80211_CONN_MODE_VHT] = "VHT", + [IEEE80211_CONN_MODE_HE] = "HE", + [IEEE80211_CONN_MODE_EHT] = "EHT", + }; + + if (WARN_ON(mode >= ARRAY_SIZE(modes))) + return ""; + + return modes[mode] ?: ""; +} + +enum ieee80211_conn_bw_limit +ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef) +{ + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + return IEEE80211_CONN_BW_LIMIT_20; + case NL80211_CHAN_WIDTH_40: + return IEEE80211_CONN_BW_LIMIT_40; + case NL80211_CHAN_WIDTH_80: + return IEEE80211_CONN_BW_LIMIT_80; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + return IEEE80211_CONN_BW_LIMIT_160; + case NL80211_CHAN_WIDTH_320: + return IEEE80211_CONN_BW_LIMIT_320; + default: + WARN(1, "unhandled chandef width %d\n", chandef->width); + return IEEE80211_CONN_BW_LIMIT_20; + } +} -- cgit 1.2.3-korg From 2d9698dd32d086e47b8bff3df4322cc017c17b55 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:37 +0100 Subject: wifi: mac80211: clean up HE 6 GHz and EHT chandef parsing In the code we currently check for support 80+80, 160 and 320 channel widths, but really the way this should be (and is otherwise) handled is that we compute the highest channel bandwidth given there, and then cut it down to what we support. This is also needed for wider bandwidth OFDMA support. Change the code to remove this limitation and always parse the highest possible channel width. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240129194108.d06f85082e29.I47e68ed3d97b0a2f4ee61e5d8abfcefc8a5b9c08@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 +- net/mac80211/mesh.c | 3 +- net/mac80211/mlme.c | 8 ++-- net/mac80211/spectmgmt.c | 4 +- net/mac80211/util.c | 104 ++++++++++----------------------------------- 5 files changed, 29 insertions(+), 93 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 112029f5a7df2..29294cf88d39c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2512,9 +2512,8 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, const struct ieee80211_ht_operation *htop, struct cfg80211_chan_def *chandef); void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, - bool support_160, bool support_320, struct cfg80211_chan_def *chandef); -bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, +bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_local *local, const struct ieee80211_he_operation *he_oper, const struct ieee80211_eht_operation *eht_oper, struct cfg80211_chan_def *chandef); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6b48914b39fd4..60dc453acb5ad 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -107,7 +107,8 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, ie->vht_operation, ie->ht_operation, &sta_chan_def); - ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, ie->eht_operation, + ieee80211_chandef_he_6ghz_oper(sdata->local, ie->he_operation, + ie->eht_operation, &sta_chan_def); if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 549a06c9a2e56..7c0339d4f0598 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -116,7 +116,7 @@ ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper, /* set 160/320 supported to get the full AP definition */ ieee80211_chandef_eht_oper((const void *)eht_oper->optional, - true, true, &ap_chandef); + &ap_chandef); ap_center_freq = ap_chandef.center_freq1; ap_bw = 20 * BIT(u8_get_bits(info->control, IEEE80211_EHT_OPER_CHAN_WIDTH)); @@ -280,7 +280,7 @@ ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata, mode = IEEE80211_CONN_MODE_HE; } - if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, + if (!ieee80211_chandef_he_6ghz_oper(sdata->local, he_oper, eht_oper, chandef)) { sdata_info(sdata, "bad HE/EHT 6 GHz operation\n"); return IEEE80211_CONN_MODE_LEGACY; @@ -394,9 +394,7 @@ ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def eht_chandef = *chandef; ieee80211_chandef_eht_oper((const void *)eht_oper->optional, - eht_chandef.width == - NL80211_CHAN_WIDTH_160, - false, &eht_chandef); + &eht_chandef); if (!cfg80211_chandef_valid(&eht_chandef)) { sdata_info(sdata, diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index d8c7b3e16eb76..51efc9bc81680 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -139,9 +139,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, /* start with the CSA one */ new_vht_chandef = csa_ie->chandef; /* and update the width accordingly */ - /* FIXME: support 160/320 */ - ieee80211_chandef_eht_oper(&bwi->info, true, true, - &new_vht_chandef); + ieee80211_chandef_eht_oper(&bwi->info, &new_vht_chandef); } else if (wide_bw_chansw_ie) { u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1; struct ieee80211_vht_operation vht_oper = { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1d504c8e6bfc5..1a2522e699d41 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3841,7 +3841,6 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, } void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, - bool support_160, bool support_320, struct cfg80211_chan_def *chandef) { chandef->center_freq1 = @@ -3860,80 +3859,34 @@ void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, chandef->width = NL80211_CHAN_WIDTH_80; break; case IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ: - if (support_160) { - chandef->width = NL80211_CHAN_WIDTH_160; - chandef->center_freq1 = - ieee80211_channel_to_frequency(info->ccfs1, - chandef->chan->band); - } else { - chandef->width = NL80211_CHAN_WIDTH_80; - } + chandef->width = NL80211_CHAN_WIDTH_160; + chandef->center_freq1 = + ieee80211_channel_to_frequency(info->ccfs1, + chandef->chan->band); break; case IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ: - if (support_320) { - chandef->width = NL80211_CHAN_WIDTH_320; - chandef->center_freq1 = - ieee80211_channel_to_frequency(info->ccfs1, - chandef->chan->band); - } else if (support_160) { - chandef->width = NL80211_CHAN_WIDTH_160; - } else { - chandef->width = NL80211_CHAN_WIDTH_80; - - if (chandef->center_freq1 > chandef->chan->center_freq) - chandef->center_freq1 -= 40; - else - chandef->center_freq1 += 40; - } + chandef->width = NL80211_CHAN_WIDTH_320; + chandef->center_freq1 = + ieee80211_channel_to_frequency(info->ccfs1, + chandef->chan->band); break; } } -bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, +bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_local *local, const struct ieee80211_he_operation *he_oper, const struct ieee80211_eht_operation *eht_oper, struct cfg80211_chan_def *chandef) { - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; - enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif); - const struct ieee80211_sta_he_cap *he_cap; - const struct ieee80211_sta_eht_cap *eht_cap; struct cfg80211_chan_def he_chandef = *chandef; const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - bool support_80_80, support_160, support_320; - u8 he_phy_cap, eht_phy_cap; u32 freq; if (chandef->chan->band != NL80211_BAND_6GHZ) return true; - sband = local->hw.wiphy->bands[NL80211_BAND_6GHZ]; - - he_cap = ieee80211_get_he_iftype_cap(sband, iftype); - if (!he_cap) { - sdata_info(sdata, "Missing iftype sband data/HE cap"); - return false; - } - - he_phy_cap = he_cap->he_cap_elem.phy_cap_info[0]; - support_160 = - he_phy_cap & - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; - support_80_80 = - he_phy_cap & - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; - - if (!he_oper) { - sdata_info(sdata, - "HE is not advertised on (on %d MHz), expect issues\n", - chandef->chan->center_freq); + if (!he_oper) return false; - } - - eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype); - if (!eht_cap) - eht_oper = NULL; he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); if (!he_6ghz_oper) @@ -3946,7 +3899,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, */ freq = ieee80211_channel_to_frequency(he_6ghz_oper->primary, NL80211_BAND_6GHZ); - he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq); + he_chandef.chan = ieee80211_get_channel(local->hw.wiphy, freq); + + if (!he_chandef.chan) + return false; if (!eht_oper || !(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) { @@ -3965,13 +3921,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, he_chandef.width = NL80211_CHAN_WIDTH_80; if (!he_6ghz_oper->ccfs1) break; - if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) { - if (support_160) - he_chandef.width = NL80211_CHAN_WIDTH_160; - } else { - if (support_80_80) - he_chandef.width = NL80211_CHAN_WIDTH_80P80; - } + if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) + he_chandef.width = NL80211_CHAN_WIDTH_160; + else + he_chandef.width = NL80211_CHAN_WIDTH_80P80; break; } @@ -3983,30 +3936,17 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, he_chandef.center_freq1 = ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0, NL80211_BAND_6GHZ); - if (support_80_80 || support_160) - he_chandef.center_freq2 = - ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, - NL80211_BAND_6GHZ); + he_chandef.center_freq2 = + ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, + NL80211_BAND_6GHZ); } } else { - eht_phy_cap = eht_cap->eht_cap_elem.phy_cap_info[0]; - support_320 = - eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; - ieee80211_chandef_eht_oper((const void *)eht_oper->optional, - support_160, support_320, &he_chandef); } - if (!cfg80211_chandef_valid(&he_chandef)) { - sdata_info(sdata, - "HE 6GHz operation resulted in invalid chandef: %d MHz/%d/%d MHz/%d MHz\n", - he_chandef.chan ? he_chandef.chan->center_freq : 0, - he_chandef.width, - he_chandef.center_freq1, - he_chandef.center_freq2); + if (!cfg80211_chandef_valid(&he_chandef)) return false; - } *chandef = he_chandef; -- cgit 1.2.3-korg From 0a44dfc070749514b804ccac0b1fd38718f7daa1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:38 +0100 Subject: wifi: mac80211: simplify non-chanctx drivers There are still surprisingly many non-chanctx drivers, but in mac80211 that code is a bit awkward. Simplify this by having those drivers assign 'emulated' ops, so that the mac80211 code can be more unified between non-chanctx/chanctx drivers. This cuts the number of places caring about it by about 15, which are scattered across - now they're fewer and no longer in the channel context handling. Link: https://msgid.link/20240129194108.6d0ead50f5cf.I60d093b2fc81ca1853925a4d0ac3a2337d5baa5b@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/admtek/adm8211.c | 4 + drivers/net/wireless/ath/ar5523/ar5523.c | 4 + drivers/net/wireless/ath/ath5k/mac80211-ops.c | 4 + drivers/net/wireless/ath/ath9k/htc_drv_main.c | 4 + drivers/net/wireless/ath/ath9k/main.c | 4 + drivers/net/wireless/ath/carl9170/main.c | 4 + drivers/net/wireless/ath/wcn36xx/main.c | 4 + drivers/net/wireless/atmel/at76c50x-usb.c | 4 + drivers/net/wireless/broadcom/b43/main.c | 4 + drivers/net/wireless/broadcom/b43legacy/main.c | 4 + .../broadcom/brcm80211/brcmsmac/mac80211_if.c | 4 + drivers/net/wireless/intel/iwlegacy/3945-mac.c | 4 + drivers/net/wireless/intel/iwlegacy/4965-mac.c | 4 + drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c | 4 + drivers/net/wireless/intersil/p54/main.c | 4 + drivers/net/wireless/marvell/libertas_tf/main.c | 4 + drivers/net/wireless/marvell/mwl8k.c | 4 + drivers/net/wireless/mediatek/mt76/mt7603/main.c | 4 + drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 4 + drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 4 + .../net/wireless/mediatek/mt76/mt76x2/pci_main.c | 4 + .../net/wireless/mediatek/mt76/mt76x2/usb_main.c | 4 + drivers/net/wireless/mediatek/mt76/mt792x_core.c | 7 +- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 4 + drivers/net/wireless/mediatek/mt7601u/main.c | 4 + drivers/net/wireless/purelifi/plfxlc/mac.c | 4 + drivers/net/wireless/ralink/rt2x00/rt2400pci.c | 4 + drivers/net/wireless/ralink/rt2x00/rt2500pci.c | 4 + drivers/net/wireless/ralink/rt2x00/rt2500usb.c | 4 + drivers/net/wireless/ralink/rt2x00/rt2800pci.c | 4 + drivers/net/wireless/ralink/rt2x00/rt2800soc.c | 4 + drivers/net/wireless/ralink/rt2x00/rt2800usb.c | 4 + drivers/net/wireless/ralink/rt2x00/rt61pci.c | 4 + drivers/net/wireless/ralink/rt2x00/rt73usb.c | 4 + drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c | 4 + drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | 4 + .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 4 + drivers/net/wireless/realtek/rtlwifi/core.c | 4 + drivers/net/wireless/realtek/rtw88/mac80211.c | 4 + drivers/net/wireless/realtek/rtw89/core.c | 7 +- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 4 + drivers/net/wireless/st/cw1200/main.c | 4 + drivers/net/wireless/ti/wl1251/main.c | 4 + drivers/net/wireless/virtual/mac80211_hwsim.c | 4 + drivers/net/wireless/zydas/zd1211rw/zd_mac.c | 4 + drivers/staging/vt6655/device_main.c | 4 + drivers/staging/vt6656/main_usb.c | 4 + include/net/mac80211.h | 13 ++ net/mac80211/cfg.c | 42 ++--- net/mac80211/chan.c | 111 ++--------- net/mac80211/ieee80211_i.h | 9 +- net/mac80211/iface.c | 6 +- net/mac80211/main.c | 205 +++++++++++++++++---- net/mac80211/mlme.c | 3 +- net/mac80211/offchannel.c | 21 ++- net/mac80211/scan.c | 18 +- net/mac80211/tx.c | 2 - net/mac80211/util.c | 27 ++- 58 files changed, 444 insertions(+), 207 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/admtek/adm8211.c b/drivers/net/wireless/admtek/adm8211.c index 2fceea9f65507..e3fd48dd39097 100644 --- a/drivers/net/wireless/admtek/adm8211.c +++ b/drivers/net/wireless/admtek/adm8211.c @@ -1759,6 +1759,10 @@ static int adm8211_alloc_rings(struct ieee80211_hw *dev) } static const struct ieee80211_ops adm8211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = adm8211_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = adm8211_start, diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index a742cec44e3db..815f8f599f5d1 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1358,6 +1358,10 @@ static void ar5523_configure_filter(struct ieee80211_hw *hw, } static const struct ieee80211_ops ar5523_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .start = ar5523_start, .stop = ar5523_stop, .tx = ar5523_tx, diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index c630343ca4f9b..eea4bda776080 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -779,6 +779,10 @@ static int ath5k_set_ringparam(struct ieee80211_hw *hw, u32 tx, u32 rx) const struct ieee80211_ops ath5k_hw_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = ath5k_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = ath5k_start, diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 9a9b5212051af..b389e19381c44 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1868,6 +1868,10 @@ static void ath9k_htc_channel_switch_beacon(struct ieee80211_hw *hw, } struct ieee80211_ops ath9k_htc_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = ath9k_htc_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = ath9k_htc_start, diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index c48ff0ffbfefb..a2943aaecb202 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2786,6 +2786,10 @@ static int ath9k_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, } struct ieee80211_ops ath9k_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = ath9k_tx, .start = ath9k_start, .stop = ath9k_stop, diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 524327d24964c..7e7797bf44b7d 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1712,6 +1712,10 @@ static bool carl9170_tx_frames_pending(struct ieee80211_hw *hw) } static const struct ieee80211_ops carl9170_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .start = carl9170_op_start, .stop = carl9170_op_stop, .tx = carl9170_op_tx, diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 4e6b4df8562f6..bfbd3c7a70b37 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1347,6 +1347,10 @@ static void wcn36xx_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif } static const struct ieee80211_ops wcn36xx_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .start = wcn36xx_start, .stop = wcn36xx_stop, .add_interface = wcn36xx_add_interface, diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index 447b51cff8f96..0b55a272bfd66 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -2178,6 +2178,10 @@ static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } static const struct ieee80211_ops at76_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = at76_mac80211_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .add_interface = at76_add_interface, diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index effb6c23f8257..badb2f4940358 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -5172,6 +5172,10 @@ static int b43_op_get_survey(struct ieee80211_hw *hw, int idx, } static const struct ieee80211_ops b43_hw_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = b43_op_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .conf_tx = b43_op_conf_tx, diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index 760136638a956..18eb610f600a8 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -3531,6 +3531,10 @@ static int b43legacy_op_get_survey(struct ieee80211_hw *hw, int idx, } static const struct ieee80211_ops b43legacy_hw_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = b43legacy_op_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .conf_tx = b43legacy_op_conf_tx, diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 543e93ec49d22..92860dc0a92eb 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -959,6 +959,10 @@ static int brcms_ops_beacon_set_tim(struct ieee80211_hw *hw, } static const struct ieee80211_ops brcms_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = brcms_ops_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = brcms_ops_start, diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 9eaf5ec133f9e..075b705a8d7bb 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -3432,6 +3432,10 @@ static const struct attribute_group il3945_attribute_group = { }; static struct ieee80211_ops il3945_mac_ops __ro_after_init = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = il3945_mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = il3945_mac_start, diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 70e420df16438..4beb7be6d51df 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -6301,6 +6301,10 @@ il4965_tx_queue_set_status(struct il_priv *il, struct il_tx_queue *txq, } static const struct ieee80211_ops il4965_mac_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = il4965_mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = il4965_mac_start, diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index 5f3d5b15f727e..52b008ce53bdf 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -1570,6 +1570,10 @@ static void iwlagn_mac_sta_notify(struct ieee80211_hw *hw, } const struct ieee80211_ops iwlagn_hw_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = iwlagn_mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = iwlagn_mac_start, diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c index c6084683aedde..687841b2fa2a2 100644 --- a/drivers/net/wireless/intersil/p54/main.c +++ b/drivers/net/wireless/intersil/p54/main.c @@ -704,6 +704,10 @@ static void p54_set_coverage_class(struct ieee80211_hw *dev, } static const struct ieee80211_ops p54_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = p54_tx_80211, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = p54_start, diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c index 199d33ed3bb96..9cca69fe04d78 100644 --- a/drivers/net/wireless/marvell/libertas_tf/main.c +++ b/drivers/net/wireless/marvell/libertas_tf/main.c @@ -473,6 +473,10 @@ static int lbtf_op_get_survey(struct ieee80211_hw *hw, int idx, } static const struct ieee80211_ops lbtf_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = lbtf_op_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = lbtf_op_start, diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 13bcb123d1223..ce8fea76dbb24 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -5610,6 +5610,10 @@ static void mwl8k_sw_scan_complete(struct ieee80211_hw *hw, } static const struct ieee80211_ops mwl8k_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = mwl8k_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = mwl8k_start, diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index e2146d30e5536..9b49267b1eabb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -701,6 +701,10 @@ static void mt7603_tx(struct ieee80211_hw *hw, } const struct ieee80211_ops mt7603_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = mt7603_tx, .start = mt7603_start, .stop = mt7603_stop, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 293e66fa83d5d..79b7996ad1a81 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -59,6 +59,10 @@ mt76x0e_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, } static const struct ieee80211_ops mt76x0e_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = mt76x02_tx, .start = mt76x0e_start, .stop = mt76x0e_stop, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index dd042949cf82b..bba44f289b4e3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -118,6 +118,10 @@ static int mt76x0u_start(struct ieee80211_hw *hw) } static const struct ieee80211_ops mt76x0u_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = mt76x02_tx, .start = mt76x0u_start, .stop = mt76x0u_stop, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c index b38bb7a2362b2..bfc8c69f43fa9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c @@ -132,6 +132,10 @@ static int mt76x2_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, } const struct ieee80211_ops mt76x2_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = mt76x02_tx, .start = mt76x2_start, .stop = mt76x2_stop, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c index ac07ed1f63a3c..9fe390fdd7305 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c @@ -103,6 +103,10 @@ mt76x2u_config(struct ieee80211_hw *hw, u32 changed) } const struct ieee80211_ops mt76x2u_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = mt76x02_tx, .start = mt76x2u_start, .stop = mt76x2u_stop, diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index c42101aa9e45e..7872fbae7252b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -684,9 +684,10 @@ mt792x_get_mac80211_ops(struct device *dev, if (!(*fw_features & MT792x_FW_CAP_CNM)) { ops->remain_on_channel = NULL; ops->cancel_remain_on_channel = NULL; - ops->add_chanctx = NULL; - ops->remove_chanctx = NULL; - ops->change_chanctx = NULL; + ops->add_chanctx = ieee80211_emulate_add_chanctx; + ops->remove_chanctx = ieee80211_emulate_remove_chanctx; + ops->change_chanctx = ieee80211_emulate_change_chanctx; + ops->switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx; ops->assign_vif_chanctx = NULL; ops->unassign_vif_chanctx = NULL; ops->mgd_prepare_tx = NULL; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 51deea84b6429..234e6495871b8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1450,6 +1450,10 @@ mt7996_net_fill_forward_path(struct ieee80211_hw *hw, #endif const struct ieee80211_ops mt7996_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = mt7996_tx, .start = mt7996_start, .stop = mt7996_stop, diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c index c8d332456a6bf..a7330576486b2 100644 --- a/drivers/net/wireless/mediatek/mt7601u/main.c +++ b/drivers/net/wireless/mediatek/mt7601u/main.c @@ -405,6 +405,10 @@ out: } const struct ieee80211_ops mt7601u_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = mt7601u_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = mt7601u_start, diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index 6f5857d09af0e..641f847d47ab0 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -684,6 +684,10 @@ static int plfxlc_set_rts_threshold(struct ieee80211_hw *hw, u32 value) } static const struct ieee80211_ops plfxlc_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = plfxlc_op_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = plfxlc_op_start, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c index 13dd672b825e7..42e21e9f303b2 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c @@ -1705,6 +1705,10 @@ static int rt2400pci_tx_last_beacon(struct ieee80211_hw *hw) } static const struct ieee80211_ops rt2400pci_mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rt2x00mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rt2x00mac_start, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c index ecddda4c471ea..36ddc5a69fa40 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c @@ -2003,6 +2003,10 @@ static int rt2500pci_tx_last_beacon(struct ieee80211_hw *hw) } static const struct ieee80211_ops rt2500pci_mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rt2x00mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rt2x00mac_start, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c index 13fdcff0ad66c..09923765e2db8 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c @@ -1794,6 +1794,10 @@ static int rt2500usb_probe_hw(struct rt2x00_dev *rt2x00dev) } static const struct ieee80211_ops rt2500usb_mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rt2x00mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rt2x00mac_start, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c index dcb56f708a5fc..14c45aba836f2 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c @@ -287,6 +287,10 @@ static int rt2800pci_read_eeprom(struct rt2x00_dev *rt2x00dev) } static const struct ieee80211_ops rt2800pci_mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rt2x00mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rt2x00mac_start, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800soc.c b/drivers/net/wireless/ralink/rt2x00/rt2800soc.c index 7118d4f9038dd..701ba54bf3e57 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800soc.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800soc.c @@ -132,6 +132,10 @@ static int rt2800soc_write_firmware(struct rt2x00_dev *rt2x00dev, } static const struct ieee80211_ops rt2800soc_mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rt2x00mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rt2x00mac_start, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c index b2a8e75a901b4..160bef79acdb7 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c @@ -629,6 +629,10 @@ static int rt2800usb_probe_hw(struct rt2x00_dev *rt2x00dev) } static const struct ieee80211_ops rt2800usb_mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rt2x00mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rt2x00mac_start, diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.c b/drivers/net/wireless/ralink/rt2x00/rt61pci.c index 483723bf514bf..d1cd5694e3c73 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt61pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.c @@ -2872,6 +2872,10 @@ static u64 rt61pci_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) } static const struct ieee80211_ops rt61pci_mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rt2x00mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rt2x00mac_start, diff --git a/drivers/net/wireless/ralink/rt2x00/rt73usb.c b/drivers/net/wireless/ralink/rt2x00/rt73usb.c index dfa9d52138985..b79dda952a338 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt73usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt73usb.c @@ -2291,6 +2291,10 @@ static u64 rt73usb_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) } static const struct ieee80211_ops rt73usb_mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rt2x00mac_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rt2x00mac_start, diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c index f6c25a52b69a9..77b6cb7e1f6b4 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c @@ -1607,6 +1607,10 @@ static void rtl8180_configure_filter(struct ieee80211_hw *dev, } static const struct ieee80211_ops rtl8180_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rtl8180_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rtl8180_start, diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c index 04945f905d6d0..78d99afa373d5 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c @@ -1377,6 +1377,10 @@ static int rtl8187_conf_tx(struct ieee80211_hw *dev, static const struct ieee80211_ops rtl8187_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rtl8187_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rtl8187_start, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 055f66b623ff4..b0c1db726d7ab 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -7654,6 +7654,10 @@ static int rtl8xxxu_sta_remove(struct ieee80211_hw *hw, } static const struct ieee80211_ops rtl8xxxu_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rtl8xxxu_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .add_interface = rtl8xxxu_add_interface, diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 69e97647e3d6b..2e60a6991ca16 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1903,6 +1903,10 @@ void rtl_init_sw_leds(struct ieee80211_hw *hw) EXPORT_SYMBOL(rtl_init_sw_leds); const struct ieee80211_ops rtl_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .start = rtl_op_start, .stop = rtl_op_stop, .tx = rtl_op_tx, diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index d8d68f16014e3..7af5bf7fe5b6a 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -927,6 +927,10 @@ static void rtw_ops_sta_rc_update(struct ieee80211_hw *hw, } const struct ieee80211_ops rtw_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rtw_ops_tx, .wake_tx_queue = rtw_ops_wake_tx_queue, .start = rtw_ops_start, diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 61a216464b6dd..650c507c8ed37 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -4579,9 +4579,10 @@ struct rtw89_dev *rtw89_alloc_ieee80211_hw(struct device *device, !RTW89_CHK_FW_FEATURE(BEACON_FILTER, &early_fw); if (no_chanctx) { - ops->add_chanctx = NULL; - ops->remove_chanctx = NULL; - ops->change_chanctx = NULL; + ops->add_chanctx = ieee80211_emulate_add_chanctx; + ops->remove_chanctx = ieee80211_emulate_remove_chanctx; + ops->change_chanctx = ieee80211_emulate_change_chanctx; + ops->switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx; ops->assign_vif_chanctx = NULL; ops->unassign_vif_chanctx = NULL; ops->remain_on_channel = NULL; diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 05890536e3538..e8aeb4d76c133 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1957,6 +1957,10 @@ static int rsi_mac80211_resume(struct ieee80211_hw *hw) #endif static const struct ieee80211_ops mac80211_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = rsi_mac80211_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = rsi_mac80211_start, diff --git a/drivers/net/wireless/st/cw1200/main.c b/drivers/net/wireless/st/cw1200/main.c index 381013e0db636..a54a7b86864f8 100644 --- a/drivers/net/wireless/st/cw1200/main.c +++ b/drivers/net/wireless/st/cw1200/main.c @@ -203,6 +203,10 @@ static const unsigned long cw1200_ttl[] = { }; static const struct ieee80211_ops cw1200_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .start = cw1200_start, .stop = cw1200_stop, .add_interface = cw1200_add_interface, diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index cd9a41f59f320..0da2d29dd7bd8 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -1351,6 +1351,10 @@ static struct ieee80211_supported_band wl1251_band_2ghz = { }; static const struct ieee80211_ops wl1251_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .start = wl1251_op_start, .stop = wl1251_op_stop, .add_interface = wl1251_op_add_interface, diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index a06a462d38f08..907c0842fee71 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -3922,6 +3922,10 @@ static const struct ieee80211_ops mac80211_hwsim_ops = { HWSIM_NON_MLO_OPS .sw_scan_start = mac80211_hwsim_sw_scan, .sw_scan_complete = mac80211_hwsim_sw_scan_complete, + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, }; #define HWSIM_CHANCTX_OPS \ diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c index 5d534e15a844f..900c063bd7249 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c @@ -1343,6 +1343,10 @@ static u64 zd_op_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) } static const struct ieee80211_ops zd_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = zd_op_tx, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = zd_op_start, diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index b0b262de64801..e23a5da2b67e0 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1684,6 +1684,10 @@ static void vnt_reset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) } static const struct ieee80211_ops vnt_mac_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = vnt_tx_80211, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = vnt_start, diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 2abae90f3f520..6c70493d1b01d 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -956,6 +956,10 @@ static void vnt_reset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) } static const struct ieee80211_ops vnt_mac_ops = { + .add_chanctx = ieee80211_emulate_add_chanctx, + .remove_chanctx = ieee80211_emulate_remove_chanctx, + .change_chanctx = ieee80211_emulate_change_chanctx, + .switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx, .tx = vnt_tx_80211, .wake_tx_queue = ieee80211_handle_wake_tx_queue, .start = vnt_start, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8d6ae22c09bfb..62c4b4d10bb49 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7532,4 +7532,17 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); void ieee80211_set_active_links_async(struct ieee80211_vif *vif, u16 active_links); +/* for older drivers - let's not document these ... */ +int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx); +void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx); +void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx, + u32 changed); +int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode); + #endif /* MAC80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6e2acad7fd715..d3bf029709d54 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -886,33 +886,30 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; - int ret = 0; + int ret; lockdep_assert_wiphy(local->hw.wiphy); if (cfg80211_chandef_identical(&local->monitor_chandef, chandef)) return 0; - if (local->use_chanctx) { - sdata = wiphy_dereference(local->hw.wiphy, - local->monitor_sdata); - if (sdata) { - ieee80211_link_release_channel(&sdata->deflink); - ret = ieee80211_link_use_channel(&sdata->deflink, - chandef, - IEEE80211_CHANCTX_EXCLUSIVE); - } - } else { - if (local->open_count == local->monitors) { - local->_oper_chandef = *chandef; - ieee80211_hw_config(local, 0); - } - } + sdata = wiphy_dereference(local->hw.wiphy, + local->monitor_sdata); + if (!sdata) + goto done; - if (ret == 0) - local->monitor_chandef = *chandef; + if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef, chandef)) + return 0; - return ret; + ieee80211_link_release_channel(&sdata->deflink); + ret = ieee80211_link_use_channel(&sdata->deflink, + chandef, + IEEE80211_CHANCTX_EXCLUSIVE); + if (ret) + return ret; +done: + local->monitor_chandef = *chandef; + return 0; } static int @@ -3086,7 +3083,7 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, if (local->ops->get_txpower) return drv_get_txpower(local, sdata, dbm); - if (!local->use_chanctx) + if (local->emulate_chanctx) *dbm = local->hw.conf.power_level; else *dbm = sdata->vif.bss_conf.txpower; @@ -4214,10 +4211,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, } else if (local->open_count > 0 && local->open_count == local->monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { - if (local->use_chanctx) - *chandef = local->monitor_chandef; - else - *chandef = local->_oper_chandef; + *chandef = local->monitor_chandef; ret = 0; } out: diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 70ba5dc4b2837..cf6297ffaef32 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -519,11 +519,6 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, drv_change_chanctx(local, ctx, changed); - if (!local->use_chanctx) { - local->_oper_chandef = *chandef; - ieee80211_hw_config(local, 0); - } - /* check is BW wider */ ieee80211_chan_bw_change(local, old_ctx, false); } @@ -674,23 +669,15 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local, ieee80211_add_wbrf(local, &ctx->conf.def); - if (!local->use_chanctx) - local->hw.conf.radar_enabled = ctx->conf.radar_enabled; - /* turn idle off *before* setting channel -- some drivers need that */ changed = ieee80211_idle_off(local); if (changed) ieee80211_hw_config(local, changed); - if (!local->use_chanctx) { - local->_oper_chandef = ctx->conf.def; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } else { - err = drv_add_chanctx(local, ctx); - if (err) { - ieee80211_recalc_idle(local); - return err; - } + err = drv_add_chanctx(local, ctx); + if (err) { + ieee80211_recalc_idle(local); + return err; } return 0; @@ -725,32 +712,7 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local, { lockdep_assert_wiphy(local->hw.wiphy); - if (!local->use_chanctx) { - struct cfg80211_chan_def *chandef = &local->_oper_chandef; - /* S1G doesn't have 20MHz, so get the correct width for the - * current channel. - */ - if (chandef->chan->band == NL80211_BAND_S1GHZ) - chandef->width = - ieee80211_s1g_channel_width(chandef->chan); - else - chandef->width = NL80211_CHAN_WIDTH_20_NOHT; - chandef->center_freq1 = chandef->chan->center_freq; - chandef->freq1_offset = chandef->chan->freq_offset; - chandef->center_freq2 = 0; - - /* NOTE: Disabling radar is only valid here for - * single channel context. To be sure, check it ... - */ - WARN_ON(local->hw.conf.radar_enabled && - !list_empty(&local->chanctx_list)); - - local->hw.conf.radar_enabled = false; - - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } else { - drv_remove_chanctx(local, ctx); - } + drv_remove_chanctx(local, ctx); ieee80211_recalc_idle(local); @@ -849,11 +811,6 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, chanctx->conf.radar_enabled = radar_enabled; - if (!local->use_chanctx) { - local->hw.conf.radar_enabled = chanctx->conf.radar_enabled; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } - drv_change_chanctx(local, chanctx, IEEE80211_CHANCTX_CHANGE_RADAR); } @@ -995,16 +952,6 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, rcu_read_unlock(); - if (!local->use_chanctx) { - if (rx_chains_static > 1) - local->smps_mode = IEEE80211_SMPS_OFF; - else if (rx_chains_dynamic > 1) - local->smps_mode = IEEE80211_SMPS_DYNAMIC; - else - local->smps_mode = IEEE80211_SMPS_STATIC; - ieee80211_hw_config(local, 0); - } - if (rx_chains_static == chanctx->conf.rx_chains_static && rx_chains_dynamic == chanctx->conf.rx_chains_dynamic) return; @@ -1114,7 +1061,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, lockdep_assert_wiphy(local->hw.wiphy); curr_ctx = ieee80211_link_get_chanctx(link); - if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx) + if (curr_ctx && !local->ops->switch_vif_chanctx) return -EOPNOTSUPP; new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode); @@ -1412,24 +1359,6 @@ ieee80211_link_has_in_place_reservation(struct ieee80211_link_data *link) return true; } -static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local, - struct ieee80211_chanctx *new_ctx) -{ - const struct cfg80211_chan_def *chandef; - - lockdep_assert_wiphy(local->hw.wiphy); - - chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL); - if (WARN_ON(!chandef)) - return -EINVAL; - - local->hw.conf.radar_enabled = new_ctx->conf.radar_enabled; - local->_oper_chandef = *chandef; - ieee80211_hw_config(local, 0); - - return 0; -} - static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local, int n_vifs) { @@ -1518,7 +1447,6 @@ err: static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) { struct ieee80211_chanctx *ctx, *ctx_tmp, *old_ctx; - struct ieee80211_chanctx *new_ctx = NULL; int err, n_assigned, n_reserved, n_ready; int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0; @@ -1551,9 +1479,6 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) goto err; } - if (!local->use_chanctx) - new_ctx = ctx; - n_ctx++; n_assigned = 0; @@ -1607,9 +1532,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) if (WARN_ON(n_ctx == 0) || WARN_ON(n_vifs_switch == 0 && n_vifs_assign == 0 && - n_vifs_ctxless == 0) || - WARN_ON(n_ctx > 1 && !local->use_chanctx) || - WARN_ON(!new_ctx && !local->use_chanctx)) { + n_vifs_ctxless == 0)) { err = -EINVAL; goto err; } @@ -1619,20 +1542,14 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) * reservations and driver capabilities. */ - if (local->use_chanctx) { - if (n_vifs_switch > 0) { - err = ieee80211_chsw_switch_vifs(local, n_vifs_switch); - if (err) - goto err; - } + if (n_vifs_switch > 0) { + err = ieee80211_chsw_switch_vifs(local, n_vifs_switch); + if (err) + goto err; + } - if (n_vifs_assign > 0 || n_vifs_ctxless > 0) { - err = ieee80211_chsw_switch_ctxs(local); - if (err) - goto err; - } - } else { - err = ieee80211_chsw_switch_hwconf(local, new_ctx); + if (n_vifs_assign > 0 || n_vifs_ctxless > 0) { + err = ieee80211_chsw_switch_ctxs(local); if (err) goto err; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 29294cf88d39c..cb4684a9451e4 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1348,7 +1348,8 @@ struct ieee80211_local { bool wiphy_ciphers_allocated; - bool use_chanctx; + struct cfg80211_chan_def dflt_chandef; + bool emulate_chanctx; /* protects the aggregated multicast list and filter calls */ spinlock_t filter_lock; @@ -1474,8 +1475,6 @@ struct ieee80211_local { enum mac80211_scan_state next_scan_state; struct wiphy_delayed_work scan_work; struct ieee80211_sub_if_data __rcu *scan_sdata; - /* For backward compatibility only -- do not use */ - struct cfg80211_chan_def _oper_chandef; /* Temporary remain-on-channel for off-channel operations */ struct ieee80211_channel *tmp_channel; @@ -1549,8 +1548,6 @@ struct ieee80211_local { int user_power_level; /* in dBm, for all interfaces */ - enum ieee80211_smps_mode smps_mode; - struct work_struct restart_work; #ifdef CONFIG_MAC80211_DEBUGFS @@ -1819,6 +1816,8 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, unsigned int mpdu_len, unsigned int mpdu_offset); int ieee80211_hw_config(struct ieee80211_local *local, u32 changed); +int ieee80211_hw_conf_chan(struct ieee80211_local *local); +void ieee80211_hw_conf_init(struct ieee80211_local *local); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u64 changed); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1a6e9dd39a375..d81162bf7d48b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1288,8 +1288,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) res = drv_start(local); if (res) goto err_del_bss; - /* we're brought up, everything changes */ - hw_reconf_flags = ~0; ieee80211_led_radio(local, true); ieee80211_mod_tpt_led_trig(local, IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); @@ -1436,7 +1434,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (coming_up) local->open_count++; - if (hw_reconf_flags) + if (local->open_count == 1) + ieee80211_hw_conf_init(local); + else if (hw_reconf_flags) ieee80211_hw_config(local, hw_reconf_flags); ieee80211_recalc_ps(local); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e05bcc35bc1e0..ce0cba8d7afc0 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -93,16 +93,32 @@ static void ieee80211_reconfig_filter(struct wiphy *wiphy, ieee80211_configure_filter(local); } -static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) +static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local, + struct ieee80211_chanctx_conf *ctx) { struct ieee80211_sub_if_data *sdata; struct cfg80211_chan_def chandef = {}; + struct cfg80211_chan_def *oper = NULL; + enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_STATIC; u32 changed = 0; int power; u32 offchannel_flag; + if (!local->emulate_chanctx) + return 0; + offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; + if (ctx && !WARN_ON(!ctx->def.chan)) { + oper = &ctx->def; + if (ctx->rx_chains_static > 1) + smps_mode = IEEE80211_SMPS_OFF; + else if (ctx->rx_chains_dynamic > 1) + smps_mode = IEEE80211_SMPS_DYNAMIC; + else + smps_mode = IEEE80211_SMPS_STATIC; + } + if (local->scan_chandef.chan) { chandef = local->scan_chandef; } else if (local->tmp_channel) { @@ -110,25 +126,30 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) chandef.width = NL80211_CHAN_WIDTH_20_NOHT; chandef.center_freq1 = chandef.chan->center_freq; chandef.freq1_offset = chandef.chan->freq_offset; - } else - chandef = local->_oper_chandef; + } else if (oper) { + chandef = *oper; + } else { + chandef = local->dflt_chandef; + } - WARN(!cfg80211_chandef_valid(&chandef), - "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz", - chandef.chan->center_freq, chandef.chan->freq_offset, - chandef.width, chandef.center_freq1, chandef.freq1_offset, - chandef.center_freq2); + if (WARN(!cfg80211_chandef_valid(&chandef), + "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz", + chandef.chan ? chandef.chan->center_freq : -1, + chandef.chan ? chandef.chan->freq_offset : 0, + chandef.width, chandef.center_freq1, chandef.freq1_offset, + chandef.center_freq2)) + return 0; - if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef)) + if (!oper || !cfg80211_chandef_identical(&chandef, oper)) local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; else local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; - if (offchannel_flag || - !cfg80211_chandef_identical(&local->hw.conf.chandef, - &local->_oper_chandef)) { + /* force it also for scanning, since drivers might config differently */ + if (offchannel_flag || local->scanning || + !cfg80211_chandef_identical(&local->hw.conf.chandef, &chandef)) { local->hw.conf.chandef = chandef; changed |= IEEE80211_CONF_CHANGE_CHANNEL; } @@ -140,8 +161,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) * that otherwise STATIC is used. */ local->hw.conf.smps_mode = IEEE80211_SMPS_STATIC; - } else if (local->hw.conf.smps_mode != local->smps_mode) { - local->hw.conf.smps_mode = local->smps_mode; + } else if (local->hw.conf.smps_mode != smps_mode) { + local->hw.conf.smps_mode = smps_mode; changed |= IEEE80211_CONF_CHANGE_SMPS; } @@ -173,12 +194,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) might_sleep(); - if (!local->use_chanctx) - changed |= ieee80211_hw_conf_chan(local); - else - changed &= ~(IEEE80211_CONF_CHANGE_CHANNEL | - IEEE80211_CONF_CHANGE_POWER | - IEEE80211_CONF_CHANGE_SMPS); + WARN_ON(changed & (IEEE80211_CONF_CHANGE_CHANNEL | + IEEE80211_CONF_CHANGE_POWER | + IEEE80211_CONF_CHANGE_SMPS)); if (changed && local->open_count) { ret = drv_config(local, changed); @@ -202,6 +220,107 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) return ret; } +/* for scanning, offchannel and chanctx emulation only */ +static int _ieee80211_hw_conf_chan(struct ieee80211_local *local, + struct ieee80211_chanctx_conf *ctx) +{ + u32 changed; + + if (!local->open_count) + return 0; + + changed = ieee80211_calc_hw_conf_chan(local, ctx); + if (!changed) + return 0; + + return drv_config(local, changed); +} + +int ieee80211_hw_conf_chan(struct ieee80211_local *local) +{ + struct ieee80211_chanctx *ctx; + + ctx = list_first_entry_or_null(&local->chanctx_list, + struct ieee80211_chanctx, + list); + + return _ieee80211_hw_conf_chan(local, ctx ? &ctx->conf : NULL); +} + +void ieee80211_hw_conf_init(struct ieee80211_local *local) +{ + u32 changed = ~(IEEE80211_CONF_CHANGE_CHANNEL | + IEEE80211_CONF_CHANGE_POWER | + IEEE80211_CONF_CHANGE_SMPS); + + if (WARN_ON(!local->open_count)) + return; + + if (local->emulate_chanctx) { + struct ieee80211_chanctx *ctx; + + ctx = list_first_entry_or_null(&local->chanctx_list, + struct ieee80211_chanctx, + list); + + changed |= ieee80211_calc_hw_conf_chan(local, + ctx ? &ctx->conf : NULL); + } + + WARN_ON(drv_config(local, changed)); +} + +int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx) +{ + struct ieee80211_local *local = hw_to_local(hw); + + local->hw.conf.radar_enabled = ctx->radar_enabled; + + return _ieee80211_hw_conf_chan(local, ctx); +} +EXPORT_SYMBOL(ieee80211_emulate_add_chanctx); + +void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx) +{ + struct ieee80211_local *local = hw_to_local(hw); + + local->hw.conf.radar_enabled = false; + + _ieee80211_hw_conf_chan(local, NULL); +} +EXPORT_SYMBOL(ieee80211_emulate_remove_chanctx); + +void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx, + u32 changed) +{ + struct ieee80211_local *local = hw_to_local(hw); + + local->hw.conf.radar_enabled = ctx->radar_enabled; + + _ieee80211_hw_conf_chan(local, ctx); +} +EXPORT_SYMBOL(ieee80211_emulate_change_chanctx); + +int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (n_vifs <= 0) + return -EINVAL; + + local->hw.conf.radar_enabled = vifs[0].new_ctx->radar_enabled; + _ieee80211_hw_conf_chan(local, vifs[0].new_ctx); + + return 0; +} +EXPORT_SYMBOL(ieee80211_emulate_switch_vif_chanctx); + #define BSS_CHANGED_VIF_CFG_FLAGS (BSS_CHANGED_ASSOC |\ BSS_CHANGED_IDLE |\ BSS_CHANGED_PS |\ @@ -645,7 +764,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, struct ieee80211_local *local; int priv_size, i; struct wiphy *wiphy; - bool use_chanctx; + bool emulate_chanctx; if (WARN_ON(!ops->tx || !ops->start || !ops->stop || !ops->config || !ops->add_interface || !ops->remove_interface || @@ -660,12 +779,26 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, return NULL; /* check all or no channel context operations exist */ - i = !!ops->add_chanctx + !!ops->remove_chanctx + - !!ops->change_chanctx + !!ops->assign_vif_chanctx + - !!ops->unassign_vif_chanctx; - if (WARN_ON(i != 0 && i != 5)) - return NULL; - use_chanctx = i == 5; + if (ops->add_chanctx == ieee80211_emulate_add_chanctx && + ops->remove_chanctx == ieee80211_emulate_remove_chanctx && + ops->change_chanctx == ieee80211_emulate_change_chanctx) { + if (WARN_ON(ops->assign_vif_chanctx || + ops->unassign_vif_chanctx)) + return NULL; + emulate_chanctx = true; + } else { + if (WARN_ON(ops->add_chanctx == ieee80211_emulate_add_chanctx || + ops->remove_chanctx == ieee80211_emulate_remove_chanctx || + ops->change_chanctx == ieee80211_emulate_change_chanctx)) + return NULL; + if (WARN_ON(!ops->add_chanctx || + !ops->remove_chanctx || + !ops->change_chanctx || + !ops->assign_vif_chanctx || + !ops->unassign_vif_chanctx)) + return NULL; + emulate_chanctx = false; + } /* Ensure 32-byte alignment of our private data and hw private data. * We use the wiphy priv data for both our ieee80211_local and for @@ -699,7 +832,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, WIPHY_FLAG_REPORTS_OBSS | WIPHY_FLAG_OFFCHAN_TX; - if (!use_chanctx || ops->remain_on_channel) + if (emulate_chanctx || ops->remain_on_channel) wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | @@ -756,7 +889,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); local->ops = ops; - local->use_chanctx = use_chanctx; + local->emulate_chanctx = emulate_chanctx; + + if (emulate_chanctx) + ieee80211_hw_set(&local->hw, CHANCTX_STA_CSA); /* * We need a bit of data queued to build aggregates properly, so @@ -833,7 +969,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_dfs_radar_detected_work); wiphy_work_init(&local->reconfig_filter, ieee80211_reconfig_filter); - local->smps_mode = IEEE80211_SMPS_OFF; wiphy_work_init(&local->dynamic_ps_enable_work, ieee80211_dynamic_ps_enable_work); @@ -984,7 +1119,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) * as much, e.g. monitoring beacons would be hard if we * might not even know which link is active at which time. */ - if (WARN_ON(!local->use_chanctx)) + if (WARN_ON(local->emulate_chanctx)) return -EINVAL; if (WARN_ON(!local->ops->link_info_changed)) @@ -1028,7 +1163,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return -EINVAL; #endif - if (!local->use_chanctx) { + if (local->emulate_chanctx) { for (i = 0; i < local->hw.wiphy->n_iface_combinations; i++) { const struct ieee80211_iface_combination *comb; @@ -1094,11 +1229,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) &sband->channels[i], NL80211_CHAN_NO_HT); /* init channel we're on */ - if (!local->use_chanctx && !local->_oper_chandef.chan) { + local->monitor_chandef = dflt_chandef; + if (local->emulate_chanctx) { + local->dflt_chandef = dflt_chandef; local->hw.conf.chandef = dflt_chandef; - local->_oper_chandef = dflt_chandef; } - local->monitor_chandef = dflt_chandef; } channels += sband->n_channels; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7c0339d4f0598..9968bc0ddf6ea 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2287,8 +2287,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, chanctx = container_of(conf, struct ieee80211_chanctx, conf); - if (local->use_chanctx && - !ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) { + if (!ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA)) { sdata_info(sdata, "driver doesn't support chan-switch with channel contexts\n"); goto drop_connection; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 6c40802025733..221695d841fd3 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -86,7 +86,7 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) lockdep_assert_wiphy(local->hw.wiphy); - if (WARN_ON(local->use_chanctx)) + if (WARN_ON(!local->emulate_chanctx)) return; /* @@ -136,7 +136,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) lockdep_assert_wiphy(local->hw.wiphy); - if (WARN_ON(local->use_chanctx)) + if (WARN_ON(!local->emulate_chanctx)) return; list_for_each_entry(sdata, &local->interfaces, list) { @@ -351,10 +351,13 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local) * 20 MHz channel width) don't stop all the operations but still * treat it as though the ROC operation started properly, so * other ROC operations won't interfere with this one. + * + * Note: scan can't run, tmp_channel is what we use, so this + * must be the currently active channel. */ - roc->on_channel = roc->chan == local->_oper_chandef.chan && - local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 && - local->_oper_chandef.width != NL80211_CHAN_WIDTH_10; + roc->on_channel = roc->chan == local->hw.conf.chandef.chan && + local->hw.conf.chandef.width != NL80211_CHAN_WIDTH_5 && + local->hw.conf.chandef.width != NL80211_CHAN_WIDTH_10; /* start this ROC */ ieee80211_recalc_idle(local); @@ -363,7 +366,7 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local) ieee80211_offchannel_stop_vifs(local); local->tmp_channel = roc->chan; - ieee80211_hw_config(local, 0); + ieee80211_hw_conf_chan(local); } wiphy_delayed_work_queue(local->hw.wiphy, &local->roc_work, @@ -426,7 +429,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local) return; if (!roc->started) { - WARN_ON(local->use_chanctx); + WARN_ON(!local->emulate_chanctx); _ieee80211_start_next_roc(local); } else { on_channel = roc->on_channel; @@ -439,7 +442,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local) ieee80211_flush_queues(local, NULL, false); local->tmp_channel = NULL; - ieee80211_hw_config(local, 0); + ieee80211_hw_conf_chan(local); ieee80211_offchannel_return(local); } @@ -539,7 +542,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, /* this may work, but is untested */ return -EOPNOTSUPP; - if (local->use_chanctx && !local->ops->remain_on_channel) + if (!local->emulate_chanctx && !local->ops->remain_on_channel) return -EOPNOTSUPP; roc = kzalloc(sizeof(*roc), GFP_KERNEL); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 81644d15fe2fa..6dedbbba153a0 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -476,7 +476,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) } /* Set power back to normal operating levels. */ - ieee80211_hw_config(local, 0); + ieee80211_hw_conf_chan(local); if (!hw_scan && was_scanning) { ieee80211_configure_filter(local); @@ -523,7 +523,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { /* Software scan is not supported in multi-channel cases */ - if (local->use_chanctx) + if (!local->emulate_chanctx) return -EOPNOTSUPP; /* @@ -553,7 +553,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, ieee80211_configure_filter(local); /* We need to set power level at maximum rate for scanning. */ - ieee80211_hw_config(local, 0); + ieee80211_hw_conf_chan(local); wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); @@ -790,7 +790,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (hw_scan) { __set_bit(SCAN_HW_SCANNING, &local->scanning); } else if ((req->n_channels == 1) && - (req->channels[0] == local->_oper_chandef.chan)) { + (req->channels[0] == local->hw.conf.chandef.chan)) { /* * If we are scanning only on the operating channel * then we do not need to stop normal activities @@ -808,7 +808,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, ieee80211_configure_filter(local); /* accept probe-responses */ /* We need to ensure power level is at max for scanning. */ - ieee80211_hw_config(local, 0); + ieee80211_hw_conf_chan(local); if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) || @@ -973,13 +973,13 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, /* If scanning on oper channel, use whatever channel-type * is currently in use. */ - if (chan == local->_oper_chandef.chan) - local->scan_chandef = local->_oper_chandef; + if (chan == local->hw.conf.chandef.chan) + local->scan_chandef = local->hw.conf.chandef; else local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; set_channel: - if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) + if (ieee80211_hw_conf_chan(local)) skip = 1; /* advance state machine to next channel/band */ @@ -1023,7 +1023,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local, { /* switch back to the operating channel */ local->scan_chandef.chan = NULL; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + ieee80211_hw_conf_chan(local); /* disable PS */ ieee80211_offchannel_return(local); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7b33942ea51f4..f57f7963ca377 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2390,8 +2390,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, if (chanctx_conf) chandef = &chanctx_conf->def; - else if (!local->use_chanctx) - chandef = &local->_oper_chandef; else goto fail_rcu; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1a2522e699d41..51c1a99f57b83 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2472,9 +2472,6 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local, lockdep_assert_wiphy(local->hw.wiphy); - if (!local->use_chanctx) - return; - conf = rcu_dereference_protected(link->conf->chanctx_conf, lockdep_is_held(&local->hw.wiphy->mtx)); if (conf) { @@ -2704,20 +2701,20 @@ int ieee80211_reconfig(struct ieee80211_local *local) } /* add channel contexts */ - if (local->use_chanctx) { - list_for_each_entry(ctx, &local->chanctx_list, list) - if (ctx->replace_state != - IEEE80211_CHANCTX_REPLACES_OTHER) - WARN_ON(drv_add_chanctx(local, ctx)); - - sdata = wiphy_dereference(local->hw.wiphy, - local->monitor_sdata); - if (sdata && ieee80211_sdata_running(sdata)) - ieee80211_assign_chanctx(local, sdata, &sdata->deflink); - } + list_for_each_entry(ctx, &local->chanctx_list, list) + if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) + WARN_ON(drv_add_chanctx(local, ctx)); + + sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); + if (sdata && ieee80211_sdata_running(sdata)) + ieee80211_assign_chanctx(local, sdata, &sdata->deflink); /* reconfigure hardware */ - ieee80211_hw_config(local, ~0); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_LISTEN_INTERVAL | + IEEE80211_CONF_CHANGE_MONITOR | + IEEE80211_CONF_CHANGE_PS | + IEEE80211_CONF_CHANGE_RETRY_LIMITS | + IEEE80211_CONF_CHANGE_IDLE); ieee80211_configure_filter(local); -- cgit 1.2.3-korg From 9bf7079bc2271321fac467cae981c44e495b76b9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:39 +0100 Subject: wifi: mac80211: chan: chandef is non-NULL for reserved The last caller of this with a NULL argument was related to the non-chanctx code, so we can now remove this odd logic. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240129194108.bad8ec1e76c8.I12287452f42c54baf75821e75491cf6d021af20a@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index cf6297ffaef32..6b82c79cf7a6e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -90,11 +90,11 @@ ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local, lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(!compat)) + return NULL; + list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) { - if (!compat) - compat = &link->reserved_chandef; - compat = cfg80211_chandef_compatible(&link->reserved_chandef, compat); if (!compat) -- cgit 1.2.3-korg From 6092077ad09ce880c61735c314060f0bd79ae4aa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:40 +0100 Subject: wifi: mac80211: introduce 'channel request' For channel contexts, mac80211 currently uses the cfg80211 chandef struct (control channel, center freq(s), width) to define towards drivers and internally how these behave. In fact, there are _two_ such structs used, where the min_def can reduce bandwidth according to the stations connected. Unfortunately, with EHT this is longer be sufficient, at least not for all hardware. EHT requires that non-AP STAs that are connected to an AP with a lower bandwidth than it (the AP) advertises (e.g. 160 MHz STA connected to 320 MHz AP) still be able to receive downlink OFDMA and respond to trigger frames for uplink OFDMA that specify the position and bandwidth for the non-AP STA relative to the channel the AP is using. Therefore, they need to be aware of this, and at least for some hardware (e.g. Intel) this awareness is in the hardware. As a result, use of the "same" channel may need to be split over two channel contexts where they differ by the AP being used. As a first step, introduce a concept of a channel request ('chanreq') for each interface, to control the context it requests. This step does nothing but reorganise the code, so that later the AP's chandef can be added to the request in order to handle the EHT case described above. Link: https://msgid.link/20240129194108.2e88e48bd2e9.I4256183debe975c5ed71621611206fdbb69ba330@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 8 +- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 12 +- .../net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 10 +- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 8 +- drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c | 6 +- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 +- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 4 +- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 4 +- drivers/net/wireless/realtek/rtw89/mac.c | 4 +- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 4 +- drivers/net/wireless/silabs/wfx/sta.c | 4 +- drivers/net/wireless/ti/wlcore/main.c | 6 +- drivers/staging/vt6655/device_main.c | 2 +- drivers/staging/vt6656/main_usb.c | 2 +- include/net/mac80211.h | 12 +- net/mac80211/agg-tx.c | 2 +- net/mac80211/cfg.c | 66 +++--- net/mac80211/chan.c | 233 +++++++++++---------- net/mac80211/ht.c | 2 +- net/mac80211/ibss.c | 36 ++-- net/mac80211/ieee80211_i.h | 21 +- net/mac80211/iface.c | 6 +- net/mac80211/link.c | 3 +- net/mac80211/main.c | 2 +- net/mac80211/mesh.c | 81 +++---- net/mac80211/mesh_plink.c | 4 +- net/mac80211/mlme.c | 87 ++++---- net/mac80211/ocb.c | 3 +- net/mac80211/rate.c | 12 +- net/mac80211/spectmgmt.c | 22 +- net/mac80211/tdls.c | 8 +- net/mac80211/trace.h | 6 +- net/mac80211/util.c | 18 +- net/mac80211/vht.c | 6 +- 34 files changed, 379 insertions(+), 329 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index bcf78ccba8c1e..61269c7b1934f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -455,7 +455,7 @@ void iwl_mvm_set_fw_protection_flags(struct iwl_mvm *mvm, break; case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: /* Protect when channel wider than 20MHz */ - if (link_conf->chandef.width > NL80211_CHAN_WIDTH_20) + if (link_conf->chanreq.oper.width > NL80211_CHAN_WIDTH_20) *protection_flags |= cpu_to_le32(ht_flag); break; default: @@ -494,7 +494,7 @@ void iwl_mvm_set_fw_qos_params(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (link_conf->qos) *qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA); - if (link_conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT) + if (link_conf->chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT) *qos_flags |= cpu_to_le32(MAC_QOS_FLG_TGN); } @@ -910,8 +910,8 @@ u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct iwl_mvm *mvm, link_conf = rcu_dereference(vif->link_conf[link_id]); if (link_conf) { basic = link_conf->basic_rates; - if (link_conf->chandef.chan) - band = link_conf->chandef.chan->band; + if (link_conf->chanreq.oper.chan) + band = link_conf->chanreq.oper.chan->band; } rcu_read_unlock(); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 93baec9bb3fc0..65293b45a98fa 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1641,7 +1641,7 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, if (vif->type == NL80211_IFTYPE_MONITOR) { mvm->monitor_on = true; mvm->monitor_p80 = - iwl_mvm_chandef_get_primary_80(&vif->bss_conf.chandef); + iwl_mvm_chandef_get_primary_80(&vif->bss_conf.chanreq.oper); } if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) @@ -3421,16 +3421,16 @@ iwl_mvm_check_he_obss_narrow_bw_ru(struct ieee80211_hw *hw, .tolerated = true, }; - if (WARN_ON_ONCE(!link_conf->chandef.chan || + if (WARN_ON_ONCE(!link_conf->chanreq.oper.chan || !mvmvif->link[link_id])) return; - if (!(link_conf->chandef.chan->flags & IEEE80211_CHAN_RADAR)) { + if (!(link_conf->chanreq.oper.chan->flags & IEEE80211_CHAN_RADAR)) { mvmvif->link[link_id]->he_ru_2mhz_block = false; return; } - cfg80211_bss_iter(hw->wiphy, &link_conf->chandef, + cfg80211_bss_iter(hw->wiphy, &link_conf->chanreq.oper, iwl_mvm_check_he_obss_narrow_bw_ru_iter, &iter_data); @@ -3490,10 +3490,10 @@ static void iwl_mvm_mei_host_associated(struct iwl_mvm *mvm, return; /* FIXME: MEI needs to be updated for MLO */ - if (!vif->bss_conf.chandef.chan) + if (!vif->bss_conf.chanreq.oper.chan) return; - conn_info.channel = vif->bss_conf.chandef.chan->hw_value; + conn_info.channel = vif->bss_conf.chanreq.oper.chan->hw_value; switch (mvm_sta->pairwise_cipher) { case WLAN_CIPHER_SUITE_TKIP: diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index ff7d9a7d607e5..b633a2a09c32d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -656,8 +656,8 @@ void iwl_mvm_mld_select_links(struct iwl_mvm *mvm, struct ieee80211_vif *vif, continue; data[n_data].link_id = link_id; - data[n_data].band = link_conf->chandef.chan->band; - data[n_data].width = link_conf->chandef.width; + data[n_data].band = link_conf->chanreq.oper.chan->band; + data[n_data].width = link_conf->chanreq.oper.width; data[n_data].active = vif->active_links & BIT(link_id); n_data++; } @@ -1241,8 +1241,8 @@ int iwl_mvm_mld_get_primary_link(struct iwl_mvm *mvm, continue; data[n_data].link_id = link_id; - data[n_data].band = link_conf->chandef.chan->band; - data[n_data].width = link_conf->chandef.width; + data[n_data].band = link_conf->chanreq.oper.chan->band; + data[n_data].width = link_conf->chanreq.oper.width; data[n_data].active = true; n_data++; } @@ -1292,7 +1292,7 @@ static bool iwl_mvm_can_enter_esr(struct iwl_mvm *mvm, continue; /* BT Coex effects eSR mode only if one of the link is on LB */ - if (link_conf->chandef.chan->band != NL80211_BAND_2GHZ) + if (link_conf->chanreq.oper.chan->band != NL80211_BAND_2GHZ) continue; ret = iwl_mvm_bt_coex_calculate_esr_mode(mvm, vif, link_id, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 747fc91ef8d03..ad4146d3345a7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -161,9 +161,9 @@ static void iwl_mvm_rx_monitor_notif(struct iwl_mvm *mvm, if (!vif || vif->type != NL80211_IFTYPE_STATION) return; - if (!vif->bss_conf.chandef.chan || - vif->bss_conf.chandef.chan->band != NL80211_BAND_2GHZ || - vif->bss_conf.chandef.width < NL80211_CHAN_WIDTH_40) + if (!vif->bss_conf.chanreq.oper.chan || + vif->bss_conf.chanreq.oper.chan->band != NL80211_BAND_2GHZ || + vif->bss_conf.chanreq.oper.width < NL80211_CHAN_WIDTH_40) return; if (!vif->cfg.assoc) @@ -219,7 +219,7 @@ void iwl_mvm_update_link_smps(struct ieee80211_vif *vif, return; if (mvm->fw_static_smps_request && - link_conf->chandef.width == NL80211_CHAN_WIDTH_160 && + link_conf->chanreq.oper.width == NL80211_CHAN_WIDTH_160 && link_conf->he_support) mode = IEEE80211_SMPS_STATIC; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c index 6cba8a353b531..71d92635d6d75 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c @@ -525,10 +525,10 @@ u16 rs_fw_get_max_amsdu_len(struct ieee80211_sta *sta, const struct ieee80211_sta_ht_cap *ht_cap = &link_sta->ht_cap; const struct ieee80211_sta_eht_cap *eht_cap = &link_sta->eht_cap; - if (WARN_ON_ONCE(!link_conf->chandef.chan)) + if (WARN_ON_ONCE(!link_conf->chanreq.oper.chan)) return IEEE80211_MAX_MPDU_LEN_VHT_3895; - if (link_conf->chandef.chan->band == NL80211_BAND_6GHZ) { + if (link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) { switch (le16_get_bits(link_sta->he_6ghz_capa.capa, IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN)) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: @@ -538,7 +538,7 @@ u16 rs_fw_get_max_amsdu_len(struct ieee80211_sta *sta, default: return IEEE80211_MAX_MPDU_LEN_VHT_3895; } - } else if (link_conf->chandef.chan->band == NL80211_BAND_2GHZ && + } else if (link_conf->chanreq.oper.chan->band == NL80211_BAND_2GHZ && eht_cap->has_eht) { switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0], IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 8ffbb8efda73e..7e9f3a6702123 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -71,7 +71,7 @@ u32 iwl_mvm_get_sta_ampdu_dens(struct ieee80211_link_sta *link_sta, mpdu_dens = link_sta->ht_cap.ampdu_density; } - if (link_conf->chandef.chan->band == NL80211_BAND_6GHZ) { + if (link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) { /* overwrite HT values on 6 GHz */ mpdu_dens = le16_get_bits(link_sta->he_6ghz_capa.capa, IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START); @@ -208,7 +208,7 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, } if (sta->deflink.ht_cap.ht_supported || - mvm_sta->vif->bss_conf.chandef.chan->band == NL80211_BAND_6GHZ) + mvm_sta->vif->bss_conf.chanreq.oper.chan->band == NL80211_BAND_6GHZ) add_sta_cmd.station_flags_msk |= cpu_to_le32(STA_FLG_MAX_AGG_SIZE_MSK | STA_FLG_AGG_MPDU_DENS_MSK); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 79eb6394e5a74..58d1f283d6283 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -894,10 +894,10 @@ unsigned int iwl_mvm_max_amsdu_size(struct iwl_mvm *mvm, if (WARN_ON(!link_conf)) band = NL80211_BAND_2GHZ; else - band = link_conf->chandef.chan->band; + band = link_conf->chanreq.oper.chan->band; rcu_read_unlock(); } else { - band = mvmsta->vif->bss_conf.chandef.chan->band; + band = mvmsta->vif->bss_conf.chanreq.oper.chan->band; } lmac = iwl_mvm_get_lmac_id(mvm, band); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index c67c4f6ca2aae..df1ad6d4e12d9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -463,10 +463,10 @@ static bool mt7915_check_he_obss_narrow_bw_ru(struct ieee80211_hw *hw, .tolerated = true, }; - if (!(vif->bss_conf.chandef.chan->flags & IEEE80211_CHAN_RADAR)) + if (!(vif->bss_conf.chanreq.oper.chan->flags & IEEE80211_CHAN_RADAR)) return false; - cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chandef, + cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chanreq.oper, mt7915_check_he_obss_narrow_bw_ru_iter, &iter_data); diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index dbf2d6fe4ea71..ef4c492003d84 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -4615,10 +4615,10 @@ void rtw89_mac_set_he_obss_narrow_bw_ru(struct rtw89_dev *rtwdev, if (!vif->bss_conf.he_support || vif->type != NL80211_IFTYPE_STATION) return; - if (!(vif->bss_conf.chandef.chan->flags & IEEE80211_CHAN_RADAR)) + if (!(vif->bss_conf.chanreq.oper.chan->flags & IEEE80211_CHAN_RADAR)) return; - cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chandef, + cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chanreq.oper, rtw89_mac_check_he_obss_narrow_bw_ru_iter, &tolerated); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index e8aeb4d76c133..211fa25b9a78e 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -740,7 +740,7 @@ u16 rsi_get_connected_channel(struct ieee80211_vif *vif) return 0; bss = &vif->bss_conf; - channel = bss->chandef.chan; + channel = bss->chanreq.oper.chan; if (!channel) return 0; @@ -759,7 +759,7 @@ static void rsi_switch_channel(struct rsi_hw *adapter, if (!vif) return; - channel = vif->bss_conf.chandef.chan; + channel = vif->bss_conf.chanreq.oper.chan; if (!channel) return; diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c index bb4446b88c12b..a904602f02ce2 100644 --- a/drivers/net/wireless/silabs/wfx/sta.c +++ b/drivers/net/wireless/silabs/wfx/sta.c @@ -144,13 +144,13 @@ static int wfx_get_ps_timeout(struct wfx_vif *wvif, bool *enable_ps) struct wfx_vif *wvif_ch0 = wdev_to_wvif(wvif->wdev, 0); struct ieee80211_vif *vif_ch0 = wvif_to_vif(wvif_ch0); - chan0 = vif_ch0->bss_conf.chandef.chan; + chan0 = vif_ch0->bss_conf.chanreq.oper.chan; } if (wdev_to_wvif(wvif->wdev, 1)) { struct wfx_vif *wvif_ch1 = wdev_to_wvif(wvif->wdev, 1); struct ieee80211_vif *vif_ch1 = wvif_to_vif(wvif_ch1); - chan1 = vif_ch1->bss_conf.chandef.chan; + chan1 = vif_ch1->bss_conf.chanreq.oper.chan; } if (chan0 && chan1 && vif->type != NL80211_IFTYPE_AP) { if (chan0->hw_value == chan1->hw_value) { diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 5736acb4d2063..ef12169f8044d 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -2910,7 +2910,7 @@ static int wlcore_set_assoc(struct wl1271 *wl, struct wl12xx_vif *wlvif, int ret; wlvif->aid = vif->cfg.aid; - wlvif->channel_type = cfg80211_get_chandef_type(&bss_conf->chandef); + wlvif->channel_type = cfg80211_get_chandef_type(&bss_conf->chanreq.oper); wlvif->beacon_int = bss_conf->beacon_int; wlvif->wmm_enabled = bss_conf->qos; @@ -4242,7 +4242,7 @@ static void wl1271_bss_info_changed_ap(struct wl1271 *wl, /* Handle HT information change */ if ((changed & BSS_CHANGED_HT) && - (bss_conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT)) { + (bss_conf->chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT)) { ret = wl1271_acx_set_ht_information(wl, wlvif, bss_conf->ht_operation_mode); if (ret < 0) { @@ -4515,7 +4515,7 @@ static void wl1271_bss_info_changed_sta(struct wl1271 *wl, /* Handle new association with HT. Do this after join. */ if (sta_exists) { bool enabled = - bss_conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT; + bss_conf->chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT; ret = wlcore_hw_set_peer_cap(wl, &sta_ht_cap, diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index e23a5da2b67e0..283804b49e91f 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1515,7 +1515,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, if (changed & BSS_CHANGED_TXPOWER) RFbSetPower(priv, priv->wCurrentRate, - conf->chandef.chan->hw_value); + conf->chanreq.oper.chan->hw_value); if (changed & BSS_CHANGED_BEACON_ENABLED) { dev_dbg(&priv->pcid->dev, diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 6c70493d1b01d..7bbed462f062b 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -794,7 +794,7 @@ static void vnt_bss_info_changed(struct ieee80211_hw *hw, vnt_set_bss_mode(priv); if (changed & (BSS_CHANGED_TXPOWER | BSS_CHANGED_BANDWIDTH)) - vnt_rf_setpower(priv, conf->chandef.chan); + vnt_rf_setpower(priv, conf->chanreq.oper.chan); if (changed & BSS_CHANGED_BEACON_ENABLED) { dev_dbg(&priv->usb->dev, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 62c4b4d10bb49..dd8a66e9afd9a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -223,6 +223,14 @@ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), }; +/** + * struct ieee80211_chan_req - A channel "request" + * @oper: channel definition to use for operation + */ +struct ieee80211_chan_req { + struct cfg80211_chan_def oper; +}; + /** * struct ieee80211_chanctx_conf - channel context that vifs may be tuned to * @@ -583,7 +591,7 @@ struct ieee80211_fils_discovery { * @mcast_rate: per-band multicast rate index + 1 (0: disabled) * @bssid: The BSSID for this BSS * @enable_beacon: whether beaconing should be enabled or not - * @chandef: Channel definition for this BSS -- the hardware might be + * @chanreq: Channel request for this BSS -- the hardware might be * configured a higher bandwidth than this BSS uses, for example. * @mu_group: VHT MU-MIMO group membership data * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation. @@ -716,7 +724,7 @@ struct ieee80211_bss_conf { u32 cqm_rssi_hyst; s32 cqm_rssi_low; s32 cqm_rssi_high; - struct cfg80211_chan_def chandef; + struct ieee80211_chan_req chanreq; struct ieee80211_mu_group_data mu_group; bool qos; bool hidden_ssid; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b8a278355e18e..21d55dc539f6c 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -616,7 +616,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if (!pubsta->deflink.ht_cap.ht_supported && - sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) + sta->sdata->vif.bss_conf.chanreq.oper.chan->band != NL80211_BAND_6GHZ) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d3bf029709d54..5aa02b0872d92 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -886,11 +886,13 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; + struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; lockdep_assert_wiphy(local->hw.wiphy); - if (cfg80211_chandef_identical(&local->monitor_chandef, chandef)) + if (cfg80211_chandef_identical(&local->monitor_chanreq.oper, + &chanreq.oper)) return 0; sdata = wiphy_dereference(local->hw.wiphy, @@ -898,17 +900,17 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, if (!sdata) goto done; - if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef, chandef)) + if (cfg80211_chandef_identical(&sdata->vif.bss_conf.chanreq.oper, + &chanreq.oper)) return 0; ieee80211_link_release_channel(&sdata->deflink); - ret = ieee80211_link_use_channel(&sdata->deflink, - chandef, + ret = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_EXCLUSIVE); if (ret) return ret; done: - local->monitor_chandef = *chandef; + local->monitor_chanreq = chanreq; return 0; } @@ -1257,6 +1259,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, unsigned int link_id = params->beacon.link_id; struct ieee80211_link_data *link; struct ieee80211_bss_conf *link_conf; + struct ieee80211_chan_req chanreq = { .oper = params->chandef }; lockdep_assert_wiphy(local->hw.wiphy); @@ -1369,7 +1372,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return err; } - err = ieee80211_link_use_channel(link, ¶ms->chandef, + err = ieee80211_link_use_channel(link, &chanreq, IEEE80211_CHANCTX_SHARED); if (!err) ieee80211_link_copy_chanctx_to_vlans(link, false); @@ -1626,7 +1629,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_BEACON_ENABLED); if (sdata->wdev.cac_started) { - chandef = link_conf->chandef; + chandef = link_conf->chanreq.oper; wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, @@ -1826,7 +1829,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, if (params->supported_rates && params->supported_rates_len) { - ieee80211_parse_bitrates(link->conf->chandef.width, + ieee80211_parse_bitrates(link->conf->chanreq.oper.width, sband, params->supported_rates, params->supported_rates_len, &link_sta->pub->supp_rates[sband->band]); @@ -2602,6 +2605,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, const struct mesh_setup *setup) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_chan_req chanreq = { .oper = setup->chandef }; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; int err; @@ -2618,7 +2622,7 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; - err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef, + err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; @@ -2661,7 +2665,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, return -EINVAL; if (params->basic_rates) { - if (!ieee80211_parse_bitrates(link->conf->chandef.width, + if (!ieee80211_parse_bitrates(link->conf->chanreq.oper.width, wiphy->bands[sband->band], params->basic_rates, params->basic_rates_len, @@ -3176,7 +3180,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, * the new value until we associate. */ if (!sdata->u.mgd.associated || - link->conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + link->conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT) return 0; ap = sdata->vif.cfg.ap_addr; @@ -3331,9 +3335,11 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, * so at a basic rate so that all clients can receive it. */ if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) && - sdata->vif.bss_conf.chandef.chan) { + sdata->vif.bss_conf.chanreq.oper.chan) { u32 basic_rates = sdata->vif.bss_conf.basic_rates; - enum nl80211_band band = sdata->vif.bss_conf.chandef.chan->band; + enum nl80211_band band; + + band = sdata->vif.bss_conf.chanreq.oper.chan->band; if (!(mask->control[band].legacy & basic_rates)) return -EINVAL; @@ -3385,6 +3391,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, u32 cac_time_ms) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_chan_req chanreq = { .oper = *chandef }; struct ieee80211_local *local = sdata->local; int err; @@ -3399,7 +3406,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = local->rx_chains; - err = ieee80211_link_use_channel(&sdata->deflink, chandef, + err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) goto out_unlock; @@ -3651,8 +3658,8 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) return ieee80211_link_use_reserved_context(&sdata->deflink); } - if (!cfg80211_chandef_identical(&link_data->conf->chandef, - &link_data->csa_chandef)) + if (!cfg80211_chandef_identical(&link_data->conf->chanreq.oper, + &link_data->csa_chanreq.oper)) return -EINVAL; sdata->vif.bss_conf.csa_active = false; @@ -3679,7 +3686,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) if (err) return err; - cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chandef, + cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chanreq.oper, link_data->link_id, link_data->conf->eht_puncturing); @@ -3814,7 +3821,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; /* changes into another band are not supported */ - if (sdata->vif.bss_conf.chandef.chan->band != + if (sdata->vif.bss_conf.chanreq.oper.chan->band != params->chandef.chan->band) return -EINVAL; @@ -3862,6 +3869,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_chan_req chanreq = { .oper = params->chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_channel_switch ch_switch; struct ieee80211_chanctx_conf *conf; @@ -3877,8 +3885,8 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (sdata->wdev.cac_started) return -EBUSY; - if (cfg80211_chandef_identical(¶ms->chandef, - &sdata->vif.bss_conf.chandef)) + if (cfg80211_chandef_identical(&chanreq.oper, + &sdata->vif.bss_conf.chanreq.oper)) return -EINVAL; /* don't allow another channel switch if one is already active. */ @@ -3903,14 +3911,14 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, ch_switch.timestamp = 0; ch_switch.device_timestamp = 0; ch_switch.block_tx = params->block_tx; - ch_switch.chandef = params->chandef; + ch_switch.chandef = chanreq.oper; ch_switch.count = params->count; err = drv_pre_channel_switch(sdata, &ch_switch); if (err) goto out; - err = ieee80211_link_reserve_chanctx(&sdata->deflink, ¶ms->chandef, + err = ieee80211_link_reserve_chanctx(&sdata->deflink, &chanreq, chanctx->mode, params->radar_required); if (err) @@ -3936,7 +3944,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (params->punct_bitmap && !sdata->vif.bss_conf.eht_support) goto out; - sdata->deflink.csa_chandef = params->chandef; + sdata->deflink.csa_chanreq = chanreq; sdata->deflink.csa_block_tx = params->block_tx; sdata->vif.bss_conf.csa_active = true; sdata->vif.bss_conf.csa_punct_bitmap = params->punct_bitmap; @@ -3946,14 +3954,15 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, IEEE80211_QUEUE_STOP_REASON_CSA); cfg80211_ch_switch_started_notify(sdata->dev, - &sdata->deflink.csa_chandef, 0, + &sdata->deflink.csa_chanreq.oper, 0, params->count, params->block_tx, sdata->vif.bss_conf.csa_punct_bitmap); if (changed) { ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); - drv_channel_switch_beacon(sdata, ¶ms->chandef); + drv_channel_switch_beacon(sdata, + &sdata->deflink.csa_chanreq.oper); } else { /* if the beacon didn't change, we can finalize immediately */ ieee80211_csa_finalize(&sdata->deflink); @@ -4206,12 +4215,12 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, chanctx_conf = rcu_dereference(link->conf->chanctx_conf); if (chanctx_conf) { - *chandef = link->conf->chandef; + *chandef = link->conf->chanreq.oper; ret = 0; } else if (local->open_count > 0 && local->open_count == local->monitors && sdata->vif.type == NL80211_IFTYPE_MONITOR) { - *chandef = local->monitor_chandef; + *chandef = local->monitor_chanreq.oper; ret = 0; } out: @@ -4259,12 +4268,13 @@ static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; + struct ieee80211_chan_req chanreq = { .oper = *chandef }; int ret; u64 changed = 0; link = sdata_dereference(sdata->link[link_id], sdata); - ret = ieee80211_link_change_bandwidth(link, chandef, &changed); + ret = ieee80211_link_change_chanreq(link, &chanreq, &changed); if (ret == 0) ieee80211_link_info_change_notify(sdata, link, changed); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 6b82c79cf7a6e..f1cef332e4dbe 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -81,87 +81,97 @@ ieee80211_link_get_chanctx(struct ieee80211_link_data *link) return container_of(conf, struct ieee80211_chanctx, conf); } -static const struct cfg80211_chan_def * -ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local, +static const struct ieee80211_chan_req * +ieee80211_chanreq_compatible(const struct ieee80211_chan_req *a, + const struct ieee80211_chan_req *b) +{ + const struct cfg80211_chan_def *compat; + + compat = cfg80211_chandef_compatible(&a->oper, &b->oper); + + if (compat == &a->oper) + return a; + + if (compat == &b->oper) + return b; + + WARN_ON(compat); + return NULL; +} + +static const struct ieee80211_chan_req * +ieee80211_chanctx_compatible(struct ieee80211_chanctx *ctx, + const struct ieee80211_chan_req *req, + struct ieee80211_chan_req *tmp) +{ + *tmp = (struct ieee80211_chan_req){ + .oper = ctx->conf.def, + }; + + return ieee80211_chanreq_compatible(tmp, req); +} + +static const struct ieee80211_chan_req * +ieee80211_chanctx_reserved_chanreq(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - const struct cfg80211_chan_def *compat) + const struct ieee80211_chan_req *req) { struct ieee80211_link_data *link; lockdep_assert_wiphy(local->hw.wiphy); - if (WARN_ON(!compat)) + if (WARN_ON(!req)) return NULL; - list_for_each_entry(link, &ctx->reserved_links, - reserved_chanctx_list) { - compat = cfg80211_chandef_compatible(&link->reserved_chandef, - compat); - if (!compat) + list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) { + req = ieee80211_chanreq_compatible(&link->reserved, req); + if (!req) break; } - return compat; + return req; } -static const struct cfg80211_chan_def * +static const struct ieee80211_chan_req * ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - const struct cfg80211_chan_def *compat) + const struct ieee80211_chan_req *compat) { struct ieee80211_link_data *link; + const struct ieee80211_chan_req *comp_def = compat; lockdep_assert_wiphy(local->hw.wiphy); - list_for_each_entry(link, &ctx->assigned_links, - assigned_chanctx_list) { + list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list) { struct ieee80211_bss_conf *link_conf = link->conf; if (link->reserved_chanctx) continue; - if (!compat) - compat = &link_conf->chandef; - - compat = cfg80211_chandef_compatible( - &link_conf->chandef, compat); - if (!compat) + comp_def = ieee80211_chanreq_compatible(&link_conf->chanreq, + comp_def); + if (!comp_def) break; } - return compat; -} - -static const struct cfg80211_chan_def * -ieee80211_chanctx_combined_chandef(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx, - const struct cfg80211_chan_def *compat) -{ - lockdep_assert_wiphy(local->hw.wiphy); - - compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat); - if (!compat) - return NULL; - - compat = ieee80211_chanctx_non_reserved_chandef(local, ctx, compat); - if (!compat) - return NULL; - - return compat; + return comp_def; } static bool -ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx, - const struct cfg80211_chan_def *def) +ieee80211_chanctx_can_reserve(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct ieee80211_chan_req *req) { lockdep_assert_wiphy(local->hw.wiphy); - if (ieee80211_chanctx_combined_chandef(local, ctx, def)) - return true; + if (!ieee80211_chanctx_reserved_chanreq(local, ctx, req)) + return false; + + if (!ieee80211_chanctx_non_reserved_chandef(local, ctx, req)) + return false; if (!list_empty(&ctx->reserved_links) && - ieee80211_chanctx_reserved_chandef(local, ctx, def)) + ieee80211_chanctx_reserved_chanreq(local, ctx, req)) return true; return false; @@ -169,7 +179,7 @@ ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local, static struct ieee80211_chanctx * ieee80211_find_reservation_chanctx(struct ieee80211_local *local, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; @@ -186,8 +196,7 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local, if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) continue; - if (!ieee80211_chanctx_can_reserve_chandef(local, ctx, - chandef)) + if (!ieee80211_chanctx_can_reserve(local, ctx, chanreq)) continue; return ctx; @@ -290,7 +299,7 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata, * point, so take the width from the chandef, but * account also for TDLS peers */ - width = max(link->conf->chandef.width, + width = max(link->conf->chanreq.oper.width, ieee80211_get_max_required_bw(sdata, link_id)); break; case NL80211_IFTYPE_P2P_DEVICE: @@ -299,7 +308,7 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: - width = link->conf->chandef.width; + width = link->conf->chanreq.oper.width; break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: @@ -395,7 +404,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, } /* calling this function is assuming that station vif is updated to - * lates changes by calling ieee80211_link_update_chandef + * lates changes by calling ieee80211_link_update_chanreq */ static void ieee80211_chan_bw_change(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, @@ -475,9 +484,10 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, static void _ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, struct ieee80211_chanctx *old_ctx, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *chanreq, struct ieee80211_link_data *rsvd_for) { + const struct cfg80211_chan_def *chandef = &chanreq->oper; u32 changed; /* expected to handle only 20/40/80/160/320 channel widths */ @@ -526,16 +536,17 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, static void ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, struct ieee80211_chanctx *old_ctx, - const struct cfg80211_chan_def *chandef) + const struct ieee80211_chan_req *chanreq) { - _ieee80211_change_chanctx(local, ctx, old_ctx, chandef, NULL); + _ieee80211_change_chanctx(local, ctx, old_ctx, chanreq, NULL); } static struct ieee80211_chanctx * ieee80211_find_chanctx(struct ieee80211_local *local, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode) { + struct ieee80211_chan_req tmp; struct ieee80211_chanctx *ctx; lockdep_assert_wiphy(local->hw.wiphy); @@ -544,7 +555,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, return NULL; list_for_each_entry(ctx, &local->chanctx_list, list) { - const struct cfg80211_chan_def *compat; + const struct ieee80211_chan_req *compat; if (ctx->replace_state != IEEE80211_CHANCTX_REPLACE_NONE) continue; @@ -552,11 +563,11 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) continue; - compat = cfg80211_chandef_compatible(&ctx->conf.def, chandef); + compat = ieee80211_chanctx_compatible(ctx, chanreq, &tmp); if (!compat) continue; - compat = ieee80211_chanctx_reserved_chandef(local, ctx, + compat = ieee80211_chanctx_reserved_chanreq(local, ctx, compat); if (!compat) continue; @@ -636,7 +647,7 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local, static struct ieee80211_chanctx * ieee80211_alloc_chanctx(struct ieee80211_local *local, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; @@ -649,7 +660,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, INIT_LIST_HEAD(&ctx->assigned_links); INIT_LIST_HEAD(&ctx->reserved_links); - ctx->conf.def = *chandef; + ctx->conf.def = chanreq->oper; ctx->conf.rx_chains_static = 1; ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; @@ -685,7 +696,7 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local, static struct ieee80211_chanctx * ieee80211_new_chanctx(struct ieee80211_local *local, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode) { struct ieee80211_chanctx *ctx; @@ -693,7 +704,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, lockdep_assert_wiphy(local->hw.wiphy); - ctx = ieee80211_alloc_chanctx(local, chandef, mode); + ctx = ieee80211_alloc_chanctx(local, chanreq, mode); if (!ctx) return ERR_PTR(-ENOMEM); @@ -737,6 +748,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, struct ieee80211_chanctx_conf *conf = &ctx->conf; struct ieee80211_sub_if_data *sdata; const struct cfg80211_chan_def *compat = NULL; + struct ieee80211_chan_req chanreq = {}; struct sta_info *sta; lockdep_assert_wiphy(local->hw.wiphy); @@ -762,9 +774,9 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, continue; if (!compat) - compat = &link_conf->chandef; + compat = &link_conf->chanreq.oper; - compat = cfg80211_chandef_compatible(&link_conf->chandef, + compat = cfg80211_chandef_compatible(&link_conf->chanreq.oper, compat); if (WARN_ON_ONCE(!compat)) break; @@ -794,7 +806,9 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, if (!compat) return; - ieee80211_change_chanctx(local, ctx, ctx, compat); + chanreq.oper = *compat; + + ieee80211_change_chanctx(local, ctx, ctx, &chanreq); } static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, @@ -1050,7 +1064,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link) } int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode, bool radar_required) { @@ -1064,10 +1078,10 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, if (curr_ctx && !local->ops->switch_vif_chanctx) return -EOPNOTSUPP; - new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode); + new_ctx = ieee80211_find_reservation_chanctx(local, chanreq, mode); if (!new_ctx) { if (ieee80211_can_create_new_chanctx(local)) { - new_ctx = ieee80211_new_chanctx(local, chandef, mode); + new_ctx = ieee80211_new_chanctx(local, chanreq, mode); if (IS_ERR(new_ctx)) return PTR_ERR(new_ctx); } else { @@ -1121,7 +1135,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, !list_empty(&curr_ctx->reserved_links)) return -EBUSY; - new_ctx = ieee80211_alloc_chanctx(local, chandef, mode); + new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode); if (!new_ctx) return -ENOMEM; @@ -1139,7 +1153,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, list_add(&link->reserved_chanctx_list, &new_ctx->reserved_links); link->reserved_chanctx = new_ctx; - link->reserved_chandef = *chandef; + link->reserved = *chanreq; link->reserved_radar_required = radar_required; link->reserved_ready = false; @@ -1178,14 +1192,14 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) } static void -ieee80211_link_update_chandef(struct ieee80211_link_data *link, - const struct cfg80211_chan_def *chandef) +ieee80211_link_update_chanreq(struct ieee80211_link_data *link, + const struct ieee80211_chan_req *chanreq) { struct ieee80211_sub_if_data *sdata = link->sdata; unsigned int link_id = link->link_id; struct ieee80211_sub_if_data *vlan; - link->conf->chandef = *chandef; + link->conf->chanreq = *chanreq; if (sdata->vif.type != NL80211_IFTYPE_AP) return; @@ -1198,7 +1212,7 @@ ieee80211_link_update_chandef(struct ieee80211_link_data *link, if (WARN_ON(!vlan_conf)) continue; - vlan_conf->chandef = *chandef; + vlan_conf->chanreq = *chanreq; } rcu_read_unlock(); } @@ -1211,7 +1225,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) struct ieee80211_local *local = sdata->local; struct ieee80211_vif_chanctx_switch vif_chsw[1] = {}; struct ieee80211_chanctx *old_ctx, *new_ctx; - const struct cfg80211_chan_def *chandef; + const struct ieee80211_chan_req *chanreq; u64 changed = 0; int err; @@ -1233,17 +1247,17 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) IEEE80211_CHANCTX_REPLACES_OTHER)) return -EINVAL; - chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx, - &link->reserved_chandef); - if (WARN_ON(!chandef)) + chanreq = ieee80211_chanctx_non_reserved_chandef(local, new_ctx, + &link->reserved); + if (WARN_ON(!chanreq)) return -EINVAL; - if (link_conf->chandef.width != link->reserved_chandef.width) + if (link_conf->chanreq.oper.width != link->reserved.oper.width) changed = BSS_CHANGED_BANDWIDTH; - ieee80211_link_update_chandef(link, &link->reserved_chandef); + ieee80211_link_update_chanreq(link, &link->reserved); - _ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef, link); + _ieee80211_change_chanctx(local, new_ctx, old_ctx, chanreq, link); vif_chsw[0].vif = &sdata->vif; vif_chsw[0].old_ctx = &old_ctx->conf; @@ -1291,7 +1305,7 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *old_ctx, *new_ctx; - const struct cfg80211_chan_def *chandef; + const struct ieee80211_chan_req *chanreq; int err; old_ctx = ieee80211_link_get_chanctx(link); @@ -1310,12 +1324,12 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link) IEEE80211_CHANCTX_REPLACES_OTHER)) return -EINVAL; - chandef = ieee80211_chanctx_non_reserved_chandef(local, new_ctx, - &link->reserved_chandef); - if (WARN_ON(!chandef)) + chanreq = ieee80211_chanctx_non_reserved_chandef(local, new_ctx, + &link->reserved); + if (WARN_ON(!chanreq)) return -EINVAL; - ieee80211_change_chanctx(local, new_ctx, new_ctx, chandef); + ieee80211_change_chanctx(local, new_ctx, new_ctx, chanreq); list_del(&link->reserved_chanctx_list); link->reserved_chanctx = NULL; @@ -1589,10 +1603,10 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) link->radar_required = link->reserved_radar_required; - if (link_conf->chandef.width != link->reserved_chandef.width) + if (link_conf->chanreq.oper.width != link->reserved.oper.width) changed = BSS_CHANGED_BANDWIDTH; - ieee80211_link_update_chandef(link, &link->reserved_chandef); + ieee80211_link_update_chanreq(link, &link->reserved); if (changed) ieee80211_link_info_change_notify(sdata, link, @@ -1727,7 +1741,7 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link) } int ieee80211_link_use_channel(struct ieee80211_link_data *link, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode) { struct ieee80211_sub_if_data *sdata = link->sdata; @@ -1740,36 +1754,36 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, if (sdata->vif.active_links && !(sdata->vif.active_links & BIT(link->link_id))) { - ieee80211_link_update_chandef(link, chandef); + ieee80211_link_update_chanreq(link, chanreq); return 0; } ret = cfg80211_chandef_dfs_required(local->hw.wiphy, - chandef, + &chanreq->oper, sdata->wdev.iftype); if (ret < 0) goto out; if (ret > 0) - radar_detect_width = BIT(chandef->width); + radar_detect_width = BIT(chanreq->oper.width); link->radar_required = ret; - ret = ieee80211_check_combinations(sdata, chandef, mode, + ret = ieee80211_check_combinations(sdata, &chanreq->oper, mode, radar_detect_width); if (ret < 0) goto out; __ieee80211_link_release_channel(link); - ctx = ieee80211_find_chanctx(local, chandef, mode); + ctx = ieee80211_find_chanctx(local, chanreq, mode); if (!ctx) - ctx = ieee80211_new_chanctx(local, chandef, mode); + ctx = ieee80211_new_chanctx(local, chanreq, mode); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto out; } - ieee80211_link_update_chandef(link, chandef); + ieee80211_link_update_chanreq(link, chanreq); ret = ieee80211_assign_link_chanctx(link, ctx); if (ret) { @@ -1849,28 +1863,33 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link) return 0; } -int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link, - const struct cfg80211_chan_def *chandef, - u64 *changed) +int ieee80211_link_change_chanreq(struct ieee80211_link_data *link, + const struct ieee80211_chan_req *chanreq, + u64 *changed) { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_bss_conf *link_conf = link->conf; struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; - const struct cfg80211_chan_def *compat; + const struct ieee80211_chan_req *compat; + struct ieee80211_chan_req tmp; lockdep_assert_wiphy(local->hw.wiphy); - if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, + &chanreq->oper, IEEE80211_CHAN_DISABLED)) return -EINVAL; - if (cfg80211_chandef_identical(chandef, &link_conf->chandef)) + /* for non-HT 20 MHz the rest doesn't matter */ + if (chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT && + cfg80211_chandef_identical(&chanreq->oper, &link_conf->chanreq.oper)) return 0; - if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT || - link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + /* but you cannot switch to/from it */ + if (chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT || + link_conf->chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT) return -EINVAL; conf = rcu_dereference_protected(link_conf->chanctx_conf, @@ -1880,13 +1899,13 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link, ctx = container_of(conf, struct ieee80211_chanctx, conf); - compat = cfg80211_chandef_compatible(&conf->def, chandef); + compat = ieee80211_chanctx_compatible(ctx, chanreq, &tmp); if (!compat) return -EINVAL; switch (ctx->replace_state) { case IEEE80211_CHANCTX_REPLACE_NONE: - if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat)) + if (!ieee80211_chanctx_reserved_chanreq(local, ctx, compat)) return -EBUSY; break; case IEEE80211_CHANCTX_WILL_BE_REPLACED: @@ -1901,7 +1920,7 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link, break; } - ieee80211_link_update_chandef(link, chandef); + ieee80211_link_update_chanreq(link, chanreq); ieee80211_recalc_chanctx_chantype(local, ctx); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index bccc99a003832..c3330aea4da3c 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -257,7 +257,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!link_conf)) width = NL80211_CHAN_WIDTH_20_NOHT; else - width = link_conf->chandef.width; + width = link_conf->chanreq.oper.width; switch (width) { default: diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index c23f46218af73..27cc9ddd0432a 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -223,7 +223,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; struct cfg80211_bss *bss; u64 bss_change; - struct cfg80211_chan_def chandef; + struct ieee80211_chan_req chanreq = {}; struct ieee80211_channel *chan; struct beacon_data *presp; struct cfg80211_inform_bss bss_meta = {}; @@ -257,22 +257,22 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, kfree_rcu(presp, rcu_head); /* make a copy of the chandef, it could be modified below. */ - chandef = *req_chandef; - chan = chandef.chan; - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + chanreq.oper = *req_chandef; + chan = chanreq.oper.chan; + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chanreq.oper, NL80211_IFTYPE_ADHOC)) { - if (chandef.width == NL80211_CHAN_WIDTH_5 || - chandef.width == NL80211_CHAN_WIDTH_10 || - chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - chandef.width == NL80211_CHAN_WIDTH_20) { + if (chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + chanreq.oper.width == NL80211_CHAN_WIDTH_10 || + chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + chanreq.oper.width == NL80211_CHAN_WIDTH_20) { sdata_info(sdata, "Failed to join IBSS, beacons forbidden\n"); return; } - chandef.width = NL80211_CHAN_WIDTH_20; - chandef.center_freq1 = chan->center_freq; + chanreq.oper.width = NL80211_CHAN_WIDTH_20; + chanreq.oper.center_freq1 = chan->center_freq; /* check again for downgraded chandef */ - if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef, + if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chanreq.oper, NL80211_IFTYPE_ADHOC)) { sdata_info(sdata, "Failed to join IBSS, beacons forbidden\n"); @@ -281,7 +281,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &chandef, NL80211_IFTYPE_ADHOC); + &chanreq.oper, NL80211_IFTYPE_ADHOC); if (err < 0) { sdata_info(sdata, "Failed to join IBSS, invalid chandef\n"); @@ -295,7 +295,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, radar_required = err; - if (ieee80211_link_use_channel(&sdata->deflink, &chandef, + if (ieee80211_link_use_channel(&sdata->deflink, &chanreq, ifibss->fixed_channel ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE)) { @@ -307,7 +307,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, memcpy(ifibss->bssid, bssid, ETH_ALEN); presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates, - capability, tsf, &chandef, + capability, tsf, &chanreq.oper, &have_higher_than_11mbit, NULL); if (!presp) return; @@ -533,12 +533,12 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) IEEE80211_PRIVACY(ifibss->privacy)); /* XXX: should not really modify cfg80211 data */ if (cbss) { - cbss->channel = sdata->deflink.csa_chandef.chan; + cbss->channel = sdata->deflink.csa_chanreq.oper.chan; cfg80211_put_bss(sdata->local->hw.wiphy, cbss); } } - ifibss->chandef = sdata->deflink.csa_chandef; + ifibss->chandef = sdata->deflink.csa_chanreq.oper; /* generate the beacon */ return ieee80211_ibss_csa_beacon(sdata, NULL, changed); @@ -799,7 +799,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, goto disconnect; params.count = csa_ie.count; - params.chandef = csa_ie.chandef; + params.chandef = csa_ie.chanreq.oper; switch (ifibss->chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: @@ -858,7 +858,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, params.radar_required = err; if (cfg80211_chandef_identical(¶ms.chandef, - &sdata->vif.bss_conf.chandef)) { + &sdata->vif.bss_conf.chanreq.oper)) { ibss_dbg(sdata, "received csa with an identical chandef, ignoring\n"); return true; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cb4684a9451e4..70c48cad180aa 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -884,6 +884,9 @@ struct ieee80211_chanctx { enum ieee80211_chanctx_mode mode; bool driver_present; + /* temporary data for search algorithm etc. */ + struct ieee80211_chan_req req; + struct ieee80211_chanctx_conf conf; }; @@ -1035,7 +1038,7 @@ struct ieee80211_link_data { bool operating_11g_mode; - struct cfg80211_chan_def csa_chandef; + struct ieee80211_chan_req csa_chanreq; struct wiphy_work color_change_finalize_work; struct delayed_work color_collision_detect_work; @@ -1043,7 +1046,7 @@ struct ieee80211_link_data { /* context reservation -- protected with wiphy mutex */ struct ieee80211_chanctx *reserved_chanctx; - struct cfg80211_chan_def reserved_chandef; + struct ieee80211_chan_req reserved; bool reserved_radar_required; bool reserved_ready; @@ -1574,7 +1577,7 @@ struct ieee80211_local { /* virtual monitor interface */ struct ieee80211_sub_if_data __rcu *monitor_sdata; - struct cfg80211_chan_def monitor_chandef; + struct ieee80211_chan_req monitor_chanreq; /* extended capabilities provided by mac80211 */ u8 ext_capa[8]; @@ -1639,7 +1642,7 @@ ieee80211_get_link_sband(struct ieee80211_link_data *link) /* this struct holds the value parsing from channel switch IE */ struct ieee80211_csa_ie { - struct cfg80211_chan_def chandef; + struct ieee80211_chan_req chanreq; u8 mode; u8 count; u8 ttl; @@ -2523,11 +2526,11 @@ void ieee80211_chandef_downgrade(struct cfg80211_chan_def *chandef, int __must_check ieee80211_link_use_channel(struct ieee80211_link_data *link, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *req, enum ieee80211_chanctx_mode mode); int __must_check ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, - const struct cfg80211_chan_def *chandef, + const struct ieee80211_chan_req *req, enum ieee80211_chanctx_mode mode, bool radar_required); int __must_check @@ -2535,9 +2538,9 @@ ieee80211_link_use_reserved_context(struct ieee80211_link_data *link); int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link); int __must_check -ieee80211_link_change_bandwidth(struct ieee80211_link_data *link, - const struct cfg80211_chan_def *chandef, - u64 *changed); +ieee80211_link_change_chanreq(struct ieee80211_link_data *link, + const struct ieee80211_chan_req *req, + u64 *changed); void ieee80211_link_release_channel(struct ieee80211_link_data *link); void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link); void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d81162bf7d48b..227c8dc3fbe5a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -557,7 +557,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { - chandef = sdata->vif.bss_conf.chandef; + chandef = sdata->vif.bss_conf.chanreq.oper; WARN_ON(local->suspended); ieee80211_link_release_channel(&sdata->deflink); cfg80211_cac_event(sdata->dev, &chandef, @@ -1164,7 +1164,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) rcu_assign_pointer(local->monitor_sdata, sdata); mutex_unlock(&local->iflist_mtx); - ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef, + ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chanreq, IEEE80211_CHANCTX_EXCLUSIVE); if (ret) { mutex_lock(&local->iflist_mtx); @@ -1252,7 +1252,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sdata->vif.cab_queue = master->vif.cab_queue; memcpy(sdata->vif.hw_queue, master->vif.hw_queue, sizeof(sdata->vif.hw_queue)); - sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef; + sdata->vif.bss_conf.chanreq = master->vif.bss_conf.chanreq; sdata->crypto_tx_tailroom_needed_cnt += master->crypto_tx_tailroom_needed_cnt; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index c0d05efcf6f8c..2231eb38a2110 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -402,7 +402,8 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, link = sdata_dereference(sdata->link[link_id], sdata); - ret = ieee80211_link_use_channel(link, &link->conf->chandef, + ret = ieee80211_link_use_channel(link, + &link->conf->chanreq, IEEE80211_CHANCTX_SHARED); WARN_ON_ONCE(ret); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ce0cba8d7afc0..879abe216a3ee 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1229,7 +1229,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) &sband->channels[i], NL80211_CHAN_NO_HT); /* init channel we're on */ - local->monitor_chandef = dflt_chandef; + local->monitor_chanreq.oper = dflt_chandef; if (local->emulate_chanctx) { local->dflt_chandef = dflt_chandef; local->hw.conf.chandef = dflt_chandef; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 60dc453acb5ad..e06d9ed2d31b4 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -97,7 +97,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, if (sdata->vif.bss_conf.basic_rates != basic_rates) return false; - cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan, + cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chanreq.oper.chan, NL80211_CHAN_NO_HT); ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def); @@ -111,7 +111,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, ie->eht_operation, &sta_chan_def); - if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, + if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chanreq.oper, &sta_chan_def)) return false; @@ -436,9 +436,9 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, return 0; if (!sband->ht_cap.ht_supported || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) @@ -477,16 +477,16 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, return 0; if (!ht_cap->ht_supported || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_operation)) return -ENOMEM; pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); - ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chandef, + ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chanreq.oper, sdata->vif.bss_conf.ht_operation_mode, false); @@ -508,9 +508,9 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata, return 0; if (!sband->vht_cap.vht_supported || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap)) @@ -549,9 +549,9 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, return 0; if (!vht_cap->vht_supported || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_operation)) @@ -559,7 +559,7 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation)); ieee80211_ie_build_vht_oper(pos, vht_cap, - &sdata->vif.bss_conf.chandef); + &sdata->vif.bss_conf.chanreq.oper); return 0; } @@ -578,9 +578,9 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); if (!he_cap || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < ie_len) @@ -606,20 +606,20 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); if (!he_cap || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; len = 2 + 1 + sizeof(struct ieee80211_he_operation); - if (sdata->vif.bss_conf.chandef.chan->band == NL80211_BAND_6GHZ) + if (sdata->vif.bss_conf.chanreq.oper.chan->band == NL80211_BAND_6GHZ) len += sizeof(struct ieee80211_he_6ghz_oper); if (skb_tailroom(skb) < len) return -ENOMEM; pos = skb_put(skb, len); - ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chandef); + ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chanreq.oper); return 0; } @@ -659,9 +659,9 @@ int mesh_add_eht_cap_ie(struct ieee80211_sub_if_data *sdata, he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); if (!he_cap || !eht_cap || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < ie_len) @@ -686,9 +686,9 @@ int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *sk eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); if (!eht_cap || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; len = 2 + 1 + offsetof(struct ieee80211_eht_operation, optional) + @@ -698,7 +698,7 @@ int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *sk return -ENOMEM; pos = skb_put(skb, len); - ieee80211_ie_build_eht_oper(pos, &sdata->vif.bss_conf.chandef, eht_cap); + ieee80211_ie_build_eht_oper(pos, &sdata->vif.bss_conf.chanreq.oper, eht_cap); return 0; } @@ -746,9 +746,9 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, return; if (!ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT) || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return; sdata->vif.bss_conf.he_support = true; @@ -1277,11 +1277,12 @@ static void ieee80211_mesh_csa_mark_radar(struct ieee80211_sub_if_data *sdata) * unavailable. */ err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy, - &sdata->vif.bss_conf.chandef, + &sdata->vif.bss_conf.chanreq.oper, NL80211_IFTYPE_MESH_POINT); if (err > 0) cfg80211_radar_event(sdata->local->hw.wiphy, - &sdata->vif.bss_conf.chandef, GFP_ATOMIC); + &sdata->vif.bss_conf.chanreq.oper, + GFP_ATOMIC); } static bool @@ -1302,7 +1303,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, if (!sband) return false; - switch (sdata->vif.bss_conf.chandef.width) { + switch (sdata->vif.bss_conf.chanreq.oper.width) { case NL80211_CHAN_WIDTH_20_NOHT: conn.mode = IEEE80211_CONN_MODE_LEGACY; conn.bw_limit = IEEE80211_CONN_BW_LIMIT_20; @@ -1339,7 +1340,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, if (csa_ie.reason_code == WLAN_REASON_MESH_CHAN_REGULATORY) ieee80211_mesh_csa_mark_radar(sdata); - params.chandef = csa_ie.chandef; + params.chandef = csa_ie.chanreq.oper; params.count = csa_ie.count; if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, ¶ms.chandef, @@ -1375,7 +1376,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, params.radar_required = err; if (cfg80211_chandef_identical(¶ms.chandef, - &sdata->vif.bss_conf.chandef)) { + &sdata->vif.bss_conf.chanreq.oper)) { mcsa_dbg(sdata, "received csa with an identical chandef, ignoring\n"); return true; @@ -1555,7 +1556,7 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) *changed |= BSS_CHANGED_BEACON; mcsa_dbg(sdata, "complete switching to center freq %d MHz", - sdata->vif.bss_conf.chandef.chan->center_freq); + sdata->vif.bss_conf.chanreq.oper.chan->center_freq); return 0; } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 28bf794f67f8c..e3aad60f68ab9 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -163,7 +163,7 @@ static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) u16 ht_opmode; bool non_ht_sta = false, ht20_sta = false; - switch (sdata->vif.bss_conf.chandef.width) { + switch (sdata->vif.bss_conf.chanreq.oper.width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: @@ -196,7 +196,7 @@ static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) if (non_ht_sta) ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED; else if (ht20_sta && - sdata->vif.bss_conf.chandef.width > NL80211_CHAN_WIDTH_20) + sdata->vif.bss_conf.chanreq.oper.width > NL80211_CHAN_WIDTH_20) ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_20MHZ; else ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9968bc0ddf6ea..e9d720f25ddf7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -139,7 +139,7 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link, const struct ieee80211_eht_operation *eht_oper, u16 bitmap, u64 *changed) { - struct cfg80211_chan_def *chandef = &link->conf->chandef; + struct cfg80211_chan_def *chandef = &link->conf->chanreq.oper; struct ieee80211_local *local = link->sdata->local; u16 extracted; u64 _changed = 0; @@ -862,8 +862,9 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, struct ieee802_11_elems *elems, u64 *changed) { - struct ieee80211_channel *channel = link->conf->chandef.chan; + struct ieee80211_channel *channel = link->conf->chanreq.oper.chan; struct ieee80211_sub_if_data *sdata = link->sdata; + struct ieee80211_chan_req chanreq = {}; struct cfg80211_chan_def ap_chandef; enum ieee80211_conn_mode ap_mode; u32 vht_cap_info = 0; @@ -913,7 +914,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, ieee80211_min_bw_limit_from_chandef(&ap_chandef)) ieee80211_chandef_downgrade(&ap_chandef, NULL); - if (cfg80211_chandef_identical(&ap_chandef, &link->conf->chandef)) + if (cfg80211_chandef_identical(&ap_chandef, &link->conf->chanreq.oper)) return 0; link_info(link, @@ -946,8 +947,9 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, * bandwidth changes where a this could happen, but those cases are * less common and wouldn't completely prevent using the AP. */ + chanreq.oper = ap_chandef; - ret = ieee80211_link_change_bandwidth(link, &ap_chandef, changed); + ret = ieee80211_link_change_chanreq(link, &chanreq, changed); if (ret) { sdata_info(sdata, "AP %pM changed bandwidth to incompatible one - disconnect\n", @@ -2069,8 +2071,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, return; } - if (!cfg80211_chandef_identical(&link->conf->chandef, - &link->csa_chandef)) { + if (!cfg80211_chandef_identical(&link->conf->chanreq.oper, + &link->csa_chanreq.oper)) { sdata_info(sdata, "failed to finalize channel switch, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, @@ -2118,7 +2120,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) return; } - cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, + cfg80211_ch_switch_notify(sdata->dev, &link->reserved.oper, link->link_id, 0); } @@ -2211,7 +2213,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, ch_switch.timestamp = timestamp; ch_switch.device_timestamp = device_timestamp; ch_switch.block_tx = csa_ie.mode; - ch_switch.chandef = csa_ie.chandef; + ch_switch.chandef = csa_ie.chanreq.oper; ch_switch.count = csa_ie.count; ch_switch.delay = csa_ie.max_switch_time; } @@ -2231,34 +2233,36 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, return; } - if (link->conf->chandef.chan->band != - csa_ie.chandef.chan->band) { + if (link->conf->chanreq.oper.chan->band != + csa_ie.chanreq.oper.chan->band) { sdata_info(sdata, "AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", link->u.mgd.bssid, - csa_ie.chandef.chan->center_freq, - csa_ie.chandef.width, csa_ie.chandef.center_freq1, - csa_ie.chandef.center_freq2); + csa_ie.chanreq.oper.chan->center_freq, + csa_ie.chanreq.oper.width, + csa_ie.chanreq.oper.center_freq1, + csa_ie.chanreq.oper.center_freq2); goto drop_connection; } - if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef, + if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chanreq.oper, IEEE80211_CHAN_DISABLED)) { sdata_info(sdata, "AP %pM switches to unsupported channel " "(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), " "disconnecting\n", link->u.mgd.bssid, - csa_ie.chandef.chan->center_freq, - csa_ie.chandef.chan->freq_offset, - csa_ie.chandef.width, csa_ie.chandef.center_freq1, - csa_ie.chandef.freq1_offset, - csa_ie.chandef.center_freq2); + csa_ie.chanreq.oper.chan->center_freq, + csa_ie.chanreq.oper.chan->freq_offset, + csa_ie.chanreq.oper.width, + csa_ie.chanreq.oper.center_freq1, + csa_ie.chanreq.oper.freq1_offset, + csa_ie.chanreq.oper.center_freq2); goto drop_connection; } - if (cfg80211_chandef_identical(&csa_ie.chandef, - &link->conf->chandef) && + if (cfg80211_chandef_identical(&csa_ie.chanreq.oper, + &link->conf->chanreq.oper) && (!csa_ie.mode || !beacon)) { if (link->u.mgd.csa_ignored_same_chan) return; @@ -2299,7 +2303,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, goto drop_connection; } - res = ieee80211_link_reserve_chanctx(link, &csa_ie.chandef, + res = ieee80211_link_reserve_chanctx(link, &csa_ie.chanreq, chanctx->mode, false); if (res) { sdata_info(sdata, @@ -2309,7 +2313,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } link->conf->csa_active = true; - link->csa_chandef = csa_ie.chandef; + link->csa_chanreq = csa_ie.chanreq; link->csa_block_tx = csa_ie.mode; link->u.mgd.csa_ignored_same_chan = false; link->u.mgd.beacon_crc_valid = false; @@ -2318,7 +2322,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, + cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chanreq.oper, link->link_id, csa_ie.count, csa_ie.mode, 0); @@ -2741,7 +2745,7 @@ void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work) struct ieee80211_link_data *link = container_of(work, struct ieee80211_link_data, dfs_cac_timer_work.work); - struct cfg80211_chan_def chandef = link->conf->chandef; + struct cfg80211_chan_def chandef = link->conf->chanreq.oper; struct ieee80211_sub_if_data *sdata = link->sdata; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -4508,11 +4512,11 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, goto out; } - if (WARN_ON(!link->conf->chandef.chan)) { + if (WARN_ON(!link->conf->chanreq.oper.chan)) { ret = false; goto out; } - sband = local->hw.wiphy->bands[link->conf->chandef.chan->band]; + sband = local->hw.wiphy->bands[link->conf->chanreq.oper.chan->band]; /* Set up internal HT/VHT capabilities */ if (elems->ht_cap_elem && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) @@ -4580,7 +4584,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, } else if (is_6ghz) { link_info(link, "HE 6 GHz operation missing (on %d MHz), expect issues\n", - bss_conf->chandef.chan->center_freq); + bss_conf->chanreq.oper.chan->center_freq); } bss_conf->he_support = link_sta->pub->he_cap.has_he; @@ -5132,8 +5136,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn) { struct ieee80211_local *local = sdata->local; - struct cfg80211_chan_def chandef; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; + struct ieee80211_chan_req chanreq = {}; struct ieee802_11_elems *elems; int ret; u32 i; @@ -5142,7 +5146,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id, - &chandef); + &chanreq.oper); if (IS_ERR(elems)) { rcu_read_unlock(); @@ -5196,17 +5200,18 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, * on incompatible channels, e.g. 80+80 and 160 sharing the * same control channel) try to use a smaller bandwidth. */ - ret = ieee80211_link_use_channel(link, &chandef, + ret = ieee80211_link_use_channel(link, &chanreq, IEEE80211_CHANCTX_SHARED); /* don't downgrade for 5 and 10 MHz channels, though. */ - if (chandef.width == NL80211_CHAN_WIDTH_5 || - chandef.width == NL80211_CHAN_WIDTH_10) + if (chanreq.oper.width == NL80211_CHAN_WIDTH_5 || + chanreq.oper.width == NL80211_CHAN_WIDTH_10) return ret; - while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { - ieee80211_chandef_downgrade(&chandef, conn); - ret = ieee80211_link_use_channel(link, &chandef, + while (ret && chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT) { + ieee80211_chandef_downgrade(&chanreq.oper, conn); + + ret = ieee80211_link_use_channel(link, &chanreq, IEEE80211_CHANCTX_SHARED); } @@ -5862,7 +5867,7 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link, } extracted = ieee80211_extract_dis_subch_bmap(eht_oper, - &link->conf->chandef, + &link->conf->chanreq.oper, bitmap); /* accept if there are no changes */ @@ -5871,12 +5876,12 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link, return true; if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap, - &link->conf->chandef)) { + &link->conf->chanreq.oper)) { link_info(link, "Got an invalid disable subchannel bitmap from AP %pM: bitmap = 0x%x, bw = 0x%x. disconnect\n", link->u.mgd.bssid, bitmap, - link->conf->chandef.width); + link->conf->chanreq.oper.width); return false; } @@ -6573,10 +6578,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, goto free; } - if (WARN_ON(!link->conf->chandef.chan)) + if (WARN_ON(!link->conf->chanreq.oper.chan)) goto free; - sband = local->hw.wiphy->bands[link->conf->chandef.chan->band]; + sband = local->hw.wiphy->bands[link->conf->chanreq.oper.chan->band]; changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems); diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 449af4e1cca49..2dd4a2196af41 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -168,6 +168,7 @@ void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata) int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, struct ocb_setup *setup) { + struct ieee80211_chan_req chanreq = { .oper = setup->chandef }; struct ieee80211_local *local = sdata->local; struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID; @@ -182,7 +183,7 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; - err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef, + err = ieee80211_link_use_channel(&sdata->deflink, &chanreq, IEEE80211_CHANCTX_SHARED); if (err) return err; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d5ea5f5bcf3a0..34e03b9522c8a 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -4,7 +4,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2019, 2022-2024 Intel Corporation */ #include @@ -278,10 +278,10 @@ void ieee80211_check_rate_mask(struct ieee80211_link_data *link) u32 user_mask, basic_rates = link->conf->basic_rates; enum nl80211_band band; - if (WARN_ON(!link->conf->chandef.chan)) + if (WARN_ON(!link->conf->chanreq.oper.chan)) return; - band = link->conf->chandef.chan->band; + band = link->conf->chanreq.oper.chan->band; if (band == NL80211_BAND_S1GHZ) { /* TODO */ return; @@ -761,7 +761,7 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata, u32 i, flags; *mask = sdata->rc_rateidx_mask[sband->band]; - flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); + flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); for (i = 0; i < sband->n_bitrates; i++) { if ((flags & sband->bitrates[i].flags) != flags) *mask &= ~BIT(i); @@ -817,7 +817,7 @@ rate_control_apply_mask_ratetbl(struct sta_info *sta, mcs_mask, vht_mask)) return; - chan_width = sta->sdata->vif.bss_conf.chandef.width; + chan_width = sta->sdata->vif.bss_conf.chanreq.oper.width; for (i = 0; i < IEEE80211_TX_RATE_TABLE_SIZE; i++) { if (rates->rate[i].idx < 0) break; @@ -854,7 +854,7 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata, * included in the configured mask and change the rate indexes * if needed. */ - chan_width = sdata->vif.bss_conf.chandef.width; + chan_width = sdata->vif.bss_conf.chanreq.oper.width; for (i = 0; i < max_rates; i++) { /* Skip invalid rates */ if (rates[i].idx < 0) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 51efc9bc81680..2b0bf2a1a877c 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -108,26 +108,26 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, default: /* secondary_channel_offset was present but is invalid */ case IEEE80211_HT_PARAM_CHA_SEC_NONE: - cfg80211_chandef_create(&csa_ie->chandef, new_chan, + cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT20); break; case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: - cfg80211_chandef_create(&csa_ie->chandef, new_chan, + cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT40PLUS); break; case IEEE80211_HT_PARAM_CHA_SEC_BELOW: - cfg80211_chandef_create(&csa_ie->chandef, new_chan, + cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_HT40MINUS); break; case -1: - cfg80211_chandef_create(&csa_ie->chandef, new_chan, + cfg80211_chandef_create(&csa_ie->chanreq.oper, new_chan, NL80211_CHAN_NO_HT); /* keep width for 5/10 MHz channels */ - switch (sdata->vif.bss_conf.chandef.width) { + switch (sdata->vif.bss_conf.chanreq.oper.width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: - csa_ie->chandef.width = - sdata->vif.bss_conf.chandef.width; + csa_ie->chanreq.oper.width = + sdata->vif.bss_conf.chanreq.oper.width; break; default: break; @@ -137,7 +137,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, if (bwi) { /* start with the CSA one */ - new_vht_chandef = csa_ie->chandef; + new_vht_chandef = csa_ie->chanreq.oper; /* and update the width accordingly */ ieee80211_chandef_eht_oper(&bwi->info, &new_vht_chandef); } else if (wide_bw_chansw_ie) { @@ -159,7 +159,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, /* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT, * to the previously parsed chandef */ - new_vht_chandef = csa_ie->chandef; + new_vht_chandef = csa_ie->chanreq.oper; /* ignore if parsing fails */ if (!ieee80211_chandef_vht_oper(&sdata->local->hw, @@ -177,13 +177,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, /* if VHT data is there validate & use it */ if (new_vht_chandef.chan) { if (!cfg80211_chandef_compatible(&new_vht_chandef, - &csa_ie->chandef)) { + &csa_ie->chanreq.oper)) { sdata_info(sdata, "BSS %pM: CSA has inconsistent channel data, disconnecting\n", bssid); return -EINVAL; } - csa_ie->chandef = new_vht_chandef; + csa_ie->chanreq.oper = new_vht_chandef; } if (elems->max_channel_switch_time) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 396fd54d8bf7b..0f4aa42e070f9 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -159,7 +159,7 @@ static void ieee80211_tdls_add_oper_classes(struct ieee80211_link_data *link, u8 *pos; u8 op_class; - if (!ieee80211_chandef_to_operating_class(&link->conf->chandef, + if (!ieee80211_chandef_to_operating_class(&link->conf->chanreq.oper, &op_class)) return; @@ -438,7 +438,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, if (WARN_ON_ONCE(!sta)) return; - sta->tdls_chandef = link->conf->chandef; + sta->tdls_chandef = link->conf->chanreq.oper; } ieee80211_tdls_add_oper_classes(link, skb); @@ -638,7 +638,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_link_data *link, if (WARN_ON_ONCE(!sta || !ap_sta)) return; - sta->tdls_chandef = link->conf->chandef; + sta->tdls_chandef = link->conf->chanreq.oper; /* add any custom IEs that go before the QoS IE */ if (extra_ies_len) { @@ -684,7 +684,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_link_data *link, pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation)); ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap, - &link->conf->chandef, prot, + &link->conf->chanreq.oper, prot, true); } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e2dde3e77c301..efff20d7fe0ed 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -503,9 +503,9 @@ TRACE_EVENT(drv_link_info_changed, __entry->ht_operation_mode = link_conf->ht_operation_mode; __entry->cqm_rssi_thold = link_conf->cqm_rssi_thold; __entry->cqm_rssi_hyst = link_conf->cqm_rssi_hyst; - __entry->channel_width = link_conf->chandef.width; - __entry->channel_cfreq1 = link_conf->chandef.center_freq1; - __entry->channel_cfreq1_offset = link_conf->chandef.freq1_offset; + __entry->channel_width = link_conf->chanreq.oper.width; + __entry->channel_cfreq1 = link_conf->chanreq.oper.center_freq1; + __entry->channel_cfreq1_offset = link_conf->chanreq.oper.freq1_offset; __entry->qos = link_conf->qos; __entry->hidden_ssid = link_conf->hidden_ssid; __entry->txpower = link_conf->txpower; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 51c1a99f57b83..49eef33b5e702 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2309,7 +2309,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, * in order to maximize the chance that we get a response. Some * badly-behaved APs don't respond when this parameter is included. */ - chandef.width = sdata->vif.bss_conf.chandef.width; + chandef.width = sdata->vif.bss_conf.chanreq.oper.width; if (flags & IEEE80211_PROBE_FLAG_DIRECTED) chandef.chan = NULL; else @@ -2351,7 +2351,8 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!sband)) return 1; - rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); + rate_flags = + ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); num_rates = sband->n_bitrates; supp_rates = 0; @@ -4026,7 +4027,8 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; - rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); + rate_flags = + ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); sband = local->hw.wiphy->bands[band]; rates = 0; for (i = 0; i < sband->n_bitrates; i++) { @@ -4068,8 +4070,8 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; - rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); - + rate_flags = + ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); sband = local->hw.wiphy->bands[band]; exrates = 0; for (i = 0; i < sband->n_bitrates; i++) { @@ -4312,7 +4314,7 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { - chandef = sdata->vif.bss_conf.chandef; + chandef = sdata->vif.bss_conf.chanreq.oper; ieee80211_link_release_channel(&sdata->deflink); cfg80211_cac_event(sdata->dev, &chandef, @@ -4756,7 +4758,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local, list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) if (link->reserved_radar_required) - radar_detect |= BIT(link->reserved_chandef.width); + radar_detect |= BIT(link->reserved.oper.width); /* * An in-place reservation context should not have any assigned vifs @@ -4770,7 +4772,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local, continue; radar_detect |= - BIT(link->conf->chandef.width); + BIT(link->conf->chanreq.oper.width); } return radar_detect; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index b3a5c3e96a720..2c475c439ba9b 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -369,7 +369,7 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta) link_conf = rcu_dereference(sdata->vif.link_conf[link_id]); if (eht_cap->has_eht && - link_conf->chandef.chan->band == NL80211_BAND_6GHZ) { + link_conf->chanreq.oper.chan->band == NL80211_BAND_6GHZ) { info = eht_cap->eht_cap_elem.phy_cap_info[0]; if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) { @@ -380,7 +380,7 @@ ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta) info = he_cap->he_cap_elem.phy_cap_info[0]; - if (link_conf->chandef.chan->band == NL80211_BAND_2GHZ) { + if (link_conf->chanreq.oper.chan->band == NL80211_BAND_2GHZ) { if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) ret = IEEE80211_STA_RX_BW_40; else @@ -515,7 +515,7 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta) if (WARN_ON(!link_conf)) bss_width = NL80211_CHAN_WIDTH_20_NOHT; else - bss_width = link_conf->chandef.width; + bss_width = link_conf->chanreq.oper.width; rcu_read_unlock(); bw = ieee80211_sta_cap_rx_bw(link_sta); -- cgit 1.2.3-korg From d1256c1546a0e03f103e2f0381103dc747ea7f81 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:41 +0100 Subject: wifi: mac80211: add and use a link iteration macro In the channel context code we have quite a few instances of nested loops iterating the interfaces and then links. Add a new for_each_sdata_link() macro and use it. Also, since it's easier, convert all the loops and a few other places away from RCU as we now hold the wiphy mutex everywhere anyway. This does cause a little bit more work (such as checking interface types for each link of an interface rather than not iterating links in some cases), but that's not a huge issue and seems like an acceptable trade-off, readability is important too. Link: https://msgid.link/20240129194108.7240829bd96d.I5ccbb8dd019cbcb5326c85d76121359225d6541a@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 246 ++++++++++++++------------------------------- net/mac80211/ieee80211_i.h | 13 +++ 2 files changed, 89 insertions(+), 170 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f1cef332e4dbe..c84449bdc9284 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -211,7 +211,7 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta, enum ieee80211_sta_rx_bandwidth width; struct link_sta_info *link_sta; - link_sta = rcu_dereference(sta->link[link_id]); + link_sta = wiphy_dereference(sta->local->hw.wiphy, sta->link[link_id]); /* no effect if this STA has no presence on this link */ if (!link_sta) @@ -249,9 +249,10 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta, } static enum nl80211_chan_width -ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata, - unsigned int link_id) +ieee80211_get_max_required_bw(struct ieee80211_link_data *link) { + struct ieee80211_sub_if_data *sdata = link->sdata; + unsigned int link_id = link->link_id; enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; struct sta_info *sta; @@ -267,31 +268,25 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata, } static enum nl80211_chan_width -ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata, - struct ieee80211_chanctx *ctx, - struct ieee80211_link_data *rsvd_for) +ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + struct ieee80211_link_data *rsvd_for) { + struct ieee80211_sub_if_data *sdata; + struct ieee80211_link_data *link; enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; - struct ieee80211_vif *vif = &sdata->vif; - int link_id; - rcu_read_lock(); - for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { + for_each_sdata_link(local, link) { enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT; - struct ieee80211_link_data *link = - rcu_dereference(sdata->link[link_id]); - - if (!link) - continue; if (link != rsvd_for && rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf) continue; - switch (vif->type) { + switch (link->sdata->vif.type) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: - width = ieee80211_get_max_required_bw(sdata, link_id); + width = ieee80211_get_max_required_bw(link); break; case NL80211_IFTYPE_STATION: /* @@ -300,7 +295,7 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata, * account also for TDLS peers */ width = max(link->conf->chanreq.oper.width, - ieee80211_get_max_required_bw(sdata, link_id)); + ieee80211_get_max_required_bw(link)); break; case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: @@ -321,40 +316,13 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata, max_bw = max(max_bw, width); } - rcu_read_unlock(); - - return max_bw; -} - -static enum nl80211_chan_width -ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx, - struct ieee80211_link_data *rsvd_for) -{ - struct ieee80211_sub_if_data *sdata; - enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - enum nl80211_chan_width width; - - if (!ieee80211_sdata_running(sdata)) - continue; - - width = ieee80211_get_chanctx_vif_max_required_bw(sdata, ctx, - rsvd_for); - - max_bw = max(max_bw, width); - } /* use the configured bandwidth in case of monitor interface */ - sdata = rcu_dereference(local->monitor_sdata); + sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata && rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &ctx->conf) max_bw = max(max_bw, ctx->conf.def.width); - rcu_read_unlock(); - return max_bw; } @@ -582,26 +550,14 @@ ieee80211_find_chanctx(struct ieee80211_local *local, bool ieee80211_is_radar_required(struct ieee80211_local *local) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_link_data *link; lockdep_assert_wiphy(local->hw.wiphy); - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - unsigned int link_id; - - for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { - struct ieee80211_link_data *link; - - link = rcu_dereference(sdata->link[link_id]); - - if (link && link->radar_required) { - rcu_read_unlock(); - return true; - } - } + for_each_sdata_link(local, link) { + if (link->radar_required) + return true; } - rcu_read_unlock(); return false; } @@ -611,38 +567,19 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { struct ieee80211_chanctx_conf *conf = &ctx->conf; - struct ieee80211_sub_if_data *sdata; - bool required = false; + struct ieee80211_link_data *link; lockdep_assert_wiphy(local->hw.wiphy); - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - unsigned int link_id; - - if (!ieee80211_sdata_running(sdata)) + for_each_sdata_link(local, link) { + if (rcu_access_pointer(link->conf->chanctx_conf) != conf) continue; - for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { - struct ieee80211_link_data *link; - - link = rcu_dereference(sdata->link[link_id]); - if (!link) - continue; - - if (rcu_access_pointer(link->conf->chanctx_conf) != conf) - continue; - if (!link->radar_required) - continue; - required = true; - break; - } - - if (required) - break; + if (!link->radar_required) + continue; + return true; } - rcu_read_unlock(); - return required; + return false; } static struct ieee80211_chanctx * @@ -746,50 +683,38 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { struct ieee80211_chanctx_conf *conf = &ctx->conf; - struct ieee80211_sub_if_data *sdata; const struct cfg80211_chan_def *compat = NULL; + struct ieee80211_link_data *link; struct ieee80211_chan_req chanreq = {}; struct sta_info *sta; lockdep_assert_wiphy(local->hw.wiphy); - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - int link_id; + for_each_sdata_link(local, link) { + struct ieee80211_bss_conf *link_conf; - if (!ieee80211_sdata_running(sdata)) + if (link->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - continue; - - for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { - struct ieee80211_bss_conf *link_conf = - rcu_dereference(sdata->vif.link_conf[link_id]); + link_conf = link->conf; - if (!link_conf) - continue; - - if (rcu_access_pointer(link_conf->chanctx_conf) != conf) - continue; + if (rcu_access_pointer(link_conf->chanctx_conf) != conf) + continue; - if (!compat) - compat = &link_conf->chanreq.oper; + if (!compat) + compat = &link_conf->chanreq.oper; - compat = cfg80211_chandef_compatible(&link_conf->chanreq.oper, - compat); - if (WARN_ON_ONCE(!compat)) - break; - } + compat = cfg80211_chandef_compatible(&link_conf->chanreq.oper, + compat); + if (WARN_ON_ONCE(!compat)) + return; } - if (WARN_ON_ONCE(!compat)) { - rcu_read_unlock(); + if (WARN_ON_ONCE(!compat)) return; - } /* TDLS peers can sometimes affect the chandef width */ - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) || !test_sta_flag(sta, WLAN_STA_AUTHORIZED) || @@ -799,9 +724,8 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, compat = cfg80211_chandef_compatible(&sta->tdls_chandef, compat); if (WARN_ON_ONCE(!compat)) - break; + return; } - rcu_read_unlock(); if (!compat) return; @@ -895,23 +819,19 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata; u8 rx_chains_static, rx_chains_dynamic; + struct ieee80211_link_data *link; lockdep_assert_wiphy(local->hw.wiphy); rx_chains_static = 1; rx_chains_dynamic = 1; - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { + for_each_sdata_link(local, link) { u8 needed_static, needed_dynamic; - unsigned int link_id; - - if (!ieee80211_sdata_running(sdata)) - continue; - switch (sdata->vif.type) { + switch (link->sdata->vif.type) { case NL80211_IFTYPE_STATION: - if (!sdata->u.mgd.associated) + if (!link->sdata->u.mgd.associated) continue; break; case NL80211_IFTYPE_AP: @@ -923,49 +843,38 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, continue; } - for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { - struct ieee80211_link_data *link; - - link = rcu_dereference(sdata->link[link_id]); - - if (!link) - continue; - - if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf) - continue; - - switch (link->smps_mode) { - default: - WARN_ONCE(1, "Invalid SMPS mode %d\n", - link->smps_mode); - fallthrough; - case IEEE80211_SMPS_OFF: - needed_static = link->needed_rx_chains; - needed_dynamic = link->needed_rx_chains; - break; - case IEEE80211_SMPS_DYNAMIC: - needed_static = 1; - needed_dynamic = link->needed_rx_chains; - break; - case IEEE80211_SMPS_STATIC: - needed_static = 1; - needed_dynamic = 1; - break; - } + if (rcu_access_pointer(link->conf->chanctx_conf) != &chanctx->conf) + continue; - rx_chains_static = max(rx_chains_static, needed_static); - rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic); + switch (link->smps_mode) { + default: + WARN_ONCE(1, "Invalid SMPS mode %d\n", + link->smps_mode); + fallthrough; + case IEEE80211_SMPS_OFF: + needed_static = link->needed_rx_chains; + needed_dynamic = link->needed_rx_chains; + break; + case IEEE80211_SMPS_DYNAMIC: + needed_static = 1; + needed_dynamic = link->needed_rx_chains; + break; + case IEEE80211_SMPS_STATIC: + needed_static = 1; + needed_dynamic = 1; + break; } + + rx_chains_static = max(rx_chains_static, needed_static); + rx_chains_dynamic = max(rx_chains_dynamic, needed_dynamic); } /* Disable SMPS for the monitor interface */ - sdata = rcu_dereference(local->monitor_sdata); + sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata && rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &chanctx->conf) rx_chains_dynamic = rx_chains_static = local->rx_chains; - rcu_read_unlock(); - if (rx_chains_static == chanctx->conf.rx_chains_static && rx_chains_dynamic == chanctx->conf.rx_chains_dynamic) return; @@ -1004,17 +913,16 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, if (clear) conf = NULL; - rcu_read_lock(); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { struct ieee80211_bss_conf *vlan_conf; - vlan_conf = rcu_dereference(vlan->vif.link_conf[link_id]); + vlan_conf = wiphy_dereference(local->hw.wiphy, + vlan->vif.link_conf[link_id]); if (WARN_ON(!vlan_conf)) continue; rcu_assign_pointer(vlan_conf->chanctx_conf, conf); } - rcu_read_unlock(); } void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, @@ -1204,17 +1112,16 @@ ieee80211_link_update_chanreq(struct ieee80211_link_data *link, if (sdata->vif.type != NL80211_IFTYPE_AP) return; - rcu_read_lock(); list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { struct ieee80211_bss_conf *vlan_conf; - vlan_conf = rcu_dereference(vlan->vif.link_conf[link_id]); + vlan_conf = wiphy_dereference(sdata->local->hw.wiphy, + vlan->vif.link_conf[link_id]); if (WARN_ON(!vlan_conf)) continue; vlan_conf->chanreq = *chanreq; } - rcu_read_unlock(); } static int @@ -1955,12 +1862,11 @@ void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link) ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - rcu_read_lock(); - ap_conf = rcu_dereference(ap->vif.link_conf[link_id]); - conf = rcu_dereference_protected(ap_conf->chanctx_conf, - lockdep_is_held(&local->hw.wiphy->mtx)); + ap_conf = wiphy_dereference(local->hw.wiphy, + ap->vif.link_conf[link_id]); + conf = wiphy_dereference(local->hw.wiphy, + ap_conf->chanctx_conf); rcu_assign_pointer(link_conf->chanctx_conf, conf); - rcu_read_unlock(); } void ieee80211_iter_chan_contexts_atomic( diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 70c48cad180aa..601b889b6237e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1181,6 +1181,19 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) #define sdata_dereference(p, sdata) \ wiphy_dereference(sdata->local->hw.wiphy, p) +#define for_each_sdata_link(_local, _link) \ + /* outer loop just to define the variables ... */ \ + for (struct ieee80211_sub_if_data *___sdata = NULL; \ + !___sdata; \ + ___sdata = (void *)~0 /* always stop */) \ + list_for_each_entry(___sdata, &(_local)->interfaces, list) \ + if (ieee80211_sdata_running(___sdata)) \ + for (int ___link_id = 0; \ + ___link_id < ARRAY_SIZE(___sdata->link); \ + ___link_id++) \ + if ((_link = wiphy_dereference((local)->hw.wiphy, \ + ___sdata->link[___link_id]))) + static inline int ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems, struct cfg80211_rnr_elems *rnr_elems, -- cgit 1.2.3-korg From 761748f001800d925c2ee8b04407e7aee12c3ffb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:42 +0100 Subject: wifi: mac80211: support wider bandwidth OFDMA config EHT requires that stations are able to participate in wider bandwidth OFDMA, i.e. parse downlink OFDMA and uplink OFDMA triggers when they're not capable of (or not connected at) the (wider) bandwidth that the AP is using. This requires hardware configuration, since the entity responsible for parsing (possibly hardware) needs to know the AP bandwidth. To support this, change the channel request to have the AP's bandwidth for clients, and track that in the channel context in mac80211. This means that the same chandef might need to be split up into two different contexts, if the APs are different. Interfaces other than client are not participating in OFDMA the same way, so they don't request any AP setting. Note that this doesn't introduce any API to split a channel context, so that there are cases where this might lead to a disconnect, e.g. if there are two client interfaces using the same channel context, e.g. both 160 MHz connected to different 320 MHz APs, and one of the APs switches to 160 MHz. Note also there are possible cases where this can be optimised, e.g. when using the upper or lower 160 Mhz, but I haven't been able to really fully understand the spec and/or hardware limitations. If, for some reason, there are no hardware limits on this because the OFDMA (downlink/trigger) parsing is done in firmware and can take the transmitter into account, then drivers can set the new flag IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW on interfaces to not have them request any AP bandwidth in the channel context and ignore this issue entirely. The bss_conf still contains the AP configuration (if any, i.e. EHT) in the chanreq. Link: https://msgid.link/20240129194108.d3d5b35dd783.I939d04674f4ff06f39934b1591c8d36a30ce74c2@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 14 ++++ net/mac80211/chan.c | 167 ++++++++++++++++++++++++++++++++++----------- net/mac80211/ieee80211_i.h | 13 ++++ net/mac80211/mlme.c | 60 +++++++++------- net/mac80211/trace.h | 31 +++++++-- 5 files changed, 215 insertions(+), 70 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dd8a66e9afd9a..ab6bc89d33945 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -214,6 +214,8 @@ struct ieee80211_low_level_stats { * @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel, * this is used only with channel switching with CSA * @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed + * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider + * bandwidth) OFDMA settings need to be changed */ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_WIDTH = BIT(0), @@ -221,14 +223,18 @@ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_RADAR = BIT(2), IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3), IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), + IEEE80211_CHANCTX_CHANGE_AP = BIT(5), }; /** * struct ieee80211_chan_req - A channel "request" * @oper: channel definition to use for operation + * @ap: the channel definition of the AP, if any + * (otherwise the chan member is %NULL) */ struct ieee80211_chan_req { struct cfg80211_chan_def oper; + struct cfg80211_chan_def ap; }; /** @@ -239,6 +245,8 @@ struct ieee80211_chan_req { * * @def: the channel definition * @min_def: the minimum channel definition currently required. + * @ap: the channel definition the AP actually is operating as, + * for use with (wider bandwidth) OFDMA * @rx_chains_static: The number of RX chains that must always be * active on the channel to receive MIMO transmissions * @rx_chains_dynamic: The number of RX chains that must be enabled @@ -251,6 +259,7 @@ struct ieee80211_chan_req { struct ieee80211_chanctx_conf { struct cfg80211_chan_def def; struct cfg80211_chan_def min_def; + struct cfg80211_chan_def ap; u8 rx_chains_static, rx_chains_dynamic; @@ -1782,6 +1791,10 @@ struct ieee80211_channel_switch { * this is not pure P2P vif. * @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is * enabled for the interface. + * @IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW: Ignore wider bandwidth OFDMA + * operation on this interface and request a channel context without + * the AP definition. Use this e.g. because the device is able to + * handle OFDMA (downlink and trigger for uplink) on a per-AP basis. */ enum ieee80211_vif_flags { IEEE80211_VIF_BEACON_FILTER = BIT(0), @@ -1789,6 +1802,7 @@ enum ieee80211_vif_flags { IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2), IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), IEEE80211_VIF_EML_ACTIVE = BIT(4), + IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW = BIT(5), }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index c84449bdc9284..fe1489ba58c69 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -81,22 +81,35 @@ ieee80211_link_get_chanctx(struct ieee80211_link_data *link) return container_of(conf, struct ieee80211_chanctx, conf); } +bool ieee80211_chanreq_identical(const struct ieee80211_chan_req *a, + const struct ieee80211_chan_req *b) +{ + if (!cfg80211_chandef_identical(&a->oper, &b->oper)) + return false; + if (!a->ap.chan && !b->ap.chan) + return true; + return cfg80211_chandef_identical(&a->ap, &b->ap); +} + static const struct ieee80211_chan_req * ieee80211_chanreq_compatible(const struct ieee80211_chan_req *a, - const struct ieee80211_chan_req *b) + const struct ieee80211_chan_req *b, + struct ieee80211_chan_req *tmp) { const struct cfg80211_chan_def *compat; - compat = cfg80211_chandef_compatible(&a->oper, &b->oper); - - if (compat == &a->oper) - return a; + if (a->ap.chan && b->ap.chan && + !cfg80211_chandef_identical(&a->ap, &b->ap)) + return NULL; - if (compat == &b->oper) - return b; + compat = cfg80211_chandef_compatible(&a->oper, &b->oper); + if (!compat) + return NULL; - WARN_ON(compat); - return NULL; + /* Note: later code assumes this always fills & returns tmp if compat */ + tmp->oper = *compat; + tmp->ap = a->ap.chan ? a->ap : b->ap; + return tmp; } static const struct ieee80211_chan_req * @@ -104,17 +117,26 @@ ieee80211_chanctx_compatible(struct ieee80211_chanctx *ctx, const struct ieee80211_chan_req *req, struct ieee80211_chan_req *tmp) { + const struct ieee80211_chan_req *ret; + struct ieee80211_chan_req tmp2; + *tmp = (struct ieee80211_chan_req){ .oper = ctx->conf.def, + .ap = ctx->conf.ap, }; - return ieee80211_chanreq_compatible(tmp, req); + ret = ieee80211_chanreq_compatible(tmp, req, &tmp2); + if (!ret) + return NULL; + *tmp = *ret; + return tmp; } static const struct ieee80211_chan_req * ieee80211_chanctx_reserved_chanreq(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - const struct ieee80211_chan_req *req) + const struct ieee80211_chan_req *req, + struct ieee80211_chan_req *tmp) { struct ieee80211_link_data *link; @@ -124,7 +146,7 @@ ieee80211_chanctx_reserved_chanreq(struct ieee80211_local *local, return NULL; list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) { - req = ieee80211_chanreq_compatible(&link->reserved, req); + req = ieee80211_chanreq_compatible(&link->reserved, req, tmp); if (!req) break; } @@ -135,7 +157,8 @@ ieee80211_chanctx_reserved_chanreq(struct ieee80211_local *local, static const struct ieee80211_chan_req * ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, - const struct ieee80211_chan_req *compat) + const struct ieee80211_chan_req *compat, + struct ieee80211_chan_req *tmp) { struct ieee80211_link_data *link; const struct ieee80211_chan_req *comp_def = compat; @@ -149,7 +172,7 @@ ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, continue; comp_def = ieee80211_chanreq_compatible(&link_conf->chanreq, - comp_def); + comp_def, tmp); if (!comp_def) break; } @@ -162,16 +185,18 @@ ieee80211_chanctx_can_reserve(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct ieee80211_chan_req *req) { + struct ieee80211_chan_req tmp; + lockdep_assert_wiphy(local->hw.wiphy); - if (!ieee80211_chanctx_reserved_chanreq(local, ctx, req)) + if (!ieee80211_chanctx_reserved_chanreq(local, ctx, req, &tmp)) return false; - if (!ieee80211_chanctx_non_reserved_chandef(local, ctx, req)) + if (!ieee80211_chanctx_non_reserved_chandef(local, ctx, req, &tmp)) return false; if (!list_empty(&ctx->reserved_links) && - ieee80211_chanctx_reserved_chanreq(local, ctx, req)) + ieee80211_chanctx_reserved_chanreq(local, ctx, req, &tmp)) return true; return false; @@ -456,7 +481,11 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_link_data *rsvd_for) { const struct cfg80211_chan_def *chandef = &chanreq->oper; - u32 changed; + struct ieee80211_chan_req ctx_req = { + .oper = ctx->conf.def, + .ap = ctx->conf.ap, + }; + u32 changed = 0; /* expected to handle only 20/40/80/160/320 channel widths */ switch (chandef->width) { @@ -478,26 +507,31 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, */ ieee80211_chan_bw_change(local, old_ctx, true); - if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) { + if (ieee80211_chanreq_identical(&ctx_req, chanreq)) { ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); return; } - WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); + WARN_ON(ieee80211_chanctx_refcount(local, ctx) > 1 && + !cfg80211_chandef_compatible(&ctx->conf.def, &chanreq->oper)); ieee80211_remove_wbrf(local, &ctx->conf.def); + if (!cfg80211_chandef_identical(&ctx->conf.def, &chanreq->oper)) + changed |= IEEE80211_CHANCTX_CHANGE_WIDTH; + if (!cfg80211_chandef_identical(&ctx->conf.ap, &chanreq->ap)) + changed |= IEEE80211_CHANCTX_CHANGE_AP; ctx->conf.def = *chandef; + ctx->conf.ap = chanreq->ap; /* check if min chanctx also changed */ - changed = IEEE80211_CHANCTX_CHANGE_WIDTH | - _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); + changed |= _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); ieee80211_add_wbrf(local, &ctx->conf.def); drv_change_chanctx(local, ctx, changed); - /* check is BW wider */ + /* check if BW is wider */ ieee80211_chan_bw_change(local, old_ctx, false); } @@ -536,7 +570,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, continue; compat = ieee80211_chanctx_reserved_chanreq(local, ctx, - compat); + compat, &tmp); if (!compat) continue; @@ -598,6 +632,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, INIT_LIST_HEAD(&ctx->assigned_links); INIT_LIST_HEAD(&ctx->reserved_links); ctx->conf.def = chanreq->oper; + ctx->conf.ap = chanreq->ap; ctx->conf.rx_chains_static = 1; ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; @@ -683,9 +718,9 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { struct ieee80211_chanctx_conf *conf = &ctx->conf; - const struct cfg80211_chan_def *compat = NULL; + const struct ieee80211_chan_req *compat = NULL; struct ieee80211_link_data *link; - struct ieee80211_chan_req chanreq = {}; + struct ieee80211_chan_req tmp; struct sta_info *sta; lockdep_assert_wiphy(local->hw.wiphy); @@ -702,10 +737,10 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, continue; if (!compat) - compat = &link_conf->chanreq.oper; + compat = &link_conf->chanreq; - compat = cfg80211_chandef_compatible(&link_conf->chanreq.oper, - compat); + compat = ieee80211_chanreq_compatible(&link_conf->chanreq, + compat, &tmp); if (WARN_ON_ONCE(!compat)) return; } @@ -715,24 +750,23 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, /* TDLS peers can sometimes affect the chandef width */ list_for_each_entry(sta, &local->sta_list, list) { + struct ieee80211_chan_req tdls_chanreq = {}; if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) || !test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->tdls_chandef.chan) continue; - compat = cfg80211_chandef_compatible(&sta->tdls_chandef, - compat); + tdls_chanreq.oper = sta->tdls_chandef; + + /* note this always fills and returns &tmp if compat */ + compat = ieee80211_chanreq_compatible(&tdls_chanreq, + compat, &tmp); if (WARN_ON_ONCE(!compat)) return; } - if (!compat) - return; - - chanreq.oper = *compat; - - ieee80211_change_chanctx(local, ctx, ctx, &chanreq); + ieee80211_change_chanctx(local, ctx, ctx, compat); } static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, @@ -1133,6 +1167,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) struct ieee80211_vif_chanctx_switch vif_chsw[1] = {}; struct ieee80211_chanctx *old_ctx, *new_ctx; const struct ieee80211_chan_req *chanreq; + struct ieee80211_chan_req tmp; u64 changed = 0; int err; @@ -1155,7 +1190,8 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) return -EINVAL; chanreq = ieee80211_chanctx_non_reserved_chandef(local, new_ctx, - &link->reserved); + &link->reserved, + &tmp); if (WARN_ON(!chanreq)) return -EINVAL; @@ -1213,6 +1249,7 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link) struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *old_ctx, *new_ctx; const struct ieee80211_chan_req *chanreq; + struct ieee80211_chan_req tmp; int err; old_ctx = ieee80211_link_get_chanctx(link); @@ -1232,7 +1269,8 @@ ieee80211_link_use_reserved_assign(struct ieee80211_link_data *link) return -EINVAL; chanreq = ieee80211_chanctx_non_reserved_chandef(local, new_ctx, - &link->reserved); + &link->reserved, + &tmp); if (WARN_ON(!chanreq)) return -EINVAL; @@ -1770,6 +1808,52 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link) return 0; } +/* + * This is similar to ieee80211_chanctx_compatible(), but rechecks + * against all the links actually using it (except the one that's + * passed, since that one is changing). + * This is done in order to allow changes to the AP's bandwidth for + * wider bandwidth OFDMA purposes, which wouldn't be treated as + * compatible by ieee80211_chanctx_recheck() but is OK if the link + * requesting the update is the only one using it. + */ +static const struct ieee80211_chan_req * +ieee80211_chanctx_recheck(struct ieee80211_local *local, + struct ieee80211_link_data *skip_link, + struct ieee80211_chanctx *ctx, + const struct ieee80211_chan_req *req, + struct ieee80211_chan_req *tmp) +{ + const struct ieee80211_chan_req *ret = req; + struct ieee80211_link_data *link; + + lockdep_assert_wiphy(local->hw.wiphy); + + for_each_sdata_link(local, link) { + if (link == skip_link) + continue; + + if (rcu_access_pointer(link->conf->chanctx_conf) == &ctx->conf) { + ret = ieee80211_chanreq_compatible(ret, + &link->conf->chanreq, + tmp); + if (!ret) + return NULL; + } + + if (link->reserved_chanctx == ctx) { + ret = ieee80211_chanreq_compatible(ret, + &link->reserved, + tmp); + if (!ret) + return NULL; + } + } + + *tmp = *ret; + return tmp; +} + int ieee80211_link_change_chanreq(struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, u64 *changed) @@ -1806,13 +1890,14 @@ int ieee80211_link_change_chanreq(struct ieee80211_link_data *link, ctx = container_of(conf, struct ieee80211_chanctx, conf); - compat = ieee80211_chanctx_compatible(ctx, chanreq, &tmp); + compat = ieee80211_chanctx_recheck(local, link, ctx, chanreq, &tmp); if (!compat) return -EINVAL; switch (ctx->replace_state) { case IEEE80211_CHANCTX_REPLACE_NONE: - if (!ieee80211_chanctx_reserved_chanreq(local, ctx, compat)) + if (!ieee80211_chanctx_reserved_chanreq(local, ctx, compat, + &tmp)) return -EBUSY; break; case IEEE80211_CHANCTX_WILL_BE_REPLACED: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 601b889b6237e..534cac3fc8dfa 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2536,6 +2536,19 @@ bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper, struct cfg80211_chan_def *chandef); void ieee80211_chandef_downgrade(struct cfg80211_chan_def *chandef, struct ieee80211_conn_settings *conn); +static inline void +ieee80211_chanreq_downgrade(struct ieee80211_chan_req *chanreq, + struct ieee80211_conn_settings *conn) +{ + ieee80211_chandef_downgrade(&chanreq->oper, conn); + if (WARN_ON(!conn)) + return; + if (conn->mode < IEEE80211_CONN_MODE_EHT) + chanreq->ap.chan = NULL; +} + +bool ieee80211_chanreq_identical(const struct ieee80211_chan_req *a, + const struct ieee80211_chan_req *b); int __must_check ieee80211_link_use_channel(struct ieee80211_link_data *link, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e9d720f25ddf7..31d6a56035825 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -665,7 +665,7 @@ static struct ieee802_11_elems * ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, struct cfg80211_bss *cbss, int link_id, - struct cfg80211_chan_def *chandef) + struct ieee80211_chan_req *chanreq) { struct ieee80211_local *local = sdata->local; const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies); @@ -752,20 +752,27 @@ again: } conn->mode = ap_mode; - *chandef = ap_chandef; + chanreq->oper = ap_chandef; - while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, + /* wider-bandwidth OFDMA is only done in EHT */ + if (conn->mode >= IEEE80211_CONN_MODE_EHT && + !(sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)) + chanreq->ap = ap_chandef; + else + chanreq->ap.chan = NULL; + + while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, &chanreq->oper, IEEE80211_CHAN_DISABLED)) { - if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { + if (WARN_ON(chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = -EINVAL; goto free; } - ieee80211_chandef_downgrade(chandef, conn); + ieee80211_chanreq_downgrade(chanreq, conn); } if (conn->mode >= IEEE80211_CONN_MODE_HE && - !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, + !cfg80211_chandef_usable(sdata->wdev.wiphy, &chanreq->oper, IEEE80211_CHAN_NO_HE)) { conn->mode = IEEE80211_CONN_MODE_VHT; conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, @@ -774,7 +781,7 @@ again: } if (conn->mode >= IEEE80211_CONN_MODE_EHT && - !cfg80211_chandef_usable(sdata->wdev.wiphy, chandef, + !cfg80211_chandef_usable(sdata->wdev.wiphy, &chanreq->oper, IEEE80211_CHAN_NO_EHT)) { conn->mode = IEEE80211_CONN_MODE_HE; conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, @@ -782,7 +789,7 @@ again: IEEE80211_CONN_BW_LIMIT_160); } - if (chandef->width != ap_chandef.width || ap_mode != conn->mode) + if (chanreq->oper.width != ap_chandef.width || ap_mode != conn->mode) sdata_info(sdata, "regulatory prevented using AP config, downgraded\n"); @@ -847,7 +854,7 @@ again: ieee80211_conn_mode_str(conn->mode), 20 * (1 << conn->bw_limit)); - if (WARN_ON_ONCE(!cfg80211_chandef_valid(chandef))) { + if (WARN_ON_ONCE(!cfg80211_chandef_valid(&chanreq->oper))) { ret = -EINVAL; goto free; } @@ -865,7 +872,6 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, struct ieee80211_channel *channel = link->conf->chanreq.oper.chan; struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_chan_req chanreq = {}; - struct cfg80211_chan_def ap_chandef; enum ieee80211_conn_mode ap_mode; u32 vht_cap_info = 0; u16 ht_opmode; @@ -881,7 +887,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, ap_mode = ieee80211_determine_ap_chan(sdata, channel, vht_cap_info, elems, true, &link->u.mgd.conn, - &ap_chandef); + &chanreq.ap); if (ap_mode != link->u.mgd.conn.mode) { link_info(link, @@ -891,6 +897,11 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, return -EINVAL; } + chanreq.oper = chanreq.ap; + if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT || + sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) + chanreq.ap.chan = NULL; + /* * if HT operation mode changed store the new one - * this may be applicable even if channel is identical @@ -911,20 +922,20 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, * won't do us any good -- we couldn't use it with the AP. */ while (link->u.mgd.conn.bw_limit < - ieee80211_min_bw_limit_from_chandef(&ap_chandef)) - ieee80211_chandef_downgrade(&ap_chandef, NULL); + ieee80211_min_bw_limit_from_chandef(&chanreq.oper)) + ieee80211_chandef_downgrade(&chanreq.oper, NULL); - if (cfg80211_chandef_identical(&ap_chandef, &link->conf->chanreq.oper)) + if (ieee80211_chanreq_identical(&chanreq, &link->conf->chanreq)) return 0; link_info(link, - "AP %pM changed bandwidth, new config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n", - link->u.mgd.bssid, ap_chandef.chan->center_freq, - ap_chandef.chan->freq_offset, ap_chandef.width, - ap_chandef.center_freq1, ap_chandef.freq1_offset, - ap_chandef.center_freq2); + "AP %pM changed bandwidth, new used config is %d.%03d MHz, width %d (%d.%03d/%d MHz)\n", + link->u.mgd.bssid, chanreq.oper.chan->center_freq, + chanreq.oper.chan->freq_offset, chanreq.oper.width, + chanreq.oper.center_freq1, chanreq.oper.freq1_offset, + chanreq.oper.center_freq2); - if (!cfg80211_chandef_valid(&ap_chandef)) { + if (!cfg80211_chandef_valid(&chanreq.oper)) { sdata_info(sdata, "AP %pM changed caps/bw in a way we can't support - disconnect\n", link->u.mgd.bssid); @@ -947,7 +958,6 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, * bandwidth changes where a this could happen, but those cases are * less common and wouldn't completely prevent using the AP. */ - chanreq.oper = ap_chandef; ret = ieee80211_link_change_chanreq(link, &chanreq, changed); if (ret) { @@ -2071,8 +2081,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, return; } - if (!cfg80211_chandef_identical(&link->conf->chanreq.oper, - &link->csa_chanreq.oper)) { + if (!ieee80211_chanreq_identical(&link->conf->chanreq, + &link->csa_chanreq)) { sdata_info(sdata, "failed to finalize channel switch, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, @@ -5146,7 +5156,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id, - &chanreq.oper); + &chanreq); if (IS_ERR(elems)) { rcu_read_unlock(); @@ -5209,7 +5219,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, return ret; while (ret && chanreq.oper.width != NL80211_CHAN_WIDTH_20_NOHT) { - ieee80211_chandef_downgrade(&chanreq.oper, conn); + ieee80211_chanreq_downgrade(&chanreq, conn); ret = ieee80211_link_use_channel(link, &chanreq, IEEE80211_CHANCTX_SHARED); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index efff20d7fe0ed..478b32d2520a1 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -50,7 +50,7 @@ __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \ __entry->freq1_offset = (c) ? (c)->freq1_offset : 0; \ __entry->center_freq2 = (c) ? (c)->center_freq2 : 0; -#define CHANDEF_PR_FMT " control:%d.%03d MHz width:%d center: %d.%03d/%d MHz" +#define CHANDEF_PR_FMT " chandef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" #define CHANDEF_PR_ARG __entry->control_freq, __entry->freq_offset, __entry->chan_width, \ __entry->center_freq1, __entry->freq1_offset, __entry->center_freq2 @@ -69,22 +69,45 @@ __entry->min_center_freq1 = (c)->center_freq1; \ __entry->min_freq1_offset = (c)->freq1_offset; \ __entry->min_center_freq2 = (c)->center_freq2; -#define MIN_CHANDEF_PR_FMT " min_control:%d.%03d MHz min_width:%d min_center: %d.%03d/%d MHz" +#define MIN_CHANDEF_PR_FMT " mindef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" #define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \ __entry->min_chan_width, \ __entry->min_center_freq1, __entry->min_freq1_offset, \ __entry->min_center_freq2 +#define AP_CHANDEF_ENTRY \ + __field(u32, ap_control_freq) \ + __field(u32, ap_freq_offset) \ + __field(u32, ap_chan_width) \ + __field(u32, ap_center_freq1) \ + __field(u32, ap_freq1_offset) \ + __field(u32, ap_center_freq2) + +#define AP_CHANDEF_ASSIGN(c) \ + __entry->ap_control_freq = (c)->chan ? (c)->chan->center_freq : 0;\ + __entry->ap_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0;\ + __entry->ap_chan_width = (c)->chan ? (c)->width : 0; \ + __entry->ap_center_freq1 = (c)->chan ? (c)->center_freq1 : 0; \ + __entry->ap_freq1_offset = (c)->chan ? (c)->freq1_offset : 0; \ + __entry->ap_center_freq2 = (c)->chan ? (c)->center_freq2 : 0; +#define AP_CHANDEF_PR_FMT " ap(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" +#define AP_CHANDEF_PR_ARG __entry->ap_control_freq, __entry->ap_freq_offset, \ + __entry->ap_chan_width, \ + __entry->ap_center_freq1, __entry->ap_freq1_offset, \ + __entry->ap_center_freq2 + #define CHANCTX_ENTRY CHANDEF_ENTRY \ MIN_CHANDEF_ENTRY \ + AP_CHANDEF_ENTRY \ __field(u8, rx_chains_static) \ __field(u8, rx_chains_dynamic) #define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ MIN_CHANDEF_ASSIGN(&ctx->conf.min_def) \ + AP_CHANDEF_ASSIGN(&ctx->conf.ap) \ __entry->rx_chains_static = ctx->conf.rx_chains_static; \ __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic -#define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT " chains:%d/%d" -#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \ +#define CHANCTX_PR_FMT CHANDEF_PR_FMT MIN_CHANDEF_PR_FMT AP_CHANDEF_PR_FMT " chains:%d/%d" +#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, AP_CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic #define KEY_ENTRY __field(u32, cipher) \ -- cgit 1.2.3-korg From 6bc574a7cd27ba0c8d77425055a8fd672ec928f2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:43 +0100 Subject: wifi: mac80211: validate assoc response channel config Due to the earlier restructuring we now mostly ignore the channel configuration in the association response, apart from the HT/VHT checks we had. Don't do that, but parse it and update, also dropping the association if the AP changed its mode in the response. Link: https://msgid.link/20240129194108.b3efa5eae60c.I1b70c9fd56781b22cdfdca55d34d69f7d0733e31@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 31d6a56035825..da202103faf0e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -867,7 +867,7 @@ free: static int ieee80211_config_bw(struct ieee80211_link_data *link, struct ieee802_11_elems *elems, - u64 *changed) + bool update, u64 *changed) { struct ieee80211_channel *channel = link->conf->chanreq.oper.chan; struct ieee80211_sub_if_data *sdata = link->sdata; @@ -942,6 +942,11 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, return -EINVAL; } + if (!update) { + link->conf->chanreq = chanreq; + return 0; + } + /* * We're tracking the current AP here, so don't do any further checks * here. This keeps us from playing ping-pong with regulatory, without @@ -4505,6 +4510,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, /* * We previously checked these in the beacon/probe response, so * they should be present here. This is just a safety net. + * Note that the ieee80211_config_bw() below would also check + * for this (and more), but this has better error reporting. */ if (!is_6ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT && (!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) { @@ -4522,6 +4529,14 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, goto out; } + /* check/update if AP changed anything in assoc response vs. scan */ + if (ieee80211_config_bw(link, elems, + link_id == assoc_data->assoc_link_id, + changed)) { + ret = false; + goto out; + } + if (WARN_ON(!link->conf->chanreq.oper.chan)) { ret = false; goto out; @@ -6595,7 +6610,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems); - if (ieee80211_config_bw(link, elems, &changed)) { + if (ieee80211_config_bw(link, elems, true, &changed)) { ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); -- cgit 1.2.3-korg From 719036ae06d4bfdb65139e3947a8404dec298bc5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:44 +0100 Subject: wifi: cfg80211: move puncturing validation code Upcoming patches will move the puncturing bitmap into the chandef, so chandef validation will need to check for correct puncturing. Purely move the code first so later changes are easier to review. Link: https://msgid.link/20240129194108.1ca184427c76.I077deb8d52c4648eac145b63f88b6c5a3b920ddc@changeid Signed-off-by: Johannes Berg --- net/wireless/chan.c | 138 ++++++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index ceb9174c5c3df..71f1bd456d88c 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -56,6 +56,75 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(cfg80211_chandef_create); +struct cfg80211_per_bw_puncturing_values { + u8 len; + const u16 *valid_values; +}; + +static const u16 puncturing_values_80mhz[] = { + 0x8, 0x4, 0x2, 0x1 +}; + +static const u16 puncturing_values_160mhz[] = { + 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3 +}; + +static const u16 puncturing_values_320mhz[] = { + 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00, + 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f, + 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f +}; + +#define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \ + { \ + .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \ + .valid_values = puncturing_values_ ## _bw ## mhz \ + } + +static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = { + CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80), + CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160), + CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320) +}; + +bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, + const struct cfg80211_chan_def *chandef) +{ + u32 idx, i, start_freq; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_80: + idx = 0; + start_freq = chandef->center_freq1 - 40; + break; + case NL80211_CHAN_WIDTH_160: + idx = 1; + start_freq = chandef->center_freq1 - 80; + break; + case NL80211_CHAN_WIDTH_320: + idx = 2; + start_freq = chandef->center_freq1 - 160; + break; + default: + *bitmap = 0; + break; + } + + if (!*bitmap) + return true; + + /* check if primary channel is punctured */ + if (*bitmap & (u16)BIT((chandef->chan->center_freq - start_freq) / 20)) + return false; + + for (i = 0; i < per_bw_puncturing[idx].len; i++) + if (per_bw_puncturing[idx].valid_values[i] == *bitmap) + return true; + + return false; +} +EXPORT_SYMBOL(cfg80211_valid_disable_subchannel_bitmap); + static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef) { int max_contiguous = 0; @@ -1532,72 +1601,3 @@ struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, } } EXPORT_SYMBOL(wdev_chandef); - -struct cfg80211_per_bw_puncturing_values { - u8 len; - const u16 *valid_values; -}; - -static const u16 puncturing_values_80mhz[] = { - 0x8, 0x4, 0x2, 0x1 -}; - -static const u16 puncturing_values_160mhz[] = { - 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3 -}; - -static const u16 puncturing_values_320mhz[] = { - 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00, - 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f, - 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f -}; - -#define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \ - { \ - .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \ - .valid_values = puncturing_values_ ## _bw ## mhz \ - } - -static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = { - CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80), - CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160), - CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320) -}; - -bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, - const struct cfg80211_chan_def *chandef) -{ - u32 idx, i, start_freq; - - switch (chandef->width) { - case NL80211_CHAN_WIDTH_80: - idx = 0; - start_freq = chandef->center_freq1 - 40; - break; - case NL80211_CHAN_WIDTH_160: - idx = 1; - start_freq = chandef->center_freq1 - 80; - break; - case NL80211_CHAN_WIDTH_320: - idx = 2; - start_freq = chandef->center_freq1 - 160; - break; - default: - *bitmap = 0; - break; - } - - if (!*bitmap) - return true; - - /* check if primary channel is punctured */ - if (*bitmap & (u16)BIT((chandef->chan->center_freq - start_freq) / 20)) - return false; - - for (i = 0; i < per_bw_puncturing[idx].len; i++) - if (per_bw_puncturing[idx].valid_values[i] == *bitmap) - return true; - - return false; -} -EXPORT_SYMBOL(cfg80211_valid_disable_subchannel_bitmap); -- cgit 1.2.3-korg From c478db84c8544156b80c5e5d3a8c7840d557707a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:45 +0100 Subject: wifi: mac80211: refactor puncturing bitmap extraction Add a new inline helper function to ieee80211.h to extract the disabled subchannels bitmap from an EHT operation element, and use that in mac80211 where we do that. Link: https://msgid.link/20240129194108.d9f50dcec8d0.I8b08cbc2490a734fafcce0fa0fc328211ba6f10b@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 16 +++++++++++++++ net/mac80211/mlme.c | 52 ++++++++++++++++------------------------------- 2 files changed, 33 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a70388ae3a7b0..d9d2c12531579 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3189,6 +3189,22 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +/* must validate ieee80211_eht_oper_size_ok() first */ +static inline u16 +ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper) +{ + const struct ieee80211_eht_operation_info *info = + (const void *)eht_oper->optional; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) + return 0; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) + return 0; + + return get_unaligned_le16(info->optional); +} + #define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) struct ieee80211_bandwidth_indication { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index da202103faf0e..74a15f18e7eea 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -813,36 +813,27 @@ again: } if (conn->mode >= IEEE80211_CONN_MODE_EHT) { - const struct ieee80211_eht_operation *eht_oper; + u16 bitmap; - eht_oper = elems->eht_operation; - - if (WARN_ON_ONCE(!eht_oper)) { + if (WARN_ON_ONCE(!elems->eht_operation)) { ret = -EINVAL; goto free; } - if (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT && - eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) { - const struct ieee80211_eht_operation_info *info = - (void *)eht_oper->optional; - const u8 *disable_subchannel_bitmap = info->optional; - u16 bitmap; - - bitmap = get_unaligned_le16(disable_subchannel_bitmap); - if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap, - &ap_chandef) || - (bitmap && - ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))) { - conn->mode = IEEE80211_CONN_MODE_HE; - conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, - conn->bw_limit, - IEEE80211_CONN_BW_LIMIT_160); - sdata_info(sdata, - "AP has invalid/unsupported puncturing, disabling EHT\n"); - } - /* FIXME: store puncturing bitmap */ + bitmap = ieee80211_eht_oper_dis_subchan_bitmap(elems->eht_operation); + + if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap, + &ap_chandef) || + (bitmap && + ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))) { + conn->mode = IEEE80211_CONN_MODE_HE; + conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, + conn->bw_limit, + IEEE80211_CONN_BW_LIMIT_160); + sdata_info(sdata, + "AP has invalid/unsupported puncturing, disabling EHT\n"); } + /* FIXME: store puncturing bitmap */ } /* the mode can only decrease, so this must terminate */ @@ -5879,18 +5870,9 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link, u64 *changed) { struct ieee80211_local *local = link->sdata->local; - u16 bitmap = 0, extracted; - - if ((eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) && - (eht_oper->params & - IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) { - const struct ieee80211_eht_operation_info *info = - (void *)eht_oper->optional; - const u8 *disable_subchannel_bitmap = info->optional; - - bitmap = get_unaligned_le16(disable_subchannel_bitmap); - } + u16 bitmap, extracted; + bitmap = ieee80211_eht_oper_dis_subchan_bitmap(eht_oper); extracted = ieee80211_extract_dis_subch_bmap(eht_oper, &link->conf->chanreq.oper, bitmap); -- cgit 1.2.3-korg From b9d908dc3a294d25c7d6c2f54ca3987cbd98f040 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:46 +0100 Subject: wifi: wireless: declare different S1G chandefs incompatible It doesn't look like we can get into this code, but make it more robust and declare two S1G chandefs to be incompatible unless they're identical. Link: https://msgid.link/20240129194108.b28fb0644a8c.I9297ada5cf1baf00dbbdf8fcffd1806883489fc9@changeid Signed-off-by: Johannes Berg --- net/wireless/chan.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 71f1bd456d88c..159b8aac451e3 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -465,13 +465,18 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, return NULL; /* - * can't be compatible if one of them is 5 or 10 MHz, + * can't be compatible if one of them is 5/10 MHz or S1G * but they don't have the same width. */ - if (c1->width == NL80211_CHAN_WIDTH_5 || - c1->width == NL80211_CHAN_WIDTH_10 || - c2->width == NL80211_CHAN_WIDTH_5 || - c2->width == NL80211_CHAN_WIDTH_10) +#define NARROW_OR_S1G(width) ((width) == NL80211_CHAN_WIDTH_5 || \ + (width) == NL80211_CHAN_WIDTH_10 || \ + (width) == NL80211_CHAN_WIDTH_1 || \ + (width) == NL80211_CHAN_WIDTH_2 || \ + (width) == NL80211_CHAN_WIDTH_4 || \ + (width) == NL80211_CHAN_WIDTH_8 || \ + (width) == NL80211_CHAN_WIDTH_16) + + if (NARROW_OR_S1G(c1->width) || NARROW_OR_S1G(c2->width)) return NULL; if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || -- cgit 1.2.3-korg From 8f251a0a1566e3e1da0f1d9322c8ffae808a7509 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:47 +0100 Subject: wifi: cfg80211: simplify cfg80211_chandef_compatible() Simplify cfg80211_chandef_compatible() a bit by switching c1 and c2 around so that c1 is always the narrower one (once they're not identical or narrow/S1G). Then we can just check the various primary channels and exit with the wider one (c2), or NULL. Also refactor the primary 40/80/160 function to not have all the calculations hard-coded, and use a wrapper around it to check primary 40/80/160 compatibility. While at it, add some kunit tests for this functionality. Also expose the new cfg80211_chandef_primary_freq() to drivers, mac80211 will use it. Link: https://msgid.link/20240129194108.be3e6eccaba3.I8399c2ff1435d7378e5837794cb5aa6dd2ee1416@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++ net/wireless/chan.c | 170 ++++++++++++++++++++++------------------- net/wireless/tests/Makefile | 2 +- net/wireless/tests/chan.c | 182 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 284 insertions(+), 81 deletions(-) create mode 100644 net/wireless/tests/chan.c (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5b42bfc1b6606..fc2ad80118e84 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1047,6 +1047,17 @@ unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); +/** + * cfg80211_chandef_primary_freq - calculate primary 40/80/160 MHz freq + * @chandef: chandef to calculate for + * @primary_chan_width: primary channel width to calculate center for + * + * Returns: the primary 40/80/160 MHz channel center frequency, or -1 + * for errors + */ +int cfg80211_chandef_primary_freq(const struct cfg80211_chan_def *chandef, + enum nl80211_chan_width primary_chan_width); + /** * nl80211_send_chandef - sends the channel definition. * @msg: the msg to send channel definition diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 159b8aac451e3..bfa2ea935fc28 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -390,68 +390,60 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) } EXPORT_SYMBOL(cfg80211_chandef_valid); -static void chandef_primary_freqs(const struct cfg80211_chan_def *c, - u32 *pri40, u32 *pri80, u32 *pri160) +int cfg80211_chandef_primary_freq(const struct cfg80211_chan_def *c, + enum nl80211_chan_width primary_chan_width) { - int tmp; + int pri_width = nl80211_chan_width_to_mhz(primary_chan_width); + int width = cfg80211_chandef_get_width(c); + u32 control = c->chan->center_freq; + u32 center = c->center_freq1; - switch (c->width) { - case NL80211_CHAN_WIDTH_40: - *pri40 = c->center_freq1; - *pri80 = 0; - *pri160 = 0; - break; - case NL80211_CHAN_WIDTH_80: - case NL80211_CHAN_WIDTH_80P80: - *pri160 = 0; - *pri80 = c->center_freq1; - /* n_P20 */ - tmp = (30 + c->chan->center_freq - c->center_freq1)/20; - /* n_P40 */ - tmp /= 2; - /* freq_P40 */ - *pri40 = c->center_freq1 - 20 + 40 * tmp; - break; - case NL80211_CHAN_WIDTH_160: - *pri160 = c->center_freq1; - /* n_P20 */ - tmp = (70 + c->chan->center_freq - c->center_freq1)/20; - /* n_P40 */ - tmp /= 2; - /* freq_P40 */ - *pri40 = c->center_freq1 - 60 + 40 * tmp; - /* n_P80 */ - tmp /= 2; - *pri80 = c->center_freq1 - 40 + 80 * tmp; - break; - case NL80211_CHAN_WIDTH_320: - /* n_P20 */ - tmp = (150 + c->chan->center_freq - c->center_freq1) / 20; - /* n_P40 */ - tmp /= 2; - /* freq_P40 */ - *pri40 = c->center_freq1 - 140 + 40 * tmp; - /* n_P80 */ - tmp /= 2; - *pri80 = c->center_freq1 - 120 + 80 * tmp; - /* n_P160 */ - tmp /= 2; - *pri160 = c->center_freq1 - 80 + 160 * tmp; - break; - default: - WARN_ON_ONCE(1); + if (WARN_ON_ONCE(pri_width < 0 || width < 0)) + return -1; + + /* not intended to be called this way, can't determine */ + if (WARN_ON_ONCE(pri_width > width)) + return -1; + + while (width > pri_width) { + if (control > center) + center += width / 4; + else + center -= width / 4; + width /= 2; } + + return center; } +EXPORT_SYMBOL(cfg80211_chandef_primary_freq); -const struct cfg80211_chan_def * -cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, - const struct cfg80211_chan_def *c2) +static const struct cfg80211_chan_def * +check_chandef_primary_compat(const struct cfg80211_chan_def *c1, + const struct cfg80211_chan_def *c2, + enum nl80211_chan_width primary_chan_width) { - u32 c1_pri40, c1_pri80, c2_pri40, c2_pri80, c1_pri160, c2_pri160; + /* check primary is compatible -> error if not */ + if (cfg80211_chandef_primary_freq(c1, primary_chan_width) != + cfg80211_chandef_primary_freq(c2, primary_chan_width)) + return ERR_PTR(-EINVAL); + + /* assumes c1 is smaller width, if that was just checked -> done */ + if (c1->width == primary_chan_width) + return c2; + + /* otherwise continue checking the next width */ + return NULL; +} + +static const struct cfg80211_chan_def * +_cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, + const struct cfg80211_chan_def *c2) +{ + const struct cfg80211_chan_def *ret; /* If they are identical, return */ if (cfg80211_chandef_identical(c1, c2)) - return c1; + return c2; /* otherwise, must have same control channel */ if (c1->chan != c2->chan) @@ -479,44 +471,62 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, if (NARROW_OR_S1G(c1->width) || NARROW_OR_S1G(c2->width)) return NULL; - if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || - c1->width == NL80211_CHAN_WIDTH_20) + /* + * Make sure that c1 is always the narrower one, so that later + * we either return NULL or c2 and don't have to check both + * directions. + */ + if (c1->width > c2->width) + swap(c1, c2); + + /* + * No further checks needed if the "narrower" one is only 20 MHz. + * Here "narrower" includes being a 20 MHz non-HT channel vs. a + * 20 MHz HT (or later) one. + */ + if (c1->width <= NL80211_CHAN_WIDTH_20) return c2; - if (c2->width == NL80211_CHAN_WIDTH_20_NOHT || - c2->width == NL80211_CHAN_WIDTH_20) - return c1; + ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_40); + if (ret) + return ret; - chandef_primary_freqs(c1, &c1_pri40, &c1_pri80, &c1_pri160); - chandef_primary_freqs(c2, &c2_pri40, &c2_pri80, &c2_pri160); + ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_80); + if (ret) + return ret; - if (c1_pri40 != c2_pri40) + /* + * If c1 is 80+80, then c2 is 160 or higher, but that cannot + * match. If c2 was also 80+80 it was already either accepted + * or rejected above (identical or not, respectively.) + */ + if (c1->width == NL80211_CHAN_WIDTH_80P80) return NULL; - if (c1->width == NL80211_CHAN_WIDTH_40) - return c2; - - if (c2->width == NL80211_CHAN_WIDTH_40) - return c1; + ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_160); + if (ret) + return ret; - if (c1_pri80 != c2_pri80) - return NULL; + /* + * Getting here would mean they're both wider than 160, have the + * same primary 160, but are not identical - this cannot happen + * since they must be 320 (no wider chandefs exist, at least yet.) + */ + WARN_ON_ONCE(1); - if (c1->width == NL80211_CHAN_WIDTH_80 && - c2->width > NL80211_CHAN_WIDTH_80) - return c2; + return NULL; +} - if (c2->width == NL80211_CHAN_WIDTH_80 && - c1->width > NL80211_CHAN_WIDTH_80) - return c1; +const struct cfg80211_chan_def * +cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, + const struct cfg80211_chan_def *c2) +{ + const struct cfg80211_chan_def *ret; - WARN_ON(!c1_pri160 && !c2_pri160); - if (c1_pri160 && c2_pri160 && c1_pri160 != c2_pri160) + ret = _cfg80211_chandef_compatible(c1, c2); + if (IS_ERR(ret)) return NULL; - - if (c1->width > c2->width) - return c1; - return c2; + return ret; } EXPORT_SYMBOL(cfg80211_chandef_compatible); diff --git a/net/wireless/tests/Makefile b/net/wireless/tests/Makefile index 1f6622fcb758f..c364e63b508eb 100644 --- a/net/wireless/tests/Makefile +++ b/net/wireless/tests/Makefile @@ -1,3 +1,3 @@ -cfg80211-tests-y += module.o fragmentation.o scan.o util.o +cfg80211-tests-y += module.o fragmentation.o scan.o util.o chan.o obj-$(CONFIG_CFG80211_KUNIT_TEST) += cfg80211-tests.o diff --git a/net/wireless/tests/chan.c b/net/wireless/tests/chan.c new file mode 100644 index 0000000000000..b9e7a27e43cba --- /dev/null +++ b/net/wireless/tests/chan.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for channel helper functions + * + * Copyright (C) 2023 Intel Corporation + */ +#include +#include + +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); + +static struct ieee80211_channel chan_6ghz_1 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 5955, +}; + +static struct ieee80211_channel chan_6ghz_5 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 5975, +}; + +static struct ieee80211_channel chan_6ghz_105 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 6475, +}; + +static const struct chandef_compat_case { + const char *desc; + /* leave c1 empty for tests for identical */ + struct cfg80211_chan_def c1, c2; + /* we test both ways around, so c2 should always be the compat one */ + bool compat; +} chandef_compat_cases[] = { + { + .desc = "identical non-HT", + .c2 = { + .width = NL80211_CHAN_WIDTH_20_NOHT, + .chan = &chan_6ghz_1, + .center_freq1 = 5955, + }, + .compat = true, + }, + { + .desc = "identical 20 MHz", + .c2 = { + .width = NL80211_CHAN_WIDTH_20, + .chan = &chan_6ghz_1, + .center_freq1 = 5955, + }, + .compat = true, + }, + { + .desc = "identical 40 MHz", + .c2 = { + .width = NL80211_CHAN_WIDTH_40, + .chan = &chan_6ghz_1, + .center_freq1 = 5955 + 10, + }, + .compat = true, + }, + { + .desc = "identical 80 MHz", + .c2 = { + .width = NL80211_CHAN_WIDTH_80, + .chan = &chan_6ghz_1, + .center_freq1 = 5955 + 10 + 20, + }, + .compat = true, + }, + { + .desc = "identical 160 MHz", + .c2 = { + .width = NL80211_CHAN_WIDTH_160, + .chan = &chan_6ghz_1, + .center_freq1 = 5955 + 10 + 20 + 40, + }, + .compat = true, + }, + { + .desc = "identical 320 MHz", + .c2 = { + .width = NL80211_CHAN_WIDTH_320, + .chan = &chan_6ghz_1, + .center_freq1 = 5955 + 10 + 20 + 40 + 80, + }, + .compat = true, + }, + { + .desc = "20 MHz in 320 MHz\n", + .c1 = { + .width = NL80211_CHAN_WIDTH_20, + .chan = &chan_6ghz_1, + .center_freq1 = 5955, + }, + .c2 = { + .width = NL80211_CHAN_WIDTH_320, + .chan = &chan_6ghz_1, + .center_freq1 = 5955 + 10 + 20 + 40 + 80, + }, + .compat = true, + }, + { + .desc = "different 20 MHz", + .c1 = { + .width = NL80211_CHAN_WIDTH_20, + .chan = &chan_6ghz_1, + .center_freq1 = 5955, + }, + .c2 = { + .width = NL80211_CHAN_WIDTH_20, + .chan = &chan_6ghz_5, + .center_freq1 = 5975, + }, + }, + { + .desc = "different primary 160 MHz", + .c1 = { + .width = NL80211_CHAN_WIDTH_320, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 + 150, + }, + .c2 = { + .width = NL80211_CHAN_WIDTH_320, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 - 10, + }, + }, + { + /* similar to previous test but one has lower BW */ + .desc = "matching primary 160 MHz", + .c1 = { + .width = NL80211_CHAN_WIDTH_160, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 + 70, + }, + .c2 = { + .width = NL80211_CHAN_WIDTH_320, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 - 10, + }, + .compat = true, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(chandef_compat, chandef_compat_cases, desc) + +static void test_chandef_compat(struct kunit *test) +{ + const struct chandef_compat_case *params = test->param_value; + const struct cfg80211_chan_def *ret, *expect; + struct cfg80211_chan_def c1 = params->c1; + + /* tests with identical ones */ + if (!params->c1.chan) + c1 = params->c2; + + KUNIT_EXPECT_EQ(test, cfg80211_chandef_valid(&c1), true); + KUNIT_EXPECT_EQ(test, cfg80211_chandef_valid(¶ms->c2), true); + + expect = params->compat ? ¶ms->c2 : NULL; + + ret = cfg80211_chandef_compatible(&c1, ¶ms->c2); + KUNIT_EXPECT_PTR_EQ(test, ret, expect); + + if (!params->c1.chan) + expect = &c1; + + ret = cfg80211_chandef_compatible(¶ms->c2, &c1); + KUNIT_EXPECT_PTR_EQ(test, ret, expect); +} + +static struct kunit_case chandef_compat_test_cases[] = { + KUNIT_CASE_PARAM(test_chandef_compat, chandef_compat_gen_params), + {} +}; + +static struct kunit_suite chandef_compat = { + .name = "cfg80211-chandef-compat", + .test_cases = chandef_compat_test_cases, +}; + +kunit_test_suite(chandef_compat); -- cgit 1.2.3-korg From 8616f27b3fb0d47be053d7bebc9539171a514917 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:48 +0100 Subject: wifi: mac80211: use cfg80211_chandef_primary_freq() Instead of calculating the new primary 40/80/160 MHz center frequency here, use the new helper function from cfg80211. Link: https://msgid.link/20240129194108.eb59d6433d18.I74b745f0d1a32e779fb25d50c56407be7c35b840@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 49eef33b5e702..63a88169d53d3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4365,8 +4365,9 @@ EXPORT_SYMBOL(ieee80211_radar_detected); void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, struct ieee80211_conn_settings *conn) { + /* no-HT indicates nothing to do */ + enum nl80211_chan_width new_primary_width = NL80211_CHAN_WIDTH_20_NOHT; struct ieee80211_conn_settings _ignored = {}; - int tmp; /* allow passing NULL if caller doesn't care */ if (!conn) @@ -4390,12 +4391,7 @@ void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; break; case NL80211_CHAN_WIDTH_80: - tmp = (30 + c->chan->center_freq - c->center_freq1)/20; - /* n_P40 */ - tmp /= 2; - /* freq_P40 */ - c->center_freq1 = c->center_freq1 - 20 + 40 * tmp; - c->width = NL80211_CHAN_WIDTH_40; + new_primary_width = NL80211_CHAN_WIDTH_40; if (conn->mode == IEEE80211_CONN_MODE_VHT) conn->mode = IEEE80211_CONN_MODE_HT; conn->bw_limit = IEEE80211_CONN_BW_LIMIT_40; @@ -4406,21 +4402,11 @@ void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80; break; case NL80211_CHAN_WIDTH_160: - /* n_P20 */ - tmp = (70 + c->chan->center_freq - c->center_freq1)/20; - /* n_P80 */ - tmp /= 4; - c->center_freq1 = c->center_freq1 - 40 + 80 * tmp; - c->width = NL80211_CHAN_WIDTH_80; + new_primary_width = NL80211_CHAN_WIDTH_80; conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80; break; case NL80211_CHAN_WIDTH_320: - /* n_P20 */ - tmp = (150 + c->chan->center_freq - c->center_freq1) / 20; - /* n_P160 */ - tmp /= 8; - c->center_freq1 = c->center_freq1 - 80 + 160 * tmp; - c->width = NL80211_CHAN_WIDTH_160; + new_primary_width = NL80211_CHAN_WIDTH_160; conn->bw_limit = IEEE80211_CONN_BW_LIMIT_160; break; case NL80211_CHAN_WIDTH_1: @@ -4442,6 +4428,12 @@ void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, break; } + if (new_primary_width != NL80211_CHAN_WIDTH_20_NOHT) { + c->center_freq1 = + cfg80211_chandef_primary_freq(c, new_primary_width); + c->width = new_primary_width; + } + WARN_ON_ONCE(!cfg80211_chandef_valid(c)); } -- cgit 1.2.3-korg From b82730bf57b54803ab94abbfd8c4422a7081886d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:49 +0100 Subject: wifi: cfg80211/mac80211: move puncturing into chandef Aloka originally suggested that puncturing should be part of the chandef, so that it's treated correctly. At the time, I disagreed and it ended up not part of the chandef, but I've now realized that this was wrong. Even for clients, the RX, and perhaps more importantly, CCA configuration needs to take puncturing into account. Move puncturing into the chandef, and adjust all the code accordingly. Also add a few tests for puncturing in chandef compatibility checking. Link: https://lore.kernel.org/linux-wireless/20220214223051.3610-1-quic_alokad@quicinc.com/ Suggested-by: Aloka Dixit Link: https://msgid.link/20240129194108.307183a5d2e5.I4d7fe2f126b2366c1312010e2900dfb2abffa0f6@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath12k/mac.c | 10 +- drivers/net/wireless/ath/ath6kl/cfg80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/link.c | 16 +- .../net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 6 +- drivers/net/wireless/marvell/mwifiex/11h.c | 2 +- drivers/net/wireless/quantenna/qtnfmac/event.c | 2 +- drivers/net/wireless/realtek/rtw89/fw.c | 5 +- include/net/cfg80211.h | 44 ++--- include/net/mac80211.h | 11 +- net/mac80211/cfg.c | 21 +-- net/mac80211/chan.c | 10 +- net/mac80211/link.c | 2 +- net/mac80211/mlme.c | 180 +++------------------ net/mac80211/util.c | 26 ++- net/wireless/chan.c | 62 ++++--- net/wireless/nl80211.c | 67 +++----- net/wireless/tests/chan.c | 48 +++++- net/wireless/trace.h | 38 ++--- 18 files changed, 227 insertions(+), 325 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index a27480a69b273..18fb42c045ccb 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -2784,9 +2784,6 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, } ath12k_mac_fils_discovery(arvif, info); - - if (changed & BSS_CHANGED_EHT_PUNCTURING) - arvif->punct_bitmap = info->eht_puncturing; } static void ath12k_mac_op_bss_info_changed(struct ieee80211_hw *hw, @@ -6373,6 +6370,8 @@ ath12k_mac_update_vif_chan(struct ath12k *ar, if (WARN_ON(!arvif->is_started)) continue; + arvif->punct_bitmap = vifs[i].new_ctx->def.punctured; + /* Firmware expect vdev_restart only if vdev is up. * If vdev is down then it expect vdev_stop->vdev_start. */ @@ -6473,7 +6472,8 @@ static void ath12k_mac_op_change_chanctx(struct ieee80211_hw *hw, goto unlock; if (changed & IEEE80211_CHANCTX_CHANGE_WIDTH || - changed & IEEE80211_CHANCTX_CHANGE_RADAR) + changed & IEEE80211_CHANCTX_CHANGE_RADAR || + changed & IEEE80211_CHANCTX_CHANGE_PUNCTURING) ath12k_mac_update_active_vif_chan(ar, ctx); /* TODO: Recalc radar detection */ @@ -6536,7 +6536,7 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, "mac chanctx assign ptr %pK vdev_id %i\n", ctx, arvif->vdev_id); - arvif->punct_bitmap = link_conf->eht_puncturing; + arvif->punct_bitmap = ctx->def.punctured; /* for some targets bss peer must be created before vdev_start */ if (ab->hw_params->vdev_start_delay && diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index e37db4af33dee..61b2e3f15f0e9 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1119,7 +1119,7 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT); wiphy_lock(vif->ar->wiphy); - cfg80211_ch_switch_notify(vif->ndev, &chandef, 0, 0); + cfg80211_ch_switch_notify(vif->ndev, &chandef, 0); wiphy_unlock(vif->ar->wiphy); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c index f3fcef9034ef2..129eefcc45d69 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2022 - 2023 Intel Corporation + * Copyright (C) 2022 - 2024 Intel Corporation */ #include "mvm.h" #include "time-event.h" @@ -195,12 +195,20 @@ int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif, } if (changes & LINK_CONTEXT_MODIFY_EHT_PARAMS) { + struct ieee80211_chanctx_conf *ctx; + struct cfg80211_chan_def *def = NULL; + + rcu_read_lock(); + ctx = rcu_dereference(link_conf->chanctx_conf); + if (ctx) + def = iwl_mvm_chanctx_def(mvm, ctx); + if (iwlwifi_mod_params.disable_11be || - !link_conf->eht_support) + !link_conf->eht_support || !def) changes &= ~LINK_CONTEXT_MODIFY_EHT_PARAMS; else - cmd.puncture_mask = - cpu_to_le16(link_conf->eht_puncturing); + cmd.puncture_mask = cpu_to_le16(def->punctured); + rcu_read_unlock(); } cmd.bss_color = link_conf->he_bss_color.color; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index b633a2a09c32d..7d89f50fbb101 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation */ #include "mvm.h" @@ -752,8 +752,8 @@ iwl_mvm_mld_link_info_changed_station(struct iwl_mvm *mvm, link_changes |= LINK_CONTEXT_MODIFY_HE_PARAMS; } - /* Update EHT Puncturing info */ - if (changes & BSS_CHANGED_EHT_PUNCTURING && vif->cfg.assoc) + /* if associated, maybe puncturing changed - we'll check later */ + if (vif->cfg.assoc) link_changes |= LINK_CONTEXT_MODIFY_EHT_PARAMS; if (link_changes) { diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index da211372a4816..b90f922f1cdc6 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -288,6 +288,6 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work) mwifiex_dbg(priv->adapter, MSG, "indicating channel switch completion to kernel\n"); wiphy_lock(priv->wdev.wiphy); - cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef, 0, 0); + cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef, 0); wiphy_unlock(priv->wdev.wiphy); } diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 3b283e93a13e6..76b07db284f89 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -478,7 +478,7 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, continue; wiphy_lock(priv_to_wiphy(vif->mac)); - cfg80211_ch_switch_notify(vif->netdev, &chandef, 0, 0); + cfg80211_ch_switch_notify(vif->netdev, &chandef, 0); wiphy_unlock(priv_to_wiphy(vif->mac)); } diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 51072a2dcf104..540ea16f048e1 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -2804,8 +2804,11 @@ int rtw89_fw_h2c_assoc_cmac_tbl_g7(struct rtw89_dev *rtwdev, } if (vif->bss_conf.eht_support) { - h2c->w4 |= le32_encode_bits(~vif->bss_conf.eht_puncturing, + u16 punct = vif->bss_conf.chanreq.oper.punctured; + + h2c->w4 |= le32_encode_bits(~punct, CCTLINFO_G7_W4_ACT_SUBCH_CBW); + rcu_read_unlock(); h2c->m4 |= cpu_to_le32(CCTLINFO_G7_W4_ACT_SUBCH_CBW); } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fc2ad80118e84..cb5e34d640cde 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7,7 +7,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021, 2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -808,6 +808,9 @@ struct key_params { * chan will define the primary channel and all other * parameters are ignored. * @freq1_offset: offset from @center_freq1, in KHz + * @punctured: mask of the punctured 20 MHz subchannels, with + * bits turned on being disabled (punctured); numbered + * from lower to higher frequency (like in the spec) */ struct cfg80211_chan_def { struct ieee80211_channel *chan; @@ -816,6 +819,7 @@ struct cfg80211_chan_def { u32 center_freq2; struct ieee80211_edmg edmg; u16 freq1_offset; + u16 punctured; }; /* @@ -956,7 +960,8 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1, chandef1->width == chandef2->width && chandef1->center_freq1 == chandef2->center_freq1 && chandef1->freq1_offset == chandef2->freq1_offset && - chandef1->center_freq2 == chandef2->center_freq2); + chandef1->center_freq2 == chandef2->center_freq2 && + chandef1->punctured == chandef2->punctured); } /** @@ -1051,12 +1056,15 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, * cfg80211_chandef_primary_freq - calculate primary 40/80/160 MHz freq * @chandef: chandef to calculate for * @primary_chan_width: primary channel width to calculate center for + * @punctured: punctured sub-channel bitmap, will be recalculated + * according to the new bandwidth, can be %NULL * * Returns: the primary 40/80/160 MHz channel center frequency, or -1 - * for errors + * for errors, updating the punctured bitmap */ -int cfg80211_chandef_primary_freq(const struct cfg80211_chan_def *chandef, - enum nl80211_chan_width primary_chan_width); +int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef, + enum nl80211_chan_width primary_chan_width, + u16 *punctured); /** * nl80211_send_chandef - sends the channel definition. @@ -1468,9 +1476,6 @@ struct cfg80211_unsol_bcast_probe_resp { * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters * @mbssid_config: AP settings for multiple bssid - * @punct_bitmap: Preamble puncturing bitmap. Each bit represents - * a 20 MHz channel, lowest bit corresponding to the lowest channel. - * Bit set to 1 indicates that the channel is punctured. */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1505,7 +1510,6 @@ struct cfg80211_ap_settings { struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; struct cfg80211_mbssid_config mbssid_config; - u16 punct_bitmap; }; @@ -1539,9 +1543,6 @@ struct cfg80211_ap_update { * @radar_required: whether radar detection is required on the new channel * @block_tx: whether transmissions should be blocked while changing * @count: number of beacons until switch - * @punct_bitmap: Preamble puncturing bitmap. Each bit represents - * a 20 MHz channel, lowest bit corresponding to the lowest channel. - * Bit set to 1 indicates that the channel is punctured. */ struct cfg80211_csa_settings { struct cfg80211_chan_def chandef; @@ -1554,7 +1555,6 @@ struct cfg80211_csa_settings { bool radar_required; bool block_tx; u8 count; - u16 punct_bitmap; }; /** @@ -8738,14 +8738,13 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * @dev: the device which switched channels * @chandef: the new channel definition * @link_id: the link ID for MLO, must be 0 for non-MLO - * @punct_bitmap: the new puncturing bitmap * * Caller must hold wiphy mutex, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - unsigned int link_id, u16 punct_bitmap); + unsigned int link_id); /* * cfg80211_ch_switch_started_notify - notify channel switch start @@ -8754,7 +8753,6 @@ void cfg80211_ch_switch_notify(struct net_device *dev, * @link_id: the link ID for MLO, must be 0 for non-MLO * @count: the number of TBTTs until the channel switch happens * @quiet: whether or not immediate quiet was requested by the AP - * @punct_bitmap: the future puncturing bitmap * * Inform the userspace about the channel switch that has just * started, so that it can take appropriate actions (eg. starting @@ -8763,7 +8761,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u8 count, - bool quiet, u16 punct_bitmap); + bool quiet); /** * ieee80211_operating_class_to_band - convert operating class to band @@ -9381,18 +9379,6 @@ static inline int cfg80211_color_change_notify(struct net_device *dev) 0, 0); } -/** - * cfg80211_valid_disable_subchannel_bitmap - validate puncturing bitmap - * @bitmap: bitmap to be validated - * @chandef: channel definition - * - * Validate the puncturing bitmap. - * - * Return: %true if the bitmap is valid. %false otherwise. - */ -bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, - const struct cfg80211_chan_def *chandef); - /** * cfg80211_links_removed - Notify about removed STA MLD setup links. * @dev: network device. diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ab6bc89d33945..54aa4a06c878d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation */ #ifndef MAC80211_H @@ -216,6 +216,8 @@ struct ieee80211_low_level_stats { * @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider * bandwidth) OFDMA settings need to be changed + * @IEEE80211_CHANCTX_CHANGE_PUNCTURING: The punctured channel(s) bitmap + * was changed. */ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_WIDTH = BIT(0), @@ -224,6 +226,7 @@ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3), IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), IEEE80211_CHANCTX_CHANGE_AP = BIT(5), + IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(6), }; /** @@ -357,7 +360,6 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed. * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response * status changed. - * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. * @BSS_CHANGED_MLD_TTLM: TID to link mapping was changed */ @@ -394,7 +396,6 @@ enum ieee80211_bss_change { BSS_CHANGED_HE_BSS_COLOR = 1<<29, BSS_CHANGED_FILS_DISCOVERY = 1<<30, BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, - BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32), BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), @@ -661,9 +662,7 @@ struct ieee80211_fils_discovery { * @tx_pwr_env_num: number of @tx_pwr_env. * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT - * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS * @csa_active: marks whether a channel switch is going on. - * @csa_punct_bitmap: new puncturing bitmap for channel switch * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @chanctx_conf: The channel context this interface is assigned to, or %NULL * when it is not assigned. This pointer is RCU-protected due to the TX @@ -766,10 +765,8 @@ struct ieee80211_bss_conf { u8 tx_pwr_env_num; u8 pwr_reduction; bool eht_support; - u16 eht_puncturing; bool csa_active; - u16 csa_punct_bitmap; bool mu_mimo_owner; struct ieee80211_chanctx_conf __rcu *chanctx_conf; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5aa02b0872d92..2ddaf00379523 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1343,8 +1343,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return -EOPNOTSUPP; link_conf->eht_support = true; - link_conf->eht_puncturing = params->punct_bitmap; - changed |= BSS_CHANGED_EHT_PUNCTURING; link_conf->eht_su_beamformer = params->eht_cap->fixed.phy_cap_info[0] & @@ -3668,12 +3666,6 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) if (err) return err; - if (sdata->vif.bss_conf.eht_puncturing != sdata->vif.bss_conf.csa_punct_bitmap) { - sdata->vif.bss_conf.eht_puncturing = - sdata->vif.bss_conf.csa_punct_bitmap; - changed |= BSS_CHANGED_EHT_PUNCTURING; - } - ieee80211_link_info_change_notify(sdata, link_data, changed); if (link_data->csa_block_tx) { @@ -3687,8 +3679,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) return err; cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chanreq.oper, - link_data->link_id, - link_data->conf->eht_puncturing); + link_data->link_id); return 0; } @@ -3889,6 +3880,9 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, &sdata->vif.bss_conf.chanreq.oper)) return -EINVAL; + if (chanreq.oper.punctured && !sdata->vif.bss_conf.eht_support) + return -EINVAL; + /* don't allow another channel switch if one is already active. */ if (sdata->vif.bss_conf.csa_active) return -EBUSY; @@ -3941,13 +3935,9 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, goto out; } - if (params->punct_bitmap && !sdata->vif.bss_conf.eht_support) - goto out; - sdata->deflink.csa_chanreq = chanreq; sdata->deflink.csa_block_tx = params->block_tx; sdata->vif.bss_conf.csa_active = true; - sdata->vif.bss_conf.csa_punct_bitmap = params->punct_bitmap; if (sdata->deflink.csa_block_tx) ieee80211_stop_vif_queues(local, sdata, @@ -3955,8 +3945,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, cfg80211_ch_switch_started_notify(sdata->dev, &sdata->deflink.csa_chanreq.oper, 0, - params->count, params->block_tx, - sdata->vif.bss_conf.csa_punct_bitmap); + params->count, params->block_tx); if (changed) { ieee80211_link_info_change_notify(sdata, &sdata->deflink, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index fe1489ba58c69..38acdc458c7c9 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 - channel management - * Copyright 2020 - 2022 Intel Corporation + * Copyright 2020 - 2024 Intel Corporation */ #include @@ -517,8 +517,12 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, ieee80211_remove_wbrf(local, &ctx->conf.def); - if (!cfg80211_chandef_identical(&ctx->conf.def, &chanreq->oper)) - changed |= IEEE80211_CHANCTX_CHANGE_WIDTH; + if (!cfg80211_chandef_identical(&ctx->conf.def, &chanreq->oper)) { + if (ctx->conf.def.width != chanreq->oper.width) + changed |= IEEE80211_CHANCTX_CHANGE_WIDTH; + if (ctx->conf.def.punctured != chanreq->oper.punctured) + changed |= IEEE80211_CHANCTX_CHANGE_PUNCTURING; + } if (!cfg80211_chandef_identical(&ctx->conf.ap, &chanreq->ap)) changed |= IEEE80211_CHANCTX_CHANGE_AP; ctx->conf.def = *chandef; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 2231eb38a2110..4f19d6479befd 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -2,7 +2,7 @@ /* * MLO link handling * - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation */ #include #include diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 74a15f18e7eea..4eaf5c10efdb4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -93,83 +93,6 @@ MODULE_PARM_DESC(probe_wait_ms, */ #define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 -/* - * Extract from the given disabled subchannel bitmap (raw format - * from the EHT Operation Element) the bits for the subchannel - * we're using right now. - */ -static u16 -ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper, - struct cfg80211_chan_def *chandef, u16 bitmap) -{ - struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional; - struct cfg80211_chan_def ap_chandef = *chandef; - u32 ap_center_freq, local_center_freq; - u32 ap_bw, local_bw; - int ap_start_freq, local_start_freq; - u16 shift, mask; - - if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) || - !(eht_oper->params & - IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) - return 0; - - /* set 160/320 supported to get the full AP definition */ - ieee80211_chandef_eht_oper((const void *)eht_oper->optional, - &ap_chandef); - ap_center_freq = ap_chandef.center_freq1; - ap_bw = 20 * BIT(u8_get_bits(info->control, - IEEE80211_EHT_OPER_CHAN_WIDTH)); - ap_start_freq = ap_center_freq - ap_bw / 2; - local_center_freq = chandef->center_freq1; - local_bw = 20 * BIT(ieee80211_chan_width_to_rx_bw(chandef->width)); - local_start_freq = local_center_freq - local_bw / 2; - shift = (local_start_freq - ap_start_freq) / 20; - mask = BIT(local_bw / 20) - 1; - - return (bitmap >> shift) & mask; -} - -/* - * Handle the puncturing bitmap, possibly downgrading bandwidth to get a - * valid bitmap. - */ -static void -ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link, - const struct ieee80211_eht_operation *eht_oper, - u16 bitmap, u64 *changed) -{ - struct cfg80211_chan_def *chandef = &link->conf->chanreq.oper; - struct ieee80211_local *local = link->sdata->local; - u16 extracted; - u64 _changed = 0; - - if (!changed) - changed = &_changed; - - while (chandef->width > NL80211_CHAN_WIDTH_40) { - extracted = - ieee80211_extract_dis_subch_bmap(eht_oper, chandef, - bitmap); - - if (cfg80211_valid_disable_subchannel_bitmap(&bitmap, - chandef) && - !(bitmap && ieee80211_hw_check(&local->hw, - DISALLOW_PUNCTURING))) - break; - ieee80211_chandef_downgrade(chandef, &link->u.mgd.conn); - *changed |= BSS_CHANGED_BANDWIDTH; - } - - if (chandef->width <= NL80211_CHAN_WIDTH_40) - extracted = 0; - - if (link->conf->eht_puncturing != extracted) { - link->conf->eht_puncturing = extracted; - *changed |= BSS_CHANGED_EHT_PUNCTURING; - } -} - /* * We can have multiple work items (and connection probing) * scheduling this timer, but we need to take care to only @@ -396,6 +319,9 @@ ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata, ieee80211_chandef_eht_oper((const void *)eht_oper->optional, &eht_chandef); + eht_chandef.punctured = + ieee80211_eht_oper_dis_subchan_bitmap(eht_oper); + if (!cfg80211_chandef_valid(&eht_chandef)) { sdata_info(sdata, "AP EHT information is invalid, disabling EHT\n"); @@ -661,13 +587,27 @@ ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata, return true; } +static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_chan_def *chandef, + u32 prohibited_flags) +{ + if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, + chandef, prohibited_flags)) + return false; + + if (chandef->punctured && + ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING)) + return false; + + return true; +} + static struct ieee802_11_elems * ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, struct cfg80211_bss *cbss, int link_id, struct ieee80211_chan_req *chanreq) { - struct ieee80211_local *local = sdata->local; const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies); struct ieee80211_bss *bss = (void *)cbss->priv; struct ieee80211_channel *channel = cbss->channel; @@ -761,8 +701,8 @@ again: else chanreq->ap.chan = NULL; - while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, &chanreq->oper, - IEEE80211_CHAN_DISABLED)) { + while (!ieee80211_chandef_usable(sdata, &chanreq->oper, + IEEE80211_CHAN_DISABLED)) { if (WARN_ON(chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = -EINVAL; goto free; @@ -812,30 +752,6 @@ again: sdata_info(sdata, "required MCSes not supported, disabling EHT\n"); } - if (conn->mode >= IEEE80211_CONN_MODE_EHT) { - u16 bitmap; - - if (WARN_ON_ONCE(!elems->eht_operation)) { - ret = -EINVAL; - goto free; - } - - bitmap = ieee80211_eht_oper_dis_subchan_bitmap(elems->eht_operation); - - if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap, - &ap_chandef) || - (bitmap && - ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))) { - conn->mode = IEEE80211_CONN_MODE_HE; - conn->bw_limit = min_t(enum ieee80211_conn_bw_limit, - conn->bw_limit, - IEEE80211_CONN_BW_LIMIT_160); - sdata_info(sdata, - "AP has invalid/unsupported puncturing, disabling EHT\n"); - } - /* FIXME: store puncturing bitmap */ - } - /* the mode can only decrease, so this must terminate */ if (ap_mode != conn->mode) goto again; @@ -2127,7 +2043,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) } cfg80211_ch_switch_notify(sdata->dev, &link->reserved.oper, - link->link_id, 0); + link->link_id); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, @@ -2330,7 +2246,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chanreq.oper, link->link_id, csa_ie.count, - csa_ie.mode, 0); + csa_ie.mode); if (local->ops->channel_switch) { /* use driver's channel switch callback */ @@ -3394,8 +3310,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->deflink.u.mgd.tracking_signal_avg = false; sdata->deflink.u.mgd.disable_wmm_tracking = false; - sdata->vif.bss_conf.eht_puncturing = 0; - ifmgd->flags = 0; for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { @@ -4625,7 +4539,6 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, link_sta); bss_conf->eht_support = link_sta->pub->eht_cap.has_eht; - *changed |= BSS_CHANGED_EHT_PUNCTURING; } else { bss_conf->eht_support = false; } @@ -5865,40 +5778,6 @@ static bool ieee80211_rx_our_beacon(const u8 *tx_bssid, return ether_addr_equal(tx_bssid, bss->transmitted_bss->bssid); } -static bool ieee80211_config_puncturing(struct ieee80211_link_data *link, - const struct ieee80211_eht_operation *eht_oper, - u64 *changed) -{ - struct ieee80211_local *local = link->sdata->local; - u16 bitmap, extracted; - - bitmap = ieee80211_eht_oper_dis_subchan_bitmap(eht_oper); - extracted = ieee80211_extract_dis_subch_bmap(eht_oper, - &link->conf->chanreq.oper, - bitmap); - - /* accept if there are no changes */ - if (!(*changed & BSS_CHANGED_BANDWIDTH) && - extracted == link->conf->eht_puncturing) - return true; - - if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap, - &link->conf->chanreq.oper)) { - link_info(link, - "Got an invalid disable subchannel bitmap from AP %pM: bitmap = 0x%x, bw = 0x%x. disconnect\n", - link->u.mgd.bssid, - bitmap, - link->conf->chanreq.oper.width); - return false; - } - - if (bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING)) - return false; - - ieee80211_handle_puncturing_bitmap(link, eht_oper, bitmap, changed); - return true; -} - static void ieee80211_ml_reconf_work(struct wiphy *wiphy, struct wiphy_work *work) { @@ -6614,21 +6493,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, elems->pwr_constr_elem, elems->cisco_dtpc_elem); - if (elems->eht_operation && - link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_EHT) { - if (!ieee80211_config_puncturing(link, elems->eht_operation, - &changed)) { - ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, - WLAN_REASON_DEAUTH_LEAVING, - true, deauth_buf); - ieee80211_report_disconnect(sdata, deauth_buf, - sizeof(deauth_buf), true, - WLAN_REASON_DEAUTH_LEAVING, - false); - goto free; - } - } - ieee80211_ml_reconfiguration(sdata, elems); ieee80211_process_adv_ttlm(sdata, elems, le64_to_cpu(mgmt->u.beacon.timestamp)); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 63a88169d53d3..5108dbaa93607 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * utilities for mac80211 */ @@ -2810,9 +2810,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) sdata->vif.bss_conf.protected_keep_alive) changed |= BSS_CHANGED_KEEP_ALIVE; - if (sdata->vif.bss_conf.eht_puncturing) - changed |= BSS_CHANGED_EHT_PUNCTURING; - ieee80211_bss_info_change_notify(sdata, changed); } else if (!WARN_ON(!link)) { @@ -4365,14 +4362,17 @@ EXPORT_SYMBOL(ieee80211_radar_detected); void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, struct ieee80211_conn_settings *conn) { - /* no-HT indicates nothing to do */ - enum nl80211_chan_width new_primary_width = NL80211_CHAN_WIDTH_20_NOHT; + enum nl80211_chan_width new_primary_width; struct ieee80211_conn_settings _ignored = {}; /* allow passing NULL if caller doesn't care */ if (!conn) conn = &_ignored; +again: + /* no-HT indicates nothing to do */ + new_primary_width = NL80211_CHAN_WIDTH_20_NOHT; + switch (c->width) { default: case NL80211_CHAN_WIDTH_20_NOHT: @@ -4382,6 +4382,7 @@ void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, c->width = NL80211_CHAN_WIDTH_20_NOHT; conn->mode = IEEE80211_CONN_MODE_LEGACY; conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; + c->punctured = 0; break; case NL80211_CHAN_WIDTH_40: c->width = NL80211_CHAN_WIDTH_20; @@ -4389,6 +4390,7 @@ void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, if (conn->mode == IEEE80211_CONN_MODE_VHT) conn->mode = IEEE80211_CONN_MODE_HT; conn->bw_limit = IEEE80211_CONN_BW_LIMIT_20; + c->punctured = 0; break; case NL80211_CHAN_WIDTH_80: new_primary_width = NL80211_CHAN_WIDTH_40; @@ -4429,11 +4431,19 @@ void ieee80211_chandef_downgrade(struct cfg80211_chan_def *c, } if (new_primary_width != NL80211_CHAN_WIDTH_20_NOHT) { - c->center_freq1 = - cfg80211_chandef_primary_freq(c, new_primary_width); + c->center_freq1 = cfg80211_chandef_primary(c, new_primary_width, + &c->punctured); c->width = new_primary_width; } + /* + * With an 80 MHz channel, we might have the puncturing in the primary + * 40 Mhz channel, but that's not valid when downgraded to 40 MHz width. + * In that case, downgrade again. + */ + if (!cfg80211_chandef_valid(c) && c->punctured) + goto again; + WARN_ON_ONCE(!cfg80211_chandef_valid(c)); } diff --git a/net/wireless/chan.c b/net/wireless/chan.c index bfa2ea935fc28..e2ce89afa9ffe 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018-2023 Intel Corporation + * Copyright 2018-2024 Intel Corporation */ #include @@ -27,11 +27,10 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, if (WARN_ON(!chan)) return; - chandef->chan = chan; - chandef->freq1_offset = chan->freq_offset; - chandef->center_freq2 = 0; - chandef->edmg.bw_config = 0; - chandef->edmg.channels = 0; + *chandef = (struct cfg80211_chan_def) { + .chan = chan, + .freq1_offset = chan->freq_offset, + }; switch (chan_type) { case NL80211_CHAN_NO_HT: @@ -87,10 +86,9 @@ static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = { CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320) }; -bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, - const struct cfg80211_chan_def *chandef) +static bool valid_puncturing_bitmap(const struct cfg80211_chan_def *chandef) { - u32 idx, i, start_freq; + u32 idx, i, start_freq, primary_center = chandef->chan->center_freq; switch (chandef->width) { case NL80211_CHAN_WIDTH_80: @@ -106,24 +104,23 @@ bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, start_freq = chandef->center_freq1 - 160; break; default: - *bitmap = 0; - break; + return chandef->punctured == 0; } - if (!*bitmap) + if (!chandef->punctured) return true; /* check if primary channel is punctured */ - if (*bitmap & (u16)BIT((chandef->chan->center_freq - start_freq) / 20)) + if (chandef->punctured & (u16)BIT((primary_center - start_freq) / 20)) return false; - for (i = 0; i < per_bw_puncturing[idx].len; i++) - if (per_bw_puncturing[idx].valid_values[i] == *bitmap) + for (i = 0; i < per_bw_puncturing[idx].len; i++) { + if (per_bw_puncturing[idx].valid_values[i] == chandef->punctured) return true; + } return false; } -EXPORT_SYMBOL(cfg80211_valid_disable_subchannel_bitmap); static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef) { @@ -386,17 +383,19 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) !cfg80211_edmg_chandef_valid(chandef)) return false; - return true; + return valid_puncturing_bitmap(chandef); } EXPORT_SYMBOL(cfg80211_chandef_valid); -int cfg80211_chandef_primary_freq(const struct cfg80211_chan_def *c, - enum nl80211_chan_width primary_chan_width) +int cfg80211_chandef_primary(const struct cfg80211_chan_def *c, + enum nl80211_chan_width primary_chan_width, + u16 *punctured) { int pri_width = nl80211_chan_width_to_mhz(primary_chan_width); int width = cfg80211_chandef_get_width(c); u32 control = c->chan->center_freq; u32 center = c->center_freq1; + u16 _punct = 0; if (WARN_ON_ONCE(pri_width < 0 || width < 0)) return -1; @@ -405,26 +404,41 @@ int cfg80211_chandef_primary_freq(const struct cfg80211_chan_def *c, if (WARN_ON_ONCE(pri_width > width)) return -1; + if (!punctured) + punctured = &_punct; + + *punctured = c->punctured; + while (width > pri_width) { - if (control > center) + unsigned int bits_to_drop = width / 20 / 2; + + if (control > center) { center += width / 4; - else + *punctured >>= bits_to_drop; + } else { center -= width / 4; + *punctured &= (1 << bits_to_drop) - 1; + } width /= 2; } return center; } -EXPORT_SYMBOL(cfg80211_chandef_primary_freq); +EXPORT_SYMBOL(cfg80211_chandef_primary); static const struct cfg80211_chan_def * check_chandef_primary_compat(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2, enum nl80211_chan_width primary_chan_width) { + u16 punct_c1 = 0, punct_c2 = 0; + /* check primary is compatible -> error if not */ - if (cfg80211_chandef_primary_freq(c1, primary_chan_width) != - cfg80211_chandef_primary_freq(c2, primary_chan_width)) + if (cfg80211_chandef_primary(c1, primary_chan_width, &punct_c1) != + cfg80211_chandef_primary(c2, primary_chan_width, &punct_c2)) + return ERR_PTR(-EINVAL); + + if (punct_c1 != punct_c2) return ERR_PTR(-EINVAL); /* assumes c1 is smaller width, if that was just checked -> done */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 68c20409eca65..b533412ad1e03 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3218,21 +3218,6 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) wdev->iftype == NL80211_IFTYPE_P2P_GO; } -static int nl80211_parse_punct_bitmap(struct cfg80211_registered_device *rdev, - struct genl_info *info, - const struct cfg80211_chan_def *chandef, - u16 *punct_bitmap) -{ - if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_PUNCT)) - return -EINVAL; - - *punct_bitmap = nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]); - if (!cfg80211_valid_disable_subchannel_bitmap(punct_bitmap, chandef)) - return -EINVAL; - - return 0; -} - int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_chan_def *chandef) @@ -3340,6 +3325,19 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, chandef->edmg.channels = 0; } + if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) { + chandef->punctured = + nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]); + + if (chandef->punctured && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_PUNCT)) { + NL_SET_ERR_MSG(extack, + "driver doesn't support puncturing"); + return -EINVAL; + } + } + if (!cfg80211_chandef_valid(chandef)) { NL_SET_ERR_MSG(extack, "invalid channel definition"); return -EINVAL; @@ -3816,6 +3814,10 @@ int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *ch if (chandef->center_freq2 && nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ2, chandef->center_freq2)) return -ENOBUFS; + if (chandef->punctured && + nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, chandef->punctured)) + return -ENOBUFS; + return 0; } EXPORT_SYMBOL(nl80211_send_chandef); @@ -6061,14 +6063,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) { - err = nl80211_parse_punct_bitmap(rdev, info, - ¶ms->chandef, - ¶ms->punct_bitmap); - if (err) - goto out; - } - if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms->chandef, wdev->iftype)) { err = -EINVAL; @@ -10228,14 +10222,6 @@ skip_beacons: if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; - if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) { - err = nl80211_parse_punct_bitmap(rdev, info, - ¶ms.chandef, - ¶ms.punct_bitmap); - if (err) - goto free; - } - err = rdev_channel_switch(rdev, dev, ¶ms); free: @@ -19356,7 +19342,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef, gfp_t gfp, enum nl80211_commands notif, - u8 count, bool quiet, u16 punct_bitmap) + u8 count, bool quiet) { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct sk_buff *msg; @@ -19390,9 +19376,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, goto nla_put_failure; } - if (nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, punct_bitmap)) - goto nla_put_failure; - genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, @@ -19405,7 +19388,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - unsigned int link_id, u16 punct_bitmap) + unsigned int link_id) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -19414,7 +19397,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); - trace_cfg80211_ch_switch_notify(dev, chandef, link_id, punct_bitmap); + trace_cfg80211_ch_switch_notify(dev, chandef, link_id); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -19443,15 +19426,14 @@ void cfg80211_ch_switch_notify(struct net_device *dev, cfg80211_sched_dfs_chan_update(rdev); nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL, - NL80211_CMD_CH_SWITCH_NOTIFY, 0, false, - punct_bitmap); + NL80211_CMD_CH_SWITCH_NOTIFY, 0, false); } EXPORT_SYMBOL(cfg80211_ch_switch_notify); void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u8 count, - bool quiet, u16 punct_bitmap) + bool quiet) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -19460,13 +19442,12 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); - trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id, - punct_bitmap); + trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id); nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, - count, quiet, punct_bitmap); + count, quiet); } EXPORT_SYMBOL(cfg80211_ch_switch_started_notify); diff --git a/net/wireless/tests/chan.c b/net/wireless/tests/chan.c index b9e7a27e43cba..d02258ac2dabd 100644 --- a/net/wireless/tests/chan.c +++ b/net/wireless/tests/chan.c @@ -2,7 +2,7 @@ /* * KUnit tests for channel helper functions * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation */ #include #include @@ -140,6 +140,52 @@ static const struct chandef_compat_case { }, .compat = true, }, + { + .desc = "matching primary 160 MHz & punctured secondary 160 Mhz", + .c1 = { + .width = NL80211_CHAN_WIDTH_160, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 + 70, + }, + .c2 = { + .width = NL80211_CHAN_WIDTH_320, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 - 10, + .punctured = 0xf, + }, + .compat = true, + }, + { + .desc = "matching primary 160 MHz & punctured matching", + .c1 = { + .width = NL80211_CHAN_WIDTH_160, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 + 70, + .punctured = 0xc0, + }, + .c2 = { + .width = NL80211_CHAN_WIDTH_320, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 - 10, + .punctured = 0xc000, + }, + .compat = true, + }, + { + .desc = "matching primary 160 MHz & punctured not matching", + .c1 = { + .width = NL80211_CHAN_WIDTH_160, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 + 70, + .punctured = 0x80, + }, + .c2 = { + .width = NL80211_CHAN_WIDTH_320, + .chan = &chan_6ghz_105, + .center_freq1 = 6475 - 10, + .punctured = 0xc000, + }, + }, }; KUNIT_ARRAY_PARAM_DESC(chandef_compat, chandef_compat_cases, desc) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 1f374c8a17a50..ae5e585b6863d 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright(c) 2016-2017 Intel Deutschland GmbH + * Copyright (C) 2018, 2020-2024 Intel Corporation + */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg80211 @@ -135,7 +140,8 @@ __field(u32, width) \ __field(u32, center_freq1) \ __field(u32, freq1_offset) \ - __field(u32, center_freq2) + __field(u32, center_freq2) \ + __field(u16, punctured) #define CHAN_DEF_ASSIGN(chandef) \ do { \ if ((chandef) && (chandef)->chan) { \ @@ -148,6 +154,7 @@ __entry->center_freq1 = (chandef)->center_freq1;\ __entry->freq1_offset = (chandef)->freq1_offset;\ __entry->center_freq2 = (chandef)->center_freq2;\ + __entry->punctured = (chandef)->punctured; \ } else { \ __entry->band = 0; \ __entry->control_freq = 0; \ @@ -156,14 +163,15 @@ __entry->center_freq1 = 0; \ __entry->freq1_offset = 0; \ __entry->center_freq2 = 0; \ + __entry->punctured = 0; \ } \ } while (0) #define CHAN_DEF_PR_FMT \ - "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u" + "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u, punct: 0x%x" #define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq, \ __entry->freq_offset, __entry->width, \ __entry->center_freq1, __entry->freq1_offset, \ - __entry->center_freq2 + __entry->center_freq2, __entry->punctured #define FILS_AAD_ASSIGN(fa) \ do { \ @@ -3267,47 +3275,39 @@ TRACE_EVENT(cfg80211_chandef_dfs_required, TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef, - unsigned int link_id, - u16 punct_bitmap), - TP_ARGS(netdev, chandef, link_id, punct_bitmap), + unsigned int link_id), + TP_ARGS(netdev, chandef, link_id), TP_STRUCT__entry( NETDEV_ENTRY CHAN_DEF_ENTRY __field(unsigned int, link_id) - __field(u16, punct_bitmap) ), TP_fast_assign( NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->link_id = link_id; - __entry->punct_bitmap = punct_bitmap; ), - TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d, punct_bitmap:%u", - NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id, - __entry->punct_bitmap) + TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", + NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id) ); TRACE_EVENT(cfg80211_ch_switch_started_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef, - unsigned int link_id, - u16 punct_bitmap), - TP_ARGS(netdev, chandef, link_id, punct_bitmap), + unsigned int link_id), + TP_ARGS(netdev, chandef, link_id), TP_STRUCT__entry( NETDEV_ENTRY CHAN_DEF_ENTRY __field(unsigned int, link_id) - __field(u16, punct_bitmap) ), TP_fast_assign( NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->link_id = link_id; - __entry->punct_bitmap = punct_bitmap; ), - TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d, punct_bitmap:%u", - NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id, - __entry->punct_bitmap) + TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", + NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id) ); TRACE_EVENT(cfg80211_radar_event, -- cgit 1.2.3-korg From b1344b1399daec9aca62bd0b2ea94874f5b8e126 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:04:56 +0100 Subject: wifi: mac80211: add/use ieee80211_get_sn() This will also be useful for MLO duplicate multicast detection, but add it already here and use it in one place that trivially converts. Link: https://msgid.link/20240129200456.f0ff49c80006.I850d2785ab1640e56e262d3ad7343b87f6962552@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 ++++++- net/mac80211/rx.c | 5 ++--- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d9d2c12531579..b9367d5f04c4d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2023 Intel Corporation + * Copyright (c) 2018 - 2024 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -808,6 +808,11 @@ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); } +static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr) +{ + return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 76798e8057f79..53c4764dc1ed8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -1251,8 +1251,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - u16 sc = le16_to_cpu(hdr->seq_ctrl); - u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; + u16 mpdu_seq_num = ieee80211_get_sn(hdr); u16 head_seq_num, buf_size; int index; bool ret = true; -- cgit 1.2.3-korg From 676259100cf3a81dd2d47918b36edb237986b9df Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:04:57 +0100 Subject: wifi: mac80211: implement MLO multicast deduplication If the vif is an MLD then it may receive multicast from different links, and should drop those frames according to the SN. Implement that. Link: https://msgid.link/20240129200456.693b77d14b44.I491846f2bea0058c14eab6422962c10bfae9b675@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 +++++ net/mac80211/ieee80211_i.h | 4 +++- net/mac80211/mlme.c | 3 +++ net/mac80211/rx.c | 22 ++++++++++++++++++++-- 4 files changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b9367d5f04c4d..e9078143b822f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -191,6 +191,11 @@ static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } +static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2) +{ + return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1); +} + static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) { return (sn1 + sn2) & IEEE80211_SN_MASK; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 534cac3fc8dfa..46b517cf47ea6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2010 Johannes Berg * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #ifndef IEEE80211_I_H @@ -523,6 +523,8 @@ struct ieee80211_if_managed { unsigned int flags; + u16 mcast_seq_last; + bool status_acked; bool status_received; __le16 status_fc; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4eaf5c10efdb4..35dda59828540 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3351,6 +3351,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->neg_ttlm_timeout_work); ieee80211_vif_set_links(sdata, 0, 0); + + ifmgd->mcast_seq_last = IEEE80211_SN_MODULO; } static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) @@ -7512,6 +7514,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) spin_lock_init(&ifmgd->teardown_lock); ifmgd->teardown_skb = NULL; ifmgd->orig_teardown_skb = NULL; + ifmgd->mcast_seq_last = IEEE80211_SN_MODULO; } static void ieee80211_recalc_smps_work(struct wiphy *wiphy, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 53c4764dc1ed8..9902ea69af0a1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1434,13 +1434,31 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (ieee80211_is_ctl(hdr->frame_control) || - ieee80211_is_any_nullfunc(hdr->frame_control) || - is_multicast_ether_addr(hdr->addr1)) + ieee80211_is_any_nullfunc(hdr->frame_control)) return RX_CONTINUE; if (!rx->sta) return RX_CONTINUE; + if (unlikely(is_multicast_ether_addr(hdr->addr1))) { + struct ieee80211_sub_if_data *sdata = rx->sdata; + u16 sn = ieee80211_get_sn(hdr); + + if (!ieee80211_is_data_present(hdr->frame_control)) + return RX_CONTINUE; + + if (!ieee80211_vif_is_mld(&sdata->vif) || + sdata->vif.type != NL80211_IFTYPE_STATION) + return RX_CONTINUE; + + if (sdata->u.mgd.mcast_seq_last != IEEE80211_SN_MODULO && + ieee80211_sn_less_eq(sn, sdata->u.mgd.mcast_seq_last)) + return RX_DROP_U_DUP; + + sdata->u.mgd.mcast_seq_last = sn; + return RX_CONTINUE; + } + if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); -- cgit 1.2.3-korg From 3552a22880eed3e834a33d02aec6800974bb42b4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:06:53 +0100 Subject: wifi: mac80211: disambiguate element parsing errors Let the element parsing function return what kind of error was encountered, as a bitmap, even if nothing currently checks for which specific error it was, we'll use it later. Link: https://msgid.link/20240129200652.1a69f2a31ec7.I55b86561d64e7ef1504c73f6f2813c33030c8136@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 11 ++++- net/mac80211/util.c | 108 ++++++++++++++++++++++++++++++--------------- 2 files changed, 81 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 46b517cf47ea6..f5fe659a1efd2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1666,6 +1666,13 @@ struct ieee80211_csa_ie { u32 max_switch_time; }; +enum ieee80211_elems_parse_error { + IEEE80211_PARSE_ERR_INVALID_END = BIT(0), + IEEE80211_PARSE_ERR_DUP_ELEM = BIT(1), + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE = BIT(2), + IEEE80211_PARSE_ERR_UNEXPECTED_ELEM = BIT(3), +}; + /* Parsed Information Elements */ struct ieee802_11_elems { const u8 *ie_start; @@ -1776,8 +1783,8 @@ struct ieee802_11_elems { struct ieee80211_mle_per_sta_profile *prof; size_t sta_prof_len; - /* whether a parse error occurred while retrieving these elements */ - bool parse_error; + /* whether/which parse error occurred while retrieving these elements */ + u8 parse_error; /* * scratch buffer that can be used for various element parsing related diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5108dbaa93607..c1fa762f0cbab 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1063,7 +1063,7 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, for_each_element(elem, params->start, params->len) { const struct element *subelem; - bool elem_parse_failed; + u8 elem_parse_failed; u8 id = elem->id; u8 elen = elem->datalen; const u8 *pos = elem->data; @@ -1119,7 +1119,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, * that if the content gets bigger it might be needed more than once */ if (test_bit(id, seen_elems)) { - elems->parse_error = true; + elems->parse_error |= + IEEE80211_PARSE_ERR_DUP_ELEM; continue; } break; @@ -1128,19 +1129,21 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (calc_crc && id < 64 && (params->filter & (1ULL << id))) crc = crc32_be(crc, pos - 2, elen + 2); - elem_parse_failed = false; + elem_parse_failed = 0; switch (id) { case WLAN_EID_LINK_ID: if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->lnk_id = (void *)(pos - 2); break; case WLAN_EID_CHAN_SWITCH_TIMING: if (elen < sizeof(struct ieee80211_ch_switch_timing)) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->ch_sw_timing = (void *)pos; @@ -1161,14 +1164,16 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= 1) elems->ds_params = pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_TIM: if (elen >= sizeof(struct ieee80211_tim_ie)) { elems->tim = (void *)pos; elems->tim_len = elen; } else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VENDOR_SPECIFIC: if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && @@ -1198,7 +1203,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= 1) elems->erp_info = pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_EXT_SUPP_RATES: elems->ext_supp_rates = pos; @@ -1210,7 +1216,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= sizeof(struct ieee80211_ht_cap)) elems->ht_cap_elem = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_HT_OPERATION: if (params->mode < IEEE80211_CONN_MODE_HT) @@ -1218,7 +1225,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= sizeof(struct ieee80211_ht_operation)) elems->ht_operation = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VHT_CAPABILITY: if (params->mode < IEEE80211_CONN_MODE_VHT) @@ -1226,7 +1234,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= sizeof(struct ieee80211_vht_cap)) elems->vht_cap_elem = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VHT_OPERATION: if (params->mode < IEEE80211_CONN_MODE_VHT) @@ -1237,7 +1246,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, crc = crc32_be(crc, pos - 2, elen + 2); break; } - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_OPMODE_NOTIF: if (params->mode < IEEE80211_CONN_MODE_VHT) @@ -1248,7 +1258,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, crc = crc32_be(crc, pos - 2, elen + 2); break; } - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; @@ -1258,7 +1269,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= sizeof(struct ieee80211_meshconf_ie)) elems->mesh_config = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_PEER_MGMT: elems->peering = pos; @@ -1284,18 +1296,21 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= sizeof(struct ieee80211_rann_ie)) elems->rann = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_CHANNEL_SWITCH: if (elen != sizeof(struct ieee80211_channel_sw_ie)) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->ch_switch_ie = (void *)pos; break; case WLAN_EID_EXT_CHANSWITCH_ANN: if (elen != sizeof(struct ieee80211_ext_chansw_ie)) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->ext_chansw_ie = (void *)pos; @@ -1304,7 +1319,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (params->mode < IEEE80211_CONN_MODE_HT) break; if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->sec_chan_offs = (void *)pos; @@ -1312,7 +1328,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, case WLAN_EID_CHAN_SWITCH_PARAM: if (elen < sizeof(*elems->mesh_chansw_params_ie)) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->mesh_chansw_params_ie = (void *)pos; @@ -1320,9 +1337,16 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: if (params->mode < IEEE80211_CONN_MODE_VHT) break; - if (!params->action || - elen < sizeof(*elems->wide_bw_chansw_ie)) { - elem_parse_failed = true; + + if (!params->action) { + elem_parse_failed = + IEEE80211_PARSE_ERR_UNEXPECTED_ELEM; + break; + } + + if (elen < sizeof(*elems->wide_bw_chansw_ie)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->wide_bw_chansw_ie = (void *)pos; @@ -1331,7 +1355,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (params->mode < IEEE80211_CONN_MODE_VHT) break; if (params->action) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_UNEXPECTED_ELEM; break; } /* @@ -1345,7 +1370,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elems->wide_bw_chansw_ie = (void *)subelem->data; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; } if (params->mode < IEEE80211_CONN_MODE_EHT) @@ -1361,7 +1387,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, edatalen)) elems->bandwidth_indication = edata; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; } break; case WLAN_EID_COUNTRY: @@ -1370,7 +1397,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, break; case WLAN_EID_PWR_CONSTRAINT: if (elen != 1) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->pwr_constr_elem = pos; @@ -1382,7 +1410,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, * tag (0x00). */ if (elen < 4) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } @@ -1391,7 +1420,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, break; if (elen != 6) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } @@ -1402,7 +1432,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, break; case WLAN_EID_ADDBA_EXT: if (elen < sizeof(struct ieee80211_addba_ext_ie)) { - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; } elems->addba_ext_ie = (void *)pos; @@ -1411,7 +1442,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) elems->timeout_int = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_BSS_MAX_IDLE_PERIOD: if (elen >= sizeof(*elems->max_idle_period_ie)) @@ -1444,7 +1476,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen >= sizeof(*elems->s1g_capab)) elems->s1g_capab = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_S1G_OPERATION: if (params->mode != IEEE80211_CONN_MODE_S1G) @@ -1452,7 +1485,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen == sizeof(*elems->s1g_oper)) elems->s1g_oper = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_S1G_BCN_COMPAT: if (params->mode != IEEE80211_CONN_MODE_S1G) @@ -1460,7 +1494,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen == sizeof(*elems->s1g_bcn_compat)) elems->s1g_bcn_compat = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_AID_RESPONSE: if (params->mode != IEEE80211_CONN_MODE_S1G) @@ -1468,20 +1503,21 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, if (elen == sizeof(struct ieee80211_aid_response_ie)) elems->aid_resp = (void *)pos; else - elem_parse_failed = true; + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; default: break; } if (elem_parse_failed) - elems->parse_error = true; + elems->parse_error |= elem_parse_failed; else __set_bit(id, seen_elems); } if (!for_each_element_completed(elem, params->start, params->len)) - elems->parse_error = true; + elems->parse_error |= IEEE80211_PARSE_ERR_INVALID_END; return crc; } -- cgit 1.2.3-korg From a57944d1ee8bf29cbdf7d6b8e9579d88bf912c3e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:06:54 +0100 Subject: wifi: mac80211: disallow basic multi-link element in per-STA profile There really shouldn't be a basic multi-link element in any per-STA profile in an association response, it's not clear what that would really mean. Refuse connecting in this case since the AP isn't following the spec. Link: https://msgid.link/20240129200652.23f1e3b337f1.Idd2e43cdbfe3ba15b3e9b8aeb54c8115587177a0@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 3 ++- net/mac80211/util.c | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f5fe659a1efd2..e11297b4dc635 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1671,6 +1671,7 @@ enum ieee80211_elems_parse_error { IEEE80211_PARSE_ERR_DUP_ELEM = BIT(1), IEEE80211_PARSE_ERR_BAD_ELEM_SIZE = BIT(2), IEEE80211_PARSE_ERR_UNEXPECTED_ELEM = BIT(3), + IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC = BIT(4), }; /* Parsed Information Elements */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 35dda59828540..9a0331d914d3f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4304,7 +4304,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, link->u.mgd.bss_param_ch_cnt = ieee80211_mle_get_bss_param_ch_cnt(elems->ml_basic); } - } else if (!elems->prof || + } else if (elems->parse_error & IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC || + !elems->prof || !(elems->prof->control & prof_bss_param_ch_present)) { ret = false; goto out; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c1fa762f0cbab..d85a9c5cde267 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1012,6 +1012,11 @@ ieee80211_parse_extension_element(u32 *crc, switch (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: + if (elems->ml_basic) { + elems->parse_error |= + IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; + break; + } elems->ml_basic_elem = (void *)elem; elems->ml_basic = data; elems->ml_basic_len = len; -- cgit 1.2.3-korg From 90233160d761e4ea56c7ac30e36fdaec73b3bdc4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:27 +0100 Subject: wifi: mac80211: simplify HE/EHT element length functions We don't need to pass the iftype there, we already have it in the sdata. Simplify this code. Link: https://msgid.link/20240129202041.5890eb1d4184.Ibce7e5abcc7887630da03ac2263d8004ec541418@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/mesh.c | 8 +++----- net/mac80211/mesh_plink.c | 8 +++----- net/mac80211/util.c | 15 ++++++++------- 4 files changed, 16 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e11297b4dc635..44400ce9a0b17 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2502,7 +2502,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); -u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); +u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata); u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, const struct ieee80211_sta_he_cap *he_cap, u8 *pos, u8 *end); @@ -2651,7 +2651,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache); void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache); -u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); +u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata); u8 *ieee80211_ie_build_eht_cap(u8 *pos, const struct ieee80211_sta_he_cap *he_cap, const struct ieee80211_sta_eht_cap *eht_cap, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index e06d9ed2d31b4..9fd209e4ca19f 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation * Authors: Luis Carlos Cobo * Javier Cardona */ @@ -981,10 +981,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) band = chanctx_conf->def.chan->band; rcu_read_unlock(); - ie_len_he_cap = ieee80211_ie_len_he_cap(sdata, - NL80211_IFTYPE_MESH_POINT); - ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata, - NL80211_IFTYPE_MESH_POINT); + ie_len_he_cap = ieee80211_ie_len_he_cap(sdata); + ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata); head_len = hdr_len + 2 + /* NULL SSID */ /* Channel Switch Announcement */ diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index e3aad60f68ab9..7ba0f01805a4a 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021-2023 Intel Corporation + * Copyright (C) 2019, 2021-2024 Intel Corporation * Author: Luis Carlos Cobo */ #include @@ -226,10 +226,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; - ie_len_he_cap = ieee80211_ie_len_he_cap(sdata, - NL80211_IFTYPE_MESH_POINT); - ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata, - NL80211_IFTYPE_MESH_POINT); + ie_len_he_cap = ieee80211_ie_len_he_cap(sdata); + ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata); skb = dev_alloc_skb(local->tx_headroom + hdr_len + 2 + /* capability info */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d85a9c5cde267..e9a7978d47d42 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3272,7 +3272,8 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos; } -u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) +/* this may return more than ieee80211_ie_build_he_cap() will need */ +u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata) { const struct ieee80211_sta_he_cap *he_cap; struct ieee80211_supported_band *sband; @@ -3282,7 +3283,7 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) if (!sband) return 0; - he_cap = ieee80211_get_he_iftype_cap(sband, iftype); + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); if (!he_cap) return 0; @@ -5065,7 +5066,8 @@ u16 ieee80211_encode_usf(int listen_interval) return (u16) listen_interval; } -u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) +/* this may return more than ieee80211_ie_build_eht_cap() will need */ +u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata) { const struct ieee80211_sta_he_cap *he_cap; const struct ieee80211_sta_eht_cap *eht_cap; @@ -5077,13 +5079,12 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) if (!sband) return 0; - he_cap = ieee80211_get_he_iftype_cap(sband, iftype); - eht_cap = ieee80211_get_eht_iftype_cap(sband, iftype); + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); if (!he_cap || !eht_cap) return 0; - is_ap = iftype == NL80211_IFTYPE_AP || - iftype == NL80211_IFTYPE_P2P_GO; + is_ap = sdata->vif.type == NL80211_IFTYPE_AP; n = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, &eht_cap->eht_cap_elem, -- cgit 1.2.3-korg From 6239da18d2f947523a80fb1f85f8d8a13d1726c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:28 +0100 Subject: wifi: mac80211: adjust EHT capa when lowering bandwidth If intending to associate with a lower bandwidth, remove capabilities related to 320 MHz from the EHT capabilities element. Also change the EHT MCS-NSS set accordingly: if just reducing 320->160 or similar the format doesn't change, just cut off the last bytes. If changing from higher bandwidth to 20 MHz only EHT STA, adjust the format. Note that this also requires adjusting the caller in mlme.c since the data written can now be shorter than it determined. We need to clean all that up. Since the other callers pass NULL for the conn limit, we don't need to change things there. Link: https://msgid.link/20240129202041.b5f6df108c77.I0d8ea04079c61cb3744cc88625eeaf0d4776dc2b@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 ++ net/mac80211/ieee80211_i.h | 3 +- net/mac80211/mesh.c | 3 +- net/mac80211/mlme.c | 15 ++++-- net/mac80211/tdls.c | 5 +- net/mac80211/util.c | 114 ++++++++++++++++++++++++++++++++++----------- 6 files changed, 106 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e9078143b822f..e4322238f2735 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3060,6 +3060,9 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 44400ce9a0b17..43c55ea6349c6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2652,7 +2652,8 @@ void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache); void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache); u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata); -u8 *ieee80211_ie_build_eht_cap(u8 *pos, +u8 *ieee80211_ie_build_eht_cap(const struct ieee80211_conn_settings *conn, + u8 *pos, const struct ieee80211_sta_he_cap *he_cap, const struct ieee80211_sta_eht_cap *eht_cap, u8 *end, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 9fd209e4ca19f..000fa9484b4e3 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -668,7 +668,8 @@ int mesh_add_eht_cap_ie(struct ieee80211_sub_if_data *sdata, return -ENOMEM; pos = skb_put(skb, ie_len); - ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + ie_len, false); + ieee80211_ie_build_eht_cap(NULL, pos, he_cap, eht_cap, pos + ie_len, + false); return 0; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9a0331d914d3f..bbc7894ccad06 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1072,9 +1072,10 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - struct ieee80211_supported_band *sband) + struct ieee80211_supported_band *sband, + const struct ieee80211_conn_settings *conn) { - u8 *pos; + u8 *pos, *pre_eht_pos; const struct ieee80211_sta_he_cap *he_cap; const struct ieee80211_sta_eht_cap *eht_cap; u8 eht_cap_size; @@ -1097,8 +1098,11 @@ static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata, ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0], eht_cap->eht_cap_elem.phy_cap_info); pos = skb_put(skb, eht_cap_size); - ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + eht_cap_size, - false); + pre_eht_pos = pos; + pos = ieee80211_ie_build_eht_cap(conn, pos, he_cap, eht_cap, + pos + eht_cap_size, false); + /* trim excess if any */ + skb_trim(skb, skb->len - (pre_eht_pos + eht_cap_size - pos)); } static void ieee80211_assoc_add_rates(struct sk_buff *skb, @@ -1453,7 +1457,8 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, present_elems = NULL; if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_EHT) - ieee80211_add_eht_ie(sdata, skb, sband); + ieee80211_add_eht_ie(sdata, skb, sband, + &assoc_data->link[link_id].conn); if (sband->band == NL80211_BAND_S1GHZ) { ieee80211_add_aid_request_ie(sdata, skb); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 0f4aa42e070f9..57673f27daf4e 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -6,7 +6,7 @@ * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH * Copyright 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2019, 2021-2023 Intel Corporation + * Copyright (C) 2019, 2021-2024 Intel Corporation */ #include @@ -604,7 +604,8 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0], eht_cap->eht_cap_elem.phy_cap_info); pos = skb_put(skb, cap_size); - ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, pos + cap_size, false); + ieee80211_ie_build_eht_cap(NULL, pos, he_cap, eht_cap, + pos + cap_size, false); } /* add any remaining IEs */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e9a7978d47d42..5224c22b1afc4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2250,7 +2250,8 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE | IEEE80211_CHAN_NO_EHT)) { - pos = ieee80211_ie_build_eht_cap(pos, he_cap, eht_cap, end, + pos = ieee80211_ie_build_eht_cap(NULL, pos, he_cap, eht_cap, + end, sdata->vif.type == NL80211_IFTYPE_AP); if (!pos) goto out_err; @@ -3294,6 +3295,24 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata) he_cap->he_cap_elem.phy_cap_info); } +static void +ieee80211_get_adjusted_he_cap(const struct ieee80211_conn_settings *conn, + const struct ieee80211_sta_he_cap *he_cap, + struct ieee80211_he_cap_elem *elem) +{ + *elem = he_cap->he_cap_elem; + + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) + elem->phy_cap_info[0] &= + ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G); + + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) + elem->phy_cap_info[0] &= + ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G); +} + u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, const struct ieee80211_sta_he_cap *he_cap, u8 *pos, u8 *end) @@ -3307,25 +3326,11 @@ u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, conn = &ieee80211_conn_settings_unlimited; /* Make sure we have place for the IE */ - /* - * TODO: the 1 added is because this temporarily is under the EXTENSION - * IE. Get rid of it when it moves. - */ if (!he_cap) return orig_pos; /* modify on stack first to calculate 'n' and 'ie_len' correctly */ - elem = he_cap->he_cap_elem; - - if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) - elem.phy_cap_info[0] &= - ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G); - - if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) - elem.phy_cap_info[0] &= - ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | - IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G); + ieee80211_get_adjusted_he_cap(conn, he_cap, &elem); n = ieee80211_he_mcs_nss_size(&elem); ie_len = 2 + 1 + @@ -5096,25 +5101,65 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata) return 0; } -u8 *ieee80211_ie_build_eht_cap(u8 *pos, +u8 *ieee80211_ie_build_eht_cap(const struct ieee80211_conn_settings *conn, + u8 *pos, const struct ieee80211_sta_he_cap *he_cap, const struct ieee80211_sta_eht_cap *eht_cap, - u8 *end, - bool for_ap) + u8 *end, bool for_ap) { + struct ieee80211_eht_cap_elem_fixed fixed, *out; + struct ieee80211_he_cap_elem he; u8 mcs_nss_len, ppet_len; + u8 orig_mcs_nss_len; u8 ie_len; u8 *orig_pos = pos; + if (!conn) + conn = &ieee80211_conn_settings_unlimited; + /* Make sure we have place for the IE */ if (!he_cap || !eht_cap) return orig_pos; - mcs_nss_len = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, - &eht_cap->eht_cap_elem, - for_ap); + orig_mcs_nss_len = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, + &eht_cap->eht_cap_elem, + for_ap); + + ieee80211_get_adjusted_he_cap(conn, he_cap, &he); + + fixed = eht_cap->eht_cap_elem; + + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_80) + fixed.phy_cap_info[6] &= + ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ; + + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) { + fixed.phy_cap_info[1] &= + ~IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK; + fixed.phy_cap_info[2] &= + ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK; + fixed.phy_cap_info[6] &= + ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ; + } + + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320) { + fixed.phy_cap_info[0] &= + ~IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; + fixed.phy_cap_info[1] &= + ~IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK; + fixed.phy_cap_info[2] &= + ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK; + fixed.phy_cap_info[6] &= + ~IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ; + } + + if (conn->bw_limit == IEEE80211_CONN_BW_LIMIT_20) + fixed.phy_cap_info[0] &= + ~IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ; + + mcs_nss_len = ieee80211_eht_mcs_nss_size(&he, &fixed, for_ap); ppet_len = ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0], - eht_cap->eht_cap_elem.phy_cap_info); + fixed.phy_cap_info); ie_len = 2 + 1 + sizeof(eht_cap->eht_cap_elem) + mcs_nss_len + ppet_len; if ((end - pos) < ie_len) @@ -5124,12 +5169,25 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos, *pos++ = ie_len - 2; *pos++ = WLAN_EID_EXT_EHT_CAPABILITY; - /* Fixed data */ - memcpy(pos, &eht_cap->eht_cap_elem, sizeof(eht_cap->eht_cap_elem)); - pos += sizeof(eht_cap->eht_cap_elem); + out = (void *)pos; + *out = fixed; + pos += sizeof(*out); - memcpy(pos, &eht_cap->eht_mcs_nss_supp, mcs_nss_len); - pos += mcs_nss_len; + if (mcs_nss_len == 4 && orig_mcs_nss_len != 4) { + /* + * If the (non-AP) STA became 20 MHz only, then convert from + * <=80 to 20-MHz-only format, where MCSes are indicated in + * the groups 0-7, 8-9, 10-11, 12-13 rather than just 0-9, + * 10-11, 12-13. Thus, use 0-9 for 0-7 and 8-9. + */ + *pos++ = eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss; + *pos++ = eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss; + *pos++ = eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs11_max_nss; + *pos++ = eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss; + } else { + memcpy(pos, &eht_cap->eht_mcs_nss_supp, mcs_nss_len); + pos += mcs_nss_len; + } if (ppet_len) { memcpy(pos, &eht_cap->eht_ppe_thres, ppet_len); -- cgit 1.2.3-korg From 06b4c8665dcff32f50153fd4cddc4b05f31569a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:29 +0100 Subject: wifi: mac80211: limit HE RU capabilities when limiting bandwidth When limiting a station's supported bandwidth while connecting, also limit various other HE capabilities according to the bandwidth needed for them. Link: https://msgid.link/20240129202041.34be99efca25.I02a695961bc6aadd37768b17c50fcdec4427d460@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5224c22b1afc4..c2fe9aece00d1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3300,17 +3300,48 @@ ieee80211_get_adjusted_he_cap(const struct ieee80211_conn_settings *conn, const struct ieee80211_sta_he_cap *he_cap, struct ieee80211_he_cap_elem *elem) { + u8 ru_limit, max_ru; + *elem = he_cap->he_cap_elem; - if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) + switch (conn->bw_limit) { + case IEEE80211_CONN_BW_LIMIT_20: + ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242; + break; + case IEEE80211_CONN_BW_LIMIT_40: + ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_484; + break; + case IEEE80211_CONN_BW_LIMIT_80: + ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996; + break; + default: + ru_limit = IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996; + break; + } + + max_ru = elem->phy_cap_info[8] & IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK; + max_ru = min(max_ru, ru_limit); + elem->phy_cap_info[8] &= ~IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK; + elem->phy_cap_info[8] |= max_ru; + + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) { elem->phy_cap_info[0] &= ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G); + elem->phy_cap_info[9] &= + ~IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM; + } - if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160) { elem->phy_cap_info[0] &= ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G | IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G); + elem->phy_cap_info[5] &= + ~IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK; + elem->phy_cap_info[7] &= + ~(IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ | + IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ); + } } u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, -- cgit 1.2.3-korg From 552a26b3854e12d73ab0527ee0d46454a2e650fb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:30 +0100 Subject: wifi: mac80211: rename ieee80211_ie_build_he_6ghz_cap() The term 'IE' isn't really in use in the spec, and I want to rework all of this to use SKBs as the primary method for building elements. Rename this one already. Link: https://msgid.link/20240129202041.b8064a4e73b5.I8d2f4526562029107c6414c6cda378b300b1b0b0@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 8 +++++--- net/mac80211/mesh.c | 2 +- net/mac80211/mlme.c | 2 +- net/mac80211/util.c | 6 +++--- 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 43c55ea6349c6..2b5d49399500d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2506,9 +2506,6 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata); u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, const struct ieee80211_sta_he_cap *he_cap, u8 *pos, u8 *end); -void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, - enum ieee80211_smps_mode smps_mode, - struct sk_buff *skb); u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef); u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef, const struct ieee80211_sta_eht_cap *eht_cap); @@ -2529,6 +2526,11 @@ void ieee80211_add_aid_request_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap); +/* element building in SKBs */ +void ieee80211_put_he_6ghz_cap(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode); + /* channel management */ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 000fa9484b4e3..10c7d7714ffed 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -640,7 +640,7 @@ int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, if (!iftd) return 0; - ieee80211_ie_build_he_6ghz_cap(sdata, sdata->deflink.smps_mode, skb); + ieee80211_put_he_6ghz_cap(skb, sdata, sdata->deflink.smps_mode); return 0; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bbc7894ccad06..74a6c67a94da5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1067,7 +1067,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, /* trim excess if any */ skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos)); - ieee80211_ie_build_he_6ghz_cap(sdata, smps_mode, skb); + ieee80211_put_he_6ghz_cap(skb, sdata, smps_mode); } static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c2fe9aece00d1..e8de82bafeefb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3413,9 +3413,9 @@ end: return pos; } -void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, - enum ieee80211_smps_mode smps_mode, - struct sk_buff *skb) +void ieee80211_put_he_6ghz_cap(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode) { struct ieee80211_supported_band *sband; const struct ieee80211_sband_iftype_data *iftd; -- cgit 1.2.3-korg From e0b5ee918723dcf47d3773bc95cbcb428436f817 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:31 +0100 Subject: wifi: mac80211: tdls: use ieee80211_put_he_6ghz_cap() We don't need to use the write function here since we already have an SKB, so use ieee80211_put_he_6ghz_cap() with the SMPS mode taken from the link we're using. Link: https://msgid.link/20240129202041.6454ac78ff8c.I7152e3c27645105478c68d40ca493feb27cac6bf@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 - net/mac80211/tdls.c | 11 ++--------- net/mac80211/util.c | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2b5d49399500d..f7b2381878a96 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2457,7 +2457,6 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, const u8 *da, const u8 *bssid, u16 stype, u16 reason, bool send_frame, u8 *frame_buf); -u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end); enum { IEEE80211_PROBE_FLAG_DIRECTED = BIT(0), diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 57673f27daf4e..e6808b7660ff5 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -552,7 +552,6 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, (action_code == WLAN_TDLS_SETUP_REQUEST || action_code == WLAN_TDLS_SETUP_RESPONSE || action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) { - __le16 he_6ghz_capa; u8 cap_size; cap_size = @@ -564,14 +563,8 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, pos = ieee80211_ie_build_he_cap(NULL, he_cap, pos, pos + cap_size); /* Build HE 6Ghz capa IE from sband */ - if (sband->band == NL80211_BAND_6GHZ) { - cap_size = 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa); - pos = skb_put(skb, cap_size); - he_6ghz_capa = - ieee80211_get_he_6ghz_capa_vif(sband, &sdata->vif); - pos = ieee80211_write_he_6ghz_cap(pos, he_6ghz_capa, - pos + cap_size); - } + if (sband->band == NL80211_BAND_6GHZ) + ieee80211_put_he_6ghz_cap(skb, sdata, link->smps_mode); } /* add any custom IEs that go before EHT capabilities */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e8de82bafeefb..c90f338b229c8 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2035,7 +2035,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, } } -u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end) +static u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end) { if ((end - pos) < 5) return pos; -- cgit 1.2.3-korg From 147ceae2053402cbbe543944abf3a8494ef76895 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:32 +0100 Subject: wifi: mac80211: simplify adding supported rates Make this a new-style "put" function, and change the parameters to pass more information directly, this makes it usable also for the MLME code. Link: https://msgid.link/20240129202041.f604a03bd728.I8c798ea45b8479ac9982e77d0378af11a09ccdaf@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 10 ++--- net/mac80211/mesh.c | 20 +++++---- net/mac80211/mesh_plink.c | 16 +++++-- net/mac80211/mlme.c | 53 +++++------------------ net/mac80211/tdls.c | 4 +- net/mac80211/util.c | 105 ++++++++++++++++----------------------------- 6 files changed, 77 insertions(+), 131 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f7b2381878a96..b69f081e1c1fb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2511,12 +2511,6 @@ u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef, int ieee80211_parse_bitrates(enum nl80211_chan_width width, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates); -int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic, - enum nl80211_band band); -int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic, - enum nl80211_band band); u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo); void ieee80211_add_s1g_capab_ie(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_s1g_cap *caps, @@ -2526,6 +2520,10 @@ void ieee80211_add_aid_request_ie(struct ieee80211_sub_if_data *sdata, u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap); /* element building in SKBs */ +int ieee80211_put_srates_elem(struct sk_buff *skb, + const struct ieee80211_supported_band *sband, + u32 basic_rates, u32 rate_flags, u32 masked_rates, + u8 element_id); void ieee80211_put_he_6ghz_cap(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 10c7d7714ffed..7e860486c6bc5 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -968,19 +968,19 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) int head_len, tail_len; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - struct ieee80211_chanctx_conf *chanctx_conf; struct mesh_csa_settings *csa; - enum nl80211_band band; + const struct ieee80211_supported_band *sband; u8 ie_len_he_cap, ie_len_eht_cap; u8 *pos; struct ieee80211_sub_if_data *sdata; int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); + u32 rate_flags; sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); - band = chanctx_conf->def.chan->band; - rcu_read_unlock(); + + sband = ieee80211_get_sband(sdata); + rate_flags = + ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); ie_len_he_cap = ieee80211_ie_len_he_cap(sdata); ie_len_eht_cap = ieee80211_ie_len_eht_cap(sdata); @@ -1107,7 +1107,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) } rcu_read_unlock(); - if (ieee80211_add_srates_ie(sdata, skb, true, band) || + if (ieee80211_put_srates_elem(skb, sband, + sdata->vif.bss_conf.basic_rates, + rate_flags, 0, WLAN_EID_SUPP_RATES) || mesh_add_ds_params_ie(sdata, skb)) goto out_free; @@ -1118,7 +1120,9 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) skb_trim(skb, 0); bcn->tail = bcn->head + bcn->head_len; - if (ieee80211_add_ext_srates_ie(sdata, skb, true, band) || + if (ieee80211_put_srates_elem(skb, sband, + sdata->vif.bss_conf.basic_rates, + rate_flags, 0, WLAN_EID_EXT_SUPP_RATES) || mesh_add_rsn_ie(sdata, skb) || mesh_add_ht_cap_ie(sdata, skb) || mesh_add_ht_oper_ie(sdata, skb) || diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 7ba0f01805a4a..8f2b492a9fe98 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -264,14 +264,13 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action != WLAN_SP_MESH_PEERING_CLOSE) { struct ieee80211_supported_band *sband; - enum nl80211_band band; + u32 rate_flags, basic_rates; sband = ieee80211_get_sband(sdata); if (!sband) { err = -EINVAL; goto free; } - band = sband->band; /* capability info */ pos = skb_put_zero(skb, 2); @@ -280,8 +279,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, 2); put_unaligned_le16(sta->sta.aid, pos); } - if (ieee80211_add_srates_ie(sdata, skb, true, band) || - ieee80211_add_ext_srates_ie(sdata, skb, true, band) || + + rate_flags = + ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); + basic_rates = sdata->vif.bss_conf.basic_rates; + + if (ieee80211_put_srates_elem(skb, sband, basic_rates, + rate_flags, 0, + WLAN_EID_SUPP_RATES) || + ieee80211_put_srates_elem(skb, sband, basic_rates, + rate_flags, 0, + WLAN_EID_EXT_SUPP_RATES) || mesh_add_rsn_ie(sdata, skb) || mesh_add_meshid_ie(sdata, skb) || mesh_add_meshconf_ie(sdata, skb)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 74a6c67a94da5..d807a904419c8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1110,10 +1110,7 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb, struct ieee80211_supported_band *sband, struct ieee80211_mgd_assoc_data *assoc_data) { - unsigned int rates_len, supp_rates_len; - u32 rates = 0; - int i, count; - u8 *pos; + u32 rates; if (assoc_data->supp_rates_len) { /* @@ -1122,53 +1119,23 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb, * in the association request (e.g. D-Link DAP 1353 in * b-only mode)... */ - rates_len = ieee80211_parse_bitrates(width, sband, - assoc_data->supp_rates, - assoc_data->supp_rates_len, - &rates); + ieee80211_parse_bitrates(width, sband, + assoc_data->supp_rates, + assoc_data->supp_rates_len, + &rates); } else { /* * In case AP not provide any supported rates information * before association, we send information element(s) with * all rates that we support. */ - rates_len = sband->n_bitrates; - for (i = 0; i < sband->n_bitrates; i++) - rates |= BIT(i); - } - - supp_rates_len = rates_len; - if (supp_rates_len > 8) - supp_rates_len = 8; - - pos = skb_put(skb, supp_rates_len + 2); - *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = supp_rates_len; - - count = 0; - for (i = 0; i < sband->n_bitrates; i++) { - if (BIT(i) & rates) { - int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); - *pos++ = (u8)rate; - if (++count == 8) - break; - } + rates = ~0; } - if (rates_len > count) { - pos = skb_put(skb, rates_len - count + 2); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = rates_len - count; - - for (i++; i < sband->n_bitrates; i++) { - if (BIT(i) & rates) { - int rate; - - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); - *pos++ = (u8)rate; - } - } - } + ieee80211_put_srates_elem(skb, sband, 0, 0, ~rates, + WLAN_EID_SUPP_RATES); + ieee80211_put_srates_elem(skb, sband, 0, 0, ~rates, + WLAN_EID_EXT_SUPP_RATES); } static size_t ieee80211_add_before_ht_elems(struct sk_buff *skb, diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index e6808b7660ff5..edbd3fd8a7374 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -382,8 +382,8 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, if (WARN_ON_ONCE(!sband)) return; - ieee80211_add_srates_ie(sdata, skb, false, sband->band); - ieee80211_add_ext_srates_ie(sdata, skb, false, sband->band); + ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_SUPP_RATES); + ieee80211_put_srates_elem(skb, sband, 0, 0, 0, WLAN_EID_EXT_SUPP_RATES); ieee80211_tdls_add_supp_channels(sdata, skb); /* add any custom IEs that go before Extended Capabilities */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c90f338b229c8..3888ad3b052f3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4091,93 +4091,62 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width, return count; } -int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic, - enum nl80211_band band) +int ieee80211_put_srates_elem(struct sk_buff *skb, + const struct ieee80211_supported_band *sband, + u32 basic_rates, u32 rate_flags, u32 masked_rates, + u8 element_id) { - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; - int rate; - u8 i, rates, *pos; - u32 basic_rates = sdata->vif.bss_conf.basic_rates; - u32 rate_flags; + u8 i, rates, skip; - rate_flags = - ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); - sband = local->hw.wiphy->bands[band]; rates = 0; for (i = 0; i < sband->n_bitrates; i++) { if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; + if (masked_rates & BIT(i)) + continue; rates++; } - if (rates > 8) - rates = 8; - if (skb_tailroom(skb) < rates + 2) - return -ENOMEM; - - pos = skb_put(skb, rates + 2); - *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = rates; - for (i = 0; i < rates; i++) { - u8 basic = 0; - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) - continue; - - if (need_basic && basic_rates & BIT(i)) - basic = 0x80; - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); - *pos++ = basic | (u8) rate; + if (element_id == WLAN_EID_SUPP_RATES) { + rates = min_t(u8, rates, 8); + skip = 0; + } else { + skip = 8; + if (rates <= skip) + return 0; + rates -= skip; } - return 0; -} + if (skb_tailroom(skb) < rates + 2) + return -ENOBUFS; -int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, bool need_basic, - enum nl80211_band band) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; - int rate; - u8 i, exrates, *pos; - u32 basic_rates = sdata->vif.bss_conf.basic_rates; - u32 rate_flags; + skb_put_u8(skb, element_id); + skb_put_u8(skb, rates); + + for (i = 0; i < sband->n_bitrates && rates; i++) { + int rate; + u8 basic; - rate_flags = - ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chanreq.oper); - sband = local->hw.wiphy->bands[band]; - exrates = 0; - for (i = 0; i < sband->n_bitrates; i++) { if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; - exrates++; - } + if (masked_rates & BIT(i)) + continue; - if (exrates > 8) - exrates -= 8; - else - exrates = 0; + if (skip > 0) { + skip--; + continue; + } - if (skb_tailroom(skb) < exrates + 2) - return -ENOMEM; + basic = basic_rates & BIT(i) ? 0x80 : 0; - if (exrates) { - pos = skb_put(skb, exrates + 2); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = exrates; - for (i = 8; i < sband->n_bitrates; i++) { - u8 basic = 0; - if ((rate_flags & sband->bitrates[i].flags) - != rate_flags) - continue; - if (need_basic && basic_rates & BIT(i)) - basic = 0x80; - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); - *pos++ = basic | (u8) rate; - } + rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); + skb_put_u8(skb, basic | (u8)rate); + rates--; } + + WARN(rates > 0, "rates confused: rates:%d, element:%d\n", + rates, element_id); + return 0; } -- cgit 1.2.3-korg From 07095d167749d49de876613b3567a246d86135a1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:33 +0100 Subject: wifi: mac80211: start building elements in SKBs The building of elements is really mess, and really the only reason we're not doing it in SKBs in the first place is that the scan code in ieee80211_build_preq_ies() doesn't. Convert ieee80211_build_preq_ies() to use an SKB internally so that we can gradually convert other things to ..._put_*() style interfaces. Link: https://msgid.link/20240129202041.c3a8e3c2cc99.I9d9920858c30ae5154719783933de0d7bc2a2cb9@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 7 +- net/mac80211/scan.c | 14 +- net/mac80211/util.c | 353 +++++++++++++++++++++++---------------------- 3 files changed, 189 insertions(+), 185 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b69f081e1c1fb..fde8c0b671254 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2517,16 +2517,15 @@ void ieee80211_add_s1g_capab_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_add_aid_request_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); -u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap); /* element building in SKBs */ int ieee80211_put_srates_elem(struct sk_buff *skb, const struct ieee80211_supported_band *sband, u32 basic_rates, u32 rate_flags, u32 masked_rates, u8 element_id); -void ieee80211_put_he_6ghz_cap(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata, - enum ieee80211_smps_mode smps_mode); +int ieee80211_put_he_6ghz_cap(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode); /* channel management */ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 6dedbbba153a0..bca2a259fda6b 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -400,6 +400,8 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) req->ie, req->ie_len, bands_used, req->rates, &chandef, flags); + if (ielen < 0) + return false; local->hw_scan_req->req.ie_len = ielen; local->hw_scan_req->req.no_cck = req->no_cck; ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr); @@ -1322,10 +1324,12 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, ieee80211_prepare_scan_chandef(&chandef); - ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz, - &sched_scan_ies, req->ie, - req->ie_len, bands_used, rate_masks, &chandef, - flags); + ret = ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz, + &sched_scan_ies, req->ie, + req->ie_len, bands_used, rate_masks, + &chandef, flags); + if (ret < 0) + goto error; ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies); if (ret == 0) { @@ -1333,8 +1337,8 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, rcu_assign_pointer(local->sched_scan_req, req); } +error: kfree(ie); - out: if (ret) { /* Clean in case of failure after HW restart or upon resume. */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3888ad3b052f3..ea863d78061e6 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2035,37 +2035,36 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, } } -static u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end) +static int ieee80211_put_s1g_cap(struct sk_buff *skb, + struct ieee80211_sta_s1g_cap *s1g_cap) { - if ((end - pos) < 5) - return pos; + if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_s1g_cap)) + return -ENOBUFS; - *pos++ = WLAN_EID_EXTENSION; - *pos++ = 1 + sizeof(cap); - *pos++ = WLAN_EID_EXT_HE_6GHZ_CAPA; - memcpy(pos, &cap, sizeof(cap)); + skb_put_u8(skb, WLAN_EID_S1G_CAPABILITIES); + skb_put_u8(skb, sizeof(struct ieee80211_s1g_cap)); - return pos + 2; + skb_put_data(skb, &s1g_cap->cap, sizeof(s1g_cap->cap)); + skb_put_data(skb, &s1g_cap->nss_mcs, sizeof(s1g_cap->nss_mcs)); + + return 0; } -static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, - u8 *buffer, size_t buffer_len, - const u8 *ie, size_t ie_len, - enum nl80211_band band, - u32 rate_mask, - struct cfg80211_chan_def *chandef, - size_t *offset, u32 flags) +static int ieee80211_put_preq_ies_band(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + const u8 *ie, size_t ie_len, + size_t *offset, + enum nl80211_band band, + u32 rate_mask, + struct cfg80211_chan_def *chandef, + u32 flags) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; const struct ieee80211_sta_he_cap *he_cap; const struct ieee80211_sta_eht_cap *eht_cap; - u8 *pos = buffer, *end = buffer + buffer_len; + int i, err; size_t noffset; - int supp_rates_len, i; - u8 rates[32]; - int num_rates; - int ext_rates_len; u32 rate_flags; bool have_80mhz = false; @@ -2078,32 +2077,13 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, rate_flags = ieee80211_chandef_rate_flags(chandef); /* For direct scan add S1G IE and consider its override bits */ - if (band == NL80211_BAND_S1GHZ) { - if (end - pos < 2 + sizeof(struct ieee80211_s1g_cap)) - goto out_err; - pos = ieee80211_ie_build_s1g_cap(pos, &sband->s1g_cap); - goto done; - } - - num_rates = 0; - for (i = 0; i < sband->n_bitrates; i++) { - if ((BIT(i) & rate_mask) == 0) - continue; /* skip rate */ - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) - continue; - - rates[num_rates++] = - (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); - } - - supp_rates_len = min_t(int, num_rates, 8); + if (band == NL80211_BAND_S1GHZ) + return ieee80211_put_s1g_cap(skb, &sband->s1g_cap); - if (end - pos < 2 + supp_rates_len) - goto out_err; - *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = supp_rates_len; - memcpy(pos, rates, supp_rates_len); - pos += supp_rates_len; + err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags, 0, + WLAN_EID_SUPP_RATES); + if (err) + return err; /* insert "request information" if in custom IEs */ if (ie && ie_len) { @@ -2116,34 +2096,28 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, before_extrates, ARRAY_SIZE(before_extrates), *offset); - if (end - pos < noffset - *offset) - goto out_err; - memcpy(pos, ie + *offset, noffset - *offset); - pos += noffset - *offset; + if (skb_tailroom(skb) < noffset - *offset) + return -ENOBUFS; + skb_put_data(skb, ie + *offset, noffset - *offset); *offset = noffset; } - ext_rates_len = num_rates - supp_rates_len; - if (ext_rates_len > 0) { - if (end - pos < 2 + ext_rates_len) - goto out_err; - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = ext_rates_len; - memcpy(pos, rates + supp_rates_len, ext_rates_len); - pos += ext_rates_len; - } + err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags, 0, + WLAN_EID_EXT_SUPP_RATES); + if (err) + return err; if (chandef->chan && sband->band == NL80211_BAND_2GHZ) { - if (end - pos < 3) - goto out_err; - *pos++ = WLAN_EID_DS_PARAMS; - *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel( - chandef->chan->center_freq); + if (skb_tailroom(skb) < 3) + return -ENOBUFS; + skb_put_u8(skb, WLAN_EID_DS_PARAMS); + skb_put_u8(skb, 1); + skb_put_u8(skb, + ieee80211_frequency_to_channel(chandef->chan->center_freq)); } if (flags & IEEE80211_PROBE_FLAG_MIN_CONTENT) - goto done; + return 0; /* insert custom IEs that go before HT */ if (ie && ie_len) { @@ -2158,18 +2132,21 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, noffset = ieee80211_ie_split(ie, ie_len, before_ht, ARRAY_SIZE(before_ht), *offset); - if (end - pos < noffset - *offset) - goto out_err; - memcpy(pos, ie + *offset, noffset - *offset); - pos += noffset - *offset; + if (skb_tailroom(skb) < noffset - *offset) + return -ENOBUFS; + skb_put_data(skb, ie + *offset, noffset - *offset); *offset = noffset; } if (sband->ht_cap.ht_supported) { - if (end - pos < 2 + sizeof(struct ieee80211_ht_cap)) - goto out_err; - pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, - sband->ht_cap.cap); + u8 *pos; + + if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) + return -ENOBUFS; + + pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_cap)); + ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, + sband->ht_cap.cap); } /* insert custom IEs that go before VHT */ @@ -2190,10 +2167,9 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, noffset = ieee80211_ie_split(ie, ie_len, before_vht, ARRAY_SIZE(before_vht), *offset); - if (end - pos < noffset - *offset) - goto out_err; - memcpy(pos, ie + *offset, noffset - *offset); - pos += noffset - *offset; + if (skb_tailroom(skb) < noffset - *offset) + return -ENOBUFS; + skb_put_data(skb, ie + *offset, noffset - *offset); *offset = noffset; } @@ -2208,10 +2184,14 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, } if (sband->vht_cap.vht_supported && have_80mhz) { - if (end - pos < 2 + sizeof(struct ieee80211_vht_cap)) - goto out_err; - pos = ieee80211_ie_build_vht_cap(pos, &sband->vht_cap, - sband->vht_cap.cap); + u8 *pos; + + if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap)) + return -ENOBUFS; + + pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_cap)); + ieee80211_ie_build_vht_cap(pos, &sband->vht_cap, + sband->vht_cap.cap); } /* insert custom IEs that go before HE */ @@ -2228,10 +2208,9 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, noffset = ieee80211_ie_split(ie, ie_len, before_he, ARRAY_SIZE(before_he), *offset); - if (end - pos < noffset - *offset) - goto out_err; - memcpy(pos, ie + *offset, noffset - *offset); - pos += noffset - *offset; + if (skb_tailroom(skb) < noffset - *offset) + return -ENOBUFS; + skb_put_data(skb, ie + *offset, noffset - *offset); *offset = noffset; } @@ -2239,9 +2218,13 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, if (he_cap && cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE)) { + u8 *pos = skb_tail_pointer(skb); + u8 *end = pos + skb_tailroom(skb); + pos = ieee80211_ie_build_he_cap(NULL, he_cap, pos, end); if (!pos) - goto out_err; + return -ENOBUFS; + skb_put(skb, pos - skb_tail_pointer(skb)); } eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); @@ -2250,86 +2233,118 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE | IEEE80211_CHAN_NO_EHT)) { + u8 *pos = skb_tail_pointer(skb); + u8 *end = pos + skb_tailroom(skb); + pos = ieee80211_ie_build_eht_cap(NULL, pos, he_cap, eht_cap, end, sdata->vif.type == NL80211_IFTYPE_AP); if (!pos) - goto out_err; + return -ENOBUFS; + skb_put(skb, pos - skb_tail_pointer(skb)); } - if (cfg80211_any_usable_channels(local->hw.wiphy, - BIT(NL80211_BAND_6GHZ), - IEEE80211_CHAN_NO_HE)) { - struct ieee80211_supported_band *sband6; - - sband6 = local->hw.wiphy->bands[NL80211_BAND_6GHZ]; - he_cap = ieee80211_get_he_iftype_cap_vif(sband6, &sdata->vif); - - if (he_cap) { - enum nl80211_iftype iftype = - ieee80211_vif_type_p2p(&sdata->vif); - __le16 cap = ieee80211_get_he_6ghz_capa(sband6, iftype); - - pos = ieee80211_write_he_6ghz_cap(pos, cap, end); - } - } + err = ieee80211_put_he_6ghz_cap(skb, sdata, IEEE80211_SMPS_OFF); + if (err) + return err; /* * If adding more here, adjust code in main.c * that calculates local->scan_ies_len. */ - return pos - buffer; - out_err: - WARN_ONCE(1, "not enough space for preq IEs\n"); - done: - return pos - buffer; + return 0; } -int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer, - size_t buffer_len, - struct ieee80211_scan_ies *ie_desc, - const u8 *ie, size_t ie_len, - u8 bands_used, u32 *rate_masks, - struct cfg80211_chan_def *chandef, - u32 flags) +static int ieee80211_put_preq_ies(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_scan_ies *ie_desc, + const u8 *ie, size_t ie_len, + u8 bands_used, u32 *rate_masks, + struct cfg80211_chan_def *chandef, + u32 flags) { - size_t pos = 0, old_pos = 0, custom_ie_offset = 0; - int i; + size_t custom_ie_offset = 0; + int i, err; memset(ie_desc, 0, sizeof(*ie_desc)); for (i = 0; i < NUM_NL80211_BANDS; i++) { if (bands_used & BIT(i)) { - pos += ieee80211_build_preq_ies_band(sdata, - buffer + pos, - buffer_len - pos, - ie, ie_len, i, - rate_masks[i], - chandef, - &custom_ie_offset, - flags); - ie_desc->ies[i] = buffer + old_pos; - ie_desc->len[i] = pos - old_pos; - old_pos = pos; + ie_desc->ies[i] = skb_tail_pointer(skb); + err = ieee80211_put_preq_ies_band(skb, sdata, + ie, ie_len, + &custom_ie_offset, + i, rate_masks[i], + chandef, flags); + if (err) + return err; + ie_desc->len[i] = skb_tail_pointer(skb) - + ie_desc->ies[i]; } } /* add any remaining custom IEs */ if (ie && ie_len) { - if (WARN_ONCE(buffer_len - pos < ie_len - custom_ie_offset, + if (WARN_ONCE(skb_tailroom(skb) < ie_len - custom_ie_offset, "not enough space for preq custom IEs\n")) - return pos; - memcpy(buffer + pos, ie + custom_ie_offset, - ie_len - custom_ie_offset); - ie_desc->common_ies = buffer + pos; - ie_desc->common_ie_len = ie_len - custom_ie_offset; - pos += ie_len - custom_ie_offset; + return -ENOBUFS; + ie_desc->common_ies = skb_tail_pointer(skb); + skb_put_data(skb, ie + custom_ie_offset, + ie_len - custom_ie_offset); + ie_desc->common_ie_len = skb_tail_pointer(skb) - + ie_desc->common_ies; } - return pos; + return 0; }; +int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer, + size_t buffer_len, + struct ieee80211_scan_ies *ie_desc, + const u8 *ie, size_t ie_len, + u8 bands_used, u32 *rate_masks, + struct cfg80211_chan_def *chandef, + u32 flags) +{ + struct sk_buff *skb = alloc_skb(buffer_len, GFP_KERNEL); + uintptr_t offs; + int ret, i; + u8 *start; + + if (!skb) + return -ENOMEM; + + start = skb_tail_pointer(skb); + memset(start, 0, skb_tailroom(skb)); + ret = ieee80211_put_preq_ies(skb, sdata, ie_desc, ie, ie_len, + bands_used, rate_masks, chandef, + flags); + if (ret < 0) { + goto out; + } + + if (skb->len > buffer_len) { + ret = -ENOBUFS; + goto out; + } + + memcpy(buffer, start, skb->len); + + /* adjust ie_desc for copy */ + for (i = 0; i < NUM_NL80211_BANDS; i++) { + offs = ie_desc->ies[i] - start; + ie_desc->ies[i] = buffer + offs; + } + offs = ie_desc->common_ies - start; + ie_desc->common_ies = buffer + offs; + + ret = skb->len; +out: + consume_skb(skb); + return ret; +} + struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, const u8 *src, const u8 *dst, u32 ratemask, @@ -2342,7 +2357,6 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def chandef; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - int ies_len; u32 rate_masks[NUM_NL80211_BANDS] = {}; struct ieee80211_scan_ies dummy_ie_desc; @@ -2363,11 +2377,9 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, return NULL; rate_masks[chan->band] = ratemask; - ies_len = ieee80211_build_preq_ies(sdata, skb_tail_pointer(skb), - skb_tailroom(skb), &dummy_ie_desc, - ie, ie_len, BIT(chan->band), - rate_masks, &chandef, flags); - skb_put(skb, ies_len); + ieee80211_put_preq_ies(skb, sdata, &dummy_ie_desc, + ie, ie_len, BIT(chan->band), + rate_masks, &chandef, flags); if (dst) { mgmt = (struct ieee80211_mgmt *) skb->data; @@ -3202,21 +3214,6 @@ size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) return pos; } -u8 *ieee80211_ie_build_s1g_cap(u8 *pos, struct ieee80211_sta_s1g_cap *s1g_cap) -{ - *pos++ = WLAN_EID_S1G_CAPABILITIES; - *pos++ = sizeof(struct ieee80211_s1g_cap); - memset(pos, 0, sizeof(struct ieee80211_s1g_cap)); - - memcpy(pos, &s1g_cap->cap, sizeof(s1g_cap->cap)); - pos += sizeof(s1g_cap->cap); - - memcpy(pos, &s1g_cap->nss_mcs, sizeof(s1g_cap->nss_mcs)); - pos += sizeof(s1g_cap->nss_mcs); - - return pos; -} - u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u16 cap) { @@ -3413,33 +3410,32 @@ end: return pos; } -void ieee80211_put_he_6ghz_cap(struct sk_buff *skb, - struct ieee80211_sub_if_data *sdata, - enum ieee80211_smps_mode smps_mode) +int ieee80211_put_he_6ghz_cap(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + enum ieee80211_smps_mode smps_mode) { struct ieee80211_supported_band *sband; const struct ieee80211_sband_iftype_data *iftd; enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif); - u8 *pos; - u16 cap; + __le16 cap; if (!cfg80211_any_usable_channels(sdata->local->hw.wiphy, BIT(NL80211_BAND_6GHZ), IEEE80211_CHAN_NO_HE)) - return; + return 0; sband = sdata->local->hw.wiphy->bands[NL80211_BAND_6GHZ]; iftd = ieee80211_get_sband_iftype_data(sband, iftype); if (!iftd) - return; + return 0; /* Check for device HE 6 GHz capability before adding element */ if (!iftd->he_6ghz_capa.capa) - return; + return 0; - cap = le16_to_cpu(iftd->he_6ghz_capa.capa); - cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS; + cap = iftd->he_6ghz_capa.capa; + cap &= cpu_to_le16(~IEEE80211_HE_6GHZ_CAP_SM_PS); switch (smps_mode) { case IEEE80211_SMPS_AUTOMATIC: @@ -3447,22 +3443,27 @@ void ieee80211_put_he_6ghz_cap(struct sk_buff *skb, WARN_ON(1); fallthrough; case IEEE80211_SMPS_OFF: - cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED, - IEEE80211_HE_6GHZ_CAP_SM_PS); + cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED, + IEEE80211_HE_6GHZ_CAP_SM_PS); break; case IEEE80211_SMPS_STATIC: - cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC, - IEEE80211_HE_6GHZ_CAP_SM_PS); + cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC, + IEEE80211_HE_6GHZ_CAP_SM_PS); break; case IEEE80211_SMPS_DYNAMIC: - cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC, - IEEE80211_HE_6GHZ_CAP_SM_PS); + cap |= le16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC, + IEEE80211_HE_6GHZ_CAP_SM_PS); break; } - pos = skb_put(skb, 2 + 1 + sizeof(cap)); - ieee80211_write_he_6ghz_cap(pos, cpu_to_le16(cap), - pos + 2 + 1 + sizeof(cap)); + if (skb_tailroom(skb) < 2 + 1 + sizeof(cap)) + return -ENOBUFS; + + skb_put_u8(skb, WLAN_EID_EXTENSION); + skb_put_u8(skb, 1 + sizeof(cap)); + skb_put_u8(skb, WLAN_EID_EXT_HE_6GHZ_CAPA); + skb_put_data(skb, &cap, sizeof(cap)); + return 0; } u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, -- cgit 1.2.3-korg From 9d0480a7c05b6482195acafbc5f4f3b4a1cc2b8b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:34 +0100 Subject: wifi: mac80211: move element parsing to a new file This code got really big, move it to a new file. Pure code move. Link: https://msgid.link/20240129202041.7f27f7c895e4.I0adfc28bd656a4d44c2bf47966277eecf56cbaa0@changeid Signed-off-by: Johannes Berg --- net/mac80211/Makefile | 2 +- net/mac80211/parse.c | 926 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/util.c | 891 ------------------------------------------------ 3 files changed, 927 insertions(+), 892 deletions(-) create mode 100644 net/mac80211/parse.c (limited to 'net') diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 4406b4f8f3b97..a33884967f212 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -29,7 +29,7 @@ mac80211-y := \ spectmgmt.o \ tx.o \ key.o \ - util.o \ + util.o parse.o \ wme.o \ chan.o \ trace.o mlme.o \ diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c new file mode 100644 index 0000000000000..196a882e4c190 --- /dev/null +++ b/net/mac80211/parse.c @@ -0,0 +1,926 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc + * Copyright 2007 Johannes Berg + * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright (C) 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018-2024 Intel Corporation + * + * element parsing for mac80211 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ieee80211_i.h" +#include "driver-ops.h" +#include "rate.h" +#include "mesh.h" +#include "wme.h" +#include "led.h" +#include "wep.h" + +static void +ieee80211_parse_extension_element(u32 *crc, + const struct element *elem, + struct ieee802_11_elems *elems, + struct ieee80211_elems_parse_params *params) +{ + const void *data = elem->data + 1; + bool calc_crc = false; + u8 len; + + if (!elem->datalen) + return; + + len = elem->datalen - 1; + + switch (elem->data[0]) { + case WLAN_EID_EXT_HE_MU_EDCA: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; + calc_crc = true; + if (len >= sizeof(*elems->mu_edca_param_set)) + elems->mu_edca_param_set = data; + break; + case WLAN_EID_EXT_HE_CAPABILITY: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; + if (ieee80211_he_capa_size_ok(data, len)) { + elems->he_cap = data; + elems->he_cap_len = len; + } + break; + case WLAN_EID_EXT_HE_OPERATION: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; + calc_crc = true; + if (len >= sizeof(*elems->he_operation) && + len >= ieee80211_he_oper_size(data) - 1) + elems->he_operation = data; + break; + case WLAN_EID_EXT_UORA: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; + if (len >= 1) + elems->uora_element = data; + break; + case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: + if (len == 3) + elems->max_channel_switch_time = data; + break; + case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: + if (len >= sizeof(*elems->mbssid_config_ie)) + elems->mbssid_config_ie = data; + break; + case WLAN_EID_EXT_HE_SPR: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; + if (len >= sizeof(*elems->he_spr) && + len >= ieee80211_he_spr_size(data)) + elems->he_spr = data; + break; + case WLAN_EID_EXT_HE_6GHZ_CAPA: + if (params->mode < IEEE80211_CONN_MODE_HE) + break; + if (len >= sizeof(*elems->he_6ghz_capa)) + elems->he_6ghz_capa = data; + break; + case WLAN_EID_EXT_EHT_CAPABILITY: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; + if (ieee80211_eht_capa_size_ok(elems->he_cap, + data, len, + params->from_ap)) { + elems->eht_cap = data; + elems->eht_cap_len = len; + } + break; + case WLAN_EID_EXT_EHT_OPERATION: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; + if (ieee80211_eht_oper_size_ok(data, len)) + elems->eht_operation = data; + calc_crc = true; + break; + case WLAN_EID_EXT_EHT_MULTI_LINK: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; + calc_crc = true; + + if (ieee80211_mle_size_ok(data, len)) { + const struct ieee80211_multi_link_elem *mle = + (void *)data; + + switch (le16_get_bits(mle->control, + IEEE80211_ML_CONTROL_TYPE)) { + case IEEE80211_ML_CONTROL_TYPE_BASIC: + if (elems->ml_basic) { + elems->parse_error |= + IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; + break; + } + elems->ml_basic_elem = (void *)elem; + elems->ml_basic = data; + elems->ml_basic_len = len; + break; + case IEEE80211_ML_CONTROL_TYPE_RECONF: + elems->ml_reconf_elem = (void *)elem; + elems->ml_reconf = data; + elems->ml_reconf_len = len; + break; + default: + break; + } + } + break; + case WLAN_EID_EXT_BANDWIDTH_INDICATION: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; + if (ieee80211_bandwidth_indication_size_ok(data, len)) + elems->bandwidth_indication = data; + calc_crc = true; + break; + case WLAN_EID_EXT_TID_TO_LINK_MAPPING: + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; + calc_crc = true; + if (ieee80211_tid_to_link_map_size_ok(data, len) && + elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) { + elems->ttlm[elems->ttlm_num] = (void *)data; + elems->ttlm_num++; + } + break; + } + + if (crc && calc_crc) + *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2); +} + +static u32 +_ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, + struct ieee802_11_elems *elems, + const struct element *check_inherit) +{ + const struct element *elem; + bool calc_crc = params->filter != 0; + DECLARE_BITMAP(seen_elems, 256); + u32 crc = params->crc; + + bitmap_zero(seen_elems, 256); + + for_each_element(elem, params->start, params->len) { + const struct element *subelem; + u8 elem_parse_failed; + u8 id = elem->id; + u8 elen = elem->datalen; + const u8 *pos = elem->data; + + if (check_inherit && + !cfg80211_is_element_inherited(elem, + check_inherit)) + continue; + + switch (id) { + case WLAN_EID_SSID: + case WLAN_EID_SUPP_RATES: + case WLAN_EID_FH_PARAMS: + case WLAN_EID_DS_PARAMS: + case WLAN_EID_CF_PARAMS: + case WLAN_EID_TIM: + case WLAN_EID_IBSS_PARAMS: + case WLAN_EID_CHALLENGE: + case WLAN_EID_RSN: + case WLAN_EID_ERP_INFO: + case WLAN_EID_EXT_SUPP_RATES: + case WLAN_EID_HT_CAPABILITY: + case WLAN_EID_HT_OPERATION: + case WLAN_EID_VHT_CAPABILITY: + case WLAN_EID_VHT_OPERATION: + case WLAN_EID_MESH_ID: + case WLAN_EID_MESH_CONFIG: + case WLAN_EID_PEER_MGMT: + case WLAN_EID_PREQ: + case WLAN_EID_PREP: + case WLAN_EID_PERR: + case WLAN_EID_RANN: + case WLAN_EID_CHANNEL_SWITCH: + case WLAN_EID_EXT_CHANSWITCH_ANN: + case WLAN_EID_COUNTRY: + case WLAN_EID_PWR_CONSTRAINT: + case WLAN_EID_TIMEOUT_INTERVAL: + case WLAN_EID_SECONDARY_CHANNEL_OFFSET: + case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: + case WLAN_EID_CHAN_SWITCH_PARAM: + case WLAN_EID_EXT_CAPABILITY: + case WLAN_EID_CHAN_SWITCH_TIMING: + case WLAN_EID_LINK_ID: + case WLAN_EID_BSS_MAX_IDLE_PERIOD: + case WLAN_EID_RSNX: + case WLAN_EID_S1G_BCN_COMPAT: + case WLAN_EID_S1G_CAPABILITIES: + case WLAN_EID_S1G_OPERATION: + case WLAN_EID_AID_RESPONSE: + case WLAN_EID_S1G_SHORT_BCN_INTERVAL: + /* + * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible + * that if the content gets bigger it might be needed more than once + */ + if (test_bit(id, seen_elems)) { + elems->parse_error |= + IEEE80211_PARSE_ERR_DUP_ELEM; + continue; + } + break; + } + + if (calc_crc && id < 64 && (params->filter & (1ULL << id))) + crc = crc32_be(crc, pos - 2, elen + 2); + + elem_parse_failed = 0; + + switch (id) { + case WLAN_EID_LINK_ID: + if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->lnk_id = (void *)(pos - 2); + break; + case WLAN_EID_CHAN_SWITCH_TIMING: + if (elen < sizeof(struct ieee80211_ch_switch_timing)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->ch_sw_timing = (void *)pos; + break; + case WLAN_EID_EXT_CAPABILITY: + elems->ext_capab = pos; + elems->ext_capab_len = elen; + break; + case WLAN_EID_SSID: + elems->ssid = pos; + elems->ssid_len = elen; + break; + case WLAN_EID_SUPP_RATES: + elems->supp_rates = pos; + elems->supp_rates_len = elen; + break; + case WLAN_EID_DS_PARAMS: + if (elen >= 1) + elems->ds_params = pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_TIM: + if (elen >= sizeof(struct ieee80211_tim_ie)) { + elems->tim = (void *)pos; + elems->tim_len = elen; + } else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_VENDOR_SPECIFIC: + if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && + pos[2] == 0xf2) { + /* Microsoft OUI (00:50:F2) */ + + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + + if (elen >= 5 && pos[3] == 2) { + /* OUI Type 2 - WMM IE */ + if (pos[4] == 0) { + elems->wmm_info = pos; + elems->wmm_info_len = elen; + } else if (pos[4] == 1) { + elems->wmm_param = pos; + elems->wmm_param_len = elen; + } + } + } + break; + case WLAN_EID_RSN: + elems->rsn = pos; + elems->rsn_len = elen; + break; + case WLAN_EID_ERP_INFO: + if (elen >= 1) + elems->erp_info = pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_EXT_SUPP_RATES: + elems->ext_supp_rates = pos; + elems->ext_supp_rates_len = elen; + break; + case WLAN_EID_HT_CAPABILITY: + if (params->mode < IEEE80211_CONN_MODE_HT) + break; + if (elen >= sizeof(struct ieee80211_ht_cap)) + elems->ht_cap_elem = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_HT_OPERATION: + if (params->mode < IEEE80211_CONN_MODE_HT) + break; + if (elen >= sizeof(struct ieee80211_ht_operation)) + elems->ht_operation = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_VHT_CAPABILITY: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; + if (elen >= sizeof(struct ieee80211_vht_cap)) + elems->vht_cap_elem = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_VHT_OPERATION: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; + if (elen >= sizeof(struct ieee80211_vht_operation)) { + elems->vht_operation = (void *)pos; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_OPMODE_NOTIF: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; + if (elen > 0) { + elems->opmode_notif = pos; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_MESH_ID: + elems->mesh_id = pos; + elems->mesh_id_len = elen; + break; + case WLAN_EID_MESH_CONFIG: + if (elen >= sizeof(struct ieee80211_meshconf_ie)) + elems->mesh_config = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_PEER_MGMT: + elems->peering = pos; + elems->peering_len = elen; + break; + case WLAN_EID_MESH_AWAKE_WINDOW: + if (elen >= 2) + elems->awake_window = (void *)pos; + break; + case WLAN_EID_PREQ: + elems->preq = pos; + elems->preq_len = elen; + break; + case WLAN_EID_PREP: + elems->prep = pos; + elems->prep_len = elen; + break; + case WLAN_EID_PERR: + elems->perr = pos; + elems->perr_len = elen; + break; + case WLAN_EID_RANN: + if (elen >= sizeof(struct ieee80211_rann_ie)) + elems->rann = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_CHANNEL_SWITCH: + if (elen != sizeof(struct ieee80211_channel_sw_ie)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->ch_switch_ie = (void *)pos; + break; + case WLAN_EID_EXT_CHANSWITCH_ANN: + if (elen != sizeof(struct ieee80211_ext_chansw_ie)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->ext_chansw_ie = (void *)pos; + break; + case WLAN_EID_SECONDARY_CHANNEL_OFFSET: + if (params->mode < IEEE80211_CONN_MODE_HT) + break; + if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->sec_chan_offs = (void *)pos; + break; + case WLAN_EID_CHAN_SWITCH_PARAM: + if (elen < + sizeof(*elems->mesh_chansw_params_ie)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->mesh_chansw_params_ie = (void *)pos; + break; + case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; + + if (!params->action) { + elem_parse_failed = + IEEE80211_PARSE_ERR_UNEXPECTED_ELEM; + break; + } + + if (elen < sizeof(*elems->wide_bw_chansw_ie)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->wide_bw_chansw_ie = (void *)pos; + break; + case WLAN_EID_CHANNEL_SWITCH_WRAPPER: + if (params->mode < IEEE80211_CONN_MODE_VHT) + break; + if (params->action) { + elem_parse_failed = + IEEE80211_PARSE_ERR_UNEXPECTED_ELEM; + break; + } + /* + * This is a bit tricky, but as we only care about + * a few elements, parse them out manually. + */ + subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, + pos, elen); + if (subelem) { + if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie)) + elems->wide_bw_chansw_ie = + (void *)subelem->data; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + } + + if (params->mode < IEEE80211_CONN_MODE_EHT) + break; + + subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION, + pos, elen); + if (subelem) { + const void *edata = subelem->data + 1; + u8 edatalen = subelem->datalen - 1; + + if (ieee80211_bandwidth_indication_size_ok(edata, + edatalen)) + elems->bandwidth_indication = edata; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + } + break; + case WLAN_EID_COUNTRY: + elems->country_elem = pos; + elems->country_elem_len = elen; + break; + case WLAN_EID_PWR_CONSTRAINT: + if (elen != 1) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->pwr_constr_elem = pos; + break; + case WLAN_EID_CISCO_VENDOR_SPECIFIC: + /* Lots of different options exist, but we only care + * about the Dynamic Transmit Power Control element. + * First check for the Cisco OUI, then for the DTPC + * tag (0x00). + */ + if (elen < 4) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + + if (pos[0] != 0x00 || pos[1] != 0x40 || + pos[2] != 0x96 || pos[3] != 0x00) + break; + + if (elen != 6) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + + elems->cisco_dtpc_elem = pos; + break; + case WLAN_EID_ADDBA_EXT: + if (elen < sizeof(struct ieee80211_addba_ext_ie)) { + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + } + elems->addba_ext_ie = (void *)pos; + break; + case WLAN_EID_TIMEOUT_INTERVAL: + if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) + elems->timeout_int = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_BSS_MAX_IDLE_PERIOD: + if (elen >= sizeof(*elems->max_idle_period_ie)) + elems->max_idle_period_ie = (void *)pos; + break; + case WLAN_EID_RSNX: + elems->rsnx = pos; + elems->rsnx_len = elen; + break; + case WLAN_EID_TX_POWER_ENVELOPE: + if (elen < 1 || + elen > sizeof(struct ieee80211_tx_pwr_env)) + break; + + if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env)) + break; + + elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos; + elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen; + elems->tx_pwr_env_num++; + break; + case WLAN_EID_EXTENSION: + ieee80211_parse_extension_element(calc_crc ? + &crc : NULL, + elem, elems, params); + break; + case WLAN_EID_S1G_CAPABILITIES: + if (params->mode != IEEE80211_CONN_MODE_S1G) + break; + if (elen >= sizeof(*elems->s1g_capab)) + elems->s1g_capab = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_S1G_OPERATION: + if (params->mode != IEEE80211_CONN_MODE_S1G) + break; + if (elen == sizeof(*elems->s1g_oper)) + elems->s1g_oper = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_S1G_BCN_COMPAT: + if (params->mode != IEEE80211_CONN_MODE_S1G) + break; + if (elen == sizeof(*elems->s1g_bcn_compat)) + elems->s1g_bcn_compat = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + case WLAN_EID_AID_RESPONSE: + if (params->mode != IEEE80211_CONN_MODE_S1G) + break; + if (elen == sizeof(struct ieee80211_aid_response_ie)) + elems->aid_resp = (void *)pos; + else + elem_parse_failed = + IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; + break; + default: + break; + } + + if (elem_parse_failed) + elems->parse_error |= elem_parse_failed; + else + __set_bit(id, seen_elems); + } + + if (!for_each_element_completed(elem, params->start, params->len)) + elems->parse_error |= IEEE80211_PARSE_ERR_INVALID_END; + + return crc; +} + +static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, + struct ieee802_11_elems *elems, + struct cfg80211_bss *bss, + u8 *nontransmitted_profile) +{ + const struct element *elem, *sub; + size_t profile_len = 0; + bool found = false; + + if (!bss || !bss->transmitted_bss) + return profile_len; + + for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) { + if (elem->datalen < 2) + continue; + if (elem->data[0] < 1 || elem->data[0] > 8) + continue; + + for_each_element(sub, elem->data + 1, elem->datalen - 1) { + u8 new_bssid[ETH_ALEN]; + const u8 *index; + + if (sub->id != 0 || sub->datalen < 4) { + /* not a valid BSS profile */ + continue; + } + + if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP || + sub->data[1] != 2) { + /* The first element of the + * Nontransmitted BSSID Profile is not + * the Nontransmitted BSSID Capability + * element. + */ + continue; + } + + memset(nontransmitted_profile, 0, len); + profile_len = cfg80211_merge_profile(start, len, + elem, + sub, + nontransmitted_profile, + len); + + /* found a Nontransmitted BSSID Profile */ + index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, + nontransmitted_profile, + profile_len); + if (!index || index[1] < 1 || index[2] == 0) { + /* Invalid MBSSID Index element */ + continue; + } + + cfg80211_gen_new_bssid(bss->transmitted_bss->bssid, + elem->data[0], + index[2], + new_bssid); + if (ether_addr_equal(new_bssid, bss->bssid)) { + found = true; + elems->bssid_index_len = index[1]; + elems->bssid_index = (void *)&index[2]; + break; + } + } + } + + return found ? profile_len : 0; +} + +static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, + u8 link_id) +{ + const struct ieee80211_multi_link_elem *ml = elems->ml_basic; + ssize_t ml_len = elems->ml_basic_len; + const struct element *sub; + + if (!ml || !ml_len) + return; + + if (le16_get_bits(ml->control, IEEE80211_ML_CONTROL_TYPE) != + IEEE80211_ML_CONTROL_TYPE_BASIC) + return; + + for_each_mle_subelement(sub, (u8 *)ml, ml_len) { + struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; + ssize_t sta_prof_len; + u16 control; + + if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE) + continue; + + if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data, + sub->datalen)) + return; + + control = le16_to_cpu(prof->control); + + if (link_id != u16_get_bits(control, + IEEE80211_MLE_STA_CONTROL_LINK_ID)) + continue; + + if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE)) + return; + + /* the sub element can be fragmented */ + sta_prof_len = + cfg80211_defragment_element(sub, + (u8 *)ml, ml_len, + elems->scratch_pos, + elems->scratch + + elems->scratch_len - + elems->scratch_pos, + IEEE80211_MLE_SUBELEM_FRAGMENT); + + if (sta_prof_len < 0) + return; + + elems->prof = (void *)elems->scratch_pos; + elems->sta_prof_len = sta_prof_len; + elems->scratch_pos += sta_prof_len; + + return; + } +} + +static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, + struct ieee80211_elems_parse_params *params) +{ + struct ieee80211_mle_per_sta_profile *prof; + struct ieee80211_elems_parse_params sub = { + .mode = params->mode, + .action = params->action, + .from_ap = params->from_ap, + .link_id = -1, + }; + ssize_t ml_len = elems->ml_basic_len; + const struct element *non_inherit = NULL; + const u8 *end; + + if (params->link_id == -1) + return; + + ml_len = cfg80211_defragment_element(elems->ml_basic_elem, + elems->ie_start, + elems->total_len, + elems->scratch_pos, + elems->scratch + + elems->scratch_len - + elems->scratch_pos, + WLAN_EID_FRAGMENT); + + if (ml_len < 0) + return; + + elems->ml_basic = (const void *)elems->scratch_pos; + elems->ml_basic_len = ml_len; + + ieee80211_mle_get_sta_prof(elems, params->link_id); + prof = elems->prof; + + if (!prof) + return; + + /* check if we have the 4 bytes for the fixed part in assoc response */ + if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) { + elems->prof = NULL; + elems->sta_prof_len = 0; + return; + } + + /* + * Skip the capability information and the status code that are expected + * as part of the station profile in association response frames. Note + * the -1 is because the 'sta_info_len' is accounted to as part of the + * per-STA profile, but not part of the 'u8 variable[]' portion. + */ + sub.start = prof->variable + prof->sta_info_len - 1 + 4; + end = (const u8 *)prof + elems->sta_prof_len; + sub.len = end - sub.start; + + non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub.start, sub.len); + _ieee802_11_parse_elems_full(&sub, elems, non_inherit); +} + +struct ieee802_11_elems * +ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) +{ + struct ieee802_11_elems *elems; + const struct element *non_inherit = NULL; + u8 *nontransmitted_profile; + int nontransmitted_profile_len = 0; + size_t scratch_len = 3 * params->len; + + elems = kzalloc(struct_size(elems, scratch, scratch_len), GFP_ATOMIC); + if (!elems) + return NULL; + elems->ie_start = params->start; + elems->total_len = params->len; + elems->scratch_len = scratch_len; + elems->scratch_pos = elems->scratch; + + nontransmitted_profile = elems->scratch_pos; + nontransmitted_profile_len = + ieee802_11_find_bssid_profile(params->start, params->len, + elems, params->bss, + nontransmitted_profile); + elems->scratch_pos += nontransmitted_profile_len; + elems->scratch_len -= nontransmitted_profile_len; + non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + nontransmitted_profile, + nontransmitted_profile_len); + + elems->crc = _ieee802_11_parse_elems_full(params, elems, non_inherit); + + /* Override with nontransmitted profile, if found */ + if (nontransmitted_profile_len) { + struct ieee80211_elems_parse_params sub = { + .mode = params->mode, + .start = nontransmitted_profile, + .len = nontransmitted_profile_len, + .action = params->action, + .link_id = params->link_id, + }; + + _ieee802_11_parse_elems_full(&sub, elems, NULL); + } + + ieee80211_mle_parse_link(elems, params); + + if (elems->tim && !elems->parse_error) { + const struct ieee80211_tim_ie *tim_ie = elems->tim; + + elems->dtim_period = tim_ie->dtim_period; + elems->dtim_count = tim_ie->dtim_count; + } + + /* Override DTIM period and count if needed */ + if (elems->bssid_index && + elems->bssid_index_len >= + offsetofend(struct ieee80211_bssid_index, dtim_period)) + elems->dtim_period = elems->bssid_index->dtim_period; + + if (elems->bssid_index && + elems->bssid_index_len >= + offsetofend(struct ieee80211_bssid_index, dtim_count)) + elems->dtim_count = elems->bssid_index->dtim_count; + + return elems; +} +EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full); + +int ieee80211_parse_bitrates(enum nl80211_chan_width width, + const struct ieee80211_supported_band *sband, + const u8 *srates, int srates_len, u32 *rates) +{ + u32 rate_flags = ieee80211_chanwidth_rate_flags(width); + struct ieee80211_rate *br; + int brate, rate, i, j, count = 0; + + *rates = 0; + + for (i = 0; i < srates_len; i++) { + rate = srates[i] & 0x7f; + + for (j = 0; j < sband->n_bitrates; j++) { + br = &sband->bitrates[j]; + if ((rate_flags & br->flags) != rate_flags) + continue; + + brate = DIV_ROUND_UP(br->bitrate, 5); + if (brate == rate) { + *rates |= BIT(j); + count++; + break; + } + } + } + return count; +} diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ea863d78061e6..c675ac59d2bb7 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -917,868 +917,6 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_queue_delayed_work); -static void -ieee80211_parse_extension_element(u32 *crc, - const struct element *elem, - struct ieee802_11_elems *elems, - struct ieee80211_elems_parse_params *params) -{ - const void *data = elem->data + 1; - bool calc_crc = false; - u8 len; - - if (!elem->datalen) - return; - - len = elem->datalen - 1; - - switch (elem->data[0]) { - case WLAN_EID_EXT_HE_MU_EDCA: - if (params->mode < IEEE80211_CONN_MODE_HE) - break; - calc_crc = true; - if (len >= sizeof(*elems->mu_edca_param_set)) - elems->mu_edca_param_set = data; - break; - case WLAN_EID_EXT_HE_CAPABILITY: - if (params->mode < IEEE80211_CONN_MODE_HE) - break; - if (ieee80211_he_capa_size_ok(data, len)) { - elems->he_cap = data; - elems->he_cap_len = len; - } - break; - case WLAN_EID_EXT_HE_OPERATION: - if (params->mode < IEEE80211_CONN_MODE_HE) - break; - calc_crc = true; - if (len >= sizeof(*elems->he_operation) && - len >= ieee80211_he_oper_size(data) - 1) - elems->he_operation = data; - break; - case WLAN_EID_EXT_UORA: - if (params->mode < IEEE80211_CONN_MODE_HE) - break; - if (len >= 1) - elems->uora_element = data; - break; - case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: - if (len == 3) - elems->max_channel_switch_time = data; - break; - case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: - if (len >= sizeof(*elems->mbssid_config_ie)) - elems->mbssid_config_ie = data; - break; - case WLAN_EID_EXT_HE_SPR: - if (params->mode < IEEE80211_CONN_MODE_HE) - break; - if (len >= sizeof(*elems->he_spr) && - len >= ieee80211_he_spr_size(data)) - elems->he_spr = data; - break; - case WLAN_EID_EXT_HE_6GHZ_CAPA: - if (params->mode < IEEE80211_CONN_MODE_HE) - break; - if (len >= sizeof(*elems->he_6ghz_capa)) - elems->he_6ghz_capa = data; - break; - case WLAN_EID_EXT_EHT_CAPABILITY: - if (params->mode < IEEE80211_CONN_MODE_EHT) - break; - if (ieee80211_eht_capa_size_ok(elems->he_cap, - data, len, - params->from_ap)) { - elems->eht_cap = data; - elems->eht_cap_len = len; - } - break; - case WLAN_EID_EXT_EHT_OPERATION: - if (params->mode < IEEE80211_CONN_MODE_EHT) - break; - if (ieee80211_eht_oper_size_ok(data, len)) - elems->eht_operation = data; - calc_crc = true; - break; - case WLAN_EID_EXT_EHT_MULTI_LINK: - if (params->mode < IEEE80211_CONN_MODE_EHT) - break; - calc_crc = true; - - if (ieee80211_mle_size_ok(data, len)) { - const struct ieee80211_multi_link_elem *mle = - (void *)data; - - switch (le16_get_bits(mle->control, - IEEE80211_ML_CONTROL_TYPE)) { - case IEEE80211_ML_CONTROL_TYPE_BASIC: - if (elems->ml_basic) { - elems->parse_error |= - IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; - break; - } - elems->ml_basic_elem = (void *)elem; - elems->ml_basic = data; - elems->ml_basic_len = len; - break; - case IEEE80211_ML_CONTROL_TYPE_RECONF: - elems->ml_reconf_elem = (void *)elem; - elems->ml_reconf = data; - elems->ml_reconf_len = len; - break; - default: - break; - } - } - break; - case WLAN_EID_EXT_BANDWIDTH_INDICATION: - if (params->mode < IEEE80211_CONN_MODE_EHT) - break; - if (ieee80211_bandwidth_indication_size_ok(data, len)) - elems->bandwidth_indication = data; - calc_crc = true; - break; - case WLAN_EID_EXT_TID_TO_LINK_MAPPING: - if (params->mode < IEEE80211_CONN_MODE_EHT) - break; - calc_crc = true; - if (ieee80211_tid_to_link_map_size_ok(data, len) && - elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) { - elems->ttlm[elems->ttlm_num] = (void *)data; - elems->ttlm_num++; - } - break; - } - - if (crc && calc_crc) - *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2); -} - -static u32 -_ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, - struct ieee802_11_elems *elems, - const struct element *check_inherit) -{ - const struct element *elem; - bool calc_crc = params->filter != 0; - DECLARE_BITMAP(seen_elems, 256); - u32 crc = params->crc; - - bitmap_zero(seen_elems, 256); - - for_each_element(elem, params->start, params->len) { - const struct element *subelem; - u8 elem_parse_failed; - u8 id = elem->id; - u8 elen = elem->datalen; - const u8 *pos = elem->data; - - if (check_inherit && - !cfg80211_is_element_inherited(elem, - check_inherit)) - continue; - - switch (id) { - case WLAN_EID_SSID: - case WLAN_EID_SUPP_RATES: - case WLAN_EID_FH_PARAMS: - case WLAN_EID_DS_PARAMS: - case WLAN_EID_CF_PARAMS: - case WLAN_EID_TIM: - case WLAN_EID_IBSS_PARAMS: - case WLAN_EID_CHALLENGE: - case WLAN_EID_RSN: - case WLAN_EID_ERP_INFO: - case WLAN_EID_EXT_SUPP_RATES: - case WLAN_EID_HT_CAPABILITY: - case WLAN_EID_HT_OPERATION: - case WLAN_EID_VHT_CAPABILITY: - case WLAN_EID_VHT_OPERATION: - case WLAN_EID_MESH_ID: - case WLAN_EID_MESH_CONFIG: - case WLAN_EID_PEER_MGMT: - case WLAN_EID_PREQ: - case WLAN_EID_PREP: - case WLAN_EID_PERR: - case WLAN_EID_RANN: - case WLAN_EID_CHANNEL_SWITCH: - case WLAN_EID_EXT_CHANSWITCH_ANN: - case WLAN_EID_COUNTRY: - case WLAN_EID_PWR_CONSTRAINT: - case WLAN_EID_TIMEOUT_INTERVAL: - case WLAN_EID_SECONDARY_CHANNEL_OFFSET: - case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: - case WLAN_EID_CHAN_SWITCH_PARAM: - case WLAN_EID_EXT_CAPABILITY: - case WLAN_EID_CHAN_SWITCH_TIMING: - case WLAN_EID_LINK_ID: - case WLAN_EID_BSS_MAX_IDLE_PERIOD: - case WLAN_EID_RSNX: - case WLAN_EID_S1G_BCN_COMPAT: - case WLAN_EID_S1G_CAPABILITIES: - case WLAN_EID_S1G_OPERATION: - case WLAN_EID_AID_RESPONSE: - case WLAN_EID_S1G_SHORT_BCN_INTERVAL: - /* - * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible - * that if the content gets bigger it might be needed more than once - */ - if (test_bit(id, seen_elems)) { - elems->parse_error |= - IEEE80211_PARSE_ERR_DUP_ELEM; - continue; - } - break; - } - - if (calc_crc && id < 64 && (params->filter & (1ULL << id))) - crc = crc32_be(crc, pos - 2, elen + 2); - - elem_parse_failed = 0; - - switch (id) { - case WLAN_EID_LINK_ID: - if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->lnk_id = (void *)(pos - 2); - break; - case WLAN_EID_CHAN_SWITCH_TIMING: - if (elen < sizeof(struct ieee80211_ch_switch_timing)) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->ch_sw_timing = (void *)pos; - break; - case WLAN_EID_EXT_CAPABILITY: - elems->ext_capab = pos; - elems->ext_capab_len = elen; - break; - case WLAN_EID_SSID: - elems->ssid = pos; - elems->ssid_len = elen; - break; - case WLAN_EID_SUPP_RATES: - elems->supp_rates = pos; - elems->supp_rates_len = elen; - break; - case WLAN_EID_DS_PARAMS: - if (elen >= 1) - elems->ds_params = pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_TIM: - if (elen >= sizeof(struct ieee80211_tim_ie)) { - elems->tim = (void *)pos; - elems->tim_len = elen; - } else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_VENDOR_SPECIFIC: - if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && - pos[2] == 0xf2) { - /* Microsoft OUI (00:50:F2) */ - - if (calc_crc) - crc = crc32_be(crc, pos - 2, elen + 2); - - if (elen >= 5 && pos[3] == 2) { - /* OUI Type 2 - WMM IE */ - if (pos[4] == 0) { - elems->wmm_info = pos; - elems->wmm_info_len = elen; - } else if (pos[4] == 1) { - elems->wmm_param = pos; - elems->wmm_param_len = elen; - } - } - } - break; - case WLAN_EID_RSN: - elems->rsn = pos; - elems->rsn_len = elen; - break; - case WLAN_EID_ERP_INFO: - if (elen >= 1) - elems->erp_info = pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_EXT_SUPP_RATES: - elems->ext_supp_rates = pos; - elems->ext_supp_rates_len = elen; - break; - case WLAN_EID_HT_CAPABILITY: - if (params->mode < IEEE80211_CONN_MODE_HT) - break; - if (elen >= sizeof(struct ieee80211_ht_cap)) - elems->ht_cap_elem = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_HT_OPERATION: - if (params->mode < IEEE80211_CONN_MODE_HT) - break; - if (elen >= sizeof(struct ieee80211_ht_operation)) - elems->ht_operation = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_VHT_CAPABILITY: - if (params->mode < IEEE80211_CONN_MODE_VHT) - break; - if (elen >= sizeof(struct ieee80211_vht_cap)) - elems->vht_cap_elem = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_VHT_OPERATION: - if (params->mode < IEEE80211_CONN_MODE_VHT) - break; - if (elen >= sizeof(struct ieee80211_vht_operation)) { - elems->vht_operation = (void *)pos; - if (calc_crc) - crc = crc32_be(crc, pos - 2, elen + 2); - break; - } - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_OPMODE_NOTIF: - if (params->mode < IEEE80211_CONN_MODE_VHT) - break; - if (elen > 0) { - elems->opmode_notif = pos; - if (calc_crc) - crc = crc32_be(crc, pos - 2, elen + 2); - break; - } - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_MESH_ID: - elems->mesh_id = pos; - elems->mesh_id_len = elen; - break; - case WLAN_EID_MESH_CONFIG: - if (elen >= sizeof(struct ieee80211_meshconf_ie)) - elems->mesh_config = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_PEER_MGMT: - elems->peering = pos; - elems->peering_len = elen; - break; - case WLAN_EID_MESH_AWAKE_WINDOW: - if (elen >= 2) - elems->awake_window = (void *)pos; - break; - case WLAN_EID_PREQ: - elems->preq = pos; - elems->preq_len = elen; - break; - case WLAN_EID_PREP: - elems->prep = pos; - elems->prep_len = elen; - break; - case WLAN_EID_PERR: - elems->perr = pos; - elems->perr_len = elen; - break; - case WLAN_EID_RANN: - if (elen >= sizeof(struct ieee80211_rann_ie)) - elems->rann = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_CHANNEL_SWITCH: - if (elen != sizeof(struct ieee80211_channel_sw_ie)) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->ch_switch_ie = (void *)pos; - break; - case WLAN_EID_EXT_CHANSWITCH_ANN: - if (elen != sizeof(struct ieee80211_ext_chansw_ie)) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->ext_chansw_ie = (void *)pos; - break; - case WLAN_EID_SECONDARY_CHANNEL_OFFSET: - if (params->mode < IEEE80211_CONN_MODE_HT) - break; - if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->sec_chan_offs = (void *)pos; - break; - case WLAN_EID_CHAN_SWITCH_PARAM: - if (elen < - sizeof(*elems->mesh_chansw_params_ie)) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->mesh_chansw_params_ie = (void *)pos; - break; - case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: - if (params->mode < IEEE80211_CONN_MODE_VHT) - break; - - if (!params->action) { - elem_parse_failed = - IEEE80211_PARSE_ERR_UNEXPECTED_ELEM; - break; - } - - if (elen < sizeof(*elems->wide_bw_chansw_ie)) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->wide_bw_chansw_ie = (void *)pos; - break; - case WLAN_EID_CHANNEL_SWITCH_WRAPPER: - if (params->mode < IEEE80211_CONN_MODE_VHT) - break; - if (params->action) { - elem_parse_failed = - IEEE80211_PARSE_ERR_UNEXPECTED_ELEM; - break; - } - /* - * This is a bit tricky, but as we only care about - * a few elements, parse them out manually. - */ - subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, - pos, elen); - if (subelem) { - if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie)) - elems->wide_bw_chansw_ie = - (void *)subelem->data; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - } - - if (params->mode < IEEE80211_CONN_MODE_EHT) - break; - - subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION, - pos, elen); - if (subelem) { - const void *edata = subelem->data + 1; - u8 edatalen = subelem->datalen - 1; - - if (ieee80211_bandwidth_indication_size_ok(edata, - edatalen)) - elems->bandwidth_indication = edata; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - } - break; - case WLAN_EID_COUNTRY: - elems->country_elem = pos; - elems->country_elem_len = elen; - break; - case WLAN_EID_PWR_CONSTRAINT: - if (elen != 1) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->pwr_constr_elem = pos; - break; - case WLAN_EID_CISCO_VENDOR_SPECIFIC: - /* Lots of different options exist, but we only care - * about the Dynamic Transmit Power Control element. - * First check for the Cisco OUI, then for the DTPC - * tag (0x00). - */ - if (elen < 4) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - - if (pos[0] != 0x00 || pos[1] != 0x40 || - pos[2] != 0x96 || pos[3] != 0x00) - break; - - if (elen != 6) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - - if (calc_crc) - crc = crc32_be(crc, pos - 2, elen + 2); - - elems->cisco_dtpc_elem = pos; - break; - case WLAN_EID_ADDBA_EXT: - if (elen < sizeof(struct ieee80211_addba_ext_ie)) { - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - } - elems->addba_ext_ie = (void *)pos; - break; - case WLAN_EID_TIMEOUT_INTERVAL: - if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) - elems->timeout_int = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_BSS_MAX_IDLE_PERIOD: - if (elen >= sizeof(*elems->max_idle_period_ie)) - elems->max_idle_period_ie = (void *)pos; - break; - case WLAN_EID_RSNX: - elems->rsnx = pos; - elems->rsnx_len = elen; - break; - case WLAN_EID_TX_POWER_ENVELOPE: - if (elen < 1 || - elen > sizeof(struct ieee80211_tx_pwr_env)) - break; - - if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env)) - break; - - elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos; - elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen; - elems->tx_pwr_env_num++; - break; - case WLAN_EID_EXTENSION: - ieee80211_parse_extension_element(calc_crc ? - &crc : NULL, - elem, elems, params); - break; - case WLAN_EID_S1G_CAPABILITIES: - if (params->mode != IEEE80211_CONN_MODE_S1G) - break; - if (elen >= sizeof(*elems->s1g_capab)) - elems->s1g_capab = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_S1G_OPERATION: - if (params->mode != IEEE80211_CONN_MODE_S1G) - break; - if (elen == sizeof(*elems->s1g_oper)) - elems->s1g_oper = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_S1G_BCN_COMPAT: - if (params->mode != IEEE80211_CONN_MODE_S1G) - break; - if (elen == sizeof(*elems->s1g_bcn_compat)) - elems->s1g_bcn_compat = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - case WLAN_EID_AID_RESPONSE: - if (params->mode != IEEE80211_CONN_MODE_S1G) - break; - if (elen == sizeof(struct ieee80211_aid_response_ie)) - elems->aid_resp = (void *)pos; - else - elem_parse_failed = - IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; - break; - default: - break; - } - - if (elem_parse_failed) - elems->parse_error |= elem_parse_failed; - else - __set_bit(id, seen_elems); - } - - if (!for_each_element_completed(elem, params->start, params->len)) - elems->parse_error |= IEEE80211_PARSE_ERR_INVALID_END; - - return crc; -} - -static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, - struct ieee802_11_elems *elems, - struct cfg80211_bss *bss, - u8 *nontransmitted_profile) -{ - const struct element *elem, *sub; - size_t profile_len = 0; - bool found = false; - - if (!bss || !bss->transmitted_bss) - return profile_len; - - for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, start, len) { - if (elem->datalen < 2) - continue; - if (elem->data[0] < 1 || elem->data[0] > 8) - continue; - - for_each_element(sub, elem->data + 1, elem->datalen - 1) { - u8 new_bssid[ETH_ALEN]; - const u8 *index; - - if (sub->id != 0 || sub->datalen < 4) { - /* not a valid BSS profile */ - continue; - } - - if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP || - sub->data[1] != 2) { - /* The first element of the - * Nontransmitted BSSID Profile is not - * the Nontransmitted BSSID Capability - * element. - */ - continue; - } - - memset(nontransmitted_profile, 0, len); - profile_len = cfg80211_merge_profile(start, len, - elem, - sub, - nontransmitted_profile, - len); - - /* found a Nontransmitted BSSID Profile */ - index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, - nontransmitted_profile, - profile_len); - if (!index || index[1] < 1 || index[2] == 0) { - /* Invalid MBSSID Index element */ - continue; - } - - cfg80211_gen_new_bssid(bss->transmitted_bss->bssid, - elem->data[0], - index[2], - new_bssid); - if (ether_addr_equal(new_bssid, bss->bssid)) { - found = true; - elems->bssid_index_len = index[1]; - elems->bssid_index = (void *)&index[2]; - break; - } - } - } - - return found ? profile_len : 0; -} - -static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, - u8 link_id) -{ - const struct ieee80211_multi_link_elem *ml = elems->ml_basic; - ssize_t ml_len = elems->ml_basic_len; - const struct element *sub; - - if (!ml || !ml_len) - return; - - if (le16_get_bits(ml->control, IEEE80211_ML_CONTROL_TYPE) != - IEEE80211_ML_CONTROL_TYPE_BASIC) - return; - - for_each_mle_subelement(sub, (u8 *)ml, ml_len) { - struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; - ssize_t sta_prof_len; - u16 control; - - if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE) - continue; - - if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data, - sub->datalen)) - return; - - control = le16_to_cpu(prof->control); - - if (link_id != u16_get_bits(control, - IEEE80211_MLE_STA_CONTROL_LINK_ID)) - continue; - - if (!(control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE)) - return; - - /* the sub element can be fragmented */ - sta_prof_len = - cfg80211_defragment_element(sub, - (u8 *)ml, ml_len, - elems->scratch_pos, - elems->scratch + - elems->scratch_len - - elems->scratch_pos, - IEEE80211_MLE_SUBELEM_FRAGMENT); - - if (sta_prof_len < 0) - return; - - elems->prof = (void *)elems->scratch_pos; - elems->sta_prof_len = sta_prof_len; - elems->scratch_pos += sta_prof_len; - - return; - } -} - -static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, - struct ieee80211_elems_parse_params *params) -{ - struct ieee80211_mle_per_sta_profile *prof; - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .action = params->action, - .from_ap = params->from_ap, - .link_id = -1, - }; - ssize_t ml_len = elems->ml_basic_len; - const struct element *non_inherit = NULL; - const u8 *end; - - if (params->link_id == -1) - return; - - ml_len = cfg80211_defragment_element(elems->ml_basic_elem, - elems->ie_start, - elems->total_len, - elems->scratch_pos, - elems->scratch + - elems->scratch_len - - elems->scratch_pos, - WLAN_EID_FRAGMENT); - - if (ml_len < 0) - return; - - elems->ml_basic = (const void *)elems->scratch_pos; - elems->ml_basic_len = ml_len; - - ieee80211_mle_get_sta_prof(elems, params->link_id); - prof = elems->prof; - - if (!prof) - return; - - /* check if we have the 4 bytes for the fixed part in assoc response */ - if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) { - elems->prof = NULL; - elems->sta_prof_len = 0; - return; - } - - /* - * Skip the capability information and the status code that are expected - * as part of the station profile in association response frames. Note - * the -1 is because the 'sta_info_len' is accounted to as part of the - * per-STA profile, but not part of the 'u8 variable[]' portion. - */ - sub.start = prof->variable + prof->sta_info_len - 1 + 4; - end = (const u8 *)prof + elems->sta_prof_len; - sub.len = end - sub.start; - - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub.start, sub.len); - _ieee802_11_parse_elems_full(&sub, elems, non_inherit); -} - -struct ieee802_11_elems * -ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) -{ - struct ieee802_11_elems *elems; - const struct element *non_inherit = NULL; - u8 *nontransmitted_profile; - int nontransmitted_profile_len = 0; - size_t scratch_len = 3 * params->len; - - elems = kzalloc(struct_size(elems, scratch, scratch_len), GFP_ATOMIC); - if (!elems) - return NULL; - elems->ie_start = params->start; - elems->total_len = params->len; - elems->scratch_len = scratch_len; - elems->scratch_pos = elems->scratch; - - nontransmitted_profile = elems->scratch_pos; - nontransmitted_profile_len = - ieee802_11_find_bssid_profile(params->start, params->len, - elems, params->bss, - nontransmitted_profile); - elems->scratch_pos += nontransmitted_profile_len; - elems->scratch_len -= nontransmitted_profile_len; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - nontransmitted_profile, - nontransmitted_profile_len); - - elems->crc = _ieee802_11_parse_elems_full(params, elems, non_inherit); - - /* Override with nontransmitted profile, if found */ - if (nontransmitted_profile_len) { - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .start = nontransmitted_profile, - .len = nontransmitted_profile_len, - .action = params->action, - .link_id = params->link_id, - }; - - _ieee802_11_parse_elems_full(&sub, elems, NULL); - } - - ieee80211_mle_parse_link(elems, params); - - if (elems->tim && !elems->parse_error) { - const struct ieee80211_tim_ie *tim_ie = elems->tim; - - elems->dtim_period = tim_ie->dtim_period; - elems->dtim_count = tim_ie->dtim_count; - } - - /* Override DTIM period and count if needed */ - if (elems->bssid_index && - elems->bssid_index_len >= - offsetofend(struct ieee80211_bssid_index, dtim_period)) - elems->dtim_period = elems->bssid_index->dtim_period; - - if (elems->bssid_index && - elems->bssid_index_len >= - offsetofend(struct ieee80211_bssid_index, dtim_count)) - elems->dtim_count = elems->bssid_index->dtim_count; - - return elems; -} -EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full); - void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params *qparam, int ac) @@ -4063,35 +3201,6 @@ bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper, return true; } -int ieee80211_parse_bitrates(enum nl80211_chan_width width, - const struct ieee80211_supported_band *sband, - const u8 *srates, int srates_len, u32 *rates) -{ - u32 rate_flags = ieee80211_chanwidth_rate_flags(width); - struct ieee80211_rate *br; - int brate, rate, i, j, count = 0; - - *rates = 0; - - for (i = 0; i < srates_len; i++) { - rate = srates[i] & 0x7f; - - for (j = 0; j < sband->n_bitrates; j++) { - br = &sband->bitrates[j]; - if ((rate_flags & br->flags) != rate_flags) - continue; - - brate = DIV_ROUND_UP(br->bitrate, 5); - if (brate == rate) { - *rates |= BIT(j); - count++; - break; - } - } - } - return count; -} - int ieee80211_put_srates_elem(struct sk_buff *skb, const struct ieee80211_supported_band *sband, u32 basic_rates, u32 rate_flags, u32 masked_rates, -- cgit 1.2.3-korg From 28aa895bb0b324ed4a0cb441e5fbb882befb5e1c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:35 +0100 Subject: wifi: mac80211: convert ieee80211_ie_build_he_cap() to SKB use Convert ieee80211_ie_build_he_cap() to the SKB-put function style, renaming it to ieee80211_put_he_cap(). Link: https://msgid.link/20240129202041.e6ef888980d9.Ied9e014314b5d27611e693e3d4cb63bdc8d7de17@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 7 +++--- net/mac80211/mesh.c | 15 ++----------- net/mac80211/mlme.c | 37 +++---------------------------- net/mac80211/tdls.c | 17 ++++----------- net/mac80211/util.c | 54 +++++++++++++++++++++------------------------- 5 files changed, 37 insertions(+), 93 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fde8c0b671254..de0fdbd366c5f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2502,9 +2502,6 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata); -u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, - const struct ieee80211_sta_he_cap *he_cap, - u8 *pos, u8 *end); u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef); u8 *ieee80211_ie_build_eht_oper(u8 *pos, struct cfg80211_chan_def *chandef, const struct ieee80211_sta_eht_cap *eht_cap); @@ -2523,6 +2520,10 @@ int ieee80211_put_srates_elem(struct sk_buff *skb, const struct ieee80211_supported_band *sband, u32 basic_rates, u32 rate_flags, u32 masked_rates, u8 element_id); +int ieee80211_put_he_cap(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + const struct ieee80211_supported_band *sband, + const struct ieee80211_conn_settings *conn); int ieee80211_put_he_6ghz_cap(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 7e860486c6bc5..cb217657c42e8 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -567,29 +567,18 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u8 ie_len) { - const struct ieee80211_sta_he_cap *he_cap; struct ieee80211_supported_band *sband; - u8 *pos; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; - he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); - - if (!he_cap || - sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + if (sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; - if (skb_tailroom(skb) < ie_len) - return -ENOMEM; - - pos = skb_put(skb, ie_len); - ieee80211_ie_build_he_cap(NULL, he_cap, pos, pos + ie_len); - - return 0; + return ieee80211_put_he_cap(skb, sdata, sband, NULL); } int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d807a904419c8..de3d0e08ca2a0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1038,38 +1038,6 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, return mu_mimo_owner; } -/* This function determines HE capability flags for the association - * and builds the IE. - */ -static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, - struct ieee80211_supported_band *sband, - enum ieee80211_smps_mode smps_mode, - const struct ieee80211_conn_settings *conn) -{ - u8 *pos, *pre_he_pos; - const struct ieee80211_sta_he_cap *he_cap; - u8 he_cap_size; - - he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); - if (WARN_ON(!he_cap)) - return; - - /* get a max size estimate */ - he_cap_size = - 2 + 1 + sizeof(he_cap->he_cap_elem) + - ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + - ieee80211_he_ppe_size(he_cap->ppe_thres[0], - he_cap->he_cap_elem.phy_cap_info); - pos = skb_put(skb, he_cap_size); - pre_he_pos = pos; - pos = ieee80211_ie_build_he_cap(conn, he_cap, pos, pos + he_cap_size); - /* trim excess if any */ - skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos)); - - ieee80211_put_he_6ghz_cap(skb, sdata, smps_mode); -} - static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_supported_band *sband, @@ -1403,9 +1371,10 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, offset); if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_HE) { - ieee80211_add_he_ie(sdata, skb, sband, smps_mode, - &assoc_data->link[link_id].conn); + ieee80211_put_he_cap(skb, sdata, sband, + &assoc_data->link[link_id].conn); ADD_PRESENT_EXT_ELEM(WLAN_EID_EXT_HE_CAPABILITY); + ieee80211_put_he_6ghz_cap(skb, sdata, smps_mode); } /* diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index edbd3fd8a7374..3f9c2b2771c6a 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -548,19 +548,10 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, } /* build the HE-cap from sband */ - if (he_cap && - (action_code == WLAN_TDLS_SETUP_REQUEST || - action_code == WLAN_TDLS_SETUP_RESPONSE || - action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) { - u8 cap_size; - - cap_size = - 2 + 1 + sizeof(he_cap->he_cap_elem) + - ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + - ieee80211_he_ppe_size(he_cap->ppe_thres[0], - he_cap->he_cap_elem.phy_cap_info); - pos = skb_put(skb, cap_size); - pos = ieee80211_ie_build_he_cap(NULL, he_cap, pos, pos + cap_size); + if (action_code == WLAN_TDLS_SETUP_REQUEST || + action_code == WLAN_TDLS_SETUP_RESPONSE || + action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) { + ieee80211_put_he_cap(skb, sdata, sband, NULL); /* Build HE 6Ghz capa IE from sband */ if (sband->band == NL80211_BAND_6GHZ) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c675ac59d2bb7..4dcb62e9d4c65 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1352,19 +1352,14 @@ static int ieee80211_put_preq_ies_band(struct sk_buff *skb, *offset = noffset; } - he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); - if (he_cap && - cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), + if (cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE)) { - u8 *pos = skb_tail_pointer(skb); - u8 *end = pos + skb_tailroom(skb); - - pos = ieee80211_ie_build_he_cap(NULL, he_cap, pos, end); - if (!pos) - return -ENOBUFS; - skb_put(skb, pos - skb_tail_pointer(skb)); + err = ieee80211_put_he_cap(skb, sdata, sband, NULL); + if (err) + return err; } + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); if (eht_cap && @@ -2408,7 +2403,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos; } -/* this may return more than ieee80211_ie_build_he_cap() will need */ +/* this may return more than ieee80211_put_he_6ghz_cap() will need */ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata) { const struct ieee80211_sta_he_cap *he_cap; @@ -2479,21 +2474,23 @@ ieee80211_get_adjusted_he_cap(const struct ieee80211_conn_settings *conn, } } -u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, - const struct ieee80211_sta_he_cap *he_cap, - u8 *pos, u8 *end) +int ieee80211_put_he_cap(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + const struct ieee80211_supported_band *sband, + const struct ieee80211_conn_settings *conn) { + const struct ieee80211_sta_he_cap *he_cap; struct ieee80211_he_cap_elem elem; + u8 *len; u8 n; u8 ie_len; - u8 *orig_pos = pos; if (!conn) conn = &ieee80211_conn_settings_unlimited; - /* Make sure we have place for the IE */ + he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); if (!he_cap) - return orig_pos; + return 0; /* modify on stack first to calculate 'n' and 'ie_len' correctly */ ieee80211_get_adjusted_he_cap(conn, he_cap, &elem); @@ -2504,19 +2501,17 @@ u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, ieee80211_he_ppe_size(he_cap->ppe_thres[0], he_cap->he_cap_elem.phy_cap_info); - if ((end - pos) < ie_len) - return orig_pos; + if (skb_tailroom(skb) < ie_len) + return -ENOBUFS; - *pos++ = WLAN_EID_EXTENSION; - pos++; /* We'll set the size later below */ - *pos++ = WLAN_EID_EXT_HE_CAPABILITY; + skb_put_u8(skb, WLAN_EID_EXTENSION); + len = skb_put(skb, 1); /* We'll set the size later below */ + skb_put_u8(skb, WLAN_EID_EXT_HE_CAPABILITY); /* Fixed data */ - memcpy(pos, &elem, sizeof(elem)); - pos += sizeof(elem); + skb_put_data(skb, &elem, sizeof(elem)); - memcpy(pos, &he_cap->he_mcs_nss_supp, n); - pos += n; + skb_put_data(skb, &he_cap->he_mcs_nss_supp, n); /* Check if PPE Threshold should be present */ if ((he_cap->he_cap_elem.phy_cap_info[6] & @@ -2540,12 +2535,11 @@ u8 *ieee80211_ie_build_he_cap(const struct ieee80211_conn_settings *conn, n = DIV_ROUND_UP(n, 8); /* Copy PPE Thresholds */ - memcpy(pos, &he_cap->ppe_thres, n); - pos += n; + skb_put_data(skb, &he_cap->ppe_thres, n); end: - orig_pos[1] = (pos - orig_pos) - 2; - return pos; + *len = skb_tail_pointer(skb) - len - 1; + return 0; } int ieee80211_put_he_6ghz_cap(struct sk_buff *skb, -- cgit 1.2.3-korg From ea8af8be4232a3f476d8659319cab794be44e73b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:19:36 +0100 Subject: wifi: mac80211: convert ieee80211_ie_build_eht_cap() to SKB use Convert ieee80211_ie_build_eht_cap() to the SKB-put function style, renaming it to ieee80211_put_eht_cap(). Link: https://msgid.link/20240129202041.ece9769e3c94.Ibd17bea6311f0c7ba56f6c1803fa3208abaaebb9@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 10 +++---- net/mac80211/mesh.c | 17 ++--------- net/mac80211/mlme.c | 39 ++---------------------- net/mac80211/tdls.c | 20 +++---------- net/mac80211/util.c | 75 +++++++++++++++++++--------------------------- 5 files changed, 42 insertions(+), 119 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index de0fdbd366c5f..f3edb1a148a7f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2527,6 +2527,10 @@ int ieee80211_put_he_cap(struct sk_buff *skb, int ieee80211_put_he_6ghz_cap(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode); +int ieee80211_put_eht_cap(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + const struct ieee80211_supported_band *sband, + const struct ieee80211_conn_settings *conn); /* channel management */ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, @@ -2651,12 +2655,6 @@ void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache); void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache); u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata); -u8 *ieee80211_ie_build_eht_cap(const struct ieee80211_conn_settings *conn, - u8 *pos, - const struct ieee80211_sta_he_cap *he_cap, - const struct ieee80211_sta_eht_cap *eht_cap, - u8 *end, - bool for_ap); void ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index cb217657c42e8..49f79512c1444 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -636,31 +636,18 @@ int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, int mesh_add_eht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u8 ie_len) { - const struct ieee80211_sta_he_cap *he_cap; - const struct ieee80211_sta_eht_cap *eht_cap; struct ieee80211_supported_band *sband; - u8 *pos; sband = ieee80211_get_sband(sdata); if (!sband) return -EINVAL; - he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); - eht_cap = ieee80211_get_eht_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); - if (!he_cap || !eht_cap || - sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || + if (sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_5 || sdata->vif.bss_conf.chanreq.oper.width == NL80211_CHAN_WIDTH_10) return 0; - if (skb_tailroom(skb) < ie_len) - return -ENOMEM; - - pos = skb_put(skb, ie_len); - ieee80211_ie_build_eht_cap(NULL, pos, he_cap, eht_cap, pos + ie_len, - false); - - return 0; + return ieee80211_put_eht_cap(skb, sdata, sband, NULL); } int mesh_add_eht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index de3d0e08ca2a0..dd97f402a6241 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1038,41 +1038,6 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, return mu_mimo_owner; } -static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb, - struct ieee80211_supported_band *sband, - const struct ieee80211_conn_settings *conn) -{ - u8 *pos, *pre_eht_pos; - const struct ieee80211_sta_he_cap *he_cap; - const struct ieee80211_sta_eht_cap *eht_cap; - u8 eht_cap_size; - - he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); - eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); - - /* - * EHT capabilities element is only added if the HE capabilities element - * was added so assume that 'he_cap' is valid and don't check it. - */ - if (WARN_ON(!he_cap || !eht_cap)) - return; - - eht_cap_size = - 2 + 1 + sizeof(eht_cap->eht_cap_elem) + - ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, - &eht_cap->eht_cap_elem, - false) + - ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0], - eht_cap->eht_cap_elem.phy_cap_info); - pos = skb_put(skb, eht_cap_size); - pre_eht_pos = pos; - pos = ieee80211_ie_build_eht_cap(conn, pos, he_cap, eht_cap, - pos + eht_cap_size, false); - /* trim excess if any */ - skb_trim(skb, skb->len - (pre_eht_pos + eht_cap_size - pos)); -} - static void ieee80211_assoc_add_rates(struct sk_buff *skb, enum nl80211_chan_width width, struct ieee80211_supported_band *sband, @@ -1393,8 +1358,8 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, present_elems = NULL; if (assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_EHT) - ieee80211_add_eht_ie(sdata, skb, sband, - &assoc_data->link[link_id].conn); + ieee80211_put_eht_cap(skb, sdata, sband, + &assoc_data->link[link_id].conn); if (sband->band == NL80211_BAND_S1GHZ) { ieee80211_add_aid_request_ie(sdata, skb); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 3f9c2b2771c6a..42d9c06cbb845 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -575,22 +575,10 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_link_data *link, } /* build the EHT-cap from sband */ - if (he_cap && eht_cap && - (action_code == WLAN_TDLS_SETUP_REQUEST || - action_code == WLAN_TDLS_SETUP_RESPONSE || - action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES)) { - u8 cap_size; - - cap_size = - 2 + 1 + sizeof(eht_cap->eht_cap_elem) + - ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, - &eht_cap->eht_cap_elem, false) + - ieee80211_eht_ppe_size(eht_cap->eht_ppe_thres[0], - eht_cap->eht_cap_elem.phy_cap_info); - pos = skb_put(skb, cap_size); - ieee80211_ie_build_eht_cap(NULL, pos, he_cap, eht_cap, - pos + cap_size, false); - } + if (action_code == WLAN_TDLS_SETUP_REQUEST || + action_code == WLAN_TDLS_SETUP_RESPONSE || + action_code == WLAN_PUB_ACTION_TDLS_DISCOVER_RES) + ieee80211_put_eht_cap(skb, sdata, sband, NULL); /* add any remaining IEs */ if (extra_ies_len) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4dcb62e9d4c65..627bd5a8bda52 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1199,8 +1199,6 @@ static int ieee80211_put_preq_ies_band(struct sk_buff *skb, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - const struct ieee80211_sta_he_cap *he_cap; - const struct ieee80211_sta_eht_cap *eht_cap; int i, err; size_t noffset; u32 rate_flags; @@ -1359,22 +1357,12 @@ static int ieee80211_put_preq_ies_band(struct sk_buff *skb, return err; } - he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); - eht_cap = ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); - - if (eht_cap && - cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), + if (cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), IEEE80211_CHAN_NO_HE | IEEE80211_CHAN_NO_EHT)) { - u8 *pos = skb_tail_pointer(skb); - u8 *end = pos + skb_tailroom(skb); - - pos = ieee80211_ie_build_eht_cap(NULL, pos, he_cap, eht_cap, - end, - sdata->vif.type == NL80211_IFTYPE_AP); - if (!pos) - return -ENOBUFS; - skb_put(skb, pos - skb_tail_pointer(skb)); + err = ieee80211_put_eht_cap(skb, sdata, sband, NULL); + if (err) + return err; } err = ieee80211_put_he_6ghz_cap(skb, sdata, IEEE80211_SMPS_OFF); @@ -4175,7 +4163,7 @@ u16 ieee80211_encode_usf(int listen_interval) return (u16) listen_interval; } -/* this may return more than ieee80211_ie_build_eht_cap() will need */ +/* this may return more than ieee80211_put_eht_cap() will need */ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata) { const struct ieee80211_sta_he_cap *he_cap; @@ -4205,25 +4193,28 @@ u8 ieee80211_ie_len_eht_cap(struct ieee80211_sub_if_data *sdata) return 0; } -u8 *ieee80211_ie_build_eht_cap(const struct ieee80211_conn_settings *conn, - u8 *pos, - const struct ieee80211_sta_he_cap *he_cap, - const struct ieee80211_sta_eht_cap *eht_cap, - u8 *end, bool for_ap) +int ieee80211_put_eht_cap(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata, + const struct ieee80211_supported_band *sband, + const struct ieee80211_conn_settings *conn) { - struct ieee80211_eht_cap_elem_fixed fixed, *out; + const struct ieee80211_sta_he_cap *he_cap = + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + const struct ieee80211_sta_eht_cap *eht_cap = + ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif); + bool for_ap = sdata->vif.type == NL80211_IFTYPE_AP; + struct ieee80211_eht_cap_elem_fixed fixed; struct ieee80211_he_cap_elem he; u8 mcs_nss_len, ppet_len; u8 orig_mcs_nss_len; u8 ie_len; - u8 *orig_pos = pos; if (!conn) conn = &ieee80211_conn_settings_unlimited; /* Make sure we have place for the IE */ if (!he_cap || !eht_cap) - return orig_pos; + return 0; orig_mcs_nss_len = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, &eht_cap->eht_cap_elem, @@ -4266,16 +4257,13 @@ u8 *ieee80211_ie_build_eht_cap(const struct ieee80211_conn_settings *conn, fixed.phy_cap_info); ie_len = 2 + 1 + sizeof(eht_cap->eht_cap_elem) + mcs_nss_len + ppet_len; - if ((end - pos) < ie_len) - return orig_pos; - - *pos++ = WLAN_EID_EXTENSION; - *pos++ = ie_len - 2; - *pos++ = WLAN_EID_EXT_EHT_CAPABILITY; + if (skb_tailroom(skb) < ie_len) + return -ENOBUFS; - out = (void *)pos; - *out = fixed; - pos += sizeof(*out); + skb_put_u8(skb, WLAN_EID_EXTENSION); + skb_put_u8(skb, ie_len - 2); + skb_put_u8(skb, WLAN_EID_EXT_EHT_CAPABILITY); + skb_put_data(skb, &fixed, sizeof(fixed)); if (mcs_nss_len == 4 && orig_mcs_nss_len != 4) { /* @@ -4284,21 +4272,18 @@ u8 *ieee80211_ie_build_eht_cap(const struct ieee80211_conn_settings *conn, * the groups 0-7, 8-9, 10-11, 12-13 rather than just 0-9, * 10-11, 12-13. Thus, use 0-9 for 0-7 and 8-9. */ - *pos++ = eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss; - *pos++ = eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss; - *pos++ = eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs11_max_nss; - *pos++ = eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss; + skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss); + skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs9_max_nss); + skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs11_max_nss); + skb_put_u8(skb, eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss); } else { - memcpy(pos, &eht_cap->eht_mcs_nss_supp, mcs_nss_len); - pos += mcs_nss_len; + skb_put_data(skb, &eht_cap->eht_mcs_nss_supp, mcs_nss_len); } - if (ppet_len) { - memcpy(pos, &eht_cap->eht_ppe_thres, ppet_len); - pos += ppet_len; - } + if (ppet_len) + skb_put_data(skb, &eht_cap->eht_ppe_thres, ppet_len); - return pos; + return 0; } const char *ieee80211_conn_mode_str(enum ieee80211_conn_mode mode) -- cgit 1.2.3-korg From 55167a3eed53ce8b40d5bf2a3e0be4e15d2eba57 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:35:45 +0100 Subject: wifi: mac80211: allow CSA to same channel This could be used e.g. for temporarily sending quiet (mode=1 in CSA/ECSA), or updating bandwidth. This is also useful for testing, since it's something that an AP may do and the client needs to be prepared. Simply allow it. Link: https://msgid.link/20240129203544.ef7258d5790d.Idafe22e41621757458d4960659b9621853f7104d@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2ddaf00379523..17b4454918812 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3876,10 +3876,6 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (sdata->wdev.cac_started) return -EBUSY; - if (cfg80211_chandef_identical(&chanreq.oper, - &sdata->vif.bss_conf.chanreq.oper)) - return -EINVAL; - if (chanreq.oper.punctured && !sdata->vif.bss_conf.eht_support) return -EINVAL; -- cgit 1.2.3-korg From 91cdcbbcde1092da5225cfb05c88961658a34353 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:49:11 +0100 Subject: wifi: mac80211: clarify vif handling in TX dequeue The vif pointer at least looks like it can actually be NULL in some cases such as the monitor-mode vif, causing static checkers to complain with the immediate derefence. In these cases the sta pointer will also be NULL, but clarify it in the code anyway. Link: https://msgid.link/20240131164910.60066625a239.Idfb6a5a9876f9f631eae760055e1c4018259a971@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f57f7963ca377..098b32947c2b1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3953,7 +3953,8 @@ begin: ieee80211_free_txskb(&local->hw, skb); goto begin; } else { - vif = NULL; + info->control.vif = NULL; + return skb; } break; case NL80211_IFTYPE_AP_VLAN: -- cgit 1.2.3-korg From 03145a1d5d38eb1fe79d3bc01d37bf4f28076796 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 13:45:55 +0100 Subject: wifi: mac80211: add missing kernel-doc for fast_tx_check This was added earlier, add kernel-doc for it. Link: https://msgid.link/20240206134555.6354b0ac8610.Ib90d3651834c556b73697388f59bd396a1f6f9b0@changeid Signed-off-by: Johannes Berg --- net/mac80211/mesh.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index ad8469293d712..d913ce7ba72ef 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * Authors: Luis Carlos Cobo * Javier Cardona */ @@ -94,6 +94,7 @@ enum mesh_deferred_task_flags { * @is_root: the destination station of this path is a root node * @is_gate: the destination station of this path is a mesh gate * @path_change_count: the number of path changes to destination + * @fast_tx_check: timestamp of last fast-xmit enable attempt * * * The dst address is unique in the mesh path table. Since the mesh_path is -- cgit 1.2.3-korg From 37c37096ad80bfa38ab427f33f58124e043fa2fe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Feb 2024 11:59:20 +0100 Subject: wifi: mac80211: don't use sband->band early Some drivers may (notably mt76 does) not set up sband->band before registering, and cfg80211 will fill it in later. But since the HT/HE capability check mac80211 required it to be initialized already, otherwise failing things. This really isn't necessary though since the code is iterating the list of bands, and has the 'band' variable available. Fix it to not require the sband->band to be initialized already. Fixes: f04d2c247e04 ("wifi: mac80211: disallow drivers with HT wider than HE") Reported-by: Bert Karwatzki Debugged-by: Bert Karwatzki Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218466 Link: https://msgid.link/20240207115920.43cbedffb5c3.I4968e12275a3f95926e3f3ccae81e50f23fe4d4d@changeid Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 879abe216a3ee..4eaea0a9975b4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1259,7 +1259,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_he = supp_he || iftd->he_cap.has_he; supp_eht = supp_eht || iftd->eht_cap.has_eht; - if (sband->band == NL80211_BAND_2GHZ) + if (band == NL80211_BAND_2GHZ) he_40_mhz_cap = IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G; else -- cgit 1.2.3-korg From 68de13028b94572fc570b7eb1e0e2de1d751fe7e Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Fri, 22 Dec 2023 09:09:13 +0800 Subject: wifi: cfg80211: Add utility for converting op_class into chandef This utility is used in STA CSA handling. The op_class in the ECSA Element can be converted into chandef. Co-developed-by: Money Wang Signed-off-by: Michael-CY Lee Link: https://msgid.link/20231222010914.6521-2-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 13 +++++++++ net/wireless/util.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cb5e34d640cde..e27ed2307cdbc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8774,6 +8774,19 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, bool ieee80211_operating_class_to_band(u8 operating_class, enum nl80211_band *band); +/** + * ieee80211_operating_class_to_chandef - convert operating class to chandef + * + * @operating_class: the operating class to convert + * @chan: the ieee80211_channel to convert + * @chandef: a pointer to the resulting chandef + * + * Returns %true if the conversion was successful, %false otherwise. + */ +bool ieee80211_operating_class_to_chandef(u8 operating_class, + struct ieee80211_channel *chan, + struct cfg80211_chan_def *chandef); + /** * ieee80211_chandef_to_operating_class - convert chandef to operation class * diff --git a/net/wireless/util.c b/net/wireless/util.c index d1ce3bee27973..379f742fd7415 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2073,6 +2073,82 @@ bool ieee80211_operating_class_to_band(u8 operating_class, } EXPORT_SYMBOL(ieee80211_operating_class_to_band); +bool ieee80211_operating_class_to_chandef(u8 operating_class, + struct ieee80211_channel *chan, + struct cfg80211_chan_def *chandef) +{ + u32 control_freq, offset = 0; + enum nl80211_band band; + + if (!ieee80211_operating_class_to_band(operating_class, &band) || + !chan || band != chan->band) + return false; + + control_freq = chan->center_freq; + chandef->chan = chan; + + if (control_freq >= 5955) + offset = control_freq - 5955; + else if (control_freq >= 5745) + offset = control_freq - 5745; + else if (control_freq >= 5180) + offset = control_freq - 5180; + offset /= 20; + + switch (operating_class) { + case 81: /* 2 GHz band; 20 MHz; channels 1..13 */ + case 82: /* 2 GHz band; 20 MHz; channel 14 */ + case 115: /* 5 GHz band; 20 MHz; channels 36,40,44,48 */ + case 118: /* 5 GHz band; 20 MHz; channels 52,56,60,64 */ + case 121: /* 5 GHz band; 20 MHz; channels 100..144 */ + case 124: /* 5 GHz band; 20 MHz; channels 149,153,157,161 */ + case 125: /* 5 GHz band; 20 MHz; channels 149..177 */ + case 131: /* 6 GHz band; 20 MHz; channels 1..233*/ + case 136: /* 6 GHz band; 20 MHz; channel 2 */ + chandef->center_freq1 = control_freq; + chandef->width = NL80211_CHAN_WIDTH_20; + return true; + case 83: /* 2 GHz band; 40 MHz; channels 1..9 */ + case 116: /* 5 GHz band; 40 MHz; channels 36,44 */ + case 119: /* 5 GHz band; 40 MHz; channels 52,60 */ + case 122: /* 5 GHz band; 40 MHz; channels 100,108,116,124,132,140 */ + case 126: /* 5 GHz band; 40 MHz; channels 149,157,165,173 */ + chandef->center_freq1 = control_freq + 10; + chandef->width = NL80211_CHAN_WIDTH_40; + return true; + case 84: /* 2 GHz band; 40 MHz; channels 5..13 */ + case 117: /* 5 GHz band; 40 MHz; channels 40,48 */ + case 120: /* 5 GHz band; 40 MHz; channels 56,64 */ + case 123: /* 5 GHz band; 40 MHz; channels 104,112,120,128,136,144 */ + case 127: /* 5 GHz band; 40 MHz; channels 153,161,169,177 */ + chandef->center_freq1 = control_freq - 10; + chandef->width = NL80211_CHAN_WIDTH_40; + return true; + case 132: /* 6 GHz band; 40 MHz; channels 1,5,..,229*/ + chandef->center_freq1 = control_freq + 10 - (offset & 1) * 20; + chandef->width = NL80211_CHAN_WIDTH_40; + return true; + case 128: /* 5 GHz band; 80 MHz; channels 36..64,100..144,149..177 */ + case 133: /* 6 GHz band; 80 MHz; channels 1,5,..,229 */ + chandef->center_freq1 = control_freq + 30 - (offset & 3) * 20; + chandef->width = NL80211_CHAN_WIDTH_80; + return true; + case 129: /* 5 GHz band; 160 MHz; channels 36..64,100..144,149..177 */ + case 134: /* 6 GHz band; 160 MHz; channels 1,5,..,229 */ + chandef->center_freq1 = control_freq + 70 - (offset & 7) * 20; + chandef->width = NL80211_CHAN_WIDTH_160; + return true; + case 130: /* 5 GHz band; 80+80 MHz; channels 36..64,100..144,149..177 */ + case 135: /* 6 GHz band; 80+80 MHz; channels 1,5,..,229 */ + /* The center_freq2 of 80+80 MHz is unknown */ + case 137: /* 6 GHz band; 320 MHz; channels 1,5,..,229 */ + /* 320-1 or 320-2 channelization is unknown */ + default: + return false; + } +} +EXPORT_SYMBOL(ieee80211_operating_class_to_chandef); + bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, u8 *op_class) { -- cgit 1.2.3-korg From 21c3f8f95554feff9bed15703e89adbe582e0383 Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Fri, 22 Dec 2023 09:09:14 +0800 Subject: wifi: mac80211: refactor STA CSA parsing flows The new Wi-Fi Standard (IEEE Std 802.11-2020 9.4.2.160) specifies that the Wide Bandwidth Channel Switch (WBCS) Element subfields have the same definitions as VHT operation information if the operating band is not S1G. The problem comes when the BSS is in 6 GHz band, the STA parses the WBCS Element by ieee80211_chandef_vht_oper(), which checks the capabilities for HT/VHT mode, not HE/EHT mode. This patch refactors STA CSA parsing flow so that the corresponding capabilities can be checked. Also, it adds the way to use op_class in ECSA Element to build a new chandef. In summary, the new steps for STA to handle CSA event are: 1. build the new chandef from one of the CSA-related (Sub)Elements in following order, - Bandwidth Indication (Sub)Element - Wide Bandwidth Channel Switch (Sub)Element - Operating class in Extended Channel Switch Announcement Element - Channel Switch Announcement Element 2. convert the new chandef into operation information according to the operating band in order to check if the new chandef fits STA's capabilities. 3. downgrade the bandwidth until current bandwidth is not disabled. Co-developed-by: Money Wang Signed-off-by: Michael-CY Lee Link: https://msgid.link/20231222010914.6521-3-michael-cy.lee@mediatek.com [rebase on top of the changes with struct ieee80211_conn_settings, prefer leXY_encode_bits()] Signed-off-by: Johannes Berg --- net/mac80211/spectmgmt.c | 302 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 250 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 2b0bf2a1a877c..327c74e296e21 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -19,6 +19,205 @@ #include "sta_info.h" #include "wme.h" +static bool +wbcs_elem_to_chandef(const struct ieee80211_wide_bw_chansw_ie *wbcs_elem, + struct cfg80211_chan_def *chandef) +{ + u8 ccfs0 = wbcs_elem->new_center_freq_seg0; + u8 ccfs1 = wbcs_elem->new_center_freq_seg1; + u32 cf0 = ieee80211_channel_to_frequency(ccfs0, chandef->chan->band); + u32 cf1 = ieee80211_channel_to_frequency(ccfs1, chandef->chan->band); + + switch (wbcs_elem->new_channel_width) { + case IEEE80211_VHT_CHANWIDTH_160MHZ: + /* deprecated encoding */ + chandef->width = NL80211_CHAN_WIDTH_160; + chandef->center_freq1 = cf0; + break; + case IEEE80211_VHT_CHANWIDTH_80P80MHZ: + /* deprecated encoding */ + chandef->width = NL80211_CHAN_WIDTH_80P80; + chandef->center_freq1 = cf0; + chandef->center_freq2 = cf1; + break; + case IEEE80211_VHT_CHANWIDTH_80MHZ: + chandef->width = NL80211_CHAN_WIDTH_80; + chandef->center_freq1 = cf0; + + if (ccfs1) { + u8 diff = abs(ccfs0 - ccfs1); + + if (diff == 8) { + chandef->width = NL80211_CHAN_WIDTH_160; + chandef->center_freq1 = cf1; + } else if (diff > 8) { + chandef->width = NL80211_CHAN_WIDTH_80P80; + chandef->center_freq2 = cf1; + } + } + break; + case IEEE80211_VHT_CHANWIDTH_USE_HT: + default: + /* If the WBCS Element is present, new channel bandwidth is + * at least 40 MHz. + */ + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 = cf0; + break; + } + + return cfg80211_chandef_valid(chandef); +} + +static void +validate_chandef_by_ht_vht_oper(struct ieee80211_sub_if_data *sdata, + struct ieee80211_conn_settings *conn, + u32 vht_cap_info, + struct cfg80211_chan_def *chandef) +{ + u32 control_freq, center_freq1, center_freq2; + enum nl80211_chan_width chan_width; + struct ieee80211_ht_operation ht_oper; + struct ieee80211_vht_operation vht_oper; + + if (conn->mode < IEEE80211_CONN_MODE_HT || + conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) { + chandef->chan = NULL; + return; + } + + control_freq = chandef->chan->center_freq; + center_freq1 = chandef->center_freq1; + center_freq2 = chandef->center_freq2; + chan_width = chandef->width; + + ht_oper.primary_chan = ieee80211_frequency_to_channel(control_freq); + if (control_freq != center_freq1) + ht_oper.ht_param = control_freq > center_freq1 ? + IEEE80211_HT_PARAM_CHA_SEC_BELOW : + IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + else + ht_oper.ht_param = IEEE80211_HT_PARAM_CHA_SEC_NONE; + + ieee80211_chandef_ht_oper(&ht_oper, chandef); + + if (conn->mode < IEEE80211_CONN_MODE_VHT) + return; + + vht_oper.center_freq_seg0_idx = + ieee80211_frequency_to_channel(center_freq1); + vht_oper.center_freq_seg1_idx = center_freq2 ? + ieee80211_frequency_to_channel(center_freq2) : 0; + + switch (chan_width) { + case NL80211_CHAN_WIDTH_320: + WARN_ON(1); + break; + case NL80211_CHAN_WIDTH_160: + vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; + vht_oper.center_freq_seg1_idx = vht_oper.center_freq_seg0_idx; + vht_oper.center_freq_seg0_idx += + control_freq < center_freq1 ? -8 : 8; + break; + case NL80211_CHAN_WIDTH_80P80: + vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; + break; + case NL80211_CHAN_WIDTH_80: + vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; + break; + default: + vht_oper.chan_width = IEEE80211_VHT_CHANWIDTH_USE_HT; + break; + } + + ht_oper.operation_mode = + le16_encode_bits(vht_oper.center_freq_seg1_idx, + IEEE80211_HT_OP_MODE_CCFS2_MASK); + + if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, + &vht_oper, &ht_oper, chandef)) + chandef->chan = NULL; +} + +static void +validate_chandef_by_6ghz_he_eht_oper(struct ieee80211_sub_if_data *sdata, + struct ieee80211_conn_settings *conn, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_local *local = sdata->local; + u32 control_freq, center_freq1, center_freq2; + enum nl80211_chan_width chan_width; + struct { + struct ieee80211_he_operation _oper; + struct ieee80211_he_6ghz_oper _6ghz_oper; + } __packed he; + struct { + struct ieee80211_eht_operation _oper; + struct ieee80211_eht_operation_info _oper_info; + } __packed eht; + + if (conn->mode < IEEE80211_CONN_MODE_HE) { + chandef->chan = NULL; + return; + } + + control_freq = chandef->chan->center_freq; + center_freq1 = chandef->center_freq1; + center_freq2 = chandef->center_freq2; + chan_width = chandef->width; + + he._oper.he_oper_params = + le32_encode_bits(1, IEEE80211_HE_OPERATION_6GHZ_OP_INFO); + he._6ghz_oper.primary = + ieee80211_frequency_to_channel(control_freq); + he._6ghz_oper.ccfs0 = ieee80211_frequency_to_channel(center_freq1); + he._6ghz_oper.ccfs1 = center_freq2 ? + ieee80211_frequency_to_channel(center_freq2) : 0; + + switch (chan_width) { + case NL80211_CHAN_WIDTH_320: + he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0; + he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -16 : 16; + he._6ghz_oper.control = IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ; + break; + case NL80211_CHAN_WIDTH_160: + he._6ghz_oper.ccfs1 = he._6ghz_oper.ccfs0; + he._6ghz_oper.ccfs0 += control_freq < center_freq1 ? -8 : 8; + fallthrough; + case NL80211_CHAN_WIDTH_80P80: + he._6ghz_oper.control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ; + break; + case NL80211_CHAN_WIDTH_80: + he._6ghz_oper.control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ; + break; + case NL80211_CHAN_WIDTH_40: + he._6ghz_oper.control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ; + break; + default: + he._6ghz_oper.control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ; + break; + } + + if (conn->mode < IEEE80211_CONN_MODE_EHT) { + if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper, + NULL, chandef)) + chandef->chan = NULL; + } else { + eht._oper.params = IEEE80211_EHT_OPER_INFO_PRESENT; + eht._oper_info.control = he._6ghz_oper.control; + eht._oper_info.ccfs0 = he._6ghz_oper.ccfs0; + eht._oper_info.ccfs1 = he._6ghz_oper.ccfs1; + + if (!ieee80211_chandef_he_6ghz_oper(local, &he._oper, + &eht._oper, chandef)) + chandef->chan = NULL; + } +} + int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band current_band, @@ -29,12 +228,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, { enum nl80211_band new_band = current_band; int new_freq; - u8 new_chan_no; + u8 new_chan_no = 0, new_op_class = 0; struct ieee80211_channel *new_chan; - struct cfg80211_chan_def new_vht_chandef = {}; + struct cfg80211_chan_def new_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; const struct ieee80211_bandwidth_indication *bwi; + const struct ieee80211_ext_chansw_ie *ext_chansw_elem; int secondary_channel_offset = -1; memset(csa_ie, 0, sizeof(*csa_ie)); @@ -42,6 +242,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; bwi = elems->bandwidth_indication; + ext_chansw_elem = elems->ext_chansw_ie; if (conn->mode < IEEE80211_CONN_MODE_HT || conn->bw_limit < IEEE80211_CONN_BW_LIMIT_40) { @@ -52,26 +253,30 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, if (conn->mode < IEEE80211_CONN_MODE_VHT) wide_bw_chansw_ie = NULL; - if (elems->ext_chansw_ie) { - if (!ieee80211_operating_class_to_band( - elems->ext_chansw_ie->new_operating_class, - &new_band)) { - sdata_info(sdata, - "cannot understand ECSA IE operating class, %d, ignoring\n", - elems->ext_chansw_ie->new_operating_class); + if (ext_chansw_elem) { + new_op_class = ext_chansw_elem->new_operating_class; + + if (!ieee80211_operating_class_to_band(new_op_class, &new_band)) { + new_op_class = 0; + sdata_info(sdata, "cannot understand ECSA IE operating class, %d, ignoring\n", + ext_chansw_elem->new_operating_class); + } else { + new_chan_no = ext_chansw_elem->new_ch_num; + csa_ie->count = ext_chansw_elem->count; + csa_ie->mode = ext_chansw_elem->mode; } - new_chan_no = elems->ext_chansw_ie->new_ch_num; - csa_ie->count = elems->ext_chansw_ie->count; - csa_ie->mode = elems->ext_chansw_ie->mode; - } else if (elems->ch_switch_ie) { + } + + if (!new_op_class && elems->ch_switch_ie) { new_chan_no = elems->ch_switch_ie->new_ch_num; csa_ie->count = elems->ch_switch_ie->count; csa_ie->mode = elems->ch_switch_ie->mode; - } else { - /* nothing here we understand */ - return 1; } + /* nothing here we understand */ + if (!new_chan_no) + return 1; + /* Mesh Channel Switch Parameters Element */ if (elems->mesh_chansw_params_ie) { csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl; @@ -135,55 +340,48 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, break; } + /* parse one of the Elements to build a new chandef */ + memset(&new_chandef, 0, sizeof(new_chandef)); + new_chandef.chan = new_chan; if (bwi) { /* start with the CSA one */ - new_vht_chandef = csa_ie->chanreq.oper; + new_chandef = csa_ie->chanreq.oper; /* and update the width accordingly */ - ieee80211_chandef_eht_oper(&bwi->info, &new_vht_chandef); - } else if (wide_bw_chansw_ie) { - u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1; - struct ieee80211_vht_operation vht_oper = { - .chan_width = - wide_bw_chansw_ie->new_channel_width, - .center_freq_seg0_idx = - wide_bw_chansw_ie->new_center_freq_seg0, - .center_freq_seg1_idx = new_seg1, - /* .basic_mcs_set doesn't matter */ - }; - struct ieee80211_ht_operation ht_oper = { - .operation_mode = - cpu_to_le16(new_seg1 << - IEEE80211_HT_OP_MODE_CCFS2_SHIFT), - }; - - /* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT, - * to the previously parsed chandef - */ - new_vht_chandef = csa_ie->chanreq.oper; + ieee80211_chandef_eht_oper(&bwi->info, &new_chandef); + } else if (!wide_bw_chansw_ie || !wbcs_elem_to_chandef(wide_bw_chansw_ie, + &new_chandef)) { + if (!ieee80211_operating_class_to_chandef(new_op_class, new_chan, + &new_chandef)) + new_chandef = csa_ie->chanreq.oper; + } + + /* check if the new chandef fits the capabilities */ + if (new_band == NL80211_BAND_6GHZ) + validate_chandef_by_6ghz_he_eht_oper(sdata, conn, &new_chandef); + else + validate_chandef_by_ht_vht_oper(sdata, conn, vht_cap_info, + &new_chandef); - /* ignore if parsing fails */ - if (!ieee80211_chandef_vht_oper(&sdata->local->hw, - vht_cap_info, - &vht_oper, &ht_oper, - &new_vht_chandef)) - new_vht_chandef.chan = NULL; + /* if data is there validate the bandwidth & use it */ + if (new_chandef.chan) { + if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_320 && + new_chandef.width == NL80211_CHAN_WIDTH_320) + ieee80211_chandef_downgrade(&new_chandef, NULL); if (conn->bw_limit < IEEE80211_CONN_BW_LIMIT_160 && - (new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80 || - new_vht_chandef.width == NL80211_CHAN_WIDTH_160)) - ieee80211_chandef_downgrade(&new_vht_chandef, NULL); - } + (new_chandef.width == NL80211_CHAN_WIDTH_80P80 || + new_chandef.width == NL80211_CHAN_WIDTH_160)) + ieee80211_chandef_downgrade(&new_chandef, NULL); - /* if VHT data is there validate & use it */ - if (new_vht_chandef.chan) { - if (!cfg80211_chandef_compatible(&new_vht_chandef, + if (!cfg80211_chandef_compatible(&new_chandef, &csa_ie->chanreq.oper)) { sdata_info(sdata, "BSS %pM: CSA has inconsistent channel data, disconnecting\n", bssid); return -EINVAL; } - csa_ie->chanreq.oper = new_vht_chandef; + + csa_ie->chanreq.oper = new_chandef; } if (elems->max_channel_switch_time) -- cgit 1.2.3-korg From 4ace04c0bdbde3b028ec0a5a3be2471cdb1efb67 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 30 Jan 2024 19:39:14 +0530 Subject: wifi: cfg80211: send link id in channel_switch ops Currently, during channel switch, no link id information is passed down. In order to support channel switch during Multi Link Operation, it is required to pass link id as well. Add changes to pass link id in the channel_switch cfg80211_ops. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240130140918.1172387-2-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ net/wireless/nl80211.c | 1 + net/wireless/trace.h | 7 +++++-- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e27ed2307cdbc..d4c83ea3213d4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1543,6 +1543,8 @@ struct cfg80211_ap_update { * @radar_required: whether radar detection is required on the new channel * @block_tx: whether transmissions should be blocked while changing * @count: number of beacons until switch + * @link_id: defines the link on which channel switch is expected during + * MLO. 0 in case of non-MLO. */ struct cfg80211_csa_settings { struct cfg80211_chan_def chandef; @@ -1555,6 +1557,7 @@ struct cfg80211_csa_settings { bool radar_required; bool block_tx; u8 count; + u8 link_id; }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b533412ad1e03..e1106ae35e21d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10222,6 +10222,7 @@ skip_beacons: if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; + params.link_id = link_id; err = rdev_channel_switch(rdev, dev, ¶ms); free: diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ae5e585b6863d..194ea24717177 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2332,6 +2332,7 @@ TRACE_EVENT(rdev_channel_switch, __field(u8, count) __dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon) __dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp) + __field(u8, link_id) ), TP_fast_assign( WIPHY_ASSIGN; @@ -2349,11 +2350,13 @@ TRACE_EVENT(rdev_channel_switch, memcpy(__get_dynamic_array(pres_ofs), params->counter_offsets_presp, params->n_counter_offsets_presp * sizeof(u16)); + __entry->link_id = params->link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT - ", block_tx: %d, count: %u, radar_required: %d", + ", block_tx: %d, count: %u, radar_required: %d, link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, - __entry->block_tx, __entry->count, __entry->radar_required) + __entry->block_tx, __entry->count, __entry->radar_required, + __entry->link_id) ); TRACE_EVENT(rdev_set_qos_map, -- cgit 1.2.3-korg From 480e7048aa0bbf0a79a976cdfa0195fd157da902 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 30 Jan 2024 19:39:15 +0530 Subject: wifi: mac80211: update beacon counters per link basis Currently, function to update beacon counter uses deflink to fetch the beacon and then update the counter. However, with MLO, there is a need to update the counter for the beacon in a particular link. Add support to use link_id in order to fetch the beacon from a particular link data during beacon update counter. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240130140918.1172387-3-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 2 +- include/net/mac80211.h | 4 +++- net/mac80211/tx.c | 14 +++++++++++--- 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index fc503db2fd8ed..b802204061044 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -2035,7 +2035,7 @@ static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif) return; if (!ieee80211_beacon_cntdwn_is_complete(vif)) { - ieee80211_beacon_update_cntdwn(vif); + ieee80211_beacon_update_cntdwn(vif, 0); ret = ath10k_mac_setup_bcn_tmpl(arvif); if (ret) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index bbf4d1f4d310c..f7cab50bdfd12 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1586,7 +1586,7 @@ void ath11k_mac_bcn_tx_event(struct ath11k_vif *arvif) arvif->bcca_zero_sent = false; if (vif->bss_conf.color_change_active) - ieee80211_beacon_update_cntdwn(vif); + ieee80211_beacon_update_cntdwn(vif, 0); ath11k_mac_setup_bcn_tmpl(arvif); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 61269c7b1934f..3dea06b28472f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1467,7 +1467,7 @@ static void iwl_mvm_csa_count_down(struct iwl_mvm *mvm, mvmvif->csa_countdown = true; if (!ieee80211_beacon_cntdwn_is_complete(csa_vif)) { - int c = ieee80211_beacon_update_cntdwn(csa_vif); + int c = ieee80211_beacon_update_cntdwn(csa_vif, 0); iwl_mvm_mac_ctxt_beacon_changed(mvm, csa_vif, &csa_vif->bss_conf); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 54aa4a06c878d..8acee7ce3aa9c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5526,6 +5526,7 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, /** * ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO * * The beacon counter should be updated after each beacon transmission. * This function is called implicitly when @@ -5535,7 +5536,8 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, * * Return: new countdown value */ -u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif); +u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, + unsigned int link_id); /** * ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 098b32947c2b1..876de3ba98ba4 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5027,16 +5027,24 @@ static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon) return beacon->cntdwn_current_counter; } -u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif) +u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_link_data *link; struct beacon_data *beacon = NULL; u8 count = 0; + if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) + return 0; + rcu_read_lock(); + link = rcu_dereference(sdata->link[link_id]); + if (!link) + goto unlock; + if (sdata->vif.type == NL80211_IFTYPE_AP) - beacon = rcu_dereference(sdata->deflink.u.ap.beacon); + beacon = rcu_dereference(link->u.ap.beacon); else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) beacon = rcu_dereference(sdata->u.ibss.presp); else if (ieee80211_vif_is_mesh(&sdata->vif)) @@ -5277,7 +5285,7 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw, if (beacon->cntdwn_counter_offsets[0]) { if (!is_template) - ieee80211_beacon_update_cntdwn(vif); + ieee80211_beacon_update_cntdwn(vif, link->link_id); ieee80211_set_beacon_cntdwn(sdata, beacon, link); } -- cgit 1.2.3-korg From a3a637a6c07189fab06a06c1f435331da652383b Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 30 Jan 2024 19:39:16 +0530 Subject: wifi: mac80211: handle set csa/after_csa beacon on per link basis In order to support CSA with MLO, there is a need to handle the functions ieee80211_set_csa_beacon() and ieee80211_set_after_csa_beacon() on a per link basis. Implement this by making the function argument accept the the link data instead of the sdata. Currently, deflink would only be passed. Proper link data will be passed in a subsequent patch. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240130140918.1172387-4-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 17b4454918812..0c0f418c36522 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3590,20 +3590,21 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t } EXPORT_SYMBOL(ieee80211_channel_switch_disconnect); -static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, +static int ieee80211_set_after_csa_beacon(struct ieee80211_link_data *link_data, u64 *changed) { + struct ieee80211_sub_if_data *sdata = link_data->sdata; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: - if (!sdata->deflink.u.ap.next_beacon) + if (!link_data->u.ap.next_beacon) return -EINVAL; - err = ieee80211_assign_beacon(sdata, &sdata->deflink, - sdata->deflink.u.ap.next_beacon, + err = ieee80211_assign_beacon(sdata, link_data, + link_data->u.ap.next_beacon, NULL, NULL, changed); - ieee80211_free_next_beacon(&sdata->deflink); + ieee80211_free_next_beacon(link_data); if (err < 0) return err; @@ -3662,7 +3663,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) sdata->vif.bss_conf.csa_active = false; - err = ieee80211_set_after_csa_beacon(sdata, &changed); + err = ieee80211_set_after_csa_beacon(&sdata->deflink, &changed); if (err) return err; @@ -3714,18 +3715,19 @@ void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) ieee80211_csa_finalize(link); } -static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, +static int ieee80211_set_csa_beacon(struct ieee80211_link_data *link_data, struct cfg80211_csa_settings *params, u64 *changed) { + struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_csa_settings csa = {}; int err; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: - sdata->deflink.u.ap.next_beacon = + link_data->u.ap.next_beacon = cfg80211_beacon_dup(¶ms->beacon_after); - if (!sdata->deflink.u.ap.next_beacon) + if (!link_data->u.ap.next_beacon) return -ENOMEM; /* @@ -3751,7 +3753,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, IEEE80211_MAX_CNTDWN_COUNTERS_NUM) || (params->n_counter_offsets_presp > IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) { - ieee80211_free_next_beacon(&sdata->deflink); + ieee80211_free_next_beacon(link_data); return -EINVAL; } @@ -3761,11 +3763,11 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, csa.n_counter_offsets_presp = params->n_counter_offsets_presp; csa.count = params->count; - err = ieee80211_assign_beacon(sdata, &sdata->deflink, + err = ieee80211_assign_beacon(sdata, link_data, ¶ms->beacon_csa, &csa, NULL, changed); if (err < 0) { - ieee80211_free_next_beacon(&sdata->deflink); + ieee80211_free_next_beacon(link_data); return err; } @@ -3925,7 +3927,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.bss_conf.color_change_active) ieee80211_color_change_abort(sdata); - err = ieee80211_set_csa_beacon(sdata, params, &changed); + err = ieee80211_set_csa_beacon(&sdata->deflink, params, &changed); if (err) { ieee80211_link_unreserve_chanctx(&sdata->deflink); goto out; -- cgit 1.2.3-korg From 1a96bb4e8a7953a7cf8e277c4eea29907bb5a140 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 30 Jan 2024 19:39:17 +0530 Subject: wifi: mac80211: start and finalize channel switch on link basis Add changes to start a channel switch as well as finalize it on link basis in order to support CSA with MLO as well. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240130140918.1172387-5-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 59 +++++++++++++++++++++++++++++++---------------------- net/mac80211/link.c | 2 ++ 2 files changed, 37 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c0f418c36522..d30a64cf95cd4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3633,6 +3633,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_local *local = sdata->local; + struct ieee80211_bss_conf *link_conf = link_data->conf; u64 changed = 0; int err; @@ -3654,16 +3655,16 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) if (link_data->reserved_ready) return 0; - return ieee80211_link_use_reserved_context(&sdata->deflink); + return ieee80211_link_use_reserved_context(link_data); } - if (!cfg80211_chandef_identical(&link_data->conf->chanreq.oper, + if (!cfg80211_chandef_identical(&link_conf->chanreq.oper, &link_data->csa_chanreq.oper)) return -EINVAL; - sdata->vif.bss_conf.csa_active = false; + link_conf->csa_active = false; - err = ieee80211_set_after_csa_beacon(&sdata->deflink, &changed); + err = ieee80211_set_after_csa_beacon(link_data, &changed); if (err) return err; @@ -3690,7 +3691,8 @@ static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data) struct ieee80211_sub_if_data *sdata = link_data->sdata; if (__ieee80211_csa_finalize(link_data)) { - sdata_info(sdata, "failed to finalize CSA, disconnecting\n"); + sdata_info(sdata, "failed to finalize CSA on link %d, disconnecting\n", + link_data->link_id); cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, GFP_KERNEL); } @@ -3867,7 +3869,10 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel_switch ch_switch; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; + struct ieee80211_bss_conf *link_conf; + struct ieee80211_link_data *link_data; u64 changed = 0; + u8 link_id = params->link_id; int err; lockdep_assert_wiphy(local->hw.wiphy); @@ -3878,15 +3883,23 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (sdata->wdev.cac_started) return -EBUSY; - if (chanreq.oper.punctured && !sdata->vif.bss_conf.eht_support) + if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) + return -EINVAL; + + link_data = wiphy_dereference(wiphy, sdata->link[link_id]); + if (!link_data) + return -ENOLINK; + + link_conf = link_data->conf; + + if (chanreq.oper.punctured && !link_conf->eht_support) return -EINVAL; /* don't allow another channel switch if one is already active. */ - if (sdata->vif.bss_conf.csa_active) + if (link_conf->csa_active) return -EBUSY; - conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, - lockdep_is_held(&local->hw.wiphy->mtx)); + conf = wiphy_dereference(wiphy, link_conf->chanctx_conf); if (!conf) { err = -EBUSY; goto out; @@ -3910,7 +3923,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (err) goto out; - err = ieee80211_link_reserve_chanctx(&sdata->deflink, &chanreq, + err = ieee80211_link_reserve_chanctx(link_data, &chanreq, chanctx->mode, params->radar_required); if (err) @@ -3919,40 +3932,38 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, /* if reservation is invalid then this will fail */ err = ieee80211_check_combinations(sdata, NULL, chanctx->mode, 0); if (err) { - ieee80211_link_unreserve_chanctx(&sdata->deflink); + ieee80211_link_unreserve_chanctx(link_data); goto out; } /* if there is a color change in progress, abort it */ - if (sdata->vif.bss_conf.color_change_active) + if (link_conf->color_change_active) ieee80211_color_change_abort(sdata); - err = ieee80211_set_csa_beacon(&sdata->deflink, params, &changed); + err = ieee80211_set_csa_beacon(link_data, params, &changed); if (err) { - ieee80211_link_unreserve_chanctx(&sdata->deflink); + ieee80211_link_unreserve_chanctx(link_data); goto out; } - sdata->deflink.csa_chanreq = chanreq; - sdata->deflink.csa_block_tx = params->block_tx; - sdata->vif.bss_conf.csa_active = true; + link_data->csa_chanreq = chanreq; + link_data->csa_block_tx = params->block_tx; + link_conf->csa_active = true; - if (sdata->deflink.csa_block_tx) + if (link_data->csa_block_tx) ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); cfg80211_ch_switch_started_notify(sdata->dev, - &sdata->deflink.csa_chanreq.oper, 0, + &link_data->csa_chanreq.oper, 0, params->count, params->block_tx); if (changed) { - ieee80211_link_info_change_notify(sdata, &sdata->deflink, - changed); - drv_channel_switch_beacon(sdata, - &sdata->deflink.csa_chanreq.oper); + ieee80211_link_info_change_notify(sdata, link_data, changed); + drv_channel_switch_beacon(sdata, &link_data->csa_chanreq.oper); } else { /* if the beacon didn't change, we can finalize immediately */ - ieee80211_csa_finalize(&sdata->deflink); + ieee80211_csa_finalize(link_data); } out: diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 4f19d6479befd..87a413374ecee 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -73,6 +73,8 @@ void ieee80211_link_stop(struct ieee80211_link_data *link) ieee80211_mgd_stop_link(link); cancel_delayed_work_sync(&link->color_collision_detect_work); + wiphy_work_cancel(link->sdata->local->hw.wiphy, + &link->csa_finalize_work); ieee80211_link_release_channel(link); } -- cgit 1.2.3-korg From 04ada8599c35ecb2cf16c94eb118d227630d06ee Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 30 Jan 2024 19:39:18 +0530 Subject: wifi: mac80211: add support to call csa_finish on a link Currently ieee80211_csa_finish() function finalizes CSA by scheduling a finalizing worker using the deflink. With MLO, there is a need to do it on a given link basis. Pass link ID of the link on which CSA needs to be finalized. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240130140918.1172387-6-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- drivers/net/wireless/ath/ath10k/wmi.c | 2 +- drivers/net/wireless/ath/ath11k/wmi.c | 2 +- drivers/net/wireless/ath/ath12k/wmi.c | 2 +- drivers/net/wireless/ath/ath9k/beacon.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_beacon.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 4 ++-- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 2 +- drivers/net/wireless/mediatek/mt76/mac80211.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 2 +- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- drivers/net/wireless/ti/wlcore/event.c | 2 +- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- include/net/mac80211.h | 3 ++- net/mac80211/cfg.c | 15 +++++++++++++-- 17 files changed, 31 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index b802204061044..41053219ee957 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -2047,7 +2047,7 @@ static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif) ath10k_warn(ar, "failed to update prb tmpl during csa: %d\n", ret); } else { - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); } } diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 9e2f0a50aaea5..ddf15717d5046 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3885,7 +3885,7 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) */ if (arvif->vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(arvif->vif)) { - ieee80211_csa_finish(arvif->vif); + ieee80211_csa_finish(arvif->vif, 0); continue; } diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 688ee20528a05..3c9f3b0bcfaa9 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8226,7 +8226,7 @@ ath11k_wmi_process_csa_switch_count_event(struct ath11k_base *ab, } if (arvif->is_up && arvif->vif->bss_conf.csa_active) - ieee80211_csa_finish(arvif->vif); + ieee80211_csa_finish(arvif->vif, 0); } rcu_read_unlock(); } diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 2fa724e5851a9..75c3a0d9b9ecc 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -6448,7 +6448,7 @@ ath12k_wmi_process_csa_switch_count_event(struct ath12k_base *ab, } if (arvif->is_up && arvif->vif->bss_conf.csa_active) - ieee80211_csa_finish(arvif->vif); + ieee80211_csa_finish(arvif->vif, 0); } rcu_read_unlock(); } diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index ee72faac2f1d0..4e48407138b25 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -368,7 +368,7 @@ bool ath9k_csa_is_finished(struct ath_softc *sc, struct ieee80211_vif *vif) if (!ieee80211_beacon_cntdwn_is_complete(vif)) return false; - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); return true; } diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c index 533471e694007..8179d35dc3101 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c @@ -517,7 +517,7 @@ bool ath9k_htc_csa_is_finished(struct ath9k_htc_priv *priv) if (!ieee80211_beacon_cntdwn_is_complete(vif)) return false; - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); priv->csa_vif = NULL; return true; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 3dea06b28472f..7cb6bc97b26c4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1486,7 +1486,7 @@ static void iwl_mvm_csa_count_down(struct iwl_mvm *mvm, } } else if (!iwl_mvm_te_scheduled(&mvmvif->time_event_data)) { /* we don't have CSA NoA scheduled yet, switch now */ - ieee80211_csa_finish(csa_vif); + ieee80211_csa_finish(csa_vif, 0); RCU_INIT_POINTER(mvm->csa_vif, NULL); } } @@ -1836,7 +1836,7 @@ void iwl_mvm_channel_switch_start_notif(struct iwl_mvm *mvm, msecs_to_jiffies(IWL_MVM_CS_UNBLOCK_TX_TIMEOUT * csa_vif->bss_conf.beacon_int)); - ieee80211_csa_finish(csa_vif); + ieee80211_csa_finish(csa_vif, 0); rcu_read_unlock(); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 60ec5ca6927cd..b7461bc325277 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -168,7 +168,7 @@ static void iwl_mvm_csa_noa_start(struct iwl_mvm *mvm) goto out_unlock; } - ieee80211_csa_finish(csa_vif); + ieee80211_csa_finish(csa_vif, 0); rcu_read_unlock(); diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 8a3a90d1bfac5..8bf82755ca4c6 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1614,7 +1614,7 @@ static void __mt76_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) { if (vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(vif)) - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); } void mt76_csa_finish(struct mt76_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c index ae34d019e5883..c807bd8d928d6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c @@ -353,7 +353,7 @@ static void mt7615_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) { if (vif->bss_conf.csa_active) - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); } static void diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index df1ad6d4e12d9..d90f98c500399 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -228,7 +228,7 @@ mt7915_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) if (!vif->bss_conf.csa_active || vif->type == NL80211_IFTYPE_STATION) return; - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); } static void diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 699be57309c2e..3ec813077dc24 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -341,7 +341,7 @@ mt7996_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) if (!vif->bss_conf.csa_active || vif->type == NL80211_IFTYPE_STATION) return; - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); } static void diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index b0c1db726d7ab..66bf92c164c30 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5740,7 +5740,7 @@ static void rtl8xxxu_update_beacon_work_callback(struct work_struct *work) if (vif->bss_conf.csa_active) { if (ieee80211_beacon_cntdwn_is_complete(vif)) { - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); return; } schedule_delayed_work(&priv->update_beacon_work, diff --git a/drivers/net/wireless/ti/wlcore/event.c b/drivers/net/wireless/ti/wlcore/event.c index 1e082d039b822..2499dc9083051 100644 --- a/drivers/net/wireless/ti/wlcore/event.c +++ b/drivers/net/wireless/ti/wlcore/event.c @@ -233,7 +233,7 @@ void wlcore_event_channel_switch(struct wl1271 *wl, cancel_delayed_work(&wlvif->channel_switch_work); } else { set_bit(WLVIF_FLAG_BEACON_DISABLED, &wlvif->flags); - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, 0); } } } diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 907c0842fee71..c337afd8bee22 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -2303,7 +2303,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, } if (link_conf->csa_active && ieee80211_beacon_cntdwn_is_complete(vif)) - ieee80211_csa_finish(vif); + ieee80211_csa_finish(vif, link_id); } static enum hrtimer_restart diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8acee7ce3aa9c..45d905b17a65c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5555,12 +5555,13 @@ void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter); /** * ieee80211_csa_finish - notify mac80211 about channel switch * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO * * After a channel switch announcement was scheduled and the counter in this * announcement hits 1, this function must be called by the driver to * notify mac80211 that the channel can be changed. */ -void ieee80211_csa_finish(struct ieee80211_vif *vif); +void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id); /** * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1 diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d30a64cf95cd4..156a4215dcda7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3547,13 +3547,24 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) return new_beacon; } -void ieee80211_csa_finish(struct ieee80211_vif *vif) +void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; + struct ieee80211_link_data *link_data; + + if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) + return; rcu_read_lock(); + link_data = rcu_dereference(sdata->link[link_id]); + if (WARN_ON(!link_data)) { + rcu_read_unlock(); + return; + } + + /* TODO: MBSSID with MLO changes */ if (vif->mbssid_tx_vif == vif) { /* Trigger ieee80211_csa_finish() on the non-transmitting * interfaces when channel switch is received on @@ -3572,7 +3583,7 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif) &iter->deflink.csa_finalize_work); } } - wiphy_work_queue(local->hw.wiphy, &sdata->deflink.csa_finalize_work); + wiphy_work_queue(local->hw.wiphy, &link_data->csa_finalize_work); rcu_read_unlock(); } -- cgit 1.2.3-korg From 83acbb9d0716c7f787d248ca982f4a4a895aef65 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 6 Feb 2024 13:29:26 +0200 Subject: net: dsa: remove "inline" from dsa_user_netpoll_send_skb() The convention is to not use "inline" functions in C files, and let the compiler decide whether to inline or not. Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240206112927.4134375-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dsa/user.c b/net/dsa/user.c index ba2e53b5e16a0..5d666dfb317d7 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -875,8 +875,8 @@ static int dsa_user_port_obj_del(struct net_device *dev, const void *ctx, return err; } -static inline netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev, - struct sk_buff *skb) +static netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev, + struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER struct dsa_user_priv *p = netdev_priv(dev); -- cgit 1.2.3-korg From 36f75f74dc07c6c38c87bdb39be814cfa951b6dd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 6 Feb 2024 13:29:27 +0200 Subject: net: dsa: tag_sja1105: remove "inline" keyword The convention is to not use the "inline" keyword for functions in C files, but to let the compiler choose. Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240206112927.4134375-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_sja1105.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 2717e9d7b6125..1aba1d05c27a1 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -75,7 +75,7 @@ sja1105_tagger_private(struct dsa_switch *ds) } /* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */ -static inline bool sja1105_is_link_local(const struct sk_buff *skb) +static bool sja1105_is_link_local(const struct sk_buff *skb) { const struct ethhdr *hdr = eth_hdr(skb); u64 dmac = ether_addr_to_u64(hdr->h_dest); @@ -121,7 +121,7 @@ static void sja1105_meta_unpack(const struct sk_buff *skb, packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0); } -static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) +static bool sja1105_is_meta_frame(const struct sk_buff *skb) { const struct ethhdr *hdr = eth_hdr(skb); u64 smac = ether_addr_to_u64(hdr->h_source); -- cgit 1.2.3-korg From 0e0939c0adf90a3233392e2a9650290b1ad8068c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Feb 2024 16:53:18 +0000 Subject: net-procfs: use xarray iterator to implement /proc/net/dev In commit 759ab1edb56c ("net: store netdevs in an xarray") Jakub added net->dev_by_index to map ifindex to netdevices. We can get rid of the old hash table (net->dev_index_head), one patch at a time, if performance is acceptable. This patch removes unpleasant code to something more readable. As a bonus, /proc/net/dev gets netdevices sorted by their ifindex. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240207165318.3814525-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/net-procfs.c | 48 +++++++----------------------------------------- 1 file changed, 7 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 09f7ed1a04e8a..2e4e96d30ee1a 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -6,49 +6,18 @@ #include "dev.h" -#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) - -#define get_bucket(x) ((x) >> BUCKET_SPACE) -#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) -#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) - -static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) +static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos) { - struct net *net = seq_file_net(seq); + unsigned long ifindex = *pos; struct net_device *dev; - struct hlist_head *h; - unsigned int count = 0, offset = get_offset(*pos); - h = &net->dev_index_head[get_bucket(*pos)]; - hlist_for_each_entry_rcu(dev, h, index_hlist) { - if (++count == offset) - return dev; + for_each_netdev_dump(seq_file_net(seq), dev, ifindex) { + *pos = dev->ifindex; + return dev; } - - return NULL; -} - -static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) -{ - struct net_device *dev; - unsigned int bucket; - - do { - dev = dev_from_same_bucket(seq, pos); - if (dev) - return dev; - - bucket = get_bucket(*pos) + 1; - *pos = set_bucket_offset(bucket, 1); - } while (bucket < NETDEV_HASHENTRIES); - return NULL; } -/* - * This is invoked by the /proc filesystem handler to display a device - * in detail. - */ static void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { @@ -56,16 +25,13 @@ static void *dev_seq_start(struct seq_file *seq, loff_t *pos) if (!*pos) return SEQ_START_TOKEN; - if (get_bucket(*pos) >= NETDEV_HASHENTRIES) - return NULL; - - return dev_from_bucket(seq, pos); + return dev_seq_from_index(seq, pos); } static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return dev_from_bucket(seq, pos); + return dev_seq_from_index(seq, pos); } static void dev_seq_stop(struct seq_file *seq, void *v) -- cgit 1.2.3-korg From e7689879d14ee730085c2f5974003c2d0ebbebc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Feb 2024 15:35:14 +0000 Subject: ethtool: do not use rtnl in ethnl_default_dumpit() for_each_netdev_dump() can be used with RCU protection, no need for rtnl if we are going to use dev_hold()/dev_put(). Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240207153514.3640952-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ethtool/netlink.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index fe3553f60bf39..bd04f28d5cf4b 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -477,11 +477,7 @@ out: return ret; } -/* Default ->dumpit() handler for GET requests. Device iteration copied from - * rtnl_dump_ifinfo(); we have to be more careful about device hashtable - * persistence as we cannot guarantee to hold RTNL lock through the whole - * function as rtnetnlink does. - */ +/* Default ->dumpit() handler for GET requests. */ static int ethnl_default_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { @@ -490,14 +486,14 @@ static int ethnl_default_dumpit(struct sk_buff *skb, struct net_device *dev; int ret = 0; - rtnl_lock(); + rcu_read_lock(); for_each_netdev_dump(net, dev, ctx->pos_ifindex) { dev_hold(dev); - rtnl_unlock(); + rcu_read_unlock(); ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb)); - rtnl_lock(); + rcu_read_lock(); dev_put(dev); if (ret < 0 && ret != -EOPNOTSUPP) { @@ -507,7 +503,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb, } ret = 0; } - rtnl_unlock(); + rcu_read_unlock(); return ret; } -- cgit 1.2.3-korg From 1476de6d2b578673e20fb4cf654ff61cf2782873 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 30 Jan 2024 16:14:11 +0800 Subject: xfrm: Simplify the allocation of slab caches in xfrm_policy_init commit 0a31bd5f2bbb ("KMEM_CACHE(): simplify slab cache creation") introduces a new macro. Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b4850a8f14adf..53b7ce4a4db00 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4163,10 +4163,7 @@ static int __net_init xfrm_policy_init(struct net *net) int dir, err; if (net_eq(net, &init_net)) { - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", - sizeof(struct xfrm_dst), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); + xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC); err = rhashtable_init(&xfrm_policy_inexact_table, &xfrm_pol_inexact_params); BUG_ON(err); -- cgit 1.2.3-korg From 129e406e1811538c1afc9c8e97d61bb18eed3363 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:49 -0800 Subject: net/ipv6: set expires in rt6_add_dflt_router(). Pass the duration of a lifetime (in seconds) to the function rt6_add_dflt_router() so that it can properly set the expiration time. The function ndisc_router_discovery() is the only one that calls rt6_add_dflt_router(), and it will later set the expiration time for the route created by rt6_add_dflt_router(). However, there is a gap of time between calling rt6_add_dflt_router() and setting the expiration time in ndisc_router_discovery(). During this period, there is a possibility that a new route may be removed from the routing table. By setting the correct expiration time in rt6_add_dflt_router(), we can prevent this from happening. The reason for setting RTF_EXPIRES in rt6_add_dflt_router() is to start the Garbage Collection (GC) timer, as it only activates when a route with RTF_EXPIRES is added to a table. Suggested-by: David Ahern Reviewed-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 ++- net/ipv6/ndisc.c | 3 ++- net/ipv6/route.c | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 28b0657902615..52a51c69aa9de 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -170,7 +170,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref, - u32 defrtr_usr_metric); + u32 defrtr_usr_metric, + int lifetime); void rt6_purge_dflt_routers(struct net *net); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a19999b30bc07..a68462668158b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1382,7 +1382,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) neigh_release(neigh); rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, - skb->dev, pref, defrtr_usr_metric); + skb->dev, pref, defrtr_usr_metric, + lifetime); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 63b4c60565820..98abba8f15cdc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4355,7 +4355,8 @@ struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref, - u32 defrtr_usr_metric) + u32 defrtr_usr_metric, + int lifetime) { struct fib6_config cfg = { .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, @@ -4368,6 +4369,7 @@ struct fib6_info *rt6_add_dflt_router(struct net *net, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, + .fc_expires = jiffies_to_clock_t(lifetime * HZ), }; cfg.fc_gateway = *gwaddr; -- cgit 1.2.3-korg From 60df43d3a72c5fc50ff854cca17c9935c4398794 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:50 -0800 Subject: net/ipv6: Remove unnecessary clean. The route here is newly created. It is unnecessary to call fib6_clean_expires() on it. Suggested-by: Eric Dumazet Reviewed-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 98abba8f15cdc..dd6ff5b209186 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3765,8 +3765,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (cfg->fc_flags & RTF_EXPIRES) fib6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); - else - fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; -- cgit 1.2.3-korg From 5eb902b8e7193cdcb33242af0a56502e6b5206e9 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:51 -0800 Subject: net/ipv6: Remove expired routes with a separated list of routes. FIB6 GC walks trees of fib6_tables to remove expired routes. Walking a tree can be expensive if the number of routes in a table is big, even if most of them are permanent. Checking routes in a separated list of routes having expiration will avoid this potential issue. Reviewed-by: David Ahern Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 46 ++++++++++++++++++++++++++++++++++++++- net/ipv6/addrconf.c | 41 +++++++++++++++++++++++++++++------ net/ipv6/ip6_fib.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++----- net/ipv6/ndisc.c | 10 ++++++++- net/ipv6/route.c | 13 +++++++++-- 5 files changed, 154 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 360b12e618507..323c94f1845b9 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -173,6 +173,9 @@ struct fib6_info { refcount_t fib6_ref; unsigned long expires; + + struct hlist_node gc_link; + struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -241,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt) return rt->fib6_src.plen > 0; } +/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever + * been added to a table before. + */ static inline void fib6_clean_expires(struct fib6_info *f6i) { f6i->fib6_flags &= ~RTF_EXPIRES; f6i->expires = 0; } +/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever + * been added to a table before. + */ static inline void fib6_set_expires(struct fib6_info *f6i, unsigned long expires) { @@ -327,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i) static inline void fib6_info_release(struct fib6_info *f6i) { - if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) + if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) { + DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link)); call_rcu(&f6i->rcu, fib6_info_destroy_rcu); + } } enum fib6_walk_state { @@ -382,6 +393,7 @@ struct fib6_table { struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; + struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -498,6 +510,38 @@ void fib6_gc_cleanup(void); int fib6_init(void); +/* Add the route to the gc list if it is not already there + * + * The callers should hold f6i->fib6_table->tb6_lock. + */ +static inline void fib6_add_gc_list(struct fib6_info *f6i) +{ + /* If fib6_node is null, the f6i is not in (or removed from) the + * table. + * + * There is a gap between finding the f6i from the table and + * calling this function without the protection of the tb6_lock. + * This check makes sure the f6i is not added to the gc list when + * it is not on the table. + */ + if (!rcu_dereference_protected(f6i->fib6_node, + lockdep_is_held(&f6i->fib6_table->tb6_lock))) + return; + + if (hlist_unhashed(&f6i->gc_link)) + hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist); +} + +/* Remove the route from the gc list if it is on the list. + * + * The callers should hold f6i->fib6_table->tb6_lock. + */ +static inline void fib6_remove_gc_list(struct fib6_info *f6i) +{ + if (!hlist_unhashed(&f6i->gc_link)) + hlist_del_init(&f6i->gc_link); +} + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d63f5d063f073..0ea44563454f0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1255,6 +1255,7 @@ static void cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt, bool del_peer) { + struct fib6_table *table; struct fib6_info *f6i; f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr, @@ -1264,8 +1265,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, if (del_rt) ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); else { - if (!(f6i->fib6_flags & RTF_EXPIRES)) + if (!(f6i->fib6_flags & RTF_EXPIRES)) { + table = f6i->fib6_table; + spin_lock_bh(&table->tb6_lock); + fib6_set_expires(f6i, expires); + fib6_add_gc_list(f6i); + + spin_unlock_bh(&table->tb6_lock); + } fib6_info_release(f6i); } } @@ -2706,6 +2714,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr); void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; + struct fib6_table *table; __u32 valid_lft; __u32 prefered_lft; int addr_type, err; @@ -2782,11 +2791,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (valid_lft == 0) { ip6_del_rt(net, rt, false); rt = NULL; - } else if (addrconf_finite_timeout(rt_expires)) { - /* not infinity */ - fib6_set_expires(rt, jiffies + rt_expires); } else { - fib6_clean_expires(rt); + table = rt->fib6_table; + spin_lock_bh(&table->tb6_lock); + + if (addrconf_finite_timeout(rt_expires)) { + /* not infinity */ + fib6_set_expires(rt, jiffies + rt_expires); + fib6_add_gc_list(rt); + } else { + fib6_clean_expires(rt); + fib6_remove_gc_list(rt); + } + + spin_unlock_bh(&table->tb6_lock); } } else if (valid_lft) { clock_t expires = 0; @@ -4741,6 +4759,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, u32 flags, bool modify_peer) { + struct fib6_table *table; struct fib6_info *f6i; u32 prio; @@ -4761,10 +4780,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); } else { - if (!expires) + table = f6i->fib6_table; + spin_lock_bh(&table->tb6_lock); + + if (!expires) { fib6_clean_expires(f6i); - else + fib6_remove_gc_list(f6i); + } else { fib6_set_expires(f6i, expires); + fib6_add_gc_list(f6i); + } + + spin_unlock_bh(&table->tb6_lock); fib6_info_release(f6i); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 38a0348b1d178..805bbf26b3efd 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) INIT_LIST_HEAD(&f6i->fib6_siblings); refcount_set(&f6i->fib6_ref, 1); + INIT_HLIST_NODE(&f6i->gc_link); + return f6i; } @@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); + INIT_HLIST_HEAD(&table->tb6_gc_hlist); } return table; @@ -1055,6 +1058,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, lockdep_is_held(&table->tb6_lock)); } } + + fib6_clean_expires(rt); + fib6_remove_gc_list(rt); } /* @@ -1115,10 +1121,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, rt->fib6_nsiblings = 0; if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; - if (!(rt->fib6_flags & RTF_EXPIRES)) + if (!(rt->fib6_flags & RTF_EXPIRES)) { fib6_clean_expires(iter); - else + fib6_remove_gc_list(iter); + } else { fib6_set_expires(iter, rt->expires); + fib6_add_gc_list(iter); + } if (rt->fib6_pmtu) fib6_metric_set(iter, RTAX_MTU, @@ -1477,6 +1486,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); + + if (rt->fib6_flags & RTF_EXPIRES) + fib6_add_gc_list(rt); + fib6_start_gc(info->nl_net, rt); } @@ -2280,9 +2293,8 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct fib6_info *rt, void *arg) +static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) { - struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; /* @@ -2307,6 +2319,42 @@ static int fib6_age(struct fib6_info *rt, void *arg) return 0; } +static void fib6_gc_table(struct net *net, + struct fib6_table *tb6, + struct fib6_gc_args *gc_args) +{ + struct fib6_info *rt; + struct hlist_node *n; + struct nl_info info = { + .nl_net = net, + .skip_notify = false, + }; + + hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link) + if (fib6_age(rt, gc_args) == -1) + fib6_del(rt, &info); +} + +static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args) +{ + struct fib6_table *table; + struct hlist_head *head; + unsigned int h; + + rcu_read_lock(); + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, head, tb6_hlist) { + spin_lock_bh(&table->tb6_lock); + + fib6_gc_table(net, table, gc_args); + + spin_unlock_bh(&table->tb6_lock); + } + } + rcu_read_unlock(); +} + void fib6_run_gc(unsigned long expires, struct net *net, bool force) { struct fib6_gc_args gc_args; @@ -2322,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = 0; - fib6_clean_all(net, fib6_age, &gc_args); + fib6_gc_all(net, &gc_args); now = jiffies; net->ipv6.ip6_rt_last_gc = now; @@ -2382,6 +2430,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); + INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), @@ -2394,6 +2443,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); + INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist); #endif fib6_tables_init(net); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a68462668158b..73cb31afe9354 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1237,6 +1237,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) struct ndisc_options ndopts; struct fib6_info *rt = NULL; struct inet6_dev *in6_dev; + struct fib6_table *table; u32 defrtr_usr_metric; unsigned int pref = 0; __u32 old_if_flags; @@ -1410,8 +1411,15 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE); } - if (rt) + if (rt) { + table = rt->fib6_table; + spin_lock_bh(&table->tb6_lock); + fib6_set_expires(rt, jiffies + (HZ * lifetime)); + fib6_add_gc_list(rt); + + spin_unlock_bh(&table->tb6_lock); + } if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dd6ff5b209186..707d65bc9c0e5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -931,6 +931,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; + struct fib6_table *table; unsigned int pref; unsigned long lifetime; struct fib6_info *rt; @@ -989,10 +990,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (!addrconf_finite_timeout(lifetime)) + table = rt->fib6_table; + spin_lock_bh(&table->tb6_lock); + + if (!addrconf_finite_timeout(lifetime)) { fib6_clean_expires(rt); - else + fib6_remove_gc_list(rt); + } else { fib6_set_expires(rt, jiffies + HZ * lifetime); + fib6_add_gc_list(rt); + } + + spin_unlock_bh(&table->tb6_lock); fib6_info_release(rt); } -- cgit 1.2.3-korg From 768e06a8bcab045de9eac87dd071a769119cd0fd Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:52 -0800 Subject: net/ipv6: set expires in modify_prefix_route() if RTF_EXPIRES is set. Make the decision to set or clean the expires of a route based on the RTF_EXPIRES flag, rather than the value of the "expires" argument. This patch doesn't make difference logically, but make inet6_addr_modify() and modify_prefix_route() consistent. The function inet6_addr_modify() is the only caller of modify_prefix_route(), and it passes the RTF_EXPIRES flag and an expiration value. The RTF_EXPIRES flag is turned on or off based on the value of valid_lft. The RTF_EXPIRES flag is turned on if valid_lft is a finite value (not infinite, not 0xffffffff). Even if valid_lft is 0, the RTF_EXPIRES flag remains on. The expiration value being passed is equal to the valid_lft value if the flag is on. However, if the valid_lft value is infinite, the expiration value becomes 0 and the RTF_EXPIRES flag is turned off. Despite this, modify_prefix_route() decides to set the expiration value if the received expiration value is not zero. This mixing of infinite and zero cases creates an inconsistency. Reviewed-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0ea44563454f0..ca1b719323c0b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4783,7 +4783,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, table = f6i->fib6_table; spin_lock_bh(&table->tb6_lock); - if (!expires) { + if (!(flags & RTF_EXPIRES)) { fib6_clean_expires(f6i); fib6_remove_gc_list(f6i); } else { -- cgit 1.2.3-korg From 17ef8efc00b34918b966388b2af0993811895a8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:56 +0000 Subject: ipv6: mcast: remove one synchronize_net() barrier in ipv6_mc_down() As discussed in the past (commit 2d3916f31891 ("ipv6: fix skb drops in igmp6_event_query() and igmp6_event_report()")) I think the synchronize_net() call in ipv6_mc_down() is not needed. Under load, synchronize_net() can last between 200 usec and 5 ms. KASAN seems to agree as well. Fixes: f185de28d9ae ("mld: add new workqueues for process mld events") Signed-off-by: Eric Dumazet Cc: Taehee Yoo Cc: Cong Wang Cc: David Ahern Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bc6e0a0bad3c1..76ee1615ff2a0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2719,7 +2719,6 @@ void ipv6_mc_down(struct inet6_dev *idev) /* Should stop work after group drop. or we will * start work again in mld_ifc_event() */ - synchronize_net(); mld_query_stop_work(idev); mld_report_stop_work(idev); -- cgit 1.2.3-korg From 4cd582ffa5a9a5d58e5bac9c5e55ca8eeabffddc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:57 +0000 Subject: net: use synchronize_net() in dev_change_name() dev_change_name() holds RTNL, we better use synchronize_net() instead of plain synchronize_rcu(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 31f2c97d19903..7cf15d2bf78de 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1239,7 +1239,7 @@ rollback: netdev_name_node_del(dev->name_node); write_unlock(&dev_base_lock); - synchronize_rcu(); + synchronize_net(); write_lock(&dev_base_lock); netdev_name_node_add(net, dev->name_node); -- cgit 1.2.3-korg From 48ebf6ebbc91a6e6b2c0ac1217573e81b37ef2ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:58 +0000 Subject: bridge: vlan: use synchronize_net() when holding RTNL br_vlan_flush() and nbp_vlan_flush() should use synchronize_net() instead of syncronize_rcu() to release RTNL sooner. Signed-off-by: Eric Dumazet Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 15f44d026e75a..9c2fffb827ab1 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -841,7 +841,7 @@ void br_vlan_flush(struct net_bridge *br) vg = br_vlan_group(br); __vlan_flush(br, NULL, vg); RCU_INIT_POINTER(br->vlgrp, NULL); - synchronize_rcu(); + synchronize_net(); __vlan_group_free(vg); } @@ -1372,7 +1372,7 @@ void nbp_vlan_flush(struct net_bridge_port *port) vg = nbp_vlan_group(port); __vlan_flush(port->br, port, vg); RCU_INIT_POINTER(port->vlgrp, NULL); - synchronize_rcu(); + synchronize_net(); __vlan_group_free(vg); } -- cgit 1.2.3-korg From 2cd0c51e3baf7aa49e802c06cb1b2ffa9c82fbe1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:30:59 +0000 Subject: ipv4/fib: use synchronize_net() when holding RTNL tnode_free() should use synchronize_net() instead of syncronize_rcu() to release RTNL sooner. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3ff35f8117655..0fc7ab5832d1a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -501,7 +501,7 @@ static void tnode_free(struct key_vector *tn) if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; - synchronize_rcu(); + synchronize_net(); } } -- cgit 1.2.3-korg From 78c3253f27e579f7f3a1f5c0cb8266693a7b4f41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:31:00 +0000 Subject: net: use synchronize_rcu_expedited in cleanup_net() cleanup_net() is calling synchronize_rcu() right before acquiring RTNL. synchronize_rcu() is much slower than synchronize_rcu_expedited(), and cleanup_net() is currently single threaded. In many workloads we want cleanup_net() to be fast, in order to free memory and various sysfs and procfs entries as fast as possible. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 233ec0cdd0111..f0540c5575157 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -622,7 +622,7 @@ static void cleanup_net(struct work_struct *work) * the rcu_barrier() below isn't sufficient alone. * Also the pre_exit() and exit() methods need this barrier. */ - synchronize_rcu(); + synchronize_rcu_expedited(); rtnl_lock(); list_for_each_entry_reverse(ops, &pernet_list, list) { -- cgit 1.2.3-korg From 1ebb85f9c03de0b66c334de219f224159e24e549 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 15:31:01 +0000 Subject: netfilter: conntrack: expedite rcu in nf_conntrack_cleanup_net_list nf_conntrack_cleanup_net_list() is calling synchronize_net() while RTNL is not held. This effectively calls synchronize_rcu(). synchronize_rcu() is much slower than synchronize_rcu_expedited(), and cleanup_net() is currently single threaded. In many workloads we want cleanup_net() to be faster, in order to free memory and various sysfs and procfs entries as fast as possible. Signed-off-by: Eric Dumazet Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e5f3864d353a..90e6bd2c30002 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2530,7 +2530,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) * netfilter framework. Roll on, two-stage module * delete... */ - synchronize_net(); + synchronize_rcu_expedited(); i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { -- cgit 1.2.3-korg From fec846fa7eddf7bb651bf88bd78c7db1410ae3b1 Mon Sep 17 00:00:00 2001 From: Nicolas Maier Date: Sat, 20 Jan 2024 09:10:18 +0100 Subject: can: bcm: add recvmsg flags for own, local and remote traffic CAN RAW sockets allow userspace to tell if a received CAN frame comes from the same socket, another socket on the same host, or another host. See commit 1e55659ce6dd ("can-raw: add msg_flags to distinguish local traffic"). However, this feature is missing in CAN BCM sockets. Add the same feature to CAN BCM sockets. When reading a received frame (opcode RX_CHANGED) using recvmsg, two flags in msg->msg_flags may be set following the previous convention (from CAN RAW), to distinguish between 'own', 'local' and 'remote' CAN traffic. Update the documentation to reflect this change. Signed-off-by: Nicolas Maier Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20240120081018.2319-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.rst | 34 ++++++++++---------- net/can/bcm.c | 69 +++++++++++++++++++++++++++++++++------- 2 files changed, 75 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index d7e1ada905b2d..62519d38c58ba 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -444,6 +444,24 @@ definitions are specified for CAN specific MTUs in include/linux/can.h: #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame +Returned Message Flags +---------------------- + +When using the system call recvmsg(2) on a RAW or a BCM socket, the +msg->msg_flags field may contain the following flags: + +MSG_DONTROUTE: + set when the received frame was created on the local host. + +MSG_CONFIRM: + set when the frame was sent via the socket it is received on. + This flag can be interpreted as a 'transmission confirmation' when the + CAN driver supports the echo of frames on driver level, see + :ref:`socketcan-local-loopback1` and :ref:`socketcan-local-loopback2`. + (Note: In order to receive such messages on a RAW socket, + CAN_RAW_RECV_OWN_MSGS must be set.) + + .. _socketcan-raw-sockets: RAW Protocol Sockets with can_filters (SOCK_RAW) @@ -693,22 +711,6 @@ where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or CAN ID ranges from the incoming traffic. -RAW Socket Returned Message Flags -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When using recvmsg() call, the msg->msg_flags may contain following flags: - -MSG_DONTROUTE: - set when the received frame was created on the local host. - -MSG_CONFIRM: - set when the frame was sent via the socket it is received on. - This flag can be interpreted as a 'transmission confirmation' when the - CAN driver supports the echo of frames on driver level, see - :ref:`socketcan-local-loopback1` and :ref:`socketcan-local-loopback2`. - In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set. - - Broadcast Manager Protocol Sockets (SOCK_DGRAM) ----------------------------------------------- diff --git a/net/can/bcm.c b/net/can/bcm.c index 9168114fc87f7..27d5fcf0eac9d 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -72,9 +72,11 @@ #define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60) /* use of last_frames[index].flags */ +#define RX_LOCAL 0x10 /* frame was created on the local host */ +#define RX_OWN 0x20 /* frame was sent via the socket it was received on */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ -#define BCM_CAN_FLAGS_MASK 0x3F /* to clean private flags after usage */ +#define BCM_CAN_FLAGS_MASK 0x0F /* to clean private flags after usage */ /* get best masking value for can_rx_register() for a given single can_id */ #define REGMASK(id) ((id & CAN_EFF_FLAG) ? \ @@ -138,6 +140,16 @@ static LIST_HEAD(bcm_notifier_list); static DEFINE_SPINLOCK(bcm_notifier_lock); static struct bcm_sock *bcm_busy_notifier; +/* Return pointer to store the extra msg flags for bcm_recvmsg(). + * We use the space of one unsigned int beyond the 'struct sockaddr_can' + * in skb->cb. + */ +static inline unsigned int *bcm_flags(struct sk_buff *skb) +{ + /* return pointer after struct sockaddr_can */ + return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]); +} + static inline struct bcm_sock *bcm_sk(const struct sock *sk) { return (struct bcm_sock *)sk; @@ -325,6 +337,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, struct sock *sk = op->sk; unsigned int datalen = head->nframes * op->cfsiz; int err; + unsigned int *pflags; skb = alloc_skb(sizeof(*head) + datalen, gfp_any()); if (!skb) @@ -332,6 +345,14 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, skb_put_data(skb, head, sizeof(*head)); + /* ensure space for sockaddr_can and msg flags */ + sock_skb_cb_check_size(sizeof(struct sockaddr_can) + + sizeof(unsigned int)); + + /* initialize msg flags */ + pflags = bcm_flags(skb); + *pflags = 0; + if (head->nframes) { /* CAN frames starting here */ firstframe = (struct canfd_frame *)skb_tail_pointer(skb); @@ -344,8 +365,14 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, * relevant for updates that are generated by the * BCM, where nframes is 1 */ - if (head->nframes == 1) + if (head->nframes == 1) { + if (firstframe->flags & RX_LOCAL) + *pflags |= MSG_DONTROUTE; + if (firstframe->flags & RX_OWN) + *pflags |= MSG_CONFIRM; + firstframe->flags &= BCM_CAN_FLAGS_MASK; + } } if (has_timestamp) { @@ -360,7 +387,6 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, * containing the interface index. */ - sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; @@ -444,7 +470,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data) op->frames_filtered = op->frames_abs = 0; /* this element is not throttled anymore */ - data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV); + data->flags &= ~RX_THR; memset(&head, 0, sizeof(head)); head.opcode = RX_CHANGED; @@ -465,13 +491,17 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data) */ static void bcm_rx_update_and_send(struct bcm_op *op, struct canfd_frame *lastdata, - const struct canfd_frame *rxdata) + const struct canfd_frame *rxdata, + unsigned char traffic_flags) { memcpy(lastdata, rxdata, op->cfsiz); /* mark as used and throttled by default */ lastdata->flags |= (RX_RECV|RX_THR); + /* add own/local/remote traffic flags */ + lastdata->flags |= traffic_flags; + /* throttling mode inactive ? */ if (!op->kt_ival2) { /* send RX_CHANGED to the user immediately */ @@ -508,7 +538,8 @@ rx_changed_settime: * received data stored in op->last_frames[] */ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index, - const struct canfd_frame *rxdata) + const struct canfd_frame *rxdata, + unsigned char traffic_flags) { struct canfd_frame *cf = op->frames + op->cfsiz * index; struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; @@ -521,7 +552,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index, if (!(lcf->flags & RX_RECV)) { /* received data for the first time => send update to user */ - bcm_rx_update_and_send(op, lcf, rxdata); + bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags); return; } @@ -529,7 +560,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index, for (i = 0; i < rxdata->len; i += 8) { if ((get_u64(cf, i) & get_u64(rxdata, i)) != (get_u64(cf, i) & get_u64(lcf, i))) { - bcm_rx_update_and_send(op, lcf, rxdata); + bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags); return; } } @@ -537,7 +568,7 @@ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index, if (op->flags & RX_CHECK_DLC) { /* do a real check in CAN frame length */ if (rxdata->len != lcf->len) { - bcm_rx_update_and_send(op, lcf, rxdata); + bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags); return; } } @@ -644,6 +675,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) struct bcm_op *op = (struct bcm_op *)data; const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data; unsigned int i; + unsigned char traffic_flags; if (op->can_id != rxframe->can_id) return; @@ -673,15 +705,24 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) return; } + /* compute flags to distinguish between own/local/remote CAN traffic */ + traffic_flags = 0; + if (skb->sk) { + traffic_flags |= RX_LOCAL; + if (skb->sk == op->sk) + traffic_flags |= RX_OWN; + } + if (op->flags & RX_FILTER_ID) { /* the easiest case */ - bcm_rx_update_and_send(op, op->last_frames, rxframe); + bcm_rx_update_and_send(op, op->last_frames, rxframe, + traffic_flags); goto rx_starttimer; } if (op->nframes == 1) { /* simple compare with index 0 */ - bcm_rx_cmp_to_index(op, 0, rxframe); + bcm_rx_cmp_to_index(op, 0, rxframe, traffic_flags); goto rx_starttimer; } @@ -698,7 +739,8 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) == (get_u64(op->frames, 0) & get_u64(op->frames + op->cfsiz * i, 0))) { - bcm_rx_cmp_to_index(op, i, rxframe); + bcm_rx_cmp_to_index(op, i, rxframe, + traffic_flags); break; } } @@ -1675,6 +1717,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } + /* assign the flags that have been recorded in bcm_send_to_user() */ + msg->msg_flags |= *(bcm_flags(skb)); + skb_free_datagram(sk, skb); return size; -- cgit 1.2.3-korg From e1aa35e16399d600215470411dfb56e1d6f8e017 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 8 Dec 2023 17:57:29 +0100 Subject: can: isotp: support dynamic flow control parameters The ISO15765-2 standard supports to take the PDUs communication parameters blocksize (BS) and Separation Time minimum (STmin) either from the first received flow control (FC) "static" or from every received FC "dynamic". Add a new CAN_ISOTP_DYN_FC_PARMS flag to support dynamic FC parameters. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20231208165729.3011-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 1 + net/can/isotp.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 439c982f7e811..6cde62371b6fb 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -137,6 +137,7 @@ struct can_isotp_ll_options { #define CAN_ISOTP_WAIT_TX_DONE 0x0400 /* wait for tx completion */ #define CAN_ISOTP_SF_BROADCAST 0x0800 /* 1-to-N functional addressing */ #define CAN_ISOTP_CF_BROADCAST 0x1000 /* 1-to-N transmission w/o FC */ +#define CAN_ISOTP_DYN_FC_PARMS 0x2000 /* dynamic FC parameters BS/STmin */ /* protocol machine default values */ diff --git a/net/can/isotp.c b/net/can/isotp.c index d1c6f206f4295..25bac0fafc830 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -381,8 +381,9 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae) return 1; } - /* get communication parameters only from the first FC frame */ - if (so->tx.state == ISOTP_WAIT_FIRST_FC) { + /* get static/dynamic communication params from first/every FC frame */ + if (so->tx.state == ISOTP_WAIT_FIRST_FC || + so->opt.flags & CAN_ISOTP_DYN_FC_PARMS) { so->txfc.bs = cf->data[ae + 1]; so->txfc.stmin = cf->data[ae + 2]; -- cgit 1.2.3-korg From 6f656131f6988709e3bf828c3ad992032b717c2e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 8 Feb 2024 12:01:43 +0100 Subject: wifi: mac80211: remove gfp parameter from ieee80211_obss_color_collision_notify Get rid of gfp parameter from ieee80211_obss_color_collision_notify since it is no longer used. Signed-off-by: Lorenzo Bianconi Reviewed-by: Jeff Johnson Acked-by: Jeff Johnson Link: https://msgid.link/f91e1c78896408ac556586ba8c99e4e389aeba02.1707389901.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/wmi.c | 3 +-- include/net/mac80211.h | 3 +-- net/mac80211/cfg.c | 2 +- net/mac80211/rx.c | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 3c9f3b0bcfaa9..1943e636faef2 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -4020,8 +4020,7 @@ ath11k_wmi_obss_color_collision_event(struct ath11k_base *ab, struct sk_buff *sk switch (ev->evt_type) { case WMI_BSS_COLOR_COLLISION_DETECTION: - ieee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap, - GFP_KERNEL); + ieee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap); ath11k_dbg(ab, ATH11K_DBG_WMI, "OBSS color collision detected vdev:%d, event:%d, bitmap:%08llx\n", ev->vdev_id, ev->evt_type, ev->obss_color_bitmap); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 45d905b17a65c..fc223761e3afc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7490,11 +7490,10 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is * aware of. - * @gfp: allocation flags */ void ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap, gfp_t gfp); + u64 color_bitmap); /** * ieee80211_is_tx_data - check if frame is a data frame diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 156a4215dcda7..6f7c96c358f46 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4765,7 +4765,7 @@ EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); void ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap, gfp_t gfp) + u64 color_bitmap) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_link_data *link = &sdata->deflink; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9902ea69af0a1..c1f8501384056 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3386,8 +3386,7 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) IEEE80211_HE_OPERATION_BSS_COLOR_MASK); if (color == bss_conf->he_bss_color.color) ieee80211_obss_color_collision_notify(&rx->sdata->vif, - BIT_ULL(color), - GFP_ATOMIC); + BIT_ULL(color)); } } -- cgit 1.2.3-korg From f6ca96aa51a4ae1b3a416fbe85acdf1197c405a6 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Mon, 5 Feb 2024 21:59:50 +0530 Subject: wifi: cfg80211: add support for link id attribute in NL80211_CMD_DEL_STATION Currently whenever NL80211_CMD_DEL_STATION command is called without any MAC address, all stations present on that interface are flushed. However with MLO there is a need to flush such stations only which are using at least a particular link from the AP MLD interface. For example - 2 GHz and 5 GHz are part of an AP MLD. To this interface, following stations are connected - 1. One non-EHT STA on 2 GHz link. 2. One non-EHT STA on 5 GHz link. 3. One Multi-Link STA having 2 GHz and 5 GHz as active links. Now if currently, NL80211_CMD_DEL_STATION is issued by the 2 GHz link without any MAC address, it would flush all station entries. However, flushing of station entry #2 at least is not desireable since it is connected to 5 GHz link alone. Hence, add an option to pass link ID as well in the command so that if link ID is passed, stations using that passed link ID alone would be flushed and others will not. So after this, station entries #1 and #3 alone would be flushed and #2 will remain as it is. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240205162952.1697646-2-quic_adisi@quicinc.com [clarify documentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 3 ++- net/wireless/nl80211.c | 19 ++++++++++++++++++- net/wireless/trace.h | 7 +++++-- 4 files changed, 29 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f52f989a54ad9..62894b024e880 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1780,11 +1780,15 @@ struct station_parameters { * @subtype: Management frame subtype to use for indicating removal * (10 = Disassociation, 12 = Deauthentication) * @reason_code: Reason code for the Disassociation/Deauthentication frame + * @link_id: Link ID indicating a link that stations to be flushed must be + * using; valid only for MLO, but can also be -1 for MLO to really + * remove all stations. */ struct station_del_parameters { const u8 *mac; u8 subtype; u16 reason_code; + int link_id; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 853ac538a686f..805bfe7129716 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -438,7 +438,8 @@ * %NL80211_ATTR_REASON_CODE can optionally be used to specify which type * of disconnection indication should be sent to the station * (Deauthentication or Disassociation frame and reason code for that - * frame). + * frame). %NL80211_ATTR_MLO_LINK_ID can be used optionally to remove + * stations connected and using at least that link as one of its links. * * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to * destination %NL80211_ATTR_MAC on the interface identified by diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e1106ae35e21d..5ca5457538109 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7624,14 +7624,16 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; struct station_del_parameters params; + int link_id = nl80211_link_id_or_invalid(info->attrs); memset(¶ms, 0, sizeof(params)); if (info->attrs[NL80211_ATTR_MAC]) params.mac = nla_data(info->attrs[NL80211_ATTR_MAC]); - switch (dev->ieee80211_ptr->iftype) { + switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: @@ -7672,6 +7674,17 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID; } + /* Link ID not expected in case of non-ML operation */ + if (!wdev->valid_links && link_id != -1) + return -EINVAL; + + /* If given, a valid link ID should be passed during MLO */ + if (wdev->valid_links && link_id >= 0 && + !(wdev->valid_links & BIT(link_id))) + return -EINVAL; + + params.link_id = link_id; + return rdev_del_station(rdev, dev, ¶ms); } @@ -16773,6 +16786,10 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_station, .flags = GENL_UNS_ADMIN_PERM, + /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on + * whether MAC address is passed or not. If MAC address is + * passed, then even during MLO, link ID is not required. + */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 194ea24717177..361331c29116f 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -867,6 +867,7 @@ DECLARE_EVENT_CLASS(station_del, MAC_ENTRY(sta_mac) __field(u8, subtype) __field(u16, reason_code) + __field(int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; @@ -874,11 +875,13 @@ DECLARE_EVENT_CLASS(station_del, MAC_ASSIGN(sta_mac, params->mac); __entry->subtype = params->subtype; __entry->reason_code = params->reason_code; + __entry->link_id = params->link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" - ", subtype: %u, reason_code: %u", + ", subtype: %u, reason_code: %u, link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, - __entry->subtype, __entry->reason_code) + __entry->subtype, __entry->reason_code, + __entry->link_id) ); DEFINE_EVENT(station_del, rdev_del_station, -- cgit 1.2.3-korg From ec67d6e0d491d2a2df270ddcb7aa44db0984e11c Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Mon, 5 Feb 2024 21:59:51 +0530 Subject: wifi: mac80211: flush only stations using requests links Whenever sta_flush() function is invoked, all STAs present in that interface are flushed. In case of MLO, it is desirable to only flush such STAs that are at least using a given link id as one of their links. Add support for this by making change in the __sta_info_flush API argument to accept a link ID. And then, only if the STA is using the given link as one of its links, it would be flushed. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240205162952.1697646-3-quic_adisi@quicinc.com [reword commit message, in particular this isn't about "active" links] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 ++-- net/mac80211/ibss.c | 4 ++-- net/mac80211/iface.c | 2 +- net/mac80211/mesh.c | 2 +- net/mac80211/mlme.c | 2 +- net/mac80211/ocb.c | 2 +- net/mac80211/sta_info.c | 21 ++++++++++++++------- net/mac80211/sta_info.h | 14 +++++++++++--- 8 files changed, 33 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6f7c96c358f46..41fca30c896e6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1616,7 +1616,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link_conf->ema_ap = false; link_conf->bssid_indicator = 0; - __sta_info_flush(sdata, true); + __sta_info_flush(sdata, true, -1); ieee80211_free_keys(sdata, true); link_conf->enable_beacon = false; @@ -2096,7 +2096,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, if (params->mac) return sta_info_destroy_addr_bss(sdata, params->mac); - sta_info_flush(sdata); + sta_info_flush(sdata, params->link_id); return 0; } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 27cc9ddd0432a..7ace5cdc6c265 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -237,7 +237,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, drv_reset_tsf(local, sdata); if (!ether_addr_equal(ifibss->bssid, bssid)) - sta_info_flush(sdata); + sta_info_flush(sdata, -1); /* if merging, indicate to driver that we leave the old IBSS */ if (sdata->vif.cfg.ibss_joined) { @@ -682,7 +682,7 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) ifibss->state = IEEE80211_IBSS_MLME_SEARCH; - sta_info_flush(sdata); + sta_info_flush(sdata, -1); spin_lock_bh(&ifibss->incomplete_lock); while (!list_empty(&ifibss->incomplete_stations)) { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 227c8dc3fbe5a..b75b83a5142be 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -511,7 +511,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do * would have removed them, but in other modes there shouldn't * be any stations. */ - flushed = sta_info_flush(sdata); + flushed = sta_info_flush(sdata, -1); WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && flushed > 0); /* don't count this interface for allmulti while it is down */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 49f79512c1444..32475da98d739 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1214,7 +1214,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) netif_carrier_off(sdata->dev); /* flush STAs and mpaths on this iface */ - sta_info_flush(sdata); + sta_info_flush(sdata, -1); ieee80211_free_keys(sdata, true); mesh_path_flush_by_iface(sdata); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index dd97f402a6241..e1554666d706c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3151,7 +3151,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->vif.cfg.ssid_len = 0; /* remove AP and TDLS peers */ - sta_info_flush(sdata); + sta_info_flush(sdata, -1); /* finally reset all BSS / config parameters */ if (!ieee80211_vif_is_mld(&sdata->vif)) diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index 2dd4a2196af41..9ef14e475c906 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -208,7 +208,7 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata) lockdep_assert_wiphy(sdata->local->hw.wiphy); ifocb->joined = false; - sta_info_flush(sdata); + sta_info_flush(sdata, -1); spin_lock_bh(&ifocb->incomplete_lock); while (!list_empty(&ifocb->incomplete_stations)) { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4391d8dd634bb..da5fdd6f5c852 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1566,7 +1566,8 @@ void sta_info_stop(struct ieee80211_local *local) } -int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) +int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans, + int link_id) { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; @@ -1580,12 +1581,18 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) WARN_ON(vlans && !sdata->bss); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - if (sdata == sta->sdata || - (vlans && sdata->bss == sta->sdata->bss)) { - if (!WARN_ON(__sta_info_destroy_part1(sta))) - list_add(&sta->free_list, &free_list); - ret++; - } + if (sdata != sta->sdata && + (!vlans || sdata->bss != sta->sdata->bss)) + continue; + + if (link_id >= 0 && sta->sta.valid_links && + !(sta->sta.valid_links & BIT(link_id))) + continue; + + if (!WARN_ON(__sta_info_destroy_part1(sta))) + list_add(&sta->free_list, &free_list); + + ret++; } if (!list_empty(&free_list)) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5ef1554f991f6..f03731a5bbee2 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -886,8 +886,12 @@ void sta_info_stop(struct ieee80211_local *local); * * @sdata: sdata to remove all stations from * @vlans: if the given interface is an AP interface, also flush VLANs + * @link_id: if given (>=0), all those STA entries using @link_id only + * will be removed. If -1 is passed, all STA entries will be + * removed. */ -int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans); +int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans, + int link_id); /** * sta_info_flush - flush matching STA entries from the STA table @@ -895,10 +899,14 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans); * Returns the number of removed STA entries. * * @sdata: sdata to remove all stations from + * @link_id: if given (>=0), all those STA entries using @link_id only + * will be removed. If -1 is passed, all STA entries will be + * removed. */ -static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) +static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata, + int link_id) { - return __sta_info_flush(sdata, false); + return __sta_info_flush(sdata, false, link_id); } void sta_set_rate_info_tx(struct sta_info *sta, -- cgit 1.2.3-korg From 16405bd7fd2ea8553bc87a5aa9720e3014e91df6 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Mon, 5 Feb 2024 21:59:52 +0530 Subject: wifi: mac80211: remove only own link stations during stop_ap Currently, whenever AP link is brought down via ieee80211_stop_ap() function, all stations connected to the sdata are flushed. However, in case of MLO there is a requirement to flush only stations connected to that link and not all. For instance - Consider 2 GHz and 5 GHz are AP MLD. Now due to some reason 5 GHz link of this AP is going down (link removal or any other case). All stations connected, even legacy stations connected to 2 GHz link AP would also be flushed. Flushing of other link stations is not desirable. Fix this issue by passing self link ID to sta_flush() function. This would then only remove the stations which are still using the passed link ID as their link sta. Other stations will not be affected. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240205162952.1697646-4-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 41fca30c896e6..0744113f3535d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1616,7 +1616,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link_conf->ema_ap = false; link_conf->bssid_indicator = 0; - __sta_info_flush(sdata, true, -1); + __sta_info_flush(sdata, true, link_id); ieee80211_free_keys(sdata, true); link_conf->enable_beacon = false; -- cgit 1.2.3-korg From 7b5e25b8baebc02db728bfbdc3080be863144c7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 16:54:07 +0200 Subject: wifi: cfg80211: rename UHB to 6 GHz UHB stands for "Ultra High Band", but this term doesn't really exist in the spec. Rename all occurrences to "6 GHz", but keep a few defines for userspace API compatibility. Link: https://msgid.link/20240206164849.c9cfb9400839.I153db3b951934a1d84409c17fbe1f1d1782543fa@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 +++++----- include/uapi/linux/nl80211.h | 31 ++++++++++++++++++++----------- net/wireless/nl80211.c | 8 ++++---- net/wireless/reg.c | 10 +++++----- net/wireless/scan.c | 8 ++++---- 5 files changed, 38 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 62894b024e880..7bb8484e859e6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -118,9 +118,9 @@ struct wiphy; * restrictions. * @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel. * @IEEE80211_CHAN_DFS_CONCURRENT: See %NL80211_RRF_DFS_CONCURRENT - * @IEEE80211_CHAN_NO_UHB_VLP_CLIENT: Client connection with VLP AP + * @IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT: Client connection with VLP AP * not permitted using this channel - * @IEEE80211_CHAN_NO_UHB_AFC_CLIENT: Client connection with AFC AP + * @IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT: Client connection with AFC AP * not permitted using this channel */ enum ieee80211_channel_flags { @@ -146,8 +146,8 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_320MHZ = 1<<19, IEEE80211_CHAN_NO_EHT = 1<<20, IEEE80211_CHAN_DFS_CONCURRENT = 1<<21, - IEEE80211_CHAN_NO_UHB_VLP_CLIENT= 1<<22, - IEEE80211_CHAN_NO_UHB_AFC_CLIENT= 1<<23, + IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22, + IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23, }; #define IEEE80211_CHAN_NO_HT40 \ @@ -4935,7 +4935,7 @@ struct cfg80211_ops { * enum wiphy_flags - wiphy capability flags * * @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split - * into two, first for legacy bands and second for UHB. + * into two, first for legacy bands and second for 6 GHz. * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this * wiphy at all * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 805bfe7129716..13fa10804909a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -4269,9 +4269,9 @@ enum nl80211_wmm_rule { * allowed for peer-to-peer or adhoc communication under the control * of a DFS master which operates on the same channel (FCC-594280 D01 * Section B.3). Should be used together with %NL80211_RRF_DFS only. - * @NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT: Client connection to VLP AP + * @NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP * not allowed using this channel - * @NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT: Client connection to AFC AP + * @NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP * not allowed using this channel * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined @@ -4313,8 +4313,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_EHT, NL80211_FREQUENCY_ATTR_PSD, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT, - NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT, - NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT, + NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, + NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4327,6 +4327,10 @@ enum nl80211_frequency_attr { #define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR #define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \ NL80211_FREQUENCY_ATTR_IR_CONCURRENT +#define NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT \ + NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT +#define NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT \ + NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT /** * enum nl80211_bitrate_attr - bitrate attributes @@ -4517,8 +4521,8 @@ enum nl80211_sched_scan_match_attr { peer-to-peer or adhoc communication under the control of a DFS master which operates on the same channel (FCC-594280 D01 Section B.3). Should be used together with %NL80211_RRF_DFS only. - * @NL80211_RRF_NO_UHB_VLP_CLIENT: Client connection to VLP AP not allowed - * @NL80211_RRF_NO_UHB_AFC_CLIENT: Client connection to AFC AP not allowed + * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed + * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4541,8 +4545,8 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_EHT = 1<<19, NL80211_RRF_PSD = 1<<20, NL80211_RRF_DFS_CONCURRENT = 1<<21, - NL80211_RRF_NO_UHB_VLP_CLIENT = 1<<22, - NL80211_RRF_NO_UHB_AFC_CLIENT = 1<<23, + NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1<<22, + NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1<<23, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR @@ -4551,6 +4555,8 @@ enum nl80211_reg_rule_flags { #define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\ NL80211_RRF_NO_HT40PLUS) #define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT +#define NL80211_RRF_NO_UHB_VLP_CLIENT NL80211_RRF_NO_6GHZ_VLP_CLIENT +#define NL80211_RRF_NO_UHB_AFC_CLIENT NL80211_RRF_NO_6GHZ_AFC_CLIENT /* For backport compatibility with older userspace */ #define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) @@ -5098,14 +5104,17 @@ enum nl80211_bss_use_for { * BSS isn't possible * @NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY: NSTR nonprimary links aren't * supported by the device, and this BSS entry represents one. - * @NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH: STA is not supporting + * @NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH: STA is not supporting * the AP power type (SP, VLP, AP) that the AP uses. */ enum nl80211_bss_cannot_use_reasons { NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY = 1 << 0, - NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH = 1 << 1, + NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH = 1 << 1, }; +#define NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH \ + NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH + /** * enum nl80211_bss - netlink attributes for a BSS * diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5ca5457538109..3b3b511f9f69e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1198,11 +1198,11 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_DFS_CONCURRENT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT)) + if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT)) + if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT)) goto nla_put_failure; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2741b626919af..50cadbad485fb 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -5,7 +5,7 @@ * Copyright 2008-2011 Luis R. Rodriguez * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -1595,10 +1595,10 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_EHT; if (rd_flags & NL80211_RRF_DFS_CONCURRENT) channel_flags |= IEEE80211_CHAN_DFS_CONCURRENT; - if (rd_flags & NL80211_RRF_NO_UHB_VLP_CLIENT) - channel_flags |= IEEE80211_CHAN_NO_UHB_VLP_CLIENT; - if (rd_flags & NL80211_RRF_NO_UHB_AFC_CLIENT) - channel_flags |= IEEE80211_CHAN_NO_UHB_AFC_CLIENT; + if (rd_flags & NL80211_RRF_NO_6GHZ_VLP_CLIENT) + channel_flags |= IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT; + if (rd_flags & NL80211_RRF_NO_6GHZ_AFC_CLIENT) + channel_flags |= IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT; if (rd_flags & NL80211_RRF_PSD) channel_flags |= IEEE80211_CHAN_PSD; return channel_flags; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 04039e9dbd051..88e8b25c073a7 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -5,7 +5,7 @@ * Copyright 2008 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include #include @@ -3041,9 +3041,9 @@ static bool cfg80211_uhb_power_type_valid(const u8 *ie, case IEEE80211_6GHZ_CTRL_REG_LPI_AP: return true; case IEEE80211_6GHZ_CTRL_REG_SP_AP: - return !(flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT); + return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT); case IEEE80211_6GHZ_CTRL_REG_VLP_AP: - return !(flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT); + return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT); } } return false; @@ -3112,7 +3112,7 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, data->restrict_use = 1; data->use_for = 0; data->cannot_use_reasons = - NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH; + NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH; } if (ext) { -- cgit 1.2.3-korg From a110a3b79177ddd7e7295671df97fb5386406835 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 16:54:08 +0200 Subject: wifi: cfg80211: optionally support monitor on disabled channels If the hardware supports a disabled channel, it may in some cases be possible to use monitor mode (without any transmit) on it when it's otherwise disabled. Add a new channel flag IEEE80211_CHAN_CAN_MONITOR that makes it possible for a driver to indicate such a thing. Make it per channel so drivers could have a choice with it, perhaps it's only possible on some channels, perhaps some channels are not supported at all, but still there and marked disabled. In _nl80211_parse_chandef() simplify the code and check only for an unknown channel, _cfg80211_chandef_usable() will later check for IEEE80211_CHAN_DISABLED anyway. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.87fad3a21a09.I9116b2fdc2e2c9fd59a9273a64db7fcb41fc0328@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 4 ++++ net/wireless/chan.c | 26 +++++++++++++++++++------- net/wireless/core.h | 5 ++++- net/wireless/nl80211.c | 27 ++++++++++++++++++--------- 5 files changed, 49 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7bb8484e859e6..0a3151587556e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -122,6 +122,9 @@ struct wiphy; * not permitted using this channel * @IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT: Client connection with AFC AP * not permitted using this channel + * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor + * mode even in the presence of other (regulatory) restrictions, + * even if it is otherwise disabled. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -148,6 +151,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_DFS_CONCURRENT = 1<<21, IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22, IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23, + IEEE80211_CHAN_CAN_MONITOR = 1<<24, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 13fa10804909a..546cc176c2a99 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4273,6 +4273,9 @@ enum nl80211_wmm_rule { * not allowed using this channel * @NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP * not allowed using this channel + * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor + * mode despite other (regulatory) restrictions, even if the channel is + * otherwise completely disabled. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4315,6 +4318,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_DFS_CONCURRENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, + NL80211_FREQUENCY_ATTR_CAN_MONITOR, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, diff --git a/net/wireless/chan.c b/net/wireless/chan.c index e2ce89afa9ffe..3414b2c3abcc5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1145,7 +1145,7 @@ EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time); static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, - u32 prohibited_flags) + u32 prohibited_flags, bool monitor) { struct ieee80211_channel *c; u32 freq, start_freq, end_freq; @@ -1155,7 +1155,11 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) { c = ieee80211_get_channel_khz(wiphy, freq); - if (!c || c->flags & prohibited_flags) + if (!c) + return false; + if (monitor && c->flags & IEEE80211_CHAN_CAN_MONITOR) + continue; + if (c->flags & prohibited_flags) return false; } @@ -1215,9 +1219,9 @@ static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels, return true; } -bool cfg80211_chandef_usable(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef, - u32 prohibited_flags) +bool _cfg80211_chandef_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 prohibited_flags, bool monitor) { struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_sta_vht_cap *vht_cap; @@ -1379,14 +1383,22 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, if (!cfg80211_secondary_chans_ok(wiphy, ieee80211_chandef_to_khz(chandef), - width, prohibited_flags)) + width, prohibited_flags, monitor)) return false; if (!chandef->center_freq2) return true; return cfg80211_secondary_chans_ok(wiphy, MHZ_TO_KHZ(chandef->center_freq2), - width, prohibited_flags); + width, prohibited_flags, monitor); +} + +bool cfg80211_chandef_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 prohibited_flags) +{ + return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, + false); } EXPORT_SYMBOL(cfg80211_chandef_usable); diff --git a/net/wireless/core.h b/net/wireless/core.h index debf63e6c61fc..118f2f6198289 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -3,7 +3,7 @@ * Wireless configuration interface internals. * * Copyright 2006-2010 Johannes Berg - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H @@ -492,6 +492,9 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, struct ieee80211_channel *chan, bool primary_only); +bool _cfg80211_chandef_usable(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 prohibited_flags, bool monitor); static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3b3b511f9f69e..612ca99fbf39c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3218,9 +3218,9 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) wdev->iftype == NL80211_IFTYPE_P2P_GO; } -int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, - struct genl_info *info, - struct cfg80211_chan_def *chandef) +static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, + struct genl_info *info, bool monitor, + struct cfg80211_chan_def *chandef) { struct netlink_ext_ack *extack = info->extack; struct nlattr **attrs = info->attrs; @@ -3245,10 +3245,9 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, chandef->freq1_offset = control_freq % 1000; chandef->center_freq2 = 0; - /* Primary channel not allowed */ - if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED) { + if (!chandef->chan) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], - "Channel is disabled"); + "Unknown channel"); return -EINVAL; } @@ -3343,8 +3342,9 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, return -EINVAL; } - if (!cfg80211_chandef_usable(&rdev->wiphy, chandef, - IEEE80211_CHAN_DISABLED)) { + if (!_cfg80211_chandef_usable(&rdev->wiphy, chandef, + IEEE80211_CHAN_DISABLED, + monitor)) { NL_SET_ERR_MSG(extack, "(extension) channel is disabled"); return -EINVAL; } @@ -3359,6 +3359,13 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, return 0; } +int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, + struct genl_info *info, + struct cfg80211_chan_def *chandef) +{ + return _nl80211_parse_chandef(rdev, info, false, chandef); +} + static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, struct net_device *dev, struct genl_info *info, @@ -3383,7 +3390,9 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, link_id = 0; } - result = nl80211_parse_chandef(rdev, info, &chandef); + result = _nl80211_parse_chandef(rdev, info, + iftype == NL80211_IFTYPE_MONITOR, + &chandef); if (result) return result; -- cgit 1.2.3-korg From 49c17da387bb069d802b9be538f0f07d08e1c2b8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 16:54:09 +0200 Subject: wifi: mac80211: drop injection on disabled-chan monitor If the driver uses the new IEEE80211_CHAN_CAN_MONITOR, we may monitor on channels that are, e.g. via regulatory, otherwise considered disabled. However, we really shouldn't transmit on them, so prevent that. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.9c03dcf67dbe.Ib86a851c274c440908c663f6dd774b79bfc3965d@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 876de3ba98ba4..373275d18f5a4 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Transmit and frame generation functions. */ @@ -2393,6 +2393,14 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, else goto fail_rcu; + /* + * If driver/HW supports IEEE80211_CHAN_CAN_MONITOR we still + * shouldn't transmit on disabled channels. + */ + if (!cfg80211_chandef_usable(local->hw.wiphy, chandef, + IEEE80211_CHAN_DISABLED)) + goto fail_rcu; + /* * Frame injection is not allowed if beaconing is not allowed * or if we need radar detection. Beaconing is usually not allowed when -- cgit 1.2.3-korg From a64be8296e31f432d4a9df4db684cc8a250eb81c Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Tue, 6 Feb 2024 16:54:11 +0200 Subject: wifi: cfg80211: report unprotected deauth/disassoc in wowlan Add to cfg80211_wowlan_wakeup another wakeup reason - unprot_deauth_disassoc. To be set to true if the woke up was due to an unprotected deauth or disassoc frame in MFP. In that case report WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC. Signed-off-by: Shaul Triebitz Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.a3d739850d03.I8f52a21c4f36d1af1f8068bed79e2f9cbf8289ef@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 ++++- net/wireless/nl80211.c | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0a3151587556e..93e9abb7fc3d5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3612,12 +3612,15 @@ struct cfg80211_wowlan_nd_info { * @tcp_connlost: TCP connection lost or failed to establish * @tcp_nomoretokens: TCP data ran out of tokens * @net_detect: if not %NULL, woke up because of net detect + * @unprot_deauth_disassoc: woke up due to unprotected deauth or + * disassoc frame (in MFP). */ struct cfg80211_wowlan_wakeup { bool disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, rfkill_release, packet_80211, - tcp_match, tcp_connlost, tcp_nomoretokens; + tcp_match, tcp_connlost, tcp_nomoretokens, + unprot_deauth_disassoc; s32 pattern_idx; u32 packet_present_len, packet_len; const void *packet; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 612ca99fbf39c..5f18cbf7cc3d7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -19851,6 +19851,11 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS)) goto free_msg; + if (wakeup->unprot_deauth_disassoc && + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC)) + goto free_msg; + if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; -- cgit 1.2.3-korg From a3522a2edb3faf8cb98d38c2a99f5967beef24e2 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 9 Feb 2024 17:43:37 +0100 Subject: ipv4: Set the routing scope properly in ip_route_output_ports(). Set scope automatically in ip_route_output_ports() (using the socket SOCK_LOCALROUTE flag). This way, callers don't have to overload the tos with the RTO_ONLINK flag, like RT_CONN_FLAGS() does. For callers that don't pass a struct sock, this doesn't change anything as the scope is still set to RT_SCOPE_UNIVERSE when sk is NULL. Callers that passed a struct sock and used RT_CONN_FLAGS(sk) or RT_CONN_FLAGS_TOS(sk, tos) for the tos are modified to use ip_sock_tos(sk) and RT_TOS(tos) respectively, as overloading tos with the RTO_ONLINK flag now becomes unnecessary. In drivers/net/amt.c, all ip_route_output_ports() calls use a 0 tos parameter, ignoring the SOCK_LOCALROUTE flag of the socket. But the sk parameter is a kernel socket, which doesn't have any configuration path for setting SOCK_LOCALROUTE anyway. Therefore, ip_route_output_ports() will continue to initialise scope with RT_SCOPE_UNIVERSE and amt.c doesn't need to be modified. Also, remove RT_CONN_FLAGS() and RT_CONN_FLAGS_TOS() from route.h as these macros are now unused. The objective is to eventually remove RTO_ONLINK entirely to allow converting ->flowi4_tos to dscp_t. This will ensure proper isolation between the DSCP and ECN bits, thus minimising the risk of introducing bugs where TOS values interfere with ECN. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/dacfd2ab40685e20959ab7b53c427595ba229e7d.1707496938.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 7 ++----- net/ipv4/af_inet.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ip_output.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- 6 files changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/route.h b/include/net/route.h index 980ab474eabdc..d4a0147942f1a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -37,9 +37,6 @@ #define RTO_ONLINK 0x01 -#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE)) -#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) - static inline __u8 ip_sock_rt_scope(const struct sock *sk) { if (sock_flag(sk, SOCK_LOCALROUTE)) @@ -163,8 +160,8 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi __u8 proto, __u8 tos, int oif) { flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos, - RT_SCOPE_UNIVERSE, proto, - sk ? inet_sk_flowi_flags(sk) : 0, + sk ? ip_sock_rt_scope(sk) : RT_SCOPE_UNIVERSE, + proto, sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a5a820ee20266..ad278009e4698 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1326,7 +1326,7 @@ int inet_sk_rebuild_header(struct sock *sk) fl4 = &inet->cork.fl.u.ip4; rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, - sk->sk_protocol, RT_CONN_FLAGS(sk), + sk->sk_protocol, ip_sock_rt_tos(sk), sk->sk_bound_dev_if); if (!IS_ERR(rt)) { err = 0; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 2cc50cbfc2a31..cc6d0bd7b0a96 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -119,7 +119,7 @@ void ip4_datagram_release_cb(struct sock *sk) rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + ip_sock_rt_tos(sk), sk->sk_bound_dev_if); dst = !IS_ERR(rt) ? &rt->dst : NULL; sk_dst_set(sk, dst); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 459af1f897395..747ed7344cbe4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1467,7 +1467,7 @@ static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *f rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + ip_sock_rt_tos(sk), sk->sk_bound_dev_if); if (IS_ERR(rt)) rt = NULL; if (rt) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 41537d18eecfd..5b5a0adb927ff 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -493,7 +493,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, inet->inet_dport, inet->inet_sport, sk->sk_protocol, - RT_CONN_FLAGS_TOS(sk, tos), + RT_TOS(tos), sk->sk_bound_dev_if); if (IS_ERR(rt)) goto no_route; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 9a2a9ed3ba478..970af3983d116 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -478,7 +478,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, - sk->sk_protocol, RT_CONN_FLAGS(sk), + sk->sk_protocol, ip_sock_rt_tos(sk), sk->sk_bound_dev_if); if (IS_ERR(rt)) goto no_route; -- cgit 1.2.3-korg From 86fe596b588fc9ec23bf93a5c8f86fc16225dd3a Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 9 Feb 2024 10:06:56 -0800 Subject: net: sched: Remove NET_ACT_IPT from Kconfig After this commit ba24ea129126 ("net/sched: Retire ipt action") NET_ACT_IPT is not needed anymore as the action is retired and the code is removed. Clean the Kconfig part as well. Signed-off-by: Harshit Mogalapalli Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240209180656.867546-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Paolo Abeni --- net/sched/Kconfig | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 470c70deffe2d..8180d0c12fcea 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -737,16 +737,6 @@ config NET_ACT_SAMPLE To compile this code as a module, choose M here: the module will be called act_sample. -config NET_ACT_IPT - tristate "IPtables targets" - depends on NET_CLS_ACT && NETFILTER && NETFILTER_XTABLES - help - Say Y here to be able to invoke iptables targets after successful - classification. - - To compile this code as a module, choose M here: the - module will be called act_ipt. - config NET_ACT_NAT tristate "Stateless NAT" depends on NET_CLS_ACT -- cgit 1.2.3-korg From c83c22ec1493c0b7cc77327bedbd387e295872b6 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Feb 2024 22:35:50 +0100 Subject: can: canxl: add virtual CAN network identifier support CAN XL data frames contain an 8-bit virtual CAN network identifier (VCID). A VCID value of zero represents an 'untagged' CAN XL frame. To receive and send these optional VCIDs via CAN_RAW sockets a new socket option CAN_RAW_XL_VCID_OPTS is introduced to define/access VCID content: - tx: set the outgoing VCID value by the kernel (one fixed 8-bit value) - tx: pass through VCID values from the user space (e.g. for traffic replay) - rx: apply VCID receive filter (value/mask) to be passed to the user space With the 'tx pass through' option CAN_RAW_XL_VCID_TX_PASS all valid VCID values can be sent, e.g. to replay full qualified CAN XL traffic. The VCID value provided for the CAN_RAW_XL_VCID_TX_SET option will override the VCID value in the struct canxl_frame.prio defined for CAN_RAW_XL_VCID_TX_PASS when both flags are set. With a rx_vcid_mask of zero all possible VCID values (0x00 - 0xFF) are passed to the user space when the CAN_RAW_XL_VCID_RX_FILTER flag is set. Without this flag only untagged CAN XL frames (VCID = 0x00) are delivered to the user space (default). The 8-bit VCID is stored inside the CAN XL prio element (only in CAN XL frames!) to not interfere with other CAN content or the CAN filters provided by the CAN_RAW sockets and kernel infrastruture. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20240212213550.18516-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 9 ++++- include/uapi/linux/can/raw.h | 16 ++++++++ net/can/af_can.c | 2 + net/can/raw.c | 93 ++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 110 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 939db23882084..e78cbd85ce7c7 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -193,9 +193,14 @@ struct canfd_frame { #define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */ #define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */ +/* the 8-bit VCID is optionally placed in the canxl_frame.prio element */ +#define CANXL_VCID_OFFSET 16 /* bit offset of VCID in prio element */ +#define CANXL_VCID_VAL_MASK 0xFFUL /* VCID is an 8-bit value */ +#define CANXL_VCID_MASK (CANXL_VCID_VAL_MASK << CANXL_VCID_OFFSET) + /** * struct canxl_frame - CAN with e'X'tended frame 'L'ength frame structure - * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags + * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags / VCID * @flags: additional flags for CAN XL * @sdt: SDU (service data unit) type * @len: frame payload length in byte (CANXL_MIN_DLEN .. CANXL_MAX_DLEN) @@ -205,7 +210,7 @@ struct canfd_frame { * @prio shares the same position as @can_id from struct can[fd]_frame. */ struct canxl_frame { - canid_t prio; /* 11 bit priority for arbitration (canid_t) */ + canid_t prio; /* 11 bit priority for arbitration / 8 bit VCID */ __u8 flags; /* additional flags for CAN XL */ __u8 sdt; /* SDU (service data unit) type */ __u16 len; /* frame payload length in byte */ diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index 31622c9b7988d..e024d896e2787 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -65,6 +65,22 @@ enum { CAN_RAW_FD_FRAMES, /* allow CAN FD frames (default:off) */ CAN_RAW_JOIN_FILTERS, /* all filters must match to trigger */ CAN_RAW_XL_FRAMES, /* allow CAN XL frames (default:off) */ + CAN_RAW_XL_VCID_OPTS, /* CAN XL VCID configuration options */ }; +/* configuration for CAN XL virtual CAN identifier (VCID) handling */ +struct can_raw_vcid_options { + + __u8 flags; /* flags for vcid (filter) behaviour */ + __u8 tx_vcid; /* VCID value set into canxl_frame.prio */ + __u8 rx_vcid; /* VCID value for VCID filter */ + __u8 rx_vcid_mask; /* VCID mask for VCID filter */ + +}; + +/* can_raw_vcid_options.flags for CAN XL virtual CAN identifier handling */ +#define CAN_RAW_XL_VCID_TX_SET 0x01 +#define CAN_RAW_XL_VCID_TX_PASS 0x02 +#define CAN_RAW_XL_VCID_RX_FILTER 0x04 + #endif /* !_UAPI_CAN_RAW_H */ diff --git a/net/can/af_can.c b/net/can/af_can.c index 7343fd487dbea..707576eeeb582 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -865,6 +865,8 @@ static __init int can_init(void) /* check for correct padding to be able to use the structs similarly */ BUILD_BUG_ON(offsetof(struct can_frame, len) != offsetof(struct canfd_frame, len) || + offsetof(struct can_frame, len) != + offsetof(struct canxl_frame, flags) || offsetof(struct can_frame, data) != offsetof(struct canfd_frame, data)); diff --git a/net/can/raw.c b/net/can/raw.c index e6b822624ba23..cb8e6f788af84 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -91,6 +91,10 @@ struct raw_sock { int recv_own_msgs; int fd_frames; int xl_frames; + struct can_raw_vcid_options raw_vcid_opts; + canid_t tx_vcid_shifted; + canid_t rx_vcid_shifted; + canid_t rx_vcid_mask_shifted; int join_filters; int count; /* number of active filters */ struct can_filter dfilter; /* default/single filter */ @@ -134,10 +138,29 @@ static void raw_rcv(struct sk_buff *oskb, void *data) return; /* make sure to not pass oversized frames to the socket */ - if ((!ro->fd_frames && can_is_canfd_skb(oskb)) || - (!ro->xl_frames && can_is_canxl_skb(oskb))) + if (!ro->fd_frames && can_is_canfd_skb(oskb)) return; + if (can_is_canxl_skb(oskb)) { + struct canxl_frame *cxl = (struct canxl_frame *)oskb->data; + + /* make sure to not pass oversized frames to the socket */ + if (!ro->xl_frames) + return; + + /* filter CAN XL VCID content */ + if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_RX_FILTER) { + /* apply VCID filter if user enabled the filter */ + if ((cxl->prio & ro->rx_vcid_mask_shifted) != + (ro->rx_vcid_shifted & ro->rx_vcid_mask_shifted)) + return; + } else { + /* no filter => do not forward VCID tagged frames */ + if (cxl->prio & CANXL_VCID_MASK) + return; + } + } + /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { @@ -698,6 +721,19 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->fd_frames = ro->xl_frames; break; + case CAN_RAW_XL_VCID_OPTS: + if (optlen != sizeof(ro->raw_vcid_opts)) + return -EINVAL; + + if (copy_from_sockptr(&ro->raw_vcid_opts, optval, optlen)) + return -EFAULT; + + /* prepare 32 bit values for handling in hot path */ + ro->tx_vcid_shifted = ro->raw_vcid_opts.tx_vcid << CANXL_VCID_OFFSET; + ro->rx_vcid_shifted = ro->raw_vcid_opts.rx_vcid << CANXL_VCID_OFFSET; + ro->rx_vcid_mask_shifted = ro->raw_vcid_opts.rx_vcid_mask << CANXL_VCID_OFFSET; + break; + case CAN_RAW_JOIN_FILTERS: if (optlen != sizeof(ro->join_filters)) return -EINVAL; @@ -786,6 +822,21 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, val = &ro->xl_frames; break; + case CAN_RAW_XL_VCID_OPTS: + /* user space buffer to small for VCID opts? */ + if (len < sizeof(ro->raw_vcid_opts)) { + /* return -ERANGE and needed space in optlen */ + err = -ERANGE; + if (put_user(sizeof(ro->raw_vcid_opts), optlen)) + err = -EFAULT; + } else { + if (len > sizeof(ro->raw_vcid_opts)) + len = sizeof(ro->raw_vcid_opts); + if (copy_to_user(optval, &ro->raw_vcid_opts, len)) + err = -EFAULT; + } + break; + case CAN_RAW_JOIN_FILTERS: if (len > sizeof(int)) len = sizeof(int); @@ -803,23 +854,41 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return 0; } -static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu) +static void raw_put_canxl_vcid(struct raw_sock *ro, struct sk_buff *skb) +{ + struct canxl_frame *cxl = (struct canxl_frame *)skb->data; + + /* sanitize non CAN XL bits */ + cxl->prio &= (CANXL_PRIO_MASK | CANXL_VCID_MASK); + + /* clear VCID in CAN XL frame if pass through is disabled */ + if (!(ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_PASS)) + cxl->prio &= CANXL_PRIO_MASK; + + /* set VCID in CAN XL frame if enabled */ + if (ro->raw_vcid_opts.flags & CAN_RAW_XL_VCID_TX_SET) { + cxl->prio &= CANXL_PRIO_MASK; + cxl->prio |= ro->tx_vcid_shifted; + } +} + +static unsigned int raw_check_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu) { /* Classical CAN -> no checks for flags and device capabilities */ if (can_is_can_skb(skb)) - return false; + return CAN_MTU; /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */ if (ro->fd_frames && can_is_canfd_skb(skb) && (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu))) - return false; + return CANFD_MTU; /* CAN XL -> needs to be enabled and a CAN XL device */ if (ro->xl_frames && can_is_canxl_skb(skb) && can_is_canxl_dev_mtu(mtu)) - return false; + return CANXL_MTU; - return true; + return 0; } static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -829,6 +898,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) struct sockcm_cookie sockc; struct sk_buff *skb; struct net_device *dev; + unsigned int txmtu; int ifindex; int err = -EINVAL; @@ -869,9 +939,16 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) goto free_skb; err = -EINVAL; - if (raw_bad_txframe(ro, skb, dev->mtu)) + + /* check for valid CAN (CC/FD/XL) frame content */ + txmtu = raw_check_txframe(ro, skb, dev->mtu); + if (!txmtu) goto free_skb; + /* only CANXL: clear/forward/set VCID value */ + if (txmtu == CANXL_MTU) + raw_put_canxl_vcid(ro, skb); + sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); -- cgit 1.2.3-korg From 32c7eec21c11e149a9195cf0d48cc530d1e5a637 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 11 Feb 2024 09:24:55 -0800 Subject: net: sched: codel replace GPLv2/BSD boilerplate The prologue to codel is using BSD-3 clause and GPL-2 boiler plate language. Replace it by using SPDX. The automated treewide scan in commit d2912cb15bdd ("treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 500") did not pickup dual licensed code. Signed-off-by: Stephen Hemminger Acked-by: Dave Taht Link: https://lore.kernel.org/r/20240211172532.6568-1-stephen@networkplumber.org Signed-off-by: Paolo Abeni --- net/sched/sch_codel.c | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) (limited to 'net') diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 61904d3a593b7..ecb3f164bb25b 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Codel - The Controlled-Delay Active Queue Management algorithm * @@ -7,37 +8,6 @@ * Implemented on linux by : * Copyright (C) 2012 Michael D. Taht * Copyright (C) 2012,2015 Eric Dumazet - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The names of the authors may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * Alternatively, provided that this notice is retained in full, this - * software may be distributed under the terms of the GNU General - * Public License ("GPL") version 2, in which case the provisions of the - * GPL apply INSTEAD OF those given above. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * */ #include -- cgit 1.2.3-korg From f383ced24d6ae6c1989394d052d3109b9d645f11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Feb 2024 21:44:03 +0000 Subject: vlan: use xarray iterator to implement /proc/net/vlan/config Adopt net->dev_by_index as I did in commit 0e0939c0adf9 ("net-procfs: use xarray iterator to implement /proc/net/dev") Not only this removes quadratic behavior, it also makes sure an existing vlan device is always visible in the dump, regardless of concurrent net->dev_base_head changes. Signed-off-by: Eric Dumazet Reviewed-by: Jakub Kicinski Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20240211214404.1882191-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/8021q/vlanproc.c | 46 ++++++++++++++++------------------------------ 1 file changed, 16 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 7825c129742a0..87b959da00cd3 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -163,48 +163,34 @@ void vlan_proc_rem_dev(struct net_device *vlandev) * The following few functions build the content of /proc/net/vlan/config */ -/* start read of /proc/net/vlan/config */ -static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(rcu) +static void *vlan_seq_from_index(struct seq_file *seq, loff_t *pos) { + unsigned long ifindex = *pos; struct net_device *dev; - struct net *net = seq_file_net(seq); - loff_t i = 1; - - rcu_read_lock(); - if (*pos == 0) - return SEQ_START_TOKEN; - for_each_netdev_rcu(net, dev) { + for_each_netdev_dump(seq_file_net(seq), dev, ifindex) { if (!is_vlan_dev(dev)) continue; - - if (i++ == *pos) - return dev; + *pos = dev->ifindex; + return dev; } + return NULL; +} + +static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(rcu) +{ + rcu_read_lock(); + if (*pos == 0) + return SEQ_START_TOKEN; - return NULL; + return vlan_seq_from_index(seq, pos); } static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev; - struct net *net = seq_file_net(seq); - ++*pos; - - dev = v; - if (v == SEQ_START_TOKEN) - dev = net_device_entry(&net->dev_base_head); - - for_each_netdev_continue_rcu(net, dev) { - if (!is_vlan_dev(dev)) - continue; - - return dev; - } - - return NULL; + return vlan_seq_from_index(seq, pos); } static void vlan_seq_stop(struct seq_file *seq, void *v) -- cgit 1.2.3-korg From 3e41af90767dcf8e5ca91cfbbbcb772584940df9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Feb 2024 21:44:04 +0000 Subject: rtnetlink: use xarray iterator to implement rtnl_dump_ifinfo() Adopt net->dev_by_index as I did in commit 0e0939c0adf9 ("net-procfs: use xarray iterator to implement /proc/net/dev") This makes sure an existing device is always visible in the dump, regardless of concurrent insertions/deletions. v2: added suggestions from Jakub Kicinski and Ido Schimmel, thanks for the help ! Link: https://lore.kernel.org/all/20240209142441.6c56435b@kernel.org/ Link: https://lore.kernel.org/all/ZckR-XOsULLI9EHc@shredder/ Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20240211214404.1882191-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 58 ++++++++++++++++++---------------------------------- 1 file changed, 20 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 31f433950c8dc..6f1c5537e8421 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2188,25 +2188,22 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + const struct rtnl_link_ops *kind_ops = NULL; struct netlink_ext_ack *extack = cb->extack; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - struct net *tgt_net = net; - int h, s_h; - int idx = 0, s_idx; - struct net_device *dev; - struct hlist_head *head; + unsigned int flags = NLM_F_MULTI; struct nlattr *tb[IFLA_MAX+1]; + struct { + unsigned long ifindex; + } *ctx = (void *)cb->ctx; + struct net *tgt_net = net; u32 ext_filter_mask = 0; - const struct rtnl_link_ops *kind_ops = NULL; - unsigned int flags = NLM_F_MULTI; + struct net_device *dev; int master_idx = 0; int netnsid = -1; int err, i; - s_h = cb->args[0]; - s_idx = cb->args[1]; - err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack); if (err < 0) { if (cb->strict_check) @@ -2250,36 +2247,21 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) flags |= NLM_F_DUMP_FILTERED; walk_entries: - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &tgt_net->dev_index_head[h]; - hlist_for_each_entry(dev, head, index_hlist) { - if (link_dump_filtered(dev, master_idx, kind_ops)) - goto cont; - if (idx < s_idx) - goto cont; - err = rtnl_fill_ifinfo(skb, dev, net, - RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0, NULL, 0, - netnsid, GFP_KERNEL); - - if (err < 0) { - if (likely(skb->len)) - goto out; - - goto out_err; - } -cont: - idx++; + err = 0; + for_each_netdev_dump(tgt_net, dev, ctx->ifindex) { + if (link_dump_filtered(dev, master_idx, kind_ops)) + continue; + err = rtnl_fill_ifinfo(skb, dev, net, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, 0, flags, + ext_filter_mask, 0, NULL, 0, + netnsid, GFP_KERNEL); + if (err < 0) { + if (likely(skb->len)) + err = skb->len; + break; } } -out: - err = skb->len; -out_err: - cb->args[1] = idx; - cb->args[0] = h; cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); if (netnsid >= 0) -- cgit 1.2.3-korg From 9a3c93af549185863761fc4815ace9ab17dfc350 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Feb 2024 14:06:58 +0000 Subject: vlan: use netdev_lockdep_set_classes() vlan uses vlan_dev_set_lockdep_class() which lacks qdisc_tx_busylock initialization. Use generic netdev_lockdep_set_classes() to not worry anymore. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240212140700.2795436-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/8021q/vlan_dev.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 407b2335f091e..790b54a7cbe38 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -504,28 +504,6 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } -/* - * vlan network devices have devices nesting below it, and are a special - * "super class" of normal network devices; split their locks off into a - * separate class since they always nest. - */ -static struct lock_class_key vlan_netdev_xmit_lock_key; -static struct lock_class_key vlan_netdev_addr_lock_key; - -static void vlan_dev_set_lockdep_one(struct net_device *dev, - struct netdev_queue *txq, - void *unused) -{ - lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key); -} - -static void vlan_dev_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, - &vlan_netdev_addr_lock_key); - netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); -} - static __be16 vlan_parse_protocol(const struct sk_buff *skb) { struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); @@ -627,7 +605,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - vlan_dev_set_lockdep_class(dev); + netdev_lockdep_set_classes(dev); vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan->vlan_pcpu_stats) -- cgit 1.2.3-korg From c74e1039912e63b91e8697796e321d157b44692e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Feb 2024 14:06:59 +0000 Subject: net: bridge: use netdev_lockdep_set_classes() br_set_lockdep_class() is missing many details. Use generic netdev_lockdep_set_classes() to not worry anymore. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240212140700.2795436-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/bridge/br_device.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 65cee0ad3c1b6..717e9750614cd 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -108,13 +108,6 @@ out: return NETDEV_TX_OK; } -static struct lock_class_key bridge_netdev_addr_lock_key; - -static void br_set_lockdep_class(struct net_device *dev) -{ - lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); -} - static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -153,7 +146,7 @@ static int br_dev_init(struct net_device *dev) br_fdb_hash_fini(br); } - br_set_lockdep_class(dev); + netdev_lockdep_set_classes(dev); return err; } -- cgit 1.2.3-korg From 0bef512012b1cd8820f0c9ec80e5f8ceb43fdd59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Feb 2024 14:07:00 +0000 Subject: net: add netdev_lockdep_set_classes() to virtual drivers Based on a syzbot report, it appears many virtual drivers do not yet use netdev_lockdep_set_classes(), triggerring lockdep false positives. WARNING: possible recursive locking detected 6.8.0-rc4-next-20240212-syzkaller #0 Not tainted syz-executor.0/19016 is trying to acquire lock: ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline] ffff8880162cb298 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340 but task is already holding lock: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline] ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340 other info that might help us debug this: Possible unsafe locking scenario: CPU0 lock(_xmit_ETHER#2); lock(_xmit_ETHER#2); *** DEADLOCK *** May be due to missing lock nesting notation 9 locks held by syz-executor.0/19016: #0: ffffffff8f385208 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock net/core/rtnetlink.c:79 [inline] #0: ffffffff8f385208 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x82c/0x1040 net/core/rtnetlink.c:6603 #1: ffffc90000a08c00 ((&in_dev->mr_ifc_timer)){+.-.}-{0:0}, at: call_timer_fn+0xc0/0x600 kernel/time/timer.c:1697 #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:298 [inline] #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:750 [inline] #2: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1360 net/ipv4/ip_output.c:228 #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: local_bh_disable include/linux/bottom_half.h:20 [inline] #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: rcu_read_lock_bh include/linux/rcupdate.h:802 [inline] #3: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x2c4/0x3b10 net/core/dev.c:4284 #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: spin_trylock include/linux/spinlock.h:361 [inline] #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: qdisc_run_begin include/net/sch_generic.h:195 [inline] #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_xmit_skb net/core/dev.c:3771 [inline] #4: ffff8880416e3258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_queue_xmit+0x1262/0x3b10 net/core/dev.c:4325 #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: __netif_tx_lock include/linux/netdevice.h:4452 [inline] #5: ffff8880223db4d8 (_xmit_ETHER#2){+.-.}-{2:2}, at: sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340 #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:298 [inline] #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:750 [inline] #6: ffffffff8e131520 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1360 net/ipv4/ip_output.c:228 #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: local_bh_disable include/linux/bottom_half.h:20 [inline] #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: rcu_read_lock_bh include/linux/rcupdate.h:802 [inline] #7: ffffffff8e131580 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x2c4/0x3b10 net/core/dev.c:4284 #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: spin_trylock include/linux/spinlock.h:361 [inline] #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: qdisc_run_begin include/net/sch_generic.h:195 [inline] #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_xmit_skb net/core/dev.c:3771 [inline] #8: ffff888014d9d258 (dev->qdisc_tx_busylock ?: &qdisc_tx_busylock){+...}-{2:2}, at: __dev_queue_xmit+0x1262/0x3b10 net/core/dev.c:4325 stack backtrace: CPU: 1 PID: 19016 Comm: syz-executor.0 Not tainted 6.8.0-rc4-next-20240212-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 check_deadlock kernel/locking/lockdep.c:3062 [inline] validate_chain+0x15c1/0x58e0 kernel/locking/lockdep.c:3856 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e4/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __netif_tx_lock include/linux/netdevice.h:4452 [inline] sch_direct_xmit+0x1c4/0x5f0 net/sched/sch_generic.c:340 __dev_xmit_skb net/core/dev.c:3784 [inline] __dev_queue_xmit+0x1912/0x3b10 net/core/dev.c:4325 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0xe66/0x1360 net/ipv4/ip_output.c:235 iptunnel_xmit+0x540/0x9b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x20ee/0x2960 net/ipv4/ip_tunnel.c:831 erspan_xmit+0x9de/0x1460 net/ipv4/ip_gre.c:720 __netdev_start_xmit include/linux/netdevice.h:4989 [inline] netdev_start_xmit include/linux/netdevice.h:5003 [inline] xmit_one net/core/dev.c:3555 [inline] dev_hard_start_xmit+0x242/0x770 net/core/dev.c:3571 sch_direct_xmit+0x2b6/0x5f0 net/sched/sch_generic.c:342 __dev_xmit_skb net/core/dev.c:3784 [inline] __dev_queue_xmit+0x1912/0x3b10 net/core/dev.c:4325 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0xe66/0x1360 net/ipv4/ip_output.c:235 igmpv3_send_cr net/ipv4/igmp.c:723 [inline] igmp_ifc_timer_expire+0xb71/0xd90 net/ipv4/igmp.c:813 call_timer_fn+0x17e/0x600 kernel/time/timer.c:1700 expire_timers kernel/time/timer.c:1751 [inline] __run_timers+0x621/0x830 kernel/time/timer.c:2038 run_timer_softirq+0x67/0xf0 kernel/time/timer.c:2051 __do_softirq+0x2bc/0x943 kernel/softirq.c:554 invoke_softirq kernel/softirq.c:428 [inline] __irq_exit_rcu+0xf2/0x1c0 kernel/softirq.c:633 irq_exit_rcu+0x9/0x30 kernel/softirq.c:645 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1076 [inline] sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1076 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 RIP: 0010:resched_offsets_ok kernel/sched/core.c:10127 [inline] RIP: 0010:__might_resched+0x16f/0x780 kernel/sched/core.c:10142 Code: 00 4c 89 e8 48 c1 e8 03 48 ba 00 00 00 00 00 fc ff df 48 89 44 24 38 0f b6 04 10 84 c0 0f 85 87 04 00 00 41 8b 45 00 c1 e0 08 <01> d8 44 39 e0 0f 85 d6 00 00 00 44 89 64 24 1c 48 8d bc 24 a0 00 RSP: 0018:ffffc9000ee069e0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8880296a9e00 RDX: dffffc0000000000 RSI: ffff8880296a9e00 RDI: ffffffff8bfe8fa0 RBP: ffffc9000ee06b00 R08: ffffffff82326877 R09: 1ffff11002b5ad1b R10: dffffc0000000000 R11: ffffed1002b5ad1c R12: 0000000000000000 R13: ffff8880296aa23c R14: 000000000000062a R15: 1ffff92001dc0d44 down_write+0x19/0x50 kernel/locking/rwsem.c:1578 kernfs_activate fs/kernfs/dir.c:1403 [inline] kernfs_add_one+0x4af/0x8b0 fs/kernfs/dir.c:819 __kernfs_create_file+0x22e/0x2e0 fs/kernfs/file.c:1056 sysfs_add_file_mode_ns+0x24a/0x310 fs/sysfs/file.c:307 create_files fs/sysfs/group.c:64 [inline] internal_create_group+0x4f4/0xf20 fs/sysfs/group.c:152 internal_create_groups fs/sysfs/group.c:192 [inline] sysfs_create_groups+0x56/0x120 fs/sysfs/group.c:218 create_dir lib/kobject.c:78 [inline] kobject_add_internal+0x472/0x8d0 lib/kobject.c:240 kobject_add_varg lib/kobject.c:374 [inline] kobject_init_and_add+0x124/0x190 lib/kobject.c:457 netdev_queue_add_kobject net/core/net-sysfs.c:1706 [inline] netdev_queue_update_kobjects+0x1f3/0x480 net/core/net-sysfs.c:1758 register_queue_kobjects net/core/net-sysfs.c:1819 [inline] netdev_register_kobject+0x265/0x310 net/core/net-sysfs.c:2059 register_netdevice+0x1191/0x19c0 net/core/dev.c:10298 bond_newlink+0x3b/0x90 drivers/net/bonding/bond_netlink.c:576 rtnl_newlink_create net/core/rtnetlink.c:3506 [inline] __rtnl_newlink net/core/rtnetlink.c:3726 [inline] rtnl_newlink+0x158f/0x20a0 net/core/rtnetlink.c:3739 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6606 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3c/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 __sys_sendto+0x3a4/0x4f0 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0xde/0x100 net/socket.c:2199 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6d/0x75 RIP: 0033:0x7fc3fa87fa9c Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240212140700.2795436-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/dummy.c | 1 + drivers/net/geneve.c | 1 + drivers/net/loopback.c | 1 + drivers/net/veth.c | 1 + drivers/net/vxlan/vxlan_core.c | 1 + net/ipv4/ip_tunnel.c | 1 + net/ipv6/ip6_gre.c | 2 ++ net/ipv6/ip6_tunnel.c | 1 + net/ipv6/ip6_vti.c | 1 + net/ipv6/sit.c | 1 + 10 files changed, 11 insertions(+) (limited to 'net') diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 768454aa36d6c..946bba0701a4f 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -71,6 +71,7 @@ static int dummy_dev_init(struct net_device *dev) if (!dev->lstats) return -ENOMEM; + netdev_lockdep_set_classes(dev); return 0; } diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 23e97c2e4f6fc..256602a723568 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -335,6 +335,7 @@ static int geneve_init(struct net_device *dev) gro_cells_destroy(&geneve->gro_cells); return err; } + netdev_lockdep_set_classes(dev); return 0; } diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index f6d53e63ef4ec..f6eab66c26608 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -144,6 +144,7 @@ static int loopback_dev_init(struct net_device *dev) dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); if (!dev->lstats) return -ENOMEM; + netdev_lockdep_set_classes(dev); return 0; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 578e36ea1589c..de1f138377829 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1499,6 +1499,7 @@ static void veth_free_queues(struct net_device *dev) static int veth_dev_init(struct net_device *dev) { + netdev_lockdep_set_classes(dev); return veth_alloc_queues(dev); } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 11707647afb98..705a6fd6ab6c3 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2855,6 +2855,7 @@ static int vxlan_init(struct net_device *dev) if (err) goto err_gro_cells_destroy; + netdev_lockdep_set_classes(dev); return 0; err_gro_cells_destroy: diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 00da0b80320fb..58de780ab0e2b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1269,6 +1269,7 @@ int ip_tunnel_init(struct net_device *dev) if (tunnel->collect_md) netif_keep_dst(dev); + netdev_lockdep_set_classes(dev); return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 428f03e9da45a..5e97e0aa8e07d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1511,6 +1511,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) ip6gre_tnl_init_features(dev); netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; cleanup_dst_cache_init: @@ -1901,6 +1902,7 @@ static int ip6erspan_tap_init(struct net_device *dev) ip6erspan_tnl_link_config(tunnel, 1); netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; cleanup_dst_cache_init: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index bfb0a6c601c11..44406c28445dc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1898,6 +1898,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; destroy_dst: diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index cfe1b1ad4d85d..7f4f976aa24a9 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -935,6 +935,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) if (!dev->tstats) return -ENOMEM; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 61b2b71fa8bed..b2da1f1b5fec4 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1460,6 +1460,7 @@ static int ipip6_tunnel_init(struct net_device *dev) return err; } netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); + netdev_lockdep_set_classes(dev); return 0; } -- cgit 1.2.3-korg From 2b0cfa6e49566c8fa6759734cf821aa6e8271a9e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 12 Feb 2024 10:50:54 +0100 Subject: net: add generic percpu page_pool allocator Introduce generic percpu page_pools allocator. Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct in order to recycle the page in the page_pool "hot" cache if napi_pp_put_page() is running on the same cpu. This is a preliminary patch to add xdp multi-buff support for xdp running in generic mode. Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Hoiland-Jorgensen Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/80bc4285228b6f4220cd03de1999d86e46e3fcbd.1707729884.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 3 +++ net/core/dev.c | 45 +++++++++++++++++++++++++++++++++++++++++++ net/core/page_pool.c | 23 ++++++++++++++++++---- net/core/skbuff.c | 5 +++-- 4 files changed, 70 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 76481c4653752..3828396ae60c2 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -128,6 +128,7 @@ struct page_pool_stats { struct page_pool { struct page_pool_params_fast p; + int cpuid; bool has_init_callback; long frag_users; @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); struct page_pool *page_pool_create(const struct page_pool_params *params); +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, + int cpuid); struct xdp_mem_info; diff --git a/net/core/dev.c b/net/core/dev.c index 7cf15d2bf78de..e19bdf1421e0a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -153,6 +153,8 @@ #include #include #include +#include +#include #include "dev.h" #include "net-sysfs.h" @@ -450,6 +452,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain); DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); EXPORT_PER_CPU_SYMBOL(softnet_data); +/* Page_pool has a lockless array/stack to alloc/recycle pages. + * PP consumers must pay attention to run APIs in the appropriate context + * (e.g. NAPI context). + */ +static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool); + #ifdef CONFIG_LOCKDEP /* * register_netdevice() inits txq->_xmit_lock and sets lockdep class @@ -11724,6 +11732,27 @@ static void __init net_dev_struct_check(void) * */ +/* We allocate 256 pages for each CPU if PAGE_SHIFT is 12 */ +#define SYSTEM_PERCPU_PAGE_POOL_SIZE ((1 << 20) / PAGE_SIZE) + +static int net_page_pool_create(int cpuid) +{ +#if IS_ENABLED(CONFIG_PAGE_POOL) + struct page_pool_params page_pool_params = { + .pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE, + .nid = NUMA_NO_NODE, + }; + struct page_pool *pp_ptr; + + pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid); + if (IS_ERR(pp_ptr)) + return -ENOMEM; + + per_cpu(system_page_pool, cpuid) = pp_ptr; +#endif + return 0; +} + /* * This is called single threaded during boot, so no need * to take the rtnl semaphore. @@ -11776,6 +11805,9 @@ static int __init net_dev_init(void) init_gro_hash(&sd->backlog); sd->backlog.poll = process_backlog; sd->backlog.weight = weight_p; + + if (net_page_pool_create(i)) + goto out; } dev_boot_phase = 0; @@ -11803,6 +11835,19 @@ static int __init net_dev_init(void) WARN_ON(rc < 0); rc = 0; out: + if (rc < 0) { + for_each_possible_cpu(i) { + struct page_pool *pp_ptr; + + pp_ptr = per_cpu(system_page_pool, i); + if (!pp_ptr) + continue; + + page_pool_destroy(pp_ptr); + per_cpu(system_page_pool, i) = NULL; + } + } + return rc; } diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 4933762e5a6ba..89c835fcf0944 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -171,13 +171,16 @@ static void page_pool_producer_unlock(struct page_pool *pool, } static int page_pool_init(struct page_pool *pool, - const struct page_pool_params *params) + const struct page_pool_params *params, + int cpuid) { unsigned int ring_qsize = 1024; /* Default */ memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); + pool->cpuid = cpuid; + /* Validate only known flags were used */ if (pool->p.flags & ~(PP_FLAG_ALL)) return -EINVAL; @@ -253,10 +256,12 @@ static void page_pool_uninit(struct page_pool *pool) } /** - * page_pool_create() - create a page pool. + * page_pool_create_percpu() - create a page pool for a given cpu. * @params: parameters, see struct page_pool_params + * @cpuid: cpu identifier */ -struct page_pool *page_pool_create(const struct page_pool_params *params) +struct page_pool * +page_pool_create_percpu(const struct page_pool_params *params, int cpuid) { struct page_pool *pool; int err; @@ -265,7 +270,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) if (!pool) return ERR_PTR(-ENOMEM); - err = page_pool_init(pool, params); + err = page_pool_init(pool, params, cpuid); if (err < 0) goto err_free; @@ -282,6 +287,16 @@ err_free: kfree(pool); return ERR_PTR(err); } +EXPORT_SYMBOL(page_pool_create_percpu); + +/** + * page_pool_create() - create a page pool + * @params: parameters, see struct page_pool_params + */ +struct page_pool *page_pool_create(const struct page_pool_params *params) +{ + return page_pool_create_percpu(params, -1); +} EXPORT_SYMBOL(page_pool_create); static void page_pool_return_page(struct page_pool *pool, struct page *page); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index edbbef563d4d9..9e5eb47b4025c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -923,9 +923,10 @@ bool napi_pp_put_page(struct page *page, bool napi_safe) */ if (napi_safe || in_softirq()) { const struct napi_struct *napi = READ_ONCE(pp->p.napi); + unsigned int cpuid = smp_processor_id(); - allow_direct = napi && - READ_ONCE(napi->list_owner) == smp_processor_id(); + allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid; + allow_direct |= (pp->cpuid == cpuid); } /* Driver set this to memory recycling info. Reset it on recycle. -- cgit 1.2.3-korg From 4d2bb0bfe8741a8778e0053f31a4e0f0cba80e8b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 12 Feb 2024 10:50:55 +0100 Subject: xdp: rely on skb pointer reference in do_xdp_generic and netif_receive_generic_xdp Rely on skb pointer reference instead of the skb pointer in do_xdp_generic and netif_receive_generic_xdp routine signatures. This is a preliminary patch to add multi-buff support for xdp running in generic mode where we will need to reallocate the skb to avoid linearization and we will need to make it visible to do_xdp_generic() caller. Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Hoiland-Jorgensen Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/c09415b1f48c8620ef4d76deed35050a7bddf7c2.1707729884.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 4 ++-- include/linux/netdevice.h | 2 +- net/core/dev.c | 16 +++++++++------- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b472f2c972d84..bc80fc1d576e5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1926,7 +1926,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, rcu_read_lock(); xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { - ret = do_xdp_generic(xdp_prog, skb); + ret = do_xdp_generic(xdp_prog, &skb); if (ret != XDP_PASS) { rcu_read_unlock(); local_bh_enable(); @@ -2516,7 +2516,7 @@ build: skb_record_rx_queue(skb, tfile->queue_index); if (skb_xdp) { - ret = do_xdp_generic(xdp_prog, skb); + ret = do_xdp_generic(xdp_prog, &skb); if (ret != XDP_PASS) { ret = 0; goto out; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 07cefa32eafa9..a3f9c95da51ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3958,7 +3958,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index e19bdf1421e0a..ffeb0e0279fed 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4936,10 +4936,11 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, return act; } -static u32 netif_receive_generic_xdp(struct sk_buff *skb, +static u32 netif_receive_generic_xdp(struct sk_buff **pskb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { + struct sk_buff *skb = *pskb; u32 act = XDP_DROP; /* Reinjected packets coming from act_mirred or similar should @@ -5020,24 +5021,24 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) { if (xdp_prog) { struct xdp_buff xdp; u32 act; int err; - act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); + act = netif_receive_generic_xdp(pskb, &xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: - err = xdp_do_generic_redirect(skb->dev, skb, + err = xdp_do_generic_redirect((*pskb)->dev, *pskb, &xdp, xdp_prog); if (err) goto out_redir; break; case XDP_TX: - generic_xdp_tx(skb, xdp_prog); + generic_xdp_tx(*pskb, xdp_prog); break; } return XDP_DROP; @@ -5045,7 +5046,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) } return XDP_PASS; out_redir: - kfree_skb_reason(skb, SKB_DROP_REASON_XDP); + kfree_skb_reason(*pskb, SKB_DROP_REASON_XDP); return XDP_DROP; } EXPORT_SYMBOL_GPL(do_xdp_generic); @@ -5368,7 +5369,8 @@ another_round: int ret2; migrate_disable(); - ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); + ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), + &skb); migrate_enable(); if (ret2 != XDP_PASS) { -- cgit 1.2.3-korg From e6d5dbdd20aa6a86974af51deb9414cd2e7794cb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 12 Feb 2024 10:50:56 +0100 Subject: xdp: add multi-buff support for xdp running in generic mode Similar to native xdp, do not always linearize the skb in netif_receive_generic_xdp routine but create a non-linear xdp_buff to be processed by the eBPF program. This allow to add multi-buffer support for xdp running in generic mode. Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Hoiland-Jorgensen Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/1044d6412b1c3e95b40d34993fd5f37cd2f319fd.1707729884.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 ++ net/core/dev.c | 70 +++++++++++++++++++++++++++----------- net/core/skbuff.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2dde34c29203b..def3d8689c3dd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3446,6 +3446,8 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } +int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, + struct bpf_prog *prog); bool napi_pp_put_page(struct page *page, bool napi_safe); static inline void diff --git a/net/core/dev.c b/net/core/dev.c index ffeb0e0279fed..2d02ca8a3da5f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4874,6 +4874,12 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq); xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len, skb_headlen(skb) + mac_len, true); + if (skb_is_nonlinear(skb)) { + skb_shinfo(skb)->xdp_frags_size = skb->data_len; + xdp_buff_set_frags_flag(xdp); + } else { + xdp_buff_clear_frags_flag(xdp); + } orig_data_end = xdp->data_end; orig_data = xdp->data; @@ -4903,6 +4909,14 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, skb->len += off; /* positive on grow, negative on shrink */ } + /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers + * (e.g. bpf_xdp_adjust_tail), we need to update data_len here. + */ + if (xdp_buff_has_frags(xdp)) + skb->data_len = skb_shinfo(skb)->xdp_frags_size; + else + skb->data_len = 0; + /* check if XDP changed eth hdr such SKB needs update */ eth = (struct ethhdr *)xdp->data; if ((orig_eth_type != eth->h_proto) || @@ -4936,12 +4950,35 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, return act; } +static int +netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog) +{ + struct sk_buff *skb = *pskb; + int err, hroom, troom; + + if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog)) + return 0; + + /* In case we have to go down the path and also linearize, + * then lets do the pskb_expand_head() work just once here. + */ + hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); + troom = skb->tail + skb->data_len - skb->end; + err = pskb_expand_head(skb, + hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, + troom > 0 ? troom + 128 : 0, GFP_ATOMIC); + if (err) + return err; + + return skb_linearize(skb); +} + static u32 netif_receive_generic_xdp(struct sk_buff **pskb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct sk_buff *skb = *pskb; - u32 act = XDP_DROP; + u32 mac_len, act = XDP_DROP; /* Reinjected packets coming from act_mirred or similar should * not get XDP generic processing. @@ -4949,41 +4986,36 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb, if (skb_is_redirected(skb)) return XDP_PASS; - /* XDP packets must be linear and must have sufficient headroom - * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also - * native XDP provides, thus we need to do it here as well. + /* XDP packets must have sufficient headroom of XDP_PACKET_HEADROOM + * bytes. This is the guarantee that also native XDP provides, + * thus we need to do it here as well. */ + mac_len = skb->data - skb_mac_header(skb); + __skb_push(skb, mac_len); + if (skb_cloned(skb) || skb_is_nonlinear(skb) || skb_headroom(skb) < XDP_PACKET_HEADROOM) { - int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); - int troom = skb->tail + skb->data_len - skb->end; - - /* In case we have to go down the path and also linearize, - * then lets do the pskb_expand_head() work just once here. - */ - if (pskb_expand_head(skb, - hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, - troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) - goto do_drop; - if (skb_linearize(skb)) + if (netif_skb_check_for_xdp(pskb, xdp_prog)) goto do_drop; } - act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog); + __skb_pull(*pskb, mac_len); + + act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog); switch (act) { case XDP_REDIRECT: case XDP_TX: case XDP_PASS: break; default: - bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act); + bpf_warn_invalid_xdp_action((*pskb)->dev, xdp_prog, act); fallthrough; case XDP_ABORTED: - trace_xdp_exception(skb->dev, xdp_prog, act); + trace_xdp_exception((*pskb)->dev, xdp_prog, act); fallthrough; case XDP_DROP: do_drop: - kfree_skb(skb); + kfree_skb(*pskb); break; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9e5eb47b4025c..bdb94749f05df 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -895,6 +895,97 @@ static bool is_pp_page(struct page *page) return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; } +static int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom) +{ +#if IS_ENABLED(CONFIG_PAGE_POOL) + u32 size, truesize, len, max_head_size, off; + struct sk_buff *skb = *pskb, *nskb; + int err, i, head_off; + void *data; + + /* XDP does not support fraglist so we need to linearize + * the skb. + */ + if (skb_has_frag_list(skb)) + return -EOPNOTSUPP; + + max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom); + if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE) + return -ENOMEM; + + size = min_t(u32, skb->len, max_head_size); + truesize = SKB_HEAD_ALIGN(size) + headroom; + data = page_pool_dev_alloc_va(pool, &truesize); + if (!data) + return -ENOMEM; + + nskb = napi_build_skb(data, truesize); + if (!nskb) { + page_pool_free_va(pool, data, true); + return -ENOMEM; + } + + skb_reserve(nskb, headroom); + skb_copy_header(nskb, skb); + skb_mark_for_recycle(nskb); + + err = skb_copy_bits(skb, 0, nskb->data, size); + if (err) { + consume_skb(nskb); + return err; + } + skb_put(nskb, size); + + head_off = skb_headroom(nskb) - skb_headroom(skb); + skb_headers_offset_update(nskb, head_off); + + off = size; + len = skb->len - off; + for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { + struct page *page; + u32 page_off; + + size = min_t(u32, len, PAGE_SIZE); + truesize = size; + + page = page_pool_dev_alloc(pool, &page_off, &truesize); + if (!data) { + consume_skb(nskb); + return -ENOMEM; + } + + skb_add_rx_frag(nskb, i, page, page_off, size, truesize); + err = skb_copy_bits(skb, off, page_address(page) + page_off, + size); + if (err) { + consume_skb(nskb); + return err; + } + + len -= size; + off += size; + } + + consume_skb(skb); + *pskb = nskb; + + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, + struct bpf_prog *prog) +{ + if (!prog->aux->xdp_has_frags) + return -EINVAL; + + return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM); +} +EXPORT_SYMBOL(skb_cow_data_for_xdp); + #if IS_ENABLED(CONFIG_PAGE_POOL) bool napi_pp_put_page(struct page *page, bool napi_safe) { -- cgit 1.2.3-korg From 27accb3cc08a0ec4e348356774042d5fa5f30cce Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 12 Feb 2024 10:50:57 +0100 Subject: veth: rely on skb_pp_cow_data utility routine Rely on skb_pp_cow_data utility routine and remove duplicated code. Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Hoiland-Jorgensen Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/029cc14cce41cb242ee7efdcf32acc81f1ce4e9f.1707729884.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 74 ++------------------------------------------------ include/linux/skbuff.h | 2 ++ net/core/skbuff.c | 5 ++-- 3 files changed, 7 insertions(+), 74 deletions(-) (limited to 'net') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index de1f138377829..500b9dfccd08e 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -729,80 +729,10 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, if (skb_shared(skb) || skb_head_is_locked(skb) || skb_shinfo(skb)->nr_frags || skb_headroom(skb) < XDP_PACKET_HEADROOM) { - u32 size, len, max_head_size, off, truesize, page_offset; - struct sk_buff *nskb; - struct page *page; - int i, head_off; - void *va; - - /* We need a private copy of the skb and data buffers since - * the ebpf program can modify it. We segment the original skb - * into order-0 pages without linearize it. - * - * Make sure we have enough space for linear and paged area - */ - max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - - VETH_XDP_HEADROOM); - if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size) - goto drop; - - size = min_t(u32, skb->len, max_head_size); - truesize = SKB_HEAD_ALIGN(size) + VETH_XDP_HEADROOM; - - /* Allocate skb head */ - va = page_pool_dev_alloc_va(rq->page_pool, &truesize); - if (!va) - goto drop; - - nskb = napi_build_skb(va, truesize); - if (!nskb) { - page_pool_free_va(rq->page_pool, va, true); + if (skb_pp_cow_data(rq->page_pool, pskb, XDP_PACKET_HEADROOM)) goto drop; - } - - skb_reserve(nskb, VETH_XDP_HEADROOM); - skb_copy_header(nskb, skb); - skb_mark_for_recycle(nskb); - - if (skb_copy_bits(skb, 0, nskb->data, size)) { - consume_skb(nskb); - goto drop; - } - skb_put(nskb, size); - head_off = skb_headroom(nskb) - skb_headroom(skb); - skb_headers_offset_update(nskb, head_off); - - /* Allocate paged area of new skb */ - off = size; - len = skb->len - off; - - for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { - size = min_t(u32, len, PAGE_SIZE); - truesize = size; - - page = page_pool_dev_alloc(rq->page_pool, &page_offset, - &truesize); - if (!page) { - consume_skb(nskb); - goto drop; - } - - skb_add_rx_frag(nskb, i, page, page_offset, size, - truesize); - if (skb_copy_bits(skb, off, - page_address(page) + page_offset, - size)) { - consume_skb(nskb); - goto drop; - } - - len -= size; - off += size; - } - - consume_skb(skb); - skb = nskb; + skb = *pskb; } /* SKB "head" area always have tailroom for skb_shared_info */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index def3d8689c3dd..696e7680656f5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3446,6 +3446,8 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } +int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, struct bpf_prog *prog); bool napi_pp_put_page(struct page *page, bool napi_safe); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bdb94749f05df..0d9a489e6ae18 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -895,8 +895,8 @@ static bool is_pp_page(struct page *page) return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; } -static int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, - unsigned int headroom) +int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom) { #if IS_ENABLED(CONFIG_PAGE_POOL) u32 size, truesize, len, max_head_size, off; @@ -975,6 +975,7 @@ static int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, return -EOPNOTSUPP; #endif } +EXPORT_SYMBOL(skb_pp_cow_data); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, struct bpf_prog *prog) -- cgit 1.2.3-korg From 1c07dbb0cccfe85060b6eb089db3d6bfeb6aaf31 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:33 +0000 Subject: net: annotate data-races around dev->name_assign_type name_assign_type_show() runs locklessly, we should annotate accesses to dev->name_assign_type. Alternative would be to grab devnet_rename_sem semaphore from name_assign_type_show(), but this would not bring more accuracy. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- net/core/net-sysfs.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2d02ca8a3da5f..720bd68382129 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1228,13 +1228,13 @@ int dev_change_name(struct net_device *dev, const char *newname) dev->flags & IFF_UP ? " (while UP)" : ""); old_assign_type = dev->name_assign_type; - dev->name_assign_type = NET_NAME_RENAMED; + WRITE_ONCE(dev->name_assign_type, NET_NAME_RENAMED); rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); - dev->name_assign_type = old_assign_type; + WRITE_ONCE(dev->name_assign_type, old_assign_type); up_write(&devnet_rename_sem); return ret; } @@ -1263,7 +1263,7 @@ rollback: down_write(&devnet_rename_sem); memcpy(dev->name, oldname, IFNAMSIZ); memcpy(oldname, newname, IFNAMSIZ); - dev->name_assign_type = old_assign_type; + WRITE_ONCE(dev->name_assign_type, old_assign_type); old_assign_type = NET_NAME_RENAMED; goto rollback; } else { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a09d507c5b03d..f4c2b82674951 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -125,7 +125,7 @@ static DEVICE_ATTR_RO(iflink); static ssize_t format_name_assign_type(const struct net_device *dev, char *buf) { - return sysfs_emit(buf, fmt_dec, dev->name_assign_type); + return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type)); } static ssize_t name_assign_type_show(struct device *dev, @@ -135,7 +135,7 @@ static ssize_t name_assign_type_show(struct device *dev, struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; - if (ndev->name_assign_type != NET_NAME_UNKNOWN) + if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN) ret = netdev_show(dev, attr, buf, format_name_assign_type); return ret; -- cgit 1.2.3-korg From f694eee9e1c00d6ca06c5e59c04e3b6ff7d64aa9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:34 +0000 Subject: ip_tunnel: annotate data-races around t->parms.link t->parms.link is read locklessly, annotate these reads and opposite writes accordingly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 58de780ab0e2b..9f44c49a61dee 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -102,10 +102,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (!ip_tunnel_key_match(&t->parms, flags, key)) continue; - if (t->parms.link == link) + if (READ_ONCE(t->parms.link) == link) return t; - else - cand = t; + cand = t; } hlist_for_each_entry_rcu(t, head, hash_node) { @@ -117,9 +116,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (!ip_tunnel_key_match(&t->parms, flags, key)) continue; - if (t->parms.link == link) + if (READ_ONCE(t->parms.link) == link) return t; - else if (!cand) + if (!cand) cand = t; } @@ -137,9 +136,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (!ip_tunnel_key_match(&t->parms, flags, key)) continue; - if (t->parms.link == link) + if (READ_ONCE(t->parms.link) == link) return t; - else if (!cand) + if (!cand) cand = t; } @@ -150,9 +149,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, !(t->dev->flags & IFF_UP)) continue; - if (t->parms.link == link) + if (READ_ONCE(t->parms.link) == link) return t; - else if (!cand) + if (!cand) cand = t; } @@ -221,7 +220,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - link == t->parms.link && + link == READ_ONCE(t->parms.link) && type == t->dev->type && ip_tunnel_key_match(&t->parms, flags, key)) break; @@ -747,7 +746,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), - dev_net(dev), tunnel->parms.link, + dev_net(dev), READ_ONCE(tunnel->parms.link), tunnel->fwmark, skb_get_hash(skb), 0); if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) @@ -867,7 +866,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (t->parms.link != p->link || t->fwmark != fwmark) { int mtu; - t->parms.link = p->link; + WRITE_ONCE(t->parms.link, p->link); t->fwmark = fwmark; mtu = ip_tunnel_bind_dev(dev); if (set_mtu) @@ -1057,9 +1056,9 @@ EXPORT_SYMBOL(ip_tunnel_get_link_net); int ip_tunnel_get_iflink(const struct net_device *dev) { - struct ip_tunnel *tunnel = netdev_priv(dev); + const struct ip_tunnel *tunnel = netdev_priv(dev); - return tunnel->parms.link; + return READ_ONCE(tunnel->parms.link); } EXPORT_SYMBOL(ip_tunnel_get_iflink); -- cgit 1.2.3-korg From a6473fe9b623f6667af72d972b87cd9a5ff87e21 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:35 +0000 Subject: dev: annotate accesses to dev->link Following patch will read dev->link locklessly, annotate the write from do_setlink(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6f1c5537e8421..fd47a4422a51d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2956,7 +2956,7 @@ static int do_setlink(const struct sk_buff *skb, write_lock(&dev_base_lock); if (dev->link_mode ^ value) status |= DO_SETLINK_NOTIFY; - dev->link_mode = value; + WRITE_ONCE(dev->link_mode, value); write_unlock(&dev_base_lock); } -- cgit 1.2.3-korg From 4d42b37def70327b2bb19f823d42289aed2cd7c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:36 +0000 Subject: net: convert dev->reg_state to u8 Prepares things so that dev->reg_state reads can be lockless, by adding WRITE_ONCE() on write side. READ_ONCE()/WRITE_ONCE() do not support bitfields. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 23 ++++++++++++++--------- net/core/dev.c | 8 ++++---- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a3f9c95da51ee..6311246551072 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1815,6 +1815,15 @@ enum netdev_stat_type { NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ }; +enum netdev_reg_state { + NETREG_UNINITIALIZED = 0, + NETREG_REGISTERED, /* completed register_netdevice */ + NETREG_UNREGISTERING, /* called unregister_netdevice */ + NETREG_UNREGISTERED, /* completed unregister todo */ + NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ +}; + /** * struct net_device - The DEVICE structure. * @@ -2372,13 +2381,7 @@ struct net_device { struct list_head link_watch_list; - enum { NETREG_UNINITIALIZED=0, - NETREG_REGISTERED, /* completed register_netdevice */ - NETREG_UNREGISTERING, /* called unregister_netdevice */ - NETREG_UNREGISTERED, /* completed unregister todo */ - NETREG_RELEASED, /* called free_netdev */ - NETREG_DUMMY, /* dummy device for NAPI poll */ - } reg_state:8; + u8 reg_state; bool dismantle; @@ -5254,7 +5257,9 @@ static inline const char *netdev_name(const struct net_device *dev) static inline const char *netdev_reg_state(const struct net_device *dev) { - switch (dev->reg_state) { + u8 reg_state = READ_ONCE(dev->reg_state); + + switch (reg_state) { case NETREG_UNINITIALIZED: return " (uninitialized)"; case NETREG_REGISTERED: return ""; case NETREG_UNREGISTERING: return " (unregistering)"; @@ -5263,7 +5268,7 @@ static inline const char *netdev_reg_state(const struct net_device *dev) case NETREG_DUMMY: return " (dummy)"; } - WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state); + WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state); return " (unknown)"; } diff --git a/net/core/dev.c b/net/core/dev.c index 720bd68382129..9c95cae9d6ab9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10339,7 +10339,7 @@ int register_netdevice(struct net_device *dev) ret = netdev_register_kobject(dev); write_lock(&dev_base_lock); - dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED; + WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED); write_unlock(&dev_base_lock); if (ret) goto err_uninit_notify; @@ -10630,7 +10630,7 @@ void netdev_run_todo(void) } write_lock(&dev_base_lock); - dev->reg_state = NETREG_UNREGISTERED; + WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED); write_unlock(&dev_base_lock); linkwatch_sync_dev(dev); } @@ -11050,7 +11050,7 @@ void free_netdev(struct net_device *dev) } BUG_ON(dev->reg_state != NETREG_UNREGISTERED); - dev->reg_state = NETREG_RELEASED; + WRITE_ONCE(dev->reg_state, NETREG_RELEASED); /* will free via device release */ put_device(&dev->dev); @@ -11140,7 +11140,7 @@ void unregister_netdevice_many_notify(struct list_head *head, /* And unlink it from device chain. */ write_lock(&dev_base_lock); unlist_netdevice(dev, false); - dev->reg_state = NETREG_UNREGISTERING; + WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING); write_unlock(&dev_base_lock); } flush_all_backlogs(); -- cgit 1.2.3-korg From 12692e3df2dacf2993c56aa23b6d3de921a5bdff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:37 +0000 Subject: net-sysfs: convert netdev_show() to RCU Make clear dev_isalive() can be called with RCU protection. Then convert netdev_show() to RCU, to remove dev_base_lock dependency. Also add RCU to broadcast_show(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f4c2b82674951..678e4be690821 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -34,10 +34,10 @@ static const char fmt_dec[] = "%d\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; -/* Caller holds RTNL or dev_base_lock */ +/* Caller holds RTNL, RCU or dev_base_lock */ static inline int dev_isalive(const struct net_device *dev) { - return dev->reg_state <= NETREG_REGISTERED; + return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; } /* use same locking rules as GIF* ioctl's */ @@ -48,10 +48,10 @@ static ssize_t netdev_show(const struct device *dev, struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; - read_lock(&dev_base_lock); + rcu_read_lock(); if (dev_isalive(ndev)) ret = (*format)(ndev, buf); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return ret; } @@ -60,7 +60,7 @@ static ssize_t netdev_show(const struct device *dev, #define NETDEVICE_SHOW(field, format_string) \ static ssize_t format_##field(const struct net_device *dev, char *buf) \ { \ - return sysfs_emit(buf, format_string, dev->field); \ + return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \ } \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -161,10 +161,13 @@ static ssize_t broadcast_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *ndev = to_net_dev(dev); + int ret = -EINVAL; + rcu_read_lock(); if (dev_isalive(ndev)) - return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len); - return -EINVAL; + ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len); + rcu_read_unlock(); + return ret; } static DEVICE_ATTR_RO(broadcast); -- cgit 1.2.3-korg From c7d52737e7ebd31cc5fef46380d94b58becf9479 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:38 +0000 Subject: net-sysfs: use dev_addr_sem to remove races in address_show() Using dev_base_lock is not preventing from reading garbage. Use dev_addr_sem instead. v4: place dev_addr_sem extern in net/core/dev.h (Jakub Kicinski) Link: https://lore.kernel.org/netdev/20240212175845.10f6680a@kernel.org/ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- net/core/dev.h | 3 +++ net/core/net-sysfs.c | 10 +++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9c95cae9d6ab9..26f93446b743e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8993,7 +8993,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, } EXPORT_SYMBOL(dev_set_mac_address); -static DECLARE_RWSEM(dev_addr_sem); +DECLARE_RWSEM(dev_addr_sem); int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack) diff --git a/net/core/dev.h b/net/core/dev.h index a43dfe3de50ee..45892267848d7 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -3,6 +3,7 @@ #define _NET_CORE_DEV_H #include +#include struct net; struct net_device; @@ -46,6 +47,8 @@ extern int weight_p; extern int dev_weight_rx_bias; extern int dev_weight_tx_bias; +extern struct rw_semaphore dev_addr_sem; + /* rtnl helpers */ extern struct list_head net_todo_list; void netdev_run_todo(void); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 678e4be690821..23ef2df549c30 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -142,17 +142,21 @@ static ssize_t name_assign_type_show(struct device *dev, } static DEVICE_ATTR_RO(name_assign_type); -/* use same locking rules as GIFHWADDR ioctl's */ +/* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */ static ssize_t address_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; - read_lock(&dev_base_lock); + down_read(&dev_addr_sem); + + rcu_read_lock(); if (dev_isalive(ndev)) ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len); - read_unlock(&dev_base_lock); + rcu_read_unlock(); + + up_read(&dev_addr_sem); return ret; } static DEVICE_ATTR_RO(address); -- cgit 1.2.3-korg From 004d138364fd10dd5ff8ceb54cfdc2d792a7b338 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:39 +0000 Subject: net-sysfs: convert dev->operstate reads to lockless ones operstate_show() can omit dev_base_lock acquisition only to read dev->operstate. Annotate accesses to dev->operstate. Writers still acquire dev_base_lock for mutual exclusion. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 3 ++- net/core/link_watch.c | 4 ++-- net/core/net-sysfs.c | 4 +--- net/core/rtnetlink.c | 4 ++-- net/hsr/hsr_device.c | 10 +++++----- net/ipv6/addrconf.c | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5ad4abfcb7ba3..2cf4fc7562639 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -455,7 +455,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, u32 filter_mask, const struct net_device *dev, bool getlink) { - u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + u8 operstate = netif_running(dev) ? READ_ONCE(dev->operstate) : + IF_OPER_DOWN; struct nlattr *af = NULL; struct net_bridge *br; struct ifinfomsg *hdr; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 429571c258da7..1b93e054c9a3c 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -67,7 +67,7 @@ static void rfc2863_policy(struct net_device *dev) { unsigned char operstate = default_operstate(dev); - if (operstate == dev->operstate) + if (operstate == READ_ONCE(dev->operstate)) return; write_lock(&dev_base_lock); @@ -87,7 +87,7 @@ static void rfc2863_policy(struct net_device *dev) break; } - dev->operstate = operstate; + WRITE_ONCE(dev->operstate, operstate); write_unlock(&dev_base_lock); } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 23ef2df549c30..c5d164b8c6bfb 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -325,11 +325,9 @@ static ssize_t operstate_show(struct device *dev, const struct net_device *netdev = to_net_dev(dev); unsigned char operstate; - read_lock(&dev_base_lock); - operstate = netdev->operstate; + operstate = READ_ONCE(netdev->operstate); if (!netif_running(netdev)) operstate = IF_OPER_DOWN; - read_unlock(&dev_base_lock); if (operstate >= ARRAY_SIZE(operstates)) return -EINVAL; /* should not happen */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fd47a4422a51d..43d92de8601cb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -866,9 +866,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition) break; } - if (dev->operstate != operstate) { + if (READ_ONCE(dev->operstate) != operstate) { write_lock(&dev_base_lock); - dev->operstate = operstate; + WRITE_ONCE(dev->operstate, operstate); write_unlock(&dev_base_lock); netdev_state_change(dev); } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 9d71b66183daf..be0e43f46556e 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -31,8 +31,8 @@ static bool is_slave_up(struct net_device *dev) static void __hsr_set_operstate(struct net_device *dev, int transition) { write_lock(&dev_base_lock); - if (dev->operstate != transition) { - dev->operstate = transition; + if (READ_ONCE(dev->operstate) != transition) { + WRITE_ONCE(dev->operstate, transition); write_unlock(&dev_base_lock); netdev_state_change(dev); } else { @@ -78,14 +78,14 @@ static void hsr_check_announce(struct net_device *hsr_dev, hsr = netdev_priv(hsr_dev); - if (hsr_dev->operstate == IF_OPER_UP && old_operstate != IF_OPER_UP) { + if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) { /* Went up */ hsr->announce_count = 0; mod_timer(&hsr->announce_timer, jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } - if (hsr_dev->operstate != IF_OPER_UP && old_operstate == IF_OPER_UP) + if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP) /* Went down */ del_timer(&hsr->announce_timer); } @@ -100,7 +100,7 @@ void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) /* netif_stacked_transfer_operstate() cannot be used here since * it doesn't set IF_OPER_LOWERLAYERDOWN (?) */ - old_operstate = master->dev->operstate; + old_operstate = READ_ONCE(master->dev->operstate); has_carrier = hsr_check_carrier(master); hsr_set_operstate(master, has_carrier); hsr_check_announce(master->dev, old_operstate); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ca1b719323c0b..2c1ed642e3f45 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6025,7 +6025,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, (dev->ifindex != dev_get_iflink(dev) && nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || nla_put_u8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN)) + netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN)) goto nla_put_failure; protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO); if (!protoinfo) -- cgit 1.2.3-korg From e154bb7a6ebbe5414accb5d94dc5ba80c204ea64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:40 +0000 Subject: net-sysfs: convert netstat_show() to RCU dev_get_stats() can be called from RCU, there is no need to acquire dev_base_lock. Change dev_isalive() comment to reflect we no longer use dev_base_lock from net/core/net-sysfs.c Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c5d164b8c6bfb..946caefdd9599 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -34,7 +34,7 @@ static const char fmt_dec[] = "%d\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; -/* Caller holds RTNL, RCU or dev_base_lock */ +/* Caller holds RTNL or RCU */ static inline int dev_isalive(const struct net_device *dev) { return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; @@ -685,14 +685,14 @@ static ssize_t netstat_show(const struct device *d, WARN_ON(offset > sizeof(struct rtnl_link_stats64) || offset % sizeof(u64) != 0); - read_lock(&dev_base_lock); + rcu_read_lock(); if (dev_isalive(dev)) { struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset)); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return ret; } -- cgit 1.2.3-korg From 6a2968ee1ee2cc6fce30f6f5724442b34b1483b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:42 +0000 Subject: net: add netdev_set_operstate() helper dev_base_lock is going away, add netdev_set_operstate() helper so that hsr does not have to know core internals. Remove dev_base_lock acquisition from rfc2863_policy() v3: use an "unsigned int" for dev->operstate, so that try_cmpxchg() can work on all arches. ( https://lore.kernel.org/oe-kbuild-all/202402081918.OLyGaea3-lkp@intel.com/ ) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/linux/rtnetlink.h | 2 ++ net/core/link_watch.c | 9 ++------- net/core/rtnetlink.c | 22 +++++++++++++++------- net/hsr/hsr_device.c | 22 ++++++---------------- 5 files changed, 26 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6311246551072..697370706a82d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2258,7 +2258,7 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif - unsigned char operstate; + unsigned int operstate; unsigned char link_mode; unsigned char if_port; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 21780608cf47c..cdfc897f1e3c6 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -172,4 +172,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } +void netdev_set_operstate(struct net_device *dev, int newstate); + #endif /* __LINUX_RTNETLINK_H */ diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 1b93e054c9a3c..8ec35194bfcb8 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -33,7 +33,7 @@ static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); -static unsigned char default_operstate(const struct net_device *dev) +static unsigned int default_operstate(const struct net_device *dev) { if (netif_testing(dev)) return IF_OPER_TESTING; @@ -62,16 +62,13 @@ static unsigned char default_operstate(const struct net_device *dev) return IF_OPER_UP; } - static void rfc2863_policy(struct net_device *dev) { - unsigned char operstate = default_operstate(dev); + unsigned int operstate = default_operstate(dev); if (operstate == READ_ONCE(dev->operstate)) return; - write_lock(&dev_base_lock); - switch(dev->link_mode) { case IF_LINK_MODE_TESTING: if (operstate == IF_OPER_UP) @@ -88,8 +85,6 @@ static void rfc2863_policy(struct net_device *dev) } WRITE_ONCE(dev->operstate, operstate); - - write_unlock(&dev_base_lock); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 43d92de8601cb..e484ba44f23b2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -842,9 +842,22 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); +void netdev_set_operstate(struct net_device *dev, int newstate) +{ + unsigned int old = READ_ONCE(dev->operstate); + + do { + if (old == newstate) + return; + } while (!try_cmpxchg(&dev->operstate, &old, newstate)); + + netdev_state_change(dev); +} +EXPORT_SYMBOL(netdev_set_operstate); + static void set_operstate(struct net_device *dev, unsigned char transition) { - unsigned char operstate = dev->operstate; + unsigned char operstate = READ_ONCE(dev->operstate); switch (transition) { case IF_OPER_UP: @@ -866,12 +879,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) break; } - if (READ_ONCE(dev->operstate) != operstate) { - write_lock(&dev_base_lock); - WRITE_ONCE(dev->operstate, operstate); - write_unlock(&dev_base_lock); - netdev_state_change(dev); - } + netdev_set_operstate(dev, operstate); } static unsigned int rtnl_dev_get_flags(const struct net_device *dev) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index be0e43f46556e..5ef6d437db727 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -28,29 +28,19 @@ static bool is_slave_up(struct net_device *dev) return dev && is_admin_up(dev) && netif_oper_up(dev); } -static void __hsr_set_operstate(struct net_device *dev, int transition) -{ - write_lock(&dev_base_lock); - if (READ_ONCE(dev->operstate) != transition) { - WRITE_ONCE(dev->operstate, transition); - write_unlock(&dev_base_lock); - netdev_state_change(dev); - } else { - write_unlock(&dev_base_lock); - } -} - static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) { - if (!is_admin_up(master->dev)) { - __hsr_set_operstate(master->dev, IF_OPER_DOWN); + struct net_device *dev = master->dev; + + if (!is_admin_up(dev)) { + netdev_set_operstate(dev, IF_OPER_DOWN); return; } if (has_carrier) - __hsr_set_operstate(master->dev, IF_OPER_UP); + netdev_set_operstate(dev, IF_OPER_UP); else - __hsr_set_operstate(master->dev, IF_OPER_LOWERLAYERDOWN); + netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); } static bool hsr_check_carrier(struct hsr_port *master) -- cgit 1.2.3-korg From 2dd4d828d648e101aaf19326afcdfee8667cb185 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:43 +0000 Subject: net: remove dev_base_lock from do_setlink() We hold RTNL here, and dev->link_mode readers already are using READ_ONCE(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e484ba44f23b2..39e66bf3e2384 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2961,11 +2961,9 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_LINKMODE]) { unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]); - write_lock(&dev_base_lock); if (dev->link_mode ^ value) status |= DO_SETLINK_NOTIFY; WRITE_ONCE(dev->link_mode, value); - write_unlock(&dev_base_lock); } if (tb[IFLA_VFINFO_LIST]) { -- cgit 1.2.3-korg From e51b962438741f5482c82fb225c1d59136f0fd87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:44 +0000 Subject: net: remove dev_base_lock from register_netdevice() and friends. RTNL already protects writes to dev->reg_state, we no longer need to hold dev_base_lock to protect the readers. unlist_netdevice() second argument can be removed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 26f93446b743e..02cf9fd68da63 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -414,7 +414,7 @@ static void list_netdevice(struct net_device *dev) /* Device list removal * caller must respect a RCU grace period before freeing/reusing dev */ -static void unlist_netdevice(struct net_device *dev, bool lock) +static void unlist_netdevice(struct net_device *dev) { struct netdev_name_node *name_node; struct net *net = dev_net(dev); @@ -427,13 +427,11 @@ static void unlist_netdevice(struct net_device *dev, bool lock) netdev_name_node_del(name_node); /* Unlink dev from the device chain */ - if (lock) - write_lock(&dev_base_lock); + write_lock(&dev_base_lock); list_del_rcu(&dev->dev_list); netdev_name_node_del(dev->name_node); hlist_del_rcu(&dev->index_hlist); - if (lock) - write_unlock(&dev_base_lock); + write_unlock(&dev_base_lock); dev_base_seq_inc(dev_net(dev)); } @@ -10338,9 +10336,9 @@ int register_netdevice(struct net_device *dev) goto err_ifindex_release; ret = netdev_register_kobject(dev); - write_lock(&dev_base_lock); + WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED); - write_unlock(&dev_base_lock); + if (ret) goto err_uninit_notify; @@ -10629,9 +10627,7 @@ void netdev_run_todo(void) continue; } - write_lock(&dev_base_lock); WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED); - write_unlock(&dev_base_lock); linkwatch_sync_dev(dev); } @@ -11138,10 +11134,8 @@ void unregister_netdevice_many_notify(struct list_head *head, list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ - write_lock(&dev_base_lock); - unlist_netdevice(dev, false); + unlist_netdevice(dev); WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING); - write_unlock(&dev_base_lock); } flush_all_backlogs(); @@ -11323,7 +11317,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, dev_close(dev); /* And unlink it from device chain */ - unlist_netdevice(dev, true); + unlist_netdevice(dev); synchronize_net(); -- cgit 1.2.3-korg From 1b3ef46cb7f2618cc0b507393220a69810f6da12 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2024 06:32:45 +0000 Subject: net: remove dev_base_lock dev_base_lock is not needed anymore, all remaining users also hold RTNL. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- net/core/dev.c | 39 ++++----------------------------------- 2 files changed, 4 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 697370706a82d..c541550b0e6e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3077,8 +3077,6 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev); int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info); -extern rwlock_t dev_base_lock; /* Device list lock */ - #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_reverse(net, d) \ diff --git a/net/core/dev.c b/net/core/dev.c index 02cf9fd68da63..d8dd293a7a279 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -168,28 +168,6 @@ static int call_netdevice_notifiers_extack(unsigned long val, struct net_device *dev, struct netlink_ext_ack *extack); -/* - * The @dev_base_head list is protected by @dev_base_lock and the rtnl - * semaphore. - * - * Pure readers hold dev_base_lock for reading, or rcu_read_lock() - * - * Writers must hold the rtnl semaphore while they loop through the - * dev_base_head list, and hold dev_base_lock for writing when they do the - * actual updates. This allows pure readers to access the list even - * while a writer is preparing to update it. - * - * To put it another way, dev_base_lock is held for writing only to - * protect against pure readers; the rtnl semaphore provides the - * protection against other writers. - * - * See, for example usages, register_netdevice() and - * unregister_netdevice(), which must be called with the rtnl - * semaphore held. - */ -DEFINE_RWLOCK(dev_base_lock); -EXPORT_SYMBOL(dev_base_lock); - static DEFINE_MUTEX(ifalias_mutex); /* protects napi_hash addition/deletion and napi_gen_id */ @@ -395,12 +373,10 @@ static void list_netdevice(struct net_device *dev) ASSERT_RTNL(); - write_lock(&dev_base_lock); list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); netdev_name_node_add(net, dev->name_node); hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); - write_unlock(&dev_base_lock); netdev_for_each_altname(dev, name_node) netdev_name_node_add(net, name_node); @@ -427,11 +403,9 @@ static void unlist_netdevice(struct net_device *dev) netdev_name_node_del(name_node); /* Unlink dev from the device chain */ - write_lock(&dev_base_lock); list_del_rcu(&dev->dev_list); netdev_name_node_del(dev->name_node); hlist_del_rcu(&dev->index_hlist); - write_unlock(&dev_base_lock); dev_base_seq_inc(dev_net(dev)); } @@ -752,9 +726,9 @@ EXPORT_SYMBOL_GPL(dev_fill_forward_path); * @net: the applicable net namespace * @name: name to find * - * Find an interface by name. Must be called under RTNL semaphore - * or @dev_base_lock. If the name is found a pointer to the device - * is returned. If the name is not found then %NULL is returned. The + * Find an interface by name. Must be called under RTNL semaphore. + * If the name is found a pointer to the device is returned. + * If the name is not found then %NULL is returned. The * reference counters are not incremented so the caller must be * careful with locks. */ @@ -835,8 +809,7 @@ EXPORT_SYMBOL(netdev_get_by_name); * Search for an interface by index. Returns %NULL if the device * is not found or a pointer to the device. The device has not * had its reference counter increased so the caller must be careful - * about locking. The caller must hold either the RTNL semaphore - * or @dev_base_lock. + * about locking. The caller must hold the RTNL semaphore. */ struct net_device *__dev_get_by_index(struct net *net, int ifindex) @@ -1241,15 +1214,11 @@ rollback: netdev_adjacent_rename_links(dev, oldname); - write_lock(&dev_base_lock); netdev_name_node_del(dev->name_node); - write_unlock(&dev_base_lock); synchronize_net(); - write_lock(&dev_base_lock); netdev_name_node_add(net, dev->name_node); - write_unlock(&dev_base_lock); ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); ret = notifier_to_errno(ret); -- cgit 1.2.3-korg From 414532d8aa8915d9aebd01c6b5aa54bdfd98da71 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Feb 2024 20:08:43 +0100 Subject: wifi: cfg80211: use IEEE80211_MAX_MESH_ID_LEN appropriately Even if that's the same as IEEE80211_MAX_SSID_LEN, we really should just use IEEE80211_MAX_MESH_ID_LEN for mesh, rather than having the BUILD_BUG_ON()s. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- net/wireless/nl80211.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 93e9abb7fc3d5..57c2298af35b2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6223,7 +6223,7 @@ struct wireless_dev { int beacon_interval; struct cfg80211_chan_def preset_chandef; struct cfg80211_chan_def chandef; - u8 id[IEEE80211_MAX_SSID_LEN]; + u8 id[IEEE80211_MAX_MESH_ID_LEN]; u8 id_len, id_up_len; } mesh; struct { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5f18cbf7cc3d7..dd9a092b6baba 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4205,8 +4205,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (netif_running(dev)) return -EBUSY; - BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != - IEEE80211_MAX_MESH_ID_LEN); wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); memcpy(wdev->u.mesh.id, @@ -4312,8 +4310,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) break; - BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != - IEEE80211_MAX_MESH_ID_LEN); wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); memcpy(wdev->u.mesh.id, -- cgit 1.2.3-korg From d25f32722f5069faf688b2646ec4071ac2cb3c53 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Tue, 13 Feb 2024 21:12:05 +0800 Subject: tcp: no need to use acceptable for conn_request Since tcp_conn_request() always returns zero, there is no need to keep the dead code. Remove it then. Link: https://lore.kernel.org/netdev/CANn89iJwx9b2dUGUKFSV3PF=kN5o+kxz3A_fHZZsOS4AnXhBNw@mail.gmail.com/ Suggested-by: Eric Dumazet Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240213131205.4309-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni --- net/ipv4/tcp_input.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d20edf652e6c..b1c4462a0798c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6623,7 +6623,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) const struct tcphdr *th = tcp_hdr(skb); struct request_sock *req; int queued = 0; - bool acceptable; SKB_DR(reason); switch (sk->sk_state) { @@ -6649,12 +6648,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) */ rcu_read_lock(); local_bh_disable(); - acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; + icsk->icsk_af_ops->conn_request(sk, skb); local_bh_enable(); rcu_read_unlock(); - if (!acceptable) - return 1; consume_skb(skb); return 0; } -- cgit 1.2.3-korg From 984328c7657dbf2fea8f67e1171cb93a22d7fc06 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Tue, 13 Feb 2024 22:40:58 +0900 Subject: tipc: Cleanup tipc_nl_bearer_add() error paths Consolidate the error paths of tipc_nl_bearer_add() under the common label if the function holds rtnl_lock. Signed-off-by: Shigeru Yoshida Reviewed-by: Tung Nguyen Link: https://lore.kernel.org/r/20240213134058.386123-1-syoshida@redhat.com Signed-off-by: Paolo Abeni --- net/tipc/bearer.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 878415c435276..5a526ebafeb4b 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1079,30 +1079,27 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); b = tipc_bearer_find(net, name); if (!b) { - rtnl_unlock(); NL_SET_ERR_MSG(info->extack, "Bearer not found"); - return -EINVAL; + err = -EINVAL; + goto out; } #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { - rtnl_unlock(); NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); - return -EINVAL; + err = -EINVAL; + goto out; } err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); - if (err) { - rtnl_unlock(); - return err; - } } #endif +out: rtnl_unlock(); - return 0; + return err; } int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) -- cgit 1.2.3-korg From 2aa8f155b09519814e449dc19adacf01fd1367ee Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 13 Feb 2024 23:26:30 -0700 Subject: net: ipv6/addrconf: ensure that regen_advance is at least 2 seconds RFC 8981 defines REGEN_ADVANCE as follows: REGEN_ADVANCE = 2 + (TEMP_IDGEN_RETRIES * DupAddrDetectTransmits * RetransTimer / 1000) Thus, allowing it to be less than 2 seconds is technically a protocol violation. Link: https://datatracker.ietf.org/doc/html/rfc8981#name-defined-protocol-parameters Signed-off-by: Alex Henrie Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 4 ++-- net/ipv6/addrconf.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 7afff42612e94..4583059313453 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2503,7 +2503,7 @@ use_tempaddr - INTEGER temp_valid_lft - INTEGER valid lifetime (in seconds) for temporary addresses. If less than the - minimum required lifetime (typically 5 seconds), temporary addresses + minimum required lifetime (typically 5-7 seconds), temporary addresses will not be created. Default: 172800 (2 days) @@ -2511,7 +2511,7 @@ temp_valid_lft - INTEGER temp_prefered_lft - INTEGER Preferred lifetime (in seconds) for temporary addresses. If temp_prefered_lft is less than the minimum required lifetime (typically - 5 seconds), temporary addresses will not be created. If + 5-7 seconds), temporary addresses will not be created. If temp_prefered_lft is greater than temp_valid_lft, the preferred lifetime is temp_valid_lft. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2c1ed642e3f45..65e886d7d80cc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1339,6 +1339,13 @@ out: in6_ifa_put(ifp); } +static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev) +{ + return 2 + idev->cnf.regen_max_retry * + idev->cnf.dad_transmits * + max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; +} + static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) { struct inet6_dev *idev = ifp->idev; @@ -1380,9 +1387,7 @@ retry: age = (now - ifp->tstamp) / HZ; - regen_advance = idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; + regen_advance = ipv6_get_regen_advance(idev); /* recalculate max_desync_factor each time and update * idev->desync_factor if it's larger @@ -4595,9 +4600,7 @@ restart: !ifp->regen_count && ifp->ifpub) { /* This is a non-regenerated temporary addr. */ - unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * - ifp->idev->cnf.dad_transmits * - max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; + unsigned long regen_advance = ipv6_get_regen_advance(ifp->idev); if (age + regen_advance >= ifp->prefered_lft) { struct inet6_ifaddr *ifpub = ifp->ifpub; -- cgit 1.2.3-korg From a5fcea2d2f790aa90b6e996d411ae2cf8db55186 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 13 Feb 2024 23:26:31 -0700 Subject: net: ipv6/addrconf: introduce a regen_min_advance sysctl In RFC 8981, REGEN_ADVANCE cannot be less than 2 seconds, and the RFC does not permit the creation of temporary addresses with lifetimes shorter than that: > When processing a Router Advertisement with a > Prefix Information option carrying a prefix for the purposes of > address autoconfiguration (i.e., the A bit is set), the host MUST > perform the following steps: > 5. A temporary address is created only if this calculated preferred > lifetime is greater than REGEN_ADVANCE time units. However, some users want to change their IPv6 address as frequently as possible regardless of the RFC's arbitrary minimum lifetime. For the benefit of those users, add a regen_min_advance sysctl parameter that can be set to below or above 2 seconds. Link: https://datatracker.ietf.org/doc/html/rfc8981 Signed-off-by: Alex Henrie Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 10 ++++++++++ include/linux/ipv6.h | 1 + include/net/addrconf.h | 5 +++-- net/ipv6/addrconf.c | 11 ++++++++++- 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 4583059313453..407d917d1a367 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2535,6 +2535,16 @@ max_desync_factor - INTEGER Default: 600 +regen_min_advance - INTEGER + How far in advance (in seconds), at minimum, to create a new temporary + address before the current one is deprecated. This value is added to + the amount of time that may be required for duplicate address detection + to determine when to create a new address. Linux permits setting this + value to less than the default of 2 seconds, but a value less than 2 + does not conform to RFC 8981. + + Default: 2 + regen_max_retry - INTEGER Number of attempts before give up attempting to generate valid temporary addresses. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5e605e384aac8..ef3aa060a289e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -27,6 +27,7 @@ struct ipv6_devconf { __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; + __s32 regen_min_advance; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 61ebe723ee4d5..30d6f1e84e465 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -8,8 +8,9 @@ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ -#define TEMP_VALID_LIFETIME (7*86400) -#define TEMP_PREFERRED_LIFETIME (86400) +#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */ +#define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */ +#define REGEN_MIN_ADVANCE (2) /* 2 seconds */ #define REGEN_MAX_RETRY (3) #define MAX_DESYNC_FACTOR (600) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 65e886d7d80cc..283823fba96a8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, + .regen_min_advance = REGEN_MIN_ADVANCE, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, @@ -257,6 +258,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, + .regen_min_advance = REGEN_MIN_ADVANCE, .regen_max_retry = REGEN_MAX_RETRY, .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, @@ -1341,7 +1343,7 @@ out: static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev) { - return 2 + idev->cnf.regen_max_retry * + return idev->cnf.regen_min_advance + idev->cnf.regen_max_retry * idev->cnf.dad_transmits * max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; } @@ -6819,6 +6821,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "regen_min_advance", + .data = &ipv6_devconf.regen_min_advance, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "regen_max_retry", .data = &ipv6_devconf.regen_max_retry, -- cgit 1.2.3-korg From f4bcbf360ac8dc424dc4d2b384b528e69b6f34d9 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 13 Feb 2024 23:26:32 -0700 Subject: net: ipv6/addrconf: clamp preferred_lft to the minimum required If the preferred lifetime was less than the minimum required lifetime, ipv6_create_tempaddr would error out without creating any new address. On my machine and network, this error happened immediately with the preferred lifetime set to 5 seconds or less, after a few minutes with the preferred lifetime set to 6 seconds, and not at all with the preferred lifetime set to 7 seconds. During my investigation, I found a Stack Exchange post from another person who seems to have had the same problem: They stopped getting new addresses if they lowered the preferred lifetime below 3 seconds, and they didn't really know why. The preferred lifetime is a preference, not a hard requirement. The kernel does not strictly forbid new connections on a deprecated address, nor does it guarantee that the address will be disposed of the instant its total valid lifetime expires. So rather than disable IPv6 privacy extensions altogether if the minimum required lifetime swells above the preferred lifetime, it is more in keeping with the user's intent to increase the temporary address's lifetime to the minimum necessary for the current network conditions. With these fixes, setting the preferred lifetime to 5 or 6 seconds "just works" because the extra fraction of a second is practically unnoticeable. It's even possible to reduce the time before deprecation to 1 or 2 seconds by setting /proc/sys/net/ipv6/conf/*/regen_min_advance and /proc/sys/net/ipv6/conf/*/dad_transmits to 0. I realize that that is a pretty niche use case, but I know at least one person who would gladly sacrifice performance and convenience to be sure that they are getting the maximum possible level of privacy. Link: https://serverfault.com/a/1031168/310447 Signed-off-by: Alex Henrie Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 2 +- net/ipv6/addrconf.c | 43 +++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 407d917d1a367..bd50df6a5a42e 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2511,7 +2511,7 @@ temp_valid_lft - INTEGER temp_prefered_lft - INTEGER Preferred lifetime (in seconds) for temporary addresses. If temp_prefered_lft is less than the minimum required lifetime (typically - 5-7 seconds), temporary addresses will not be created. If + 5-7 seconds), the preferred lifetime is the minimum required. If temp_prefered_lft is greater than temp_valid_lft, the preferred lifetime is temp_valid_lft. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 283823fba96a8..d3f4b7b9cf1fe 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1354,6 +1354,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) unsigned long tmp_tstamp, age; unsigned long regen_advance; unsigned long now = jiffies; + u32 if_public_preferred_lft; s32 cnf_temp_preferred_lft; struct inet6_ifaddr *ift; struct ifa6_config cfg; @@ -1409,11 +1410,13 @@ retry: } } + if_public_preferred_lft = ifp->prefered_lft; + memset(&cfg, 0, sizeof(cfg)); cfg.valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; - cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft); + cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft); cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); cfg.plen = ifp->prefix_len; @@ -1422,19 +1425,41 @@ retry: write_unlock_bh(&idev->lock); - /* A temporary address is created only if this calculated Preferred - * Lifetime is greater than REGEN_ADVANCE time units. In particular, - * an implementation must not create a temporary address with a zero - * Preferred Lifetime. + /* From RFC 4941: + * + * A temporary address is created only if this calculated Preferred + * Lifetime is greater than REGEN_ADVANCE time units. In + * particular, an implementation must not create a temporary address + * with a zero Preferred Lifetime. + * + * ... + * + * When creating a temporary address, the lifetime values MUST be + * derived from the corresponding prefix as follows: + * + * ... + * + * * Its Preferred Lifetime is the lower of the Preferred Lifetime + * of the public address or TEMP_PREFERRED_LIFETIME - + * DESYNC_FACTOR. + * + * To comply with the RFC's requirements, clamp the preferred lifetime + * to a minimum of regen_advance, unless that would exceed valid_lft or + * ifp->prefered_lft. + * * Use age calculation as in addrconf_verify to avoid unnecessary * temporary addresses being generated. */ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; if (cfg.preferred_lft <= regen_advance + age) { - in6_ifa_put(ifp); - in6_dev_put(idev); - ret = -1; - goto out; + cfg.preferred_lft = regen_advance + age + 1; + if (cfg.preferred_lft > cfg.valid_lft || + cfg.preferred_lft > if_public_preferred_lft) { + in6_ifa_put(ifp); + in6_dev_put(idev); + ret = -1; + goto out; + } } cfg.ifa_flags = IFA_F_TEMPORARY; -- cgit 1.2.3-korg From 3de21a8990d3c2cc507e9cc4ed00f36358d5b93e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 12 Feb 2024 17:16:13 +0100 Subject: genetlink: Add per family bind/unbind callbacks Add genetlink family bind()/unbind() callbacks when adding/removing multicast group to/from netlink client socket via setsockopt() or bind() syscall. They can be used to track if consumers of netlink multicast messages emerge or disappear. Thus, a client implementing callbacks, can now send events only when there are active consumers, preventing unnecessary work when none exist. Suggested-by: Jakub Kicinski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240212161615.161935-2-stanislaw.gruszka@linux.intel.com Signed-off-by: Jakub Kicinski --- include/net/genetlink.h | 4 ++++ net/netlink/genetlink.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e61469129402b..ecadba836ae56 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -41,6 +41,8 @@ struct genl_info; * do additional, common, filtering and return an error * @post_doit: called after an operation's doit callback, it may * undo operations done by pre_doit, for example release locks + * @bind: called when family multicast group is added to a netlink socket + * @unbind: called when family multicast group is removed from a netlink socket * @module: pointer to the owning module (set to THIS_MODULE) * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups @@ -84,6 +86,8 @@ struct genl_family { void (*post_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); + int (*bind)(int mcgrp); + void (*unbind)(int mcgrp); const struct genl_ops * ops; const struct genl_small_ops *small_ops; const struct genl_split_ops *split_ops; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 8c7af02f84540..50ec599a5cff5 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1836,6 +1836,9 @@ static int genl_bind(struct net *net, int group) !ns_capable(net->user_ns, CAP_SYS_ADMIN)) ret = -EPERM; + if (family->bind) + family->bind(i); + break; } @@ -1843,12 +1846,39 @@ static int genl_bind(struct net *net, int group) return ret; } +static void genl_unbind(struct net *net, int group) +{ + const struct genl_family *family; + unsigned int id; + + down_read(&cb_lock); + + idr_for_each_entry(&genl_fam_idr, family, id) { + int i; + + if (family->n_mcgrps == 0) + continue; + + i = group - family->mcgrp_offset; + if (i < 0 || i >= family->n_mcgrps) + continue; + + if (family->unbind) + family->unbind(i); + + break; + } + + up_read(&cb_lock); +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, .bind = genl_bind, + .unbind = genl_unbind, .release = genl_release, }; -- cgit 1.2.3-korg From ae94dc25fd73cb41df233a20ca638ba136cc5f3a Mon Sep 17 00:00:00 2001 From: Arınç ÜNAL Date: Tue, 13 Feb 2024 10:29:05 +0300 Subject: net: dsa: remove OF-based MDIO bus registration from DSA core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code block under the "!ds->user_mii_bus && ds->ops->phy_read" check under dsa_switch_setup() populates ds->user_mii_bus. The use of ds->user_mii_bus is inappropriate when the MDIO bus of the switch is described on the device tree [1]. For this reason, use this code block only for switches [with MDIO bus] probed on platform_data, and OF which the switch MDIO bus isn't described on the device tree. Therefore, remove OF-based MDIO bus registration as it's useless for these cases. These subdrivers which control switches [with MDIO bus] probed on OF, will lose the ability to register the MDIO bus OF-based: drivers/net/dsa/b53/b53_common.c drivers/net/dsa/lan9303-core.c drivers/net/dsa/vitesse-vsc73xx-core.c These subdrivers let the DSA core driver register the bus: - ds->ops->phy_read() and ds->ops->phy_write() are present. - ds->user_mii_bus is not populated. The commit fe7324b93222 ("net: dsa: OF-ware slave_mii_bus") which brought OF-based MDIO bus registration on the DSA core driver is reasonably recent and, in this time frame, there have been no device trees in the Linux repository that started describing the MDIO bus, or dt-bindings defining the MDIO bus for the switches these subdrivers control. So I don't expect any devices to be affected. The logic we encourage is that all subdrivers should register the switch MDIO bus on their own [2]. And, for subdrivers which control switches [with MDIO bus] probed on OF, this logic must be followed to support all cases properly: No switch MDIO bus defined: Populate ds->user_mii_bus, register the MDIO bus, set the interrupts for PHYs if "interrupt-controller" is defined at the switch node. This case should only be covered for the switches which their dt-bindings documentation didn't document the MDIO bus from the start. This is to keep supporting the device trees that do not describe the MDIO bus on the device tree but the MDIO bus is being used nonetheless. Switch MDIO bus defined: Don't populate ds->user_mii_bus, register the MDIO bus, set the interrupts for PHYs if ["interrupt-controller" is defined at the switch node and "interrupts" is defined at the PHY nodes under the switch MDIO bus node]. Switch MDIO bus defined but explicitly disabled: If the device tree says status = "disabled" for the MDIO bus, we shouldn't need an MDIO bus at all. Instead, just exit as early as possible and do not call any MDIO API. After all subdrivers that control switches with MDIO buses are made to register the MDIO buses on their own, we will be able to get rid of dsa_switch_ops :: phy_read() and :: phy_write(), and the code block for registering the MDIO bus on the DSA core driver. Link: https://lore.kernel.org/netdev/20231213120656.x46fyad6ls7sqyzv@skbuf/ [1] Link: https://lore.kernel.org/netdev/20240103184459.dcbh57wdnlox6w7d@skbuf/ [2] Suggested-by: Luiz Angelo Daros de Luca Acked-by: Luiz Angelo Daros de Luca Signed-off-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20240213-for-netnext-dsa-mdio-bus-v2-1-0ff6f4823a9e@arinc9.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ac7be864e80dc..09d2f5d4b3dd4 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -626,7 +625,6 @@ static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds) static int dsa_switch_setup(struct dsa_switch *ds) { - struct device_node *dn; int err; if (ds->setup) @@ -666,10 +664,7 @@ static int dsa_switch_setup(struct dsa_switch *ds) dsa_user_mii_bus_init(ds); - dn = of_get_child_by_name(ds->dev->of_node, "mdio"); - - err = of_mdiobus_register(ds->user_mii_bus, dn); - of_node_put(dn); + err = mdiobus_register(ds->user_mii_bus); if (err < 0) goto free_user_mii_bus; } -- cgit 1.2.3-korg From 2210c5485e4331d0abd7a0296f10178d1dc25cd2 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 14 Feb 2024 09:47:07 +0100 Subject: net/iucv: fix virtual vs physical address confusion Fix virtual vs physical address confusion. This does not fix a bug since virtual and physical address spaces are currently the same. Acked-by: Alexandra Winter Signed-off-by: Alexander Gordeev Signed-off-by: David S. Miller --- net/iucv/iucv.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 6334f64f04d5f..9e62783e6acb5 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -286,6 +286,7 @@ static union iucv_param *iucv_param_irq[NR_CPUS]; */ static inline int __iucv_call_b2f0(int command, union iucv_param *parm) { + unsigned long reg1 = virt_to_phys(parm); int cc; asm volatile( @@ -296,7 +297,7 @@ static inline int __iucv_call_b2f0(int command, union iucv_param *parm) " srl %[cc],28\n" : [cc] "=&d" (cc), "+m" (*parm) : [reg0] "d" ((unsigned long)command), - [reg1] "d" ((unsigned long)parm) + [reg1] "d" (reg1) : "cc", "0", "1"); return cc; } @@ -1123,7 +1124,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, parm = iucv_param[smp_processor_id()]; memset(parm, 0, sizeof(union iucv_param)); - parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfadr1 = (u32)virt_to_phys(buffer); parm->db.ipbfln1f = (u32) size; parm->db.ipmsgid = msg->id; parm->db.ippathid = path->pathid; @@ -1241,7 +1242,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, parm->dpl.iptrgcls = msg->class; memcpy(parm->dpl.iprmmsg, reply, min_t(size_t, size, 8)); } else { - parm->db.ipbfadr1 = (u32)(addr_t) reply; + parm->db.ipbfadr1 = (u32)virt_to_phys(reply); parm->db.ipbfln1f = (u32) size; parm->db.ippathid = path->pathid; parm->db.ipflags1 = flags; @@ -1293,7 +1294,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg, parm->dpl.ipmsgtag = msg->tag; memcpy(parm->dpl.iprmmsg, buffer, 8); } else { - parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfadr1 = (u32)virt_to_phys(buffer); parm->db.ipbfln1f = (u32) size; parm->db.ippathid = path->pathid; parm->db.ipflags1 = flags | IUCV_IPNORPY; @@ -1378,7 +1379,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, parm->dpl.iptrgcls = msg->class; parm->dpl.ipsrccls = srccls; parm->dpl.ipmsgtag = msg->tag; - parm->dpl.ipbfadr2 = (u32)(addr_t) answer; + parm->dpl.ipbfadr2 = (u32)virt_to_phys(answer); parm->dpl.ipbfln2f = (u32) asize; memcpy(parm->dpl.iprmmsg, buffer, 8); } else { @@ -1387,9 +1388,9 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, parm->db.iptrgcls = msg->class; parm->db.ipsrccls = srccls; parm->db.ipmsgtag = msg->tag; - parm->db.ipbfadr1 = (u32)(addr_t) buffer; + parm->db.ipbfadr1 = (u32)virt_to_phys(buffer); parm->db.ipbfln1f = (u32) size; - parm->db.ipbfadr2 = (u32)(addr_t) answer; + parm->db.ipbfadr2 = (u32)virt_to_phys(answer); parm->db.ipbfln2f = (u32) asize; } rc = iucv_call_b2f0(IUCV_SEND, parm); -- cgit 1.2.3-korg From 21bd52ea38a873b9dce2da86e9d63a95cdbe1275 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 15 Feb 2024 14:12:21 +0100 Subject: tcp: Spelling s/curcuit/circuit/ Fix a misspelling of "circuit". Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b1c4462a0798c..74c03f0a6c0c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1164,7 +1164,7 @@ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, * L|R 1 - orig is lost, retransmit is in flight. * S|R 1 - orig reached receiver, retrans is still in flight. * (L|S|R is logically valid, it could occur when L|R is sacked, - * but it is equivalent to plain S and code short-curcuits it to S. + * but it is equivalent to plain S and code short-circuits it to S. * L|S is logically invalid, it would mean -1 packet in flight 8)) * * These 6 states form finite state machine, controlled by the following events: -- cgit 1.2.3-korg From ea7f3cfaa58873bbe271577efa800647e30f18bd Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 15 Feb 2024 09:05:07 -0800 Subject: net: bql: allow the config to be disabled It is impossible to disable BQL individually today, since there is no prompt for the Kconfig entry, so, the BQL is always enabled if SYSFS is enabled. Create a prompt entry for BQL, so, it could be enabled or disabled at build time independently of SYSFS. Signed-off-by: Breno Leitao Signed-off-by: David S. Miller --- net/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 4adc47d0c9c2a..3e57ccf0da279 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -331,6 +331,7 @@ config NET_RX_BUSY_POLL config BQL bool + prompt "Enable Byte Queue Limits" depends on SYSFS select DQL default y -- cgit 1.2.3-korg From 1e63e5a813fa6203d7430af51d6bffb728525015 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Feb 2024 15:27:44 -0800 Subject: net: sched: Annotate struct tc_pedit with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct tc_pedit. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_pedit.h | 2 +- net/sched/act_pedit.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h index f3e61b04fa01b..f5cab7fc96ab1 100644 --- a/include/uapi/linux/tc_act/tc_pedit.h +++ b/include/uapi/linux/tc_act/tc_pedit.h @@ -62,7 +62,7 @@ struct tc_pedit_sel { tc_gen; unsigned char nkeys; unsigned char flags; - struct tc_pedit_key keys[0]; + struct tc_pedit_key keys[] __counted_by(nkeys); }; #define tc_pedit tc_pedit_sel diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index df5a02d5f919c..fc0a35a7b62ac 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -515,11 +515,11 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, spin_unlock_bh(&p->tcf_lock); return -ENOBUFS; } + opt->nkeys = parms->tcfp_nkeys; memcpy(opt->keys, parms->tcfp_keys, flex_array_size(opt, keys, parms->tcfp_nkeys)); opt->index = p->tcf_index; - opt->nkeys = parms->tcfp_nkeys; opt->flags = parms->tcfp_flags; opt->action = p->tcf_action; opt->refcnt = refcount_read(&p->tcf_refcnt) - ref; -- cgit 1.2.3-korg From 56ef27e3abe6d6453b1f4f6127041f3a65d7cbc9 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 15 Feb 2024 12:39:05 +0100 Subject: page_pool: disable direct recycling based on pool->cpuid on destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that direct recycling is performed basing on pool->cpuid when set, memory leaks are possible: 1. A pool is destroyed. 2. Alloc cache is emptied (it's done only once). 3. pool->cpuid is still set. 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. 5. Now alloc cache is not empty, but it won't ever be freed. In order to avoid that, rewrite pool->cpuid to -1 when unlinking NAPI to make sure no direct recycling will be possible after emptying the cache. This involves a bit of overhead as pool->cpuid now must be accessed via READ_ONCE() to avoid partial reads. Rename page_pool_unlink_napi() -> page_pool_disable_direct_recycling() to reflect what it actually does and unexport it. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240215113905.96817-1-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 5 ----- net/core/page_pool.c | 10 +++++++--- net/core/skbuff.c | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 3828396ae60c2..3590fbe6e3f15 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -210,17 +210,12 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL -void page_pool_unlink_napi(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), struct xdp_mem_info *mem); void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count); #else -static inline void page_pool_unlink_napi(struct page_pool *pool) -{ -} - static inline void page_pool_destroy(struct page_pool *pool) { } diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 89c835fcf0944..e8b9399d8e324 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -949,8 +949,13 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), pool->xdp_mem_id = mem->id; } -void page_pool_unlink_napi(struct page_pool *pool) +static void page_pool_disable_direct_recycling(struct page_pool *pool) { + /* Disable direct recycling based on pool->cpuid. + * Paired with READ_ONCE() in napi_pp_put_page(). + */ + WRITE_ONCE(pool->cpuid, -1); + if (!pool->p.napi) return; @@ -962,7 +967,6 @@ void page_pool_unlink_napi(struct page_pool *pool) WRITE_ONCE(pool->p.napi, NULL); } -EXPORT_SYMBOL(page_pool_unlink_napi); void page_pool_destroy(struct page_pool *pool) { @@ -972,7 +976,7 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_put(pool)) return; - page_pool_unlink_napi(pool); + page_pool_disable_direct_recycling(pool); page_pool_free_frag(pool); if (!page_pool_release(pool)) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0d9a489e6ae18..b41856585c24e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1018,7 +1018,7 @@ bool napi_pp_put_page(struct page *page, bool napi_safe) unsigned int cpuid = smp_processor_id(); allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid; - allow_direct |= (pp->cpuid == cpuid); + allow_direct |= READ_ONCE(pp->cpuid) == cpuid; } /* Driver set this to memory recycling info. Reset it on recycle. -- cgit 1.2.3-korg From f853fa5c54e7a0364a52125074dedeaf2c7ddace Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 16 Feb 2024 10:25:43 +0100 Subject: net: page_pool: fix recycle stats for system page_pool allocator Use global percpu page_pool_recycle_stats counter for system page_pool allocator instead of allocating a separate percpu variable for each (also percpu) page pool instance. Reviewed-by: Toke Hoiland-Jorgensen Signed-off-by: Lorenzo Bianconi Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/87f572425e98faea3da45f76c3c68815c01a20ee.1708075412.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 5 +++-- net/core/dev.c | 1 + net/core/page_pool.c | 22 +++++++++++++++++----- 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 3590fbe6e3f15..5e43a08d3231c 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -18,8 +18,9 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ - PP_FLAG_DMA_SYNC_DEV) +#define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ + PP_FLAG_SYSTEM_POOL) /* * Fast allocation side cache array/stack diff --git a/net/core/dev.c b/net/core/dev.c index cc9c2eda65aca..c588808be77f5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11738,6 +11738,7 @@ static int net_page_pool_create(int cpuid) #if IS_ENABLED(CONFIG_PAGE_POOL) struct page_pool_params page_pool_params = { .pool_size = SYSTEM_PERCPU_PAGE_POOL_SIZE, + .flags = PP_FLAG_SYSTEM_POOL, .nid = NUMA_NO_NODE, }; struct page_pool *pp_ptr; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index e8b9399d8e324..d706fe5548dfe 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -31,6 +31,8 @@ #define BIAS_MAX (LONG_MAX >> 1) #ifdef CONFIG_PAGE_POOL_STATS +static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats); + /* alloc_stat_inc is intended to be used in softirq context */ #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) /* recycle_stat_inc is safe to use when preemption is possible. */ @@ -220,14 +222,23 @@ static int page_pool_init(struct page_pool *pool, pool->has_init_callback = !!pool->slow.init_callback; #ifdef CONFIG_PAGE_POOL_STATS - pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); - if (!pool->recycle_stats) - return -ENOMEM; + if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) { + pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); + if (!pool->recycle_stats) + return -ENOMEM; + } else { + /* For system page pool instance we use a singular stats object + * instead of allocating a separate percpu variable for each + * (also percpu) page pool instance. + */ + pool->recycle_stats = &pp_system_recycle_stats; + } #endif if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { #ifdef CONFIG_PAGE_POOL_STATS - free_percpu(pool->recycle_stats); + if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) + free_percpu(pool->recycle_stats); #endif return -ENOMEM; } @@ -251,7 +262,8 @@ static void page_pool_uninit(struct page_pool *pool) put_device(pool->p.dev); #ifdef CONFIG_PAGE_POOL_STATS - free_percpu(pool->recycle_stats); + if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) + free_percpu(pool->recycle_stats); #endif } -- cgit 1.2.3-korg From 74293ea1c4db62cb969e741fbfd479a34d935024 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 16 Feb 2024 01:41:52 -0800 Subject: net: sysfs: Do not create sysfs for non BQL device Creation of sysfs entries is expensive, mainly for workloads that constantly creates netdev and netns often. Do not create BQL sysfs entries for devices that don't need, basically those that do not have a real queue, i.e, devices that has NETIF_F_LLTX and IFF_NO_QUEUE, such as `lo` interface. This will remove the /sys/class/net/eth0/queues/tx-X/byte_queue_limits/ directory for these devices. In the example below, eth0 has the `byte_queue_limits` directory but not `lo`. # ls /sys/class/net/lo/queues/tx-0/ traffic_class tx_maxrate tx_timeout xps_cpus xps_rxqs # ls /sys/class/net/eth0/queues/tx-0/byte_queue_limits/ hold_time inflight limit limit_max limit_min This also removes the #ifdefs, since we can also use netdev_uses_bql() to check if the config is enabled. (as suggested by Jakub). Suggested-by: Eric Dumazet Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240216094154.3263843-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 946caefdd9599..af238026ac3c6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1459,6 +1459,9 @@ static const struct attribute_group dql_group = { .name = "byte_queue_limits", .attrs = dql_attrs, }; +#else +/* Fake declaration, all the code using it should be dead */ +extern const struct attribute_group dql_group; #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS @@ -1696,6 +1699,15 @@ static const struct kobj_type netdev_queue_ktype = { .get_ownership = netdev_queue_get_ownership, }; +static bool netdev_uses_bql(const struct net_device *dev) +{ + if (dev->features & NETIF_F_LLTX || + dev->priv_flags & IFF_NO_QUEUE) + return false; + + return IS_ENABLED(CONFIG_BQL); +} + static int netdev_queue_add_kobject(struct net_device *dev, int index) { struct netdev_queue *queue = dev->_tx + index; @@ -1713,11 +1725,11 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; -#ifdef CONFIG_BQL - error = sysfs_create_group(kobj, &dql_group); - if (error) - goto err; -#endif + if (netdev_uses_bql(dev)) { + error = sysfs_create_group(kobj, &dql_group); + if (error) + goto err; + } kobject_uevent(kobj, KOBJ_ADD); return 0; @@ -1738,9 +1750,9 @@ static int tx_queue_change_owner(struct net_device *ndev, int index, if (error) return error; -#ifdef CONFIG_BQL - error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid); -#endif + if (netdev_uses_bql(ndev)) + error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid); + return error; } #endif /* CONFIG_SYSFS */ @@ -1772,9 +1784,10 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) if (!refcount_read(&dev_net(dev)->ns.count)) queue->kobj.uevent_suppress = 1; -#ifdef CONFIG_BQL - sysfs_remove_group(&queue->kobj, &dql_group); -#endif + + if (netdev_uses_bql(dev)) + sysfs_remove_group(&queue->kobj, &dql_group); + kobject_put(&queue->kobj); } -- cgit 1.2.3-korg From c8fba5d6df5e476aa791db4f1f014dad2bb5e904 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 19 Feb 2024 21:00:21 +0100 Subject: can: raw: fix getsockopt() for new CAN_RAW_XL_VCID_OPTS The code for the CAN_RAW_XL_VCID_OPTS getsockopt() was incompletely adopted from the CAN_RAW_FILTER getsockopt(). Add the missing put_user() and return statements. Flagged by Smatch. Fixes: c83c22ec1493 ("can: canxl: add virtual CAN network identifier support") Reported-by: Simon Horman Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20240219200021.12113-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index cb8e6f788af84..897ffc17d8506 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -835,7 +835,9 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (copy_to_user(optval, &ro->raw_vcid_opts, len)) err = -EFAULT; } - break; + if (!err) + err = put_user(len, optlen); + return err; case CAN_RAW_JOIN_FILTERS: if (len > sizeof(int)) -- cgit 1.2.3-korg From 21d2e6737c9789aa9b23c8a4131cbca8260139fd Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 14 Feb 2024 14:34:03 -0800 Subject: net: add netmem to skb_frag_t Use struct netmem* instead of page in skb_frag_t. Currently struct netmem* is always a struct page underneath, but the abstraction allows efforts to add support for skb frags not backed by pages. There is unfortunately 1 instance where the skb_frag_t is assumed to be a exactly a bio_vec in kcm. For this case, WARN_ON_ONCE and return error before doing a cast. Add skb[_frag]_fill_netmem_*() and skb_add_rx_frag_netmem() helpers so that the API can be used to create netmem skbs. Signed-off-by: Mina Almasry Acked-by: Paolo Abeni Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 100 +++++++++++++++++++++++++++++++++++-------------- net/core/skbuff.c | 34 +++++++++++++---- net/kcm/kcmsock.c | 7 ++-- 3 files changed, 102 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 696e7680656f5..e3a2ed5d09ad6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #endif #include #include +#include /** * DOC: skb checksums @@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags; */ #define GSO_BY_FRAGS 0xFFFF -typedef struct bio_vec skb_frag_t; +typedef struct skb_frag { + netmem_ref netmem; + unsigned int len; + unsigned int offset; +} skb_frag_t; /** * skb_frag_size() - Returns the size of a skb fragment @@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t; */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->bv_len; + return frag->len; } /** @@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->bv_len = size; + frag->len = size; } /** @@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->bv_len += delta; + frag->len += delta; } /** @@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->bv_len -= delta; + frag->len -= delta; } /** @@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p) * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->bv_page + * @f_off: offset from start of f->netmem * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@ -2429,22 +2434,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, + netmem_ref netmem, int off, + int size) +{ + frag->netmem = netmem; + frag->offset = off; + skb_frag_size_set(frag, size); +} + static inline void skb_frag_fill_page_desc(skb_frag_t *frag, struct page *page, int off, int size) { - frag->bv_page = page; - frag->bv_offset = off; - skb_frag_size_set(frag, size); + skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); +} + +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, + int i, netmem_ref netmem, + int off, int size) +{ + skb_frag_t *frag = &shinfo->frags[i]; + + skb_frag_fill_netmem_desc(frag, netmem, off, size); } static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) { - skb_frag_t *frag = &shinfo->frags[i]; - - skb_frag_fill_page_desc(frag, page, off, size); + __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, + size); } /** @@ -2460,10 +2480,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) } /** - * __skb_fill_page_desc - initialise a paged fragment in an skb + * __skb_fill_netmem_desc - initialise a fragment in an skb * @skb: buffer containing fragment to be initialised - * @i: paged fragment index to initialise - * @page: the page to use for this fragment + * @i: fragment index to initialise + * @netmem: the netmem to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * @@ -2472,10 +2492,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) * * Does not take any additional reference on the fragment. */ -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) { - __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); + struct page *page = netmem_to_page(netmem); + + __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely @@ -2483,7 +2505,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, */ page = compound_head(page); if (page_is_pfmemalloc(page)) - skb->pfmemalloc = true; + skb->pfmemalloc = true; +} + +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); +} + +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, netmem, off, size); + skb_shinfo(skb)->nr_frags = i + 1; } /** @@ -2503,8 +2538,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { - __skb_fill_page_desc(skb, i, page, off, size); - skb_shinfo(skb)->nr_frags = i + 1; + skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } /** @@ -2528,8 +2562,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, shinfo->nr_frags = i + 1; } -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize); +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size, unsigned int truesize); + +static inline void skb_add_rx_frag(struct sk_buff *skb, int i, + struct page *page, int off, int size, + unsigned int truesize) +{ + skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, + truesize); +} void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); @@ -3378,7 +3420,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page, */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { - return frag->bv_offset; + return frag->offset; } /** @@ -3388,7 +3430,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag) */ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) { - frag->bv_offset += delta; + frag->offset += delta; } /** @@ -3398,7 +3440,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) { - frag->bv_offset = offset; + frag->offset = offset; } /** @@ -3409,7 +3451,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) static inline void skb_frag_off_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_offset = fragfrom->bv_offset; + fragto->offset = fragfrom->offset; } /** @@ -3420,7 +3462,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->bv_page; + return netmem_to_page(frag->netmem); } /** @@ -3528,7 +3570,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) static inline void skb_frag_page_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_page = fragfrom->bv_page; + fragto->netmem = fragfrom->netmem; } bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b41856585c24e..1434c422f76e6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -115,6 +115,24 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); +/* kcm_write_msgs() relies on casting paged frags to bio_vec to use + * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the + * netmem is a page. + */ +static_assert(offsetof(struct bio_vec, bv_page) == + offsetof(skb_frag_t, netmem)); +static_assert(sizeof_field(struct bio_vec, bv_page) == + sizeof_field(skb_frag_t, netmem)); + +static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len)); +static_assert(sizeof_field(struct bio_vec, bv_len) == + sizeof_field(skb_frag_t, len)); + +static_assert(offsetof(struct bio_vec, bv_offset) == + offsetof(skb_frag_t, offset)); +static_assert(sizeof_field(struct bio_vec, bv_offset) == + sizeof_field(skb_frag_t, offset)); + #undef FN #define FN(reason) [SKB_DROP_REASON_##reason] = #reason, static const char * const drop_reasons[] = { @@ -845,17 +863,17 @@ skb_fail: } EXPORT_SYMBOL(__napi_alloc_skb); -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize) +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size, unsigned int truesize) { DEBUG_NET_WARN_ON_ONCE(size > truesize); - skb_fill_page_desc(skb, i, page, off, size); + skb_fill_netmem_desc(skb, i, netmem, off, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } -EXPORT_SYMBOL(skb_add_rx_frag); +EXPORT_SYMBOL(skb_add_rx_frag_netmem); void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize) @@ -1999,10 +2017,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) /* skb frags point to kernel buffers */ for (i = 0; i < new_frags - 1; i++) { - __skb_fill_page_desc(skb, i, head, 0, psize); + __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize); head = (struct page *)page_private(head); } - __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); + __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0, + d_off); skb_shinfo(skb)->nr_frags = new_frags; release: @@ -3740,7 +3759,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) if (plen) { page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); - __skb_fill_page_desc(to, 0, page, offset, plen); + __skb_fill_netmem_desc(to, 0, page_to_netmem(page), + offset, plen); get_page(page); j = 1; len -= plen; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 1184d40167b86..73c200c5c8e40 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -627,7 +627,8 @@ retry: skb = txm->frag_skb; } - if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { + if (WARN_ON(!skb_shinfo(skb)->nr_frags) || + WARN_ON_ONCE(!skb_frag_page(&skb_shinfo(skb)->frags[0]))) { ret = -EINVAL; goto out; } @@ -637,8 +638,8 @@ retry: msize += skb_frag_size(&skb_shinfo(skb)->frags[i]); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, - skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags, - msize); + (const struct bio_vec *)skb_shinfo(skb)->frags, + skb_shinfo(skb)->nr_frags, msize); iov_iter_advance(&msg.msg_iter, txm->frag_offset); do { -- cgit 1.2.3-korg From 00bf80c437dcbbd808d61cc2866c8f065ff436bd Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 20 Feb 2024 09:16:16 +0100 Subject: can: raw: raw_getsockopt(): reduce scope of err Reduce the scope of the variable "err" to the individual cases. This is to avoid the mistake of setting "err" in the mistaken belief that it will be evaluated later. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/all/20240220-raw-setsockopt-v1-1-7d34cb1377fc@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index 897ffc17d8506..00533f64d69d5 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -756,7 +756,6 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, struct raw_sock *ro = raw_sk(sk); int len; void *val; - int err = 0; if (level != SOL_CAN_RAW) return -EINVAL; @@ -766,7 +765,9 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return -EINVAL; switch (optname) { - case CAN_RAW_FILTER: + case CAN_RAW_FILTER: { + int err = 0; + lock_sock(sk); if (ro->count > 0) { int fsize = ro->count * sizeof(struct can_filter); @@ -791,7 +792,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (!err) err = put_user(len, optlen); return err; - + } case CAN_RAW_ERR_FILTER: if (len > sizeof(can_err_mask_t)) len = sizeof(can_err_mask_t); @@ -822,7 +823,9 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, val = &ro->xl_frames; break; - case CAN_RAW_XL_VCID_OPTS: + case CAN_RAW_XL_VCID_OPTS: { + int err = 0; + /* user space buffer to small for VCID opts? */ if (len < sizeof(ro->raw_vcid_opts)) { /* return -ERANGE and needed space in optlen */ @@ -838,7 +841,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (!err) err = put_user(len, optlen); return err; - + } case CAN_RAW_JOIN_FILTERS: if (len > sizeof(int)) len = sizeof(int); -- cgit 1.2.3-korg From 465c1abcb64426f0ff39e80e508e2432672c2dae Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 16 Feb 2024 12:54:43 +0000 Subject: net: tcp: Remove redundant initialization of variable len The variable len being initialized with a value that is never read, an if statement is initializing it in both paths of the if statement. The initialization is redundant and can be removed. Cleans up clang scan build warning: net/ipv4/tcp_ao.c:512:11: warning: Value stored to 'len' during its initialization is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20240216125443.2107244-1-colin.i.king@gmail.com Signed-off-by: Paolo Abeni --- net/ipv4/tcp_ao.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 87db432c6bb4a..3afeeb68e8a7e 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -509,9 +509,9 @@ static int tcp_ao_hash_header(struct tcp_sigpool *hp, bool exclude_options, u8 *hash, int hash_offset, int hash_len) { - int err, len = th->doff << 2; struct scatterlist sg; u8 *hdr = hp->scratch; + int err, len; /* We are not allowed to change tcphdr, make a local copy */ if (exclude_options) { -- cgit 1.2.3-korg From 5d4cc87414c5d11345c4b11d61377d351b5c28a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Feb 2024 16:20:06 +0000 Subject: net: reorganize "struct sock" fields Last major reorg happened in commit 9115e8cd2a0c ("net: reorganize struct sock for better data locality") Since then, many changes have been done. Before SO_PEEK_OFF support is added to TCP, we need to move sk_peek_off to a better location. It is time to make another pass, and add six groups, without explicit alignment. - sock_write_rx (following sk_refcnt) read-write fields in rx path. - sock_read_rx read-mostly fields in rx path. - sock_read_rxtx read-mostly fields in both rx and tx paths. - sock_write_rxtx read-write fields in both rx and tx paths. - sock_write_tx read-write fields in tx paths. - sock_read_tx read-mostly fields in tx paths. Results on TCP_RR benchmarks seem to show a gain (4 to 5 %). It is possible UDP needs a change, because sk_peek_off shares a cache line with sk_receive_queue. If this the case, we can exchange roles of sk->sk_receive and up->reader_queue queues. After this change, we have the following layout: struct sock { struct sock_common __sk_common; /* 0 0x88 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ __u8 __cacheline_group_begin__sock_write_rx[0]; /* 0x88 0 */ atomic_t sk_drops; /* 0x88 0x4 */ __s32 sk_peek_off; /* 0x8c 0x4 */ struct sk_buff_head sk_error_queue; /* 0x90 0x18 */ struct sk_buff_head sk_receive_queue; /* 0xa8 0x18 */ /* --- cacheline 3 boundary (192 bytes) --- */ struct { atomic_t rmem_alloc; /* 0xc0 0x4 */ int len; /* 0xc4 0x4 */ struct sk_buff * head; /* 0xc8 0x8 */ struct sk_buff * tail; /* 0xd0 0x8 */ } sk_backlog; /* 0xc0 0x18 */ struct { atomic_t rmem_alloc; /* 0 0x4 */ int len; /* 0x4 0x4 */ struct sk_buff * head; /* 0x8 0x8 */ struct sk_buff * tail; /* 0x10 0x8 */ /* size: 24, cachelines: 1, members: 4 */ /* last cacheline: 24 bytes */ }; __u8 __cacheline_group_end__sock_write_rx[0]; /* 0xd8 0 */ __u8 __cacheline_group_begin__sock_read_rx[0]; /* 0xd8 0 */ rcu * sk_rx_dst; /* 0xd8 0x8 */ int sk_rx_dst_ifindex; /* 0xe0 0x4 */ u32 sk_rx_dst_cookie; /* 0xe4 0x4 */ unsigned int sk_ll_usec; /* 0xe8 0x4 */ unsigned int sk_napi_id; /* 0xec 0x4 */ u16 sk_busy_poll_budget; /* 0xf0 0x2 */ u8 sk_prefer_busy_poll; /* 0xf2 0x1 */ u8 sk_userlocks; /* 0xf3 0x1 */ int sk_rcvbuf; /* 0xf4 0x4 */ rcu * sk_filter; /* 0xf8 0x8 */ /* --- cacheline 4 boundary (256 bytes) --- */ union { rcu * sk_wq; /* 0x100 0x8 */ struct socket_wq * sk_wq_raw; /* 0x100 0x8 */ }; /* 0x100 0x8 */ union { rcu * sk_wq; /* 0 0x8 */ struct socket_wq * sk_wq_raw; /* 0 0x8 */ }; void (*sk_data_ready)(struct sock *); /* 0x108 0x8 */ long sk_rcvtimeo; /* 0x110 0x8 */ int sk_rcvlowat; /* 0x118 0x4 */ __u8 __cacheline_group_end__sock_read_rx[0]; /* 0x11c 0 */ __u8 __cacheline_group_begin__sock_read_rxtx[0]; /* 0x11c 0 */ int sk_err; /* 0x11c 0x4 */ struct socket * sk_socket; /* 0x120 0x8 */ struct mem_cgroup * sk_memcg; /* 0x128 0x8 */ rcu * sk_policy[2]; /* 0x130 0x10 */ /* --- cacheline 5 boundary (320 bytes) --- */ __u8 __cacheline_group_end__sock_read_rxtx[0]; /* 0x140 0 */ __u8 __cacheline_group_begin__sock_write_rxtx[0]; /* 0x140 0 */ socket_lock_t sk_lock; /* 0x140 0x20 */ u32 sk_reserved_mem; /* 0x160 0x4 */ int sk_forward_alloc; /* 0x164 0x4 */ u32 sk_tsflags; /* 0x168 0x4 */ __u8 __cacheline_group_end__sock_write_rxtx[0]; /* 0x16c 0 */ __u8 __cacheline_group_begin__sock_write_tx[0]; /* 0x16c 0 */ int sk_write_pending; /* 0x16c 0x4 */ atomic_t sk_omem_alloc; /* 0x170 0x4 */ int sk_sndbuf; /* 0x174 0x4 */ int sk_wmem_queued; /* 0x178 0x4 */ refcount_t sk_wmem_alloc; /* 0x17c 0x4 */ /* --- cacheline 6 boundary (384 bytes) --- */ unsigned long sk_tsq_flags; /* 0x180 0x8 */ union { struct sk_buff * sk_send_head; /* 0x188 0x8 */ struct rb_root tcp_rtx_queue; /* 0x188 0x8 */ }; /* 0x188 0x8 */ union { struct sk_buff * sk_send_head; /* 0 0x8 */ struct rb_root tcp_rtx_queue; /* 0 0x8 */ }; struct sk_buff_head sk_write_queue; /* 0x190 0x18 */ u32 sk_dst_pending_confirm; /* 0x1a8 0x4 */ u32 sk_pacing_status; /* 0x1ac 0x4 */ struct page_frag sk_frag; /* 0x1b0 0x10 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct timer_list sk_timer; /* 0x1c0 0x28 */ /* XXX last struct has 4 bytes of padding */ unsigned long sk_pacing_rate; /* 0x1e8 0x8 */ atomic_t sk_zckey; /* 0x1f0 0x4 */ atomic_t sk_tskey; /* 0x1f4 0x4 */ __u8 __cacheline_group_end__sock_write_tx[0]; /* 0x1f8 0 */ __u8 __cacheline_group_begin__sock_read_tx[0]; /* 0x1f8 0 */ unsigned long sk_max_pacing_rate; /* 0x1f8 0x8 */ /* --- cacheline 8 boundary (512 bytes) --- */ long sk_sndtimeo; /* 0x200 0x8 */ u32 sk_priority; /* 0x208 0x4 */ u32 sk_mark; /* 0x20c 0x4 */ rcu * sk_dst_cache; /* 0x210 0x8 */ netdev_features_t sk_route_caps; /* 0x218 0x8 */ u16 sk_gso_type; /* 0x220 0x2 */ u16 sk_gso_max_segs; /* 0x222 0x2 */ unsigned int sk_gso_max_size; /* 0x224 0x4 */ gfp_t sk_allocation; /* 0x228 0x4 */ u32 sk_txhash; /* 0x22c 0x4 */ u8 sk_pacing_shift; /* 0x230 0x1 */ bool sk_use_task_frag; /* 0x231 0x1 */ __u8 __cacheline_group_end__sock_read_tx[0]; /* 0x232 0 */ u8 sk_gso_disabled:1; /* 0x232: 0 0x1 */ u8 sk_kern_sock:1; /* 0x232:0x1 0x1 */ u8 sk_no_check_tx:1; /* 0x232:0x2 0x1 */ u8 sk_no_check_rx:1; /* 0x232:0x3 0x1 */ /* XXX 4 bits hole, try to pack */ u8 sk_shutdown; /* 0x233 0x1 */ u16 sk_type; /* 0x234 0x2 */ u16 sk_protocol; /* 0x236 0x2 */ unsigned long sk_lingertime; /* 0x238 0x8 */ /* --- cacheline 9 boundary (576 bytes) --- */ struct proto * sk_prot_creator; /* 0x240 0x8 */ rwlock_t sk_callback_lock; /* 0x248 0x8 */ int sk_err_soft; /* 0x250 0x4 */ u32 sk_ack_backlog; /* 0x254 0x4 */ u32 sk_max_ack_backlog; /* 0x258 0x4 */ kuid_t sk_uid; /* 0x25c 0x4 */ spinlock_t sk_peer_lock; /* 0x260 0x4 */ int sk_bind_phc; /* 0x264 0x4 */ struct pid * sk_peer_pid; /* 0x268 0x8 */ const struct cred * sk_peer_cred; /* 0x270 0x8 */ ktime_t sk_stamp; /* 0x278 0x8 */ /* --- cacheline 10 boundary (640 bytes) --- */ int sk_disconnects; /* 0x280 0x4 */ u8 sk_txrehash; /* 0x284 0x1 */ u8 sk_clockid; /* 0x285 0x1 */ u8 sk_txtime_deadline_mode:1; /* 0x286: 0 0x1 */ u8 sk_txtime_report_errors:1; /* 0x286:0x1 0x1 */ u8 sk_txtime_unused:6; /* 0x286:0x2 0x1 */ /* XXX 1 byte hole, try to pack */ void * sk_user_data; /* 0x288 0x8 */ void * sk_security; /* 0x290 0x8 */ struct sock_cgroup_data sk_cgrp_data; /* 0x298 0x8 */ void (*sk_state_change)(struct sock *); /* 0x2a0 0x8 */ void (*sk_write_space)(struct sock *); /* 0x2a8 0x8 */ void (*sk_error_report)(struct sock *); /* 0x2b0 0x8 */ int (*sk_backlog_rcv)(struct sock *, struct sk_buff *); /* 0x2b8 0x8 */ /* --- cacheline 11 boundary (704 bytes) --- */ void (*sk_destruct)(struct sock *); /* 0x2c0 0x8 */ rcu * sk_reuseport_cb; /* 0x2c8 0x8 */ rcu * sk_bpf_storage; /* 0x2d0 0x8 */ struct callback_head sk_rcu __attribute__((__aligned__(8))); /* 0x2d8 0x10 */ netns_tracker ns_tracker; /* 0x2e8 0x8 */ /* size: 752, cachelines: 12, members: 105 */ /* sum members: 749, holes: 1, sum holes: 1 */ /* sum bitfield members: 12 bits, bit holes: 1, sum bit holes: 4 bits */ /* paddings: 1, sum paddings: 4 */ /* forced alignments: 1 */ /* last cacheline: 48 bytes */ }; Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240216162006.2342759-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 108 ++++++++++++++++++++++++++++++----------------------- net/core/sock.c | 62 ++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/include/net/sock.h b/include/net/sock.h index a9d99a9c583fc..796a902cf4c19 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -378,14 +378,10 @@ struct sock { #define sk_flags __sk_common.skc_flags #define sk_rxhash __sk_common.skc_rxhash - /* early demux fields */ - struct dst_entry __rcu *sk_rx_dst; - int sk_rx_dst_ifindex; - u32 sk_rx_dst_cookie; + __cacheline_group_begin(sock_write_rx); - socket_lock_t sk_lock; atomic_t sk_drops; - int sk_rcvlowat; + __s32 sk_peek_off; struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* @@ -402,18 +398,24 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; - #define sk_rmem_alloc sk_backlog.rmem_alloc - int sk_forward_alloc; - u32 sk_reserved_mem; + __cacheline_group_end(sock_write_rx); + + __cacheline_group_begin(sock_read_rx); + /* early demux fields */ + struct dst_entry __rcu *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_ll_usec; - /* ===== mostly read cache line ===== */ unsigned int sk_napi_id; + u16 sk_busy_poll_budget; + u8 sk_prefer_busy_poll; #endif + u8 sk_userlocks; int sk_rcvbuf; - int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -422,15 +424,33 @@ struct sock { struct socket_wq *sk_wq_raw; /* public: */ }; + + void (*sk_data_ready)(struct sock *sk); + long sk_rcvtimeo; + int sk_rcvlowat; + __cacheline_group_end(sock_read_rx); + + __cacheline_group_begin(sock_read_rxtx); + int sk_err; + struct socket *sk_socket; + struct mem_cgroup *sk_memcg; #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif + __cacheline_group_end(sock_read_rxtx); - struct dst_entry __rcu *sk_dst_cache; + __cacheline_group_begin(sock_write_rxtx); + socket_lock_t sk_lock; + u32 sk_reserved_mem; + int sk_forward_alloc; + u32 sk_tsflags; + __cacheline_group_end(sock_write_rxtx); + + __cacheline_group_begin(sock_write_tx); + int sk_write_pending; atomic_t sk_omem_alloc; int sk_sndbuf; - /* ===== cache line for TX ===== */ int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; @@ -439,22 +459,36 @@ struct sock { struct rb_root tcp_rtx_queue; }; struct sk_buff_head sk_write_queue; - __s32 sk_peek_off; - int sk_write_pending; - __u32 sk_dst_pending_confirm; + u32 sk_dst_pending_confirm; u32 sk_pacing_status; /* see enum sk_pacing */ - long sk_sndtimeo; + struct page_frag sk_frag; struct timer_list sk_timer; - __u32 sk_priority; - __u32 sk_mark; + unsigned long sk_pacing_rate; /* bytes per second */ + atomic_t sk_zckey; + atomic_t sk_tskey; + __cacheline_group_end(sock_write_tx); + + __cacheline_group_begin(sock_read_tx); unsigned long sk_max_pacing_rate; - struct page_frag sk_frag; + long sk_sndtimeo; + u32 sk_priority; + u32 sk_mark; + struct dst_entry __rcu *sk_dst_cache; netdev_features_t sk_route_caps; - int sk_gso_type; +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); +#endif + u16 sk_gso_type; + u16 sk_gso_max_segs; unsigned int sk_gso_max_size; gfp_t sk_allocation; - __u32 sk_txhash; + u32 sk_txhash; + u8 sk_pacing_shift; + bool sk_use_task_frag; + __cacheline_group_end(sock_read_tx); /* * Because of non atomicity rules, all @@ -463,64 +497,44 @@ struct sock { u8 sk_gso_disabled : 1, sk_kern_sock : 1, sk_no_check_tx : 1, - sk_no_check_rx : 1, - sk_userlocks : 4; - u8 sk_pacing_shift; + sk_no_check_rx : 1; + u8 sk_shutdown; u16 sk_type; u16 sk_protocol; - u16 sk_gso_max_segs; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; - int sk_err, - sk_err_soft; + int sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; - u8 sk_txrehash; -#ifdef CONFIG_NET_RX_BUSY_POLL - u8 sk_prefer_busy_poll; - u16 sk_busy_poll_budget; -#endif spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; - long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 seqlock_t sk_stamp_seq; #endif - atomic_t sk_tskey; - atomic_t sk_zckey; - u32 sk_tsflags; - u8 sk_shutdown; + int sk_disconnects; + u8 sk_txrehash; u8 sk_clockid; u8 sk_txtime_deadline_mode : 1, sk_txtime_report_errors : 1, sk_txtime_unused : 6; - bool sk_use_task_frag; - struct socket *sk_socket; void *sk_user_data; #ifdef CONFIG_SECURITY void *sk_security; #endif struct sock_cgroup_data sk_cgrp_data; - struct mem_cgroup *sk_memcg; void (*sk_state_change)(struct sock *sk); - void (*sk_data_ready)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); -#ifdef CONFIG_SOCK_VALIDATE_XMIT - struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, - struct net_device *dev, - struct sk_buff *skb); -#endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; #ifdef CONFIG_BPF_SYSCALL diff --git a/net/core/sock.c b/net/core/sock.c index 88bf810394a55..3fef3407383e5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -4234,3 +4234,65 @@ int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return sock_ioctl_out(sk, cmd, arg); } EXPORT_SYMBOL(sk_ioctl); + +static int __init sock_struct_check(void) +{ + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_drops); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_peek_off); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_error_queue); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_receive_queue); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_backlog); + + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_ifindex); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_cookie); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvbuf); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_filter); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_wq); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_data_ready); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvtimeo); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvlowat); + + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg); + + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_forward_alloc); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_tsflags); + + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey); + + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift); + CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag); + return 0; +} + +core_initcall(sock_struct_check); -- cgit 1.2.3-korg From c6a28acb1a27eb42970b959ff7af3a8a077f8cce Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 17 Feb 2024 12:12:14 +0100 Subject: net: fix pointer check in skb_pp_cow_data routine Properly check page pointer returned by page_pool_dev_alloc routine in skb_pp_cow_data() for non-linear part of the original skb. Reported-by: Julian Wiedmann Closes: https://lore.kernel.org/netdev/cover.1707729884.git.lorenzo@kernel.org/T/#m7d189b0015a7281ed9221903902490c03ed19a7a Fixes: e6d5dbdd20aa ("xdp: add multi-buff support for xdp running in generic mode") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Reviewed-by: Ilias Apalodimas Link: https://lore.kernel.org/r/25512af3e09befa9dcb2cf3632bdc45b807cf330.1708167716.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1434c422f76e6..b9de3ee65ae64 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -968,7 +968,7 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, truesize = size; page = page_pool_dev_alloc(pool, &page_off, &truesize); - if (!data) { + if (!page) { consume_skb(nskb); return -ENOMEM; } -- cgit 1.2.3-korg From 7e0acba3b49e3544b7ed170ca061001f0fdae839 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 17 Feb 2024 17:13:24 -0300 Subject: net: dsa: constify the struct device_type usage Since commit aed65af1cc2f ("drivers: make device_type const"), the driver core can properly handle constant struct device_type. Move the dsa_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/user.c b/net/dsa/user.c index 5d666dfb317d7..4d53c76a9840a 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -2429,7 +2429,7 @@ static const struct net_device_ops dsa_user_netdev_ops = { .ndo_fill_forward_path = dsa_user_fill_forward_path, }; -static struct device_type dsa_type = { +static const struct device_type dsa_type = { .name = "dsa", }; -- cgit 1.2.3-korg From bbc7e4cc21a43d7991a5c9a163d2990daddc8411 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 17 Feb 2024 17:13:25 -0300 Subject: net: bridge: constify the struct device_type usage Since commit aed65af1cc2f ("drivers: make device_type const"), the driver core can properly handle constant struct device_type. Move the br_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/bridge/br_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 717e9750614cd..874cec75a818e 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -474,7 +474,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fill_forward_path = br_fill_forward_path, }; -static struct device_type br_type = { +static const struct device_type br_type = { .name = "bridge", }; -- cgit 1.2.3-korg From 0072b2c1ffd0a15960ef2a8e2ccdde91dfa55566 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 17 Feb 2024 17:13:29 -0300 Subject: net: hsr: constify the struct device_type usage Since commit aed65af1cc2f ("drivers: make device_type const"), the driver core can properly handle constant struct device_type. Move the hsr_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 5ef6d437db727..c98b5b71ad7c3 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -467,7 +467,7 @@ static const struct net_device_ops hsr_device_ops = { .ndo_set_rx_mode = hsr_set_rx_mode, }; -static struct device_type hsr_type = { +static const struct device_type hsr_type = { .name = "hsr", }; -- cgit 1.2.3-korg From 43820fd1ddb52a4c2b64a3659a9e7c927fc839f0 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 17 Feb 2024 17:13:30 -0300 Subject: net: l2tp: constify the struct device_type usage Since commit aed65af1cc2f ("drivers: make device_type const"), the driver core can properly handle constant struct device_type. Move the l2tpeth_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 25ca89f804145..39e487ccc4688 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -100,7 +100,7 @@ static const struct net_device_ops l2tp_eth_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; -static struct device_type l2tpeth_type = { +static const struct device_type l2tpeth_type = { .name = "l2tpeth", }; -- cgit 1.2.3-korg From 7ae9d3423f1db1a3e61b23038005f892858a6936 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 17 Feb 2024 17:13:31 -0300 Subject: net: vlan: constify the struct device_type usage Since commit aed65af1cc2f ("drivers: make device_type const"), the driver core can properly handle constant struct device_type. Move the vlan_type variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 790b54a7cbe38..df55525182517 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -537,7 +537,7 @@ static const struct header_ops vlan_passthru_header_ops = { .parse_protocol = vlan_parse_protocol, }; -static struct device_type vlan_type = { +static const struct device_type vlan_type = { .name = "vlan", }; -- cgit 1.2.3-korg From aa23cfe6ab500dbbfa630602b403f433522daf9f Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 24 Jan 2024 16:15:38 +0800 Subject: netfilter: expect: Simplify the allocation of slab caches in nf_conntrack_expect_init Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_expect.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 81ca348915c98..21fa550966f07 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -722,9 +722,7 @@ int nf_conntrack_expect_init(void) nf_ct_expect_hsize = 1; } nf_ct_expect_max = nf_ct_expect_hsize * 4; - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", - sizeof(struct nf_conntrack_expect), - 0, 0, NULL); + nf_ct_expect_cachep = KMEM_CACHE(nf_conntrack_expect, 0); if (!nf_ct_expect_cachep) return -ENOMEM; -- cgit 1.2.3-korg From 79578be4d35c842a802487e2f31c2aed80cc005f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 20:24:24 +0100 Subject: netfilter: nf_log: consolidate check for NULL logger in lookup function Consolidate pointer fetch to logger and check for NULL in __find_logger(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index e16f158388bbe..e0bfeb75766ff 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -31,10 +31,10 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) int i; for (i = 0; i < NF_LOG_TYPE_MAX; i++) { - if (loggers[pf][i] == NULL) + log = nft_log_dereference(loggers[pf][i]); + if (!log) continue; - log = nft_log_dereference(loggers[pf][i]); if (!strncasecmp(str_logger, log->name, strlen(log->name))) return log; } -- cgit 1.2.3-korg From c47ec2b120b4a9d573e65baa33ff3f542f7ba273 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 20:24:25 +0100 Subject: netfilter: nf_log: validate nf_logger_find_get() Sanitize nf_logger_find_get() input parameters, no caller in the tree passes invalid values. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_log.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index e0bfeb75766ff..370f8231385ca 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -156,6 +156,11 @@ int nf_logger_find_get(int pf, enum nf_log_type type) struct nf_logger *logger; int ret = -ENOENT; + if (pf >= ARRAY_SIZE(loggers)) + return -EINVAL; + if (type >= NF_LOG_TYPE_MAX) + return -EINVAL; + if (pf == NFPROTO_INET) { ret = nf_logger_find_get(NFPROTO_IPV4, type); if (ret < 0) -- cgit 1.2.3-korg From 29a280025580d72bc0daf6c040518a069870421f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 20:24:57 +0100 Subject: netfilter: nft_osf: simplify init path Remove useless branch to check for errors in nft_parse_register_store(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nft_osf.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 7f61506e5b44b..7fec57ff736f6 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -63,7 +63,6 @@ static int nft_osf_init(const struct nft_ctx *ctx, { struct nft_osf *priv = nft_expr_priv(expr); u32 flags; - int err; u8 ttl; if (!tb[NFTA_OSF_DREG]) @@ -83,13 +82,9 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->flags = flags; } - err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, - NULL, NFT_DATA_VALUE, - NFT_OSF_MAXGENRELEN); - if (err < 0) - return err; - - return 0; + return nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, + NFT_OSF_MAXGENRELEN); } static int nft_osf_dump(struct sk_buff *skb, -- cgit 1.2.3-korg From 749d4ef0868c5d8a98e07073791b2198178c93b4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 6 Feb 2024 14:55:53 +0100 Subject: netfilter: xtables: fix up kconfig dependencies Randy Dunlap reports arptables build failure: arp_tables.c:(.text+0x20): undefined reference to `xt_find_table' ... because recent change removed a 'select' on the xtables core. Add a "depends" clause on arptables to resolve this. Kernel test robot reports another build breakage: iptable_nat.c:(.text+0x8): undefined reference to `ipt_unregister_table_exit' ... because of a typo, the nat table selected ip6tables. Reported-by: kernel test robot Reported-by: Randy Dunlap Closes: https://lore.kernel.org/netfilter-devel/d0dfbaef-046a-4c42-9daa-53636664bf6d@infradead.org/ Fixes: a9525c7f6219 ("netfilter: xtables: allow xtables-nft only builds") Fixes: 4654467dc7e1 ("netfilter: arptables: allow xtables-nft only builds") Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: Florian Westphal --- net/ipv4/netfilter/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 7835230872818..8f6e950163a79 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -217,7 +217,7 @@ config IP_NF_NAT default m if NETFILTER_ADVANCED=n select NF_NAT select NETFILTER_XT_NAT - select IP6_NF_IPTABLES_LEGACY + select IP_NF_IPTABLES_LEGACY help This enables the `nat' table in iptables. This allows masquerading, port forwarding and other forms of full Network Address Port @@ -329,6 +329,7 @@ config NFT_COMPAT_ARP config IP_NF_ARPFILTER tristate "arptables-legacy packet filtering support" select IP_NF_ARPTABLES + depends on NETFILTER_XTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and -- cgit 1.2.3-korg From f04df573faf90bb828a2241b650598c02c074323 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Feb 2024 16:23:37 +0100 Subject: netfilter: nft_set_pipapo: constify lookup fn args where possible Those get called from packet path, content must not be modified. No functional changes intended. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 18 ++++++----- net/netfilter/nft_set_pipapo.h | 6 ++-- net/netfilter/nft_set_pipapo_avx2.c | 59 ++++++++++++++++++++++--------------- 3 files changed, 48 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index aa1d9e93a9a04..dedb17a4e07c7 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -360,7 +360,7 @@ * Return: -1 on no match, bit position on 'match_only', 0 otherwise. */ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, - union nft_pipapo_map_bucket *mt, bool match_only) + const union nft_pipapo_map_bucket *mt, bool match_only) { unsigned long bitset; int k, ret = -1; @@ -412,9 +412,9 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; u8 genmask = nft_genmask_cur(net); + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; - struct nft_pipapo_match *m; - struct nft_pipapo_field *f; bool map_index; int i; @@ -519,11 +519,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); struct nft_pipapo *priv = nft_set_priv(set); - struct nft_pipapo_match *m = priv->clone; unsigned long *res_map, *fill_map = NULL; - struct nft_pipapo_field *f; + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; int i; + m = priv->clone; + res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); if (!res_map) { ret = ERR_PTR(-ENOMEM); @@ -1597,7 +1599,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; - struct nft_pipapo_field *f; + const struct nft_pipapo_field *f; int i, start, rules_fx; start = first_rule; @@ -2039,8 +2041,8 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, { struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); - struct nft_pipapo_match *m; - struct nft_pipapo_field *f; + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; int i, r; rcu_read_lock(); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 3842c7341a9f4..42464e7c24ac0 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -187,7 +187,7 @@ struct nft_pipapo_elem { }; int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, - union nft_pipapo_map_bucket *mt, bool match_only); + const union nft_pipapo_map_bucket *mt, bool match_only); /** * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets @@ -195,7 +195,7 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, * @dst: Area to store result * @data: Input data selecting table buckets */ -static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, +static inline void pipapo_and_field_buckets_4bit(const struct nft_pipapo_field *f, unsigned long *dst, const u8 *data) { @@ -223,7 +223,7 @@ static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f, * @dst: Area to store result * @data: Input data selecting table buckets */ -static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f, +static inline void pipapo_and_field_buckets_8bit(const struct nft_pipapo_field *f, unsigned long *dst, const u8 *data) { diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index a3a8ddca99189..d08407d589eac 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -212,8 +212,9 @@ static int nft_pipapo_avx2_refill(int offset, unsigned long *map, * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf }; @@ -274,8 +275,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf }; @@ -350,8 +352,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { u8 pg[8] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, @@ -445,8 +448,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { u8 pg[12] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, @@ -534,8 +538,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { u8 pg[32] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf, pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf, @@ -669,8 +674,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -726,8 +732,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -790,8 +797,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -865,8 +873,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -950,8 +959,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b; unsigned long *lt = f->lt, bsize = f->bsize; @@ -1042,8 +1052,9 @@ nothing: * word index to be checked next (i.e. first filled word). */ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, - struct nft_pipapo_field *f, int offset, - const u8 *pkt, bool first, bool last) + const struct nft_pipapo_field *f, + int offset, const u8 *pkt, + bool first, bool last) { unsigned long bsize = f->bsize; int i, ret = -1, b; @@ -1119,9 +1130,9 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); + const struct nft_pipapo_match *m; + const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; - struct nft_pipapo_match *m; - struct nft_pipapo_field *f; unsigned long *res, *fill; bool map_index; int i, ret = 0; -- cgit 1.2.3-korg From 07ace0bbe03b3d8e85869af1dec5e4087b1d57b8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Feb 2024 16:23:38 +0100 Subject: netfilter: nft_set_pipapo: do not rely on ZERO_SIZE_PTR pipapo relies on kmalloc(0) returning ZERO_SIZE_PTR (i.e., not NULL but pointer is invalid). Rework this to not call slab allocator when we'd request a 0-byte allocation. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index dedb17a4e07c7..625c06b8bc39a 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -525,6 +525,8 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, int i; m = priv->clone; + if (m->bsize_max == 0) + return ret; res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); if (!res_map) { @@ -1367,11 +1369,17 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) src->bsize * sizeof(*dst->lt) * src->groups * NFT_PIPAPO_BUCKETS(src->bb)); - dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL); - if (!dst->mt) - goto out_mt; + if (src->rules > 0) { + dst->mt = kvmalloc_array(src->rules, sizeof(*src->mt), + GFP_KERNEL); + if (!dst->mt) + goto out_mt; + + memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt)); + } else { + dst->mt = NULL; + } - memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt)); src++; dst++; } -- cgit 1.2.3-korg From aac14d516c2b575af20b426fa04129a28d45c287 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Feb 2024 16:23:39 +0100 Subject: netfilter: nft_set_pipapo: shrink data structures The set uses a mix of 'int', 'unsigned int', and size_t. The rule count limit is NFT_PIPAPO_RULE0_MAX, which cannot exceed INT_MAX (a few helpers use 'int' as return type). Add a compile-time assertion for this. Replace size_t usage in structs with unsigned int or u8 where the stored values are smaller. Replace signed-int arguments for lengths with 'unsigned int' where possible. Last, remove lt_aligned member: its set but never read. struct nft_pipapo_match 40 bytes -> 32 bytes struct nft_pipapo_field 56 bytes -> 32 bytes Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 62 ++++++++++++++++++++++++++++-------------- net/netfilter/nft_set_pipapo.h | 29 +++++++------------- 2 files changed, 51 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 625c06b8bc39a..b63278b2017ad 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -359,11 +359,13 @@ * * Return: -1 on no match, bit position on 'match_only', 0 otherwise. */ -int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, +int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, + unsigned long *dst, const union nft_pipapo_map_bucket *mt, bool match_only) { unsigned long bitset; - int k, ret = -1; + unsigned int k; + int ret = -1; for (k = 0; k < len; k++) { bitset = map[k]; @@ -631,13 +633,17 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set, * * Return: 0 on success, -ENOMEM on allocation failure. */ -static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules) +static int pipapo_resize(struct nft_pipapo_field *f, + unsigned int old_rules, unsigned int rules) { long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p; union nft_pipapo_map_bucket *new_mt, *old_mt = f->mt; - size_t new_bucket_size, copy; + unsigned int new_bucket_size, copy; int group, bucket; + if (rules >= NFT_PIPAPO_RULE0_MAX) + return -ENOSPC; + new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG); #ifdef NFT_PIPAPO_ALIGN new_bucket_size = roundup(new_bucket_size, @@ -690,7 +696,7 @@ mt: if (new_lt) { f->bsize = new_bucket_size; - NFT_PIPAPO_LT_ASSIGN(f, new_lt); + f->lt = new_lt; kvfree(old_lt); } @@ -847,8 +853,8 @@ static void pipapo_lt_8b_to_4b(int old_groups, int bsize, */ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) { + unsigned int groups, bb; unsigned long *new_lt; - int groups, bb; size_t lt_size; lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize * @@ -898,7 +904,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) f->groups = groups; f->bb = bb; kvfree(f->lt); - NFT_PIPAPO_LT_ASSIGN(f, new_lt); + f->lt = new_lt; } /** @@ -915,7 +921,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, int mask_bits) { - int rule = f->rules, group, ret, bit_offset = 0; + unsigned int rule = f->rules, group, ret, bit_offset = 0; ret = pipapo_resize(f, f->rules, f->rules + 1); if (ret) @@ -1255,8 +1261,14 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, /* Validate */ start_p = start; end_p = end; + + /* some helpers return -1, or 0 >= for valid rule pos, + * so we cannot support more than INT_MAX rules at this time. + */ + BUILD_BUG_ON(NFT_PIPAPO_RULE0_MAX > INT_MAX); + nft_pipapo_for_each_field(f, i, m) { - if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX) + if (f->rules >= NFT_PIPAPO_RULE0_MAX) return -ENOSPC; if (memcmp(start_p, end_p, @@ -1362,7 +1374,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new_lt) goto out_lt; - NFT_PIPAPO_LT_ASSIGN(dst, new_lt); + dst->lt = new_lt; memcpy(NFT_PIPAPO_LT_ALIGN(new_lt), NFT_PIPAPO_LT_ALIGN(src->lt), @@ -1433,10 +1445,10 @@ out_scratch: * * Return: Number of rules that originated from the same entry as @first. */ -static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first) +static unsigned int pipapo_rules_same_key(struct nft_pipapo_field *f, unsigned int first) { struct nft_pipapo_elem *e = NULL; /* Keep gcc happy */ - int r; + unsigned int r; for (r = first; r < f->rules; r++) { if (r != first && e != f->mt[r].e) @@ -1489,8 +1501,9 @@ static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first) * 0 1 2 * element pointers: 0x42 0x42 0x44 */ -static void pipapo_unmap(union nft_pipapo_map_bucket *mt, int rules, - int start, int n, int to_offset, bool is_last) +static void pipapo_unmap(union nft_pipapo_map_bucket *mt, unsigned int rules, + unsigned int start, unsigned int n, + unsigned int to_offset, bool is_last) { int i; @@ -1596,8 +1609,8 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); + unsigned int rules_f0, first_rule = 0; u64 tstamp = nft_net_tstamp(net); - int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; struct nft_trans_gc *gc; @@ -1608,7 +1621,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const struct nft_pipapo_field *f; - int i, start, rules_fx; + unsigned int i, start, rules_fx; start = first_rule; rules_fx = rules_f0; @@ -1986,7 +1999,7 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; - int rules_f0, first_rule = 0; + unsigned int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; const u8 *data; @@ -2051,7 +2064,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, struct net *net = read_pnet(&set->net); const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; - int i, r; + unsigned int i, r; rcu_read_lock(); if (iter->genmask == nft_genmask_cur(net)) @@ -2155,6 +2168,9 @@ static int nft_pipapo_init(const struct nft_set *set, field_count = desc->field_count ? : 1; + BUILD_BUG_ON(NFT_PIPAPO_MAX_FIELDS > 255); + BUILD_BUG_ON(NFT_PIPAPO_MAX_FIELDS != NFT_REG32_COUNT); + if (field_count > NFT_PIPAPO_MAX_FIELDS) return -EINVAL; @@ -2176,7 +2192,11 @@ static int nft_pipapo_init(const struct nft_set *set, rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { - int len = desc->field_len[i] ? : set->klen; + unsigned int len = desc->field_len[i] ? : set->klen; + + /* f->groups is u8 */ + BUILD_BUG_ON((NFT_PIPAPO_MAX_BYTES * + BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS_LARGE_SET) >= 256); f->bb = NFT_PIPAPO_GROUP_BITS_INIT; f->groups = len * NFT_PIPAPO_GROUPS_PER_BYTE(f); @@ -2185,7 +2205,7 @@ static int nft_pipapo_init(const struct nft_set *set, f->bsize = 0; f->rules = 0; - NFT_PIPAPO_LT_ASSIGN(f, NULL); + f->lt = NULL; f->mt = NULL; } @@ -2221,7 +2241,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, struct nft_pipapo_match *m) { struct nft_pipapo_field *f; - int i, r; + unsigned int i, r; for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) ; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 42464e7c24ac0..ca64f7505e714 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -70,15 +70,9 @@ #define NFT_PIPAPO_ALIGN_HEADROOM \ (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) #define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) -#define NFT_PIPAPO_LT_ASSIGN(field, x) \ - do { \ - (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \ - (field)->lt = (x); \ - } while (0) #else #define NFT_PIPAPO_ALIGN_HEADROOM 0 #define NFT_PIPAPO_LT_ALIGN(lt) (lt) -#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x)) #endif /* NFT_PIPAPO_ALIGN */ #define nft_pipapo_for_each_field(field, index, match) \ @@ -110,22 +104,18 @@ union nft_pipapo_map_bucket { /** * struct nft_pipapo_field - Lookup, mapping tables and related data for a field - * @groups: Amount of bit groups * @rules: Number of inserted rules * @bsize: Size of each bucket in lookup table, in longs + * @groups: Amount of bit groups * @bb: Number of bits grouped together in lookup table buckets * @lt: Lookup table: 'groups' rows of buckets - * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes * @mt: Mapping table: one bucket per rule */ struct nft_pipapo_field { - int groups; - unsigned long rules; - size_t bsize; - int bb; -#ifdef NFT_PIPAPO_ALIGN - unsigned long *lt_aligned; -#endif + unsigned int rules; + unsigned int bsize; + u8 groups; + u8 bb; unsigned long *lt; union nft_pipapo_map_bucket *mt; }; @@ -145,15 +135,15 @@ struct nft_pipapo_scratch { /** * struct nft_pipapo_match - Data used for lookup and matching * @field_count: Amount of fields in set - * @scratch: Preallocated per-CPU maps for partial matching results * @bsize_max: Maximum lookup table bucket size of all fields, in longs + * @scratch: Preallocated per-CPU maps for partial matching results * @rcu: Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { - int field_count; + u8 field_count; + unsigned int bsize_max; struct nft_pipapo_scratch * __percpu *scratch; - size_t bsize_max; struct rcu_head rcu; struct nft_pipapo_field f[] __counted_by(field_count); }; @@ -186,7 +176,8 @@ struct nft_pipapo_elem { struct nft_set_ext ext; }; -int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, +int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, + unsigned long *dst, const union nft_pipapo_map_bucket *mt, bool match_only); /** -- cgit 1.2.3-korg From 9f439bd6ef4f60c8a37bd9138fa9ed9b5e7ae0d7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Feb 2024 16:23:40 +0100 Subject: netfilter: nft_set_pipapo: speed up bulk element insertions Insertions into the set are slow when we try to add many elements. For 800k elements I get: time nft -f pipapo_800k real 19m34.849s user 0m2.390s sys 19m12.828s perf stats: --95.39%--nft_pipapo_insert |--76.60%--pipapo_insert | --76.37%--pipapo_resize | |--72.87%--memcpy_orig | |--1.88%--__free_pages_ok | | --0.89%--free_tail_page_prepare | --1.38%--kvmalloc_node .. --18.56%--pipapo_get.isra.0 |--13.91%--__bitmap_and |--3.01%--pipapo_refill |--0.81%--__kmalloc | --0.74%--__kmalloc_large_node | --0.66%--__alloc_pages .. --0.52%--memset_orig So lots of time is spent in copying exising elements to make space for the next one. Instead of allocating to the exact size of the new rule count, allocate extra slack to reduce alloc/copy/free overhead. After: time nft -f pipapo_800k real 1m54.110s user 0m2.515s sys 1m51.377s --80.46%--nft_pipapo_insert |--73.45%--pipapo_get.isra.0 |--57.63%--__bitmap_and | |--8.52%--pipapo_refill |--3.45%--__kmalloc | --3.05%--__kmalloc_large_node | --2.58%--__alloc_pages --2.59%--memset_orig |--6.51%--pipapo_insert --5.96%--pipapo_resize |--3.63%--memcpy_orig --2.13%--kvmalloc_node The new @rules_alloc fills a hole, so struct size doesn't go up. Also make it so rule removal doesn't shrink unless the free/extra space exceeds two pages. This should be safe as well: When a rule gets removed, the attempt to lower the allocated size is already allowed to fail. Exception: do exact allocations as long as set is very small (less than one page needed). v2: address comments from Stefano: kdoc comment formatting changes remove redundant assignment switch back to PAGE_SIZE Link: https://lore.kernel.org/netfilter-devel/20240213141753.17ef27a6@elisabeth/ Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 83 ++++++++++++++++++++++++++++++++++-------- net/netfilter/nft_set_pipapo.h | 2 + 2 files changed, 69 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index b63278b2017ad..6118e4bba2efb 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -621,6 +621,65 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set, return &e->priv; } +/** + * pipapo_realloc_mt() - Reallocate mapping table if needed upon resize + * @f: Field containing mapping table + * @old_rules: Amount of existing mapped rules + * @rules: Amount of new rules to map + * + * Return: 0 on success, negative error code on failure. + */ +static int pipapo_realloc_mt(struct nft_pipapo_field *f, + unsigned int old_rules, unsigned int rules) +{ + union nft_pipapo_map_bucket *new_mt = NULL, *old_mt = f->mt; + const unsigned int extra = PAGE_SIZE / sizeof(*new_mt); + unsigned int rules_alloc = rules; + + might_sleep(); + + if (unlikely(rules == 0)) + goto out_free; + + /* growing and enough space left, no action needed */ + if (rules > old_rules && f->rules_alloc > rules) + return 0; + + /* downsize and extra slack has not grown too large */ + if (rules < old_rules) { + unsigned int remove = f->rules_alloc - rules; + + if (remove < (2u * extra)) + return 0; + } + + /* If set needs more than one page of memory for rules then + * allocate another extra page to avoid frequent reallocation. + */ + if (rules > extra && + check_add_overflow(rules, extra, &rules_alloc)) + return -EOVERFLOW; + + new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL); + if (!new_mt) + return -ENOMEM; + + if (old_mt) + memcpy(new_mt, old_mt, min(old_rules, rules) * sizeof(*new_mt)); + + if (rules > old_rules) { + memset(new_mt + old_rules, 0, + (rules - old_rules) * sizeof(*new_mt)); + } +out_free: + f->rules_alloc = rules_alloc; + f->mt = new_mt; + + kvfree(old_mt); + + return 0; +} + /** * pipapo_resize() - Resize lookup or mapping table, or both * @f: Field containing lookup and mapping tables @@ -637,9 +696,8 @@ static int pipapo_resize(struct nft_pipapo_field *f, unsigned int old_rules, unsigned int rules) { long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p; - union nft_pipapo_map_bucket *new_mt, *old_mt = f->mt; unsigned int new_bucket_size, copy; - int group, bucket; + int group, bucket, err; if (rules >= NFT_PIPAPO_RULE0_MAX) return -ENOSPC; @@ -682,16 +740,10 @@ static int pipapo_resize(struct nft_pipapo_field *f, } mt: - new_mt = kvmalloc(rules * sizeof(*new_mt), GFP_KERNEL); - if (!new_mt) { + err = pipapo_realloc_mt(f, old_rules, rules); + if (err) { kvfree(new_lt); - return -ENOMEM; - } - - memcpy(new_mt, f->mt, min(old_rules, rules) * sizeof(*new_mt)); - if (rules > old_rules) { - memset(new_mt + old_rules, 0, - (rules - old_rules) * sizeof(*new_mt)); + return err; } if (new_lt) { @@ -700,9 +752,6 @@ mt: kvfree(old_lt); } - f->mt = new_mt; - kvfree(old_mt); - return 0; } @@ -1382,14 +1431,15 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) src->groups * NFT_PIPAPO_BUCKETS(src->bb)); if (src->rules > 0) { - dst->mt = kvmalloc_array(src->rules, sizeof(*src->mt), - GFP_KERNEL); + dst->mt = kvmalloc_array(src->rules_alloc, + sizeof(*src->mt), GFP_KERNEL); if (!dst->mt) goto out_mt; memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt)); } else { dst->mt = NULL; + dst->rules_alloc = 0; } src++; @@ -2205,6 +2255,7 @@ static int nft_pipapo_init(const struct nft_set *set, f->bsize = 0; f->rules = 0; + f->rules_alloc = 0; f->lt = NULL; f->mt = NULL; } diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index ca64f7505e714..24cd1ff73f981 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -106,6 +106,7 @@ union nft_pipapo_map_bucket { * struct nft_pipapo_field - Lookup, mapping tables and related data for a field * @rules: Number of inserted rules * @bsize: Size of each bucket in lookup table, in longs + * @rules_alloc: Number of allocated rules, always >= rules * @groups: Amount of bit groups * @bb: Number of bits grouped together in lookup table buckets * @lt: Lookup table: 'groups' rows of buckets @@ -114,6 +115,7 @@ union nft_pipapo_map_bucket { struct nft_pipapo_field { unsigned int rules; unsigned int bsize; + unsigned int rules_alloc; u8 groups; u8 bb; unsigned long *lt; -- cgit 1.2.3-korg From 5b651783d80b97167ecd27dc6a4408c694873902 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 15 Feb 2024 09:29:30 +0100 Subject: netfilter: nft_set_pipapo: use GFP_KERNEL for insertions An earlier attempt changed this to GFP_KERNEL, but the get helper is also called for get requests from userspace, which uses rcu. Let the caller pass in the kmalloc flags to allow insertions to schedule if needed. Suggested-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 6118e4bba2efb..c0ceea068936a 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -507,6 +507,7 @@ out: * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask * @tstamp: timestamp to check for expired elements + * @gfp: the type of memory to allocate (see kmalloc). * * This is essentially the same as the lookup function, except that it matches * key data against the uncommitted copy and doesn't use preallocated maps for @@ -517,7 +518,7 @@ out: static struct nft_pipapo_elem *pipapo_get(const struct net *net, const struct nft_set *set, const u8 *data, u8 genmask, - u64 tstamp) + u64 tstamp, gfp_t gfp) { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); struct nft_pipapo *priv = nft_set_priv(set); @@ -530,13 +531,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, if (m->bsize_max == 0) return ret; - res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); + res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), gfp); if (!res_map) { ret = ERR_PTR(-ENOMEM); goto out; } - fill_map = kcalloc(m->bsize_max, sizeof(*res_map), GFP_ATOMIC); + fill_map = kcalloc(m->bsize_max, sizeof(*res_map), gfp); if (!fill_map) { ret = ERR_PTR(-ENOMEM); goto out; @@ -614,7 +615,8 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set, struct nft_pipapo_elem *e; e = pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net), get_jiffies_64()); + nft_genmask_cur(net), get_jiffies_64(), + GFP_ATOMIC); if (IS_ERR(e)) return ERR_CAST(e); @@ -1275,7 +1277,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else end = start; - dup = pipapo_get(net, set, start, genmask, tstamp); + dup = pipapo_get(net, set, start, genmask, tstamp, GFP_KERNEL); if (!IS_ERR(dup)) { /* Check if we already have the same exact entry */ const struct nft_data *dup_key, *dup_end; @@ -1297,7 +1299,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (PTR_ERR(dup) == -ENOENT) { /* Look for partially overlapping entries */ - dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp); + dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp, + GFP_KERNEL); } if (PTR_ERR(dup) != -ENOENT) { @@ -1865,7 +1868,8 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, { struct nft_pipapo_elem *e; - e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net)); + e = pipapo_get(net, set, data, nft_genmask_next(net), + nft_net_tstamp(net), GFP_KERNEL); if (IS_ERR(e)) return NULL; -- cgit 1.2.3-korg From 3f801968889459ecae1eab524b039676e6eaa319 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 14 Feb 2024 14:41:02 +0100 Subject: netfilter: move nf_reinject into nfnetlink_queue modules No need to keep this in the core, move it to the nfnetlink_queue module. nf_reroute is moved too, there were no other callers. Signed-off-by: Florian Westphal --- include/linux/netfilter.h | 1 - include/net/netfilter/nf_queue.h | 1 - net/netfilter/nf_queue.c | 106 ----------------------------- net/netfilter/nfnetlink_queue.c | 142 +++++++++++++++++++++++++++++++++++++++ net/netfilter/utils.c | 37 ---------- 5 files changed, 142 insertions(+), 145 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 80900d9109920..ffb5e0297eb5c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -370,7 +370,6 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); -int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry); #include diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index c81021ab07aa1..4aeffddb75861 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -35,7 +35,6 @@ struct nf_queue_handler { void nf_register_queue_handler(const struct nf_queue_handler *qh); void nf_unregister_queue_handler(void); -void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_free(struct nf_queue_entry *entry); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index e2f334f70281f..7f12e56e6e526 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -248,109 +248,3 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, return 0; } EXPORT_SYMBOL_GPL(nf_queue); - -static unsigned int nf_iterate(struct sk_buff *skb, - struct nf_hook_state *state, - const struct nf_hook_entries *hooks, - unsigned int *index) -{ - const struct nf_hook_entry *hook; - unsigned int verdict, i = *index; - - while (i < hooks->num_hook_entries) { - hook = &hooks->hooks[i]; -repeat: - verdict = nf_hook_entry_hookfn(hook, skb, state); - if (verdict != NF_ACCEPT) { - *index = i; - if (verdict != NF_REPEAT) - return verdict; - goto repeat; - } - i++; - } - - *index = i; - return NF_ACCEPT; -} - -static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum) -{ - switch (pf) { -#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE - case NFPROTO_BRIDGE: - return rcu_dereference(net->nf.hooks_bridge[hooknum]); -#endif - case NFPROTO_IPV4: - return rcu_dereference(net->nf.hooks_ipv4[hooknum]); - case NFPROTO_IPV6: - return rcu_dereference(net->nf.hooks_ipv6[hooknum]); - default: - WARN_ON_ONCE(1); - return NULL; - } - - return NULL; -} - -/* Caller must hold rcu read-side lock */ -void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) -{ - const struct nf_hook_entry *hook_entry; - const struct nf_hook_entries *hooks; - struct sk_buff *skb = entry->skb; - const struct net *net; - unsigned int i; - int err; - u8 pf; - - net = entry->state.net; - pf = entry->state.pf; - - hooks = nf_hook_entries_head(net, pf, entry->state.hook); - - i = entry->hook_index; - if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { - kfree_skb(skb); - nf_queue_entry_free(entry); - return; - } - - hook_entry = &hooks->hooks[i]; - - /* Continue traversal iff userspace said ok... */ - if (verdict == NF_REPEAT) - verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); - - if (verdict == NF_ACCEPT) { - if (nf_reroute(skb, entry) < 0) - verdict = NF_DROP; - } - - if (verdict == NF_ACCEPT) { -next_hook: - ++i; - verdict = nf_iterate(skb, &entry->state, hooks, &i); - } - - switch (verdict & NF_VERDICT_MASK) { - case NF_ACCEPT: - case NF_STOP: - local_bh_disable(); - entry->state.okfn(entry->state.net, entry->state.sk, skb); - local_bh_enable(); - break; - case NF_QUEUE: - err = nf_queue(skb, &entry->state, i, verdict); - if (err == 1) - goto next_hook; - break; - case NF_STOLEN: - break; - default: - kfree_skb(skb); - } - - nf_queue_entry_free(entry); -} -EXPORT_SYMBOL(nf_reinject); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5cf38fc0a366a..00f4bd21c59b4 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -225,6 +225,148 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) return entry; } +static unsigned int nf_iterate(struct sk_buff *skb, + struct nf_hook_state *state, + const struct nf_hook_entries *hooks, + unsigned int *index) +{ + const struct nf_hook_entry *hook; + unsigned int verdict, i = *index; + + while (i < hooks->num_hook_entries) { + hook = &hooks->hooks[i]; +repeat: + verdict = nf_hook_entry_hookfn(hook, skb, state); + if (verdict != NF_ACCEPT) { + *index = i; + if (verdict != NF_REPEAT) + return verdict; + goto repeat; + } + i++; + } + + *index = i; + return NF_ACCEPT; +} + +static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum) +{ + switch (pf) { +#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE + case NFPROTO_BRIDGE: + return rcu_dereference(net->nf.hooks_bridge[hooknum]); +#endif + case NFPROTO_IPV4: + return rcu_dereference(net->nf.hooks_ipv4[hooknum]); + case NFPROTO_IPV6: + return rcu_dereference(net->nf.hooks_ipv6[hooknum]); + default: + WARN_ON_ONCE(1); + return NULL; + } + + return NULL; +} + +static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) +{ +#ifdef CONFIG_INET + const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct iphdr *iph = ip_hdr(skb); + + if (!(iph->tos == rt_info->tos && + skb->mark == rt_info->mark && + iph->daddr == rt_info->daddr && + iph->saddr == rt_info->saddr)) + return ip_route_me_harder(entry->state.net, entry->state.sk, + skb, RTN_UNSPEC); + } +#endif + return 0; +} + +static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) +{ + const struct nf_ipv6_ops *v6ops; + int ret = 0; + + switch (entry->state.pf) { + case AF_INET: + ret = nf_ip_reroute(skb, entry); + break; + case AF_INET6: + v6ops = rcu_dereference(nf_ipv6_ops); + if (v6ops) + ret = v6ops->reroute(skb, entry); + break; + } + return ret; +} + +/* caller must hold rcu read-side lock */ +static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) +{ + const struct nf_hook_entry *hook_entry; + const struct nf_hook_entries *hooks; + struct sk_buff *skb = entry->skb; + const struct net *net; + unsigned int i; + int err; + u8 pf; + + net = entry->state.net; + pf = entry->state.pf; + + hooks = nf_hook_entries_head(net, pf, entry->state.hook); + + i = entry->hook_index; + if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { + kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP); + nf_queue_entry_free(entry); + return; + } + + hook_entry = &hooks->hooks[i]; + + /* Continue traversal iff userspace said ok... */ + if (verdict == NF_REPEAT) + verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); + + if (verdict == NF_ACCEPT) { + if (nf_reroute(skb, entry) < 0) + verdict = NF_DROP; + } + + if (verdict == NF_ACCEPT) { +next_hook: + ++i; + verdict = nf_iterate(skb, &entry->state, hooks, &i); + } + + switch (verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + case NF_STOP: + local_bh_disable(); + entry->state.okfn(entry->state.net, entry->state.sk, skb); + local_bh_enable(); + break; + case NF_QUEUE: + err = nf_queue(skb, &entry->state, i, verdict); + if (err == 1) + goto next_hook; + break; + case NF_STOLEN: + break; + default: + kfree_skb(skb); + } + + nf_queue_entry_free(entry); +} + static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) { const struct nf_ct_hook *ct_hook; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index acef4155f0daf..008419db815af 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -179,43 +179,6 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, } EXPORT_SYMBOL_GPL(nf_route); -static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) -{ -#ifdef CONFIG_INET - const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(skb); - - if (!(iph->tos == rt_info->tos && - skb->mark == rt_info->mark && - iph->daddr == rt_info->daddr && - iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, entry->state.sk, - skb, RTN_UNSPEC); - } -#endif - return 0; -} - -int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) -{ - const struct nf_ipv6_ops *v6ops; - int ret = 0; - - switch (entry->state.pf) { - case AF_INET: - ret = nf_ip_reroute(skb, entry); - break; - case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->reroute(skb, entry); - break; - } - return ret; -} - /* Only get and check the lengths, not do any hop-by-hop stuff. */ int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen) { -- cgit 1.2.3-korg From 26f4dac11775a1ca24e2605cb30e828d4dbdea93 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Feb 2024 15:31:32 -0800 Subject: netfilter: x_tables: Use unsafe_memcpy() for 0-sized destination The struct xt_entry_target fake flexible array has not be converted to a true flexible array, which is mainly blocked by it being both UAPI and used in the middle of other structures. In order to properly check for 0-sized destinations in memcpy(), an exception must be made for the one place where it is still a destination. Since memcpy() was already skipping checks for 0-sized destinations, using unsafe_memcpy() is no change in behavior. Signed-off-by: Kees Cook Reviewed-by: Simon Horman Signed-off-by: Florian Westphal --- net/netfilter/x_tables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 21624d68314f9..da5d929c7c85b 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1142,7 +1142,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, if (target->compat_from_user) target->compat_from_user(t->data, ct->data); else - memcpy(t->data, ct->data, tsize - sizeof(*ct)); + unsafe_memcpy(t->data, ct->data, tsize - sizeof(*ct), + /* UAPI 0-sized destination */); tsize += off; t->u.user.target_size = tsize; -- cgit 1.2.3-korg From 5073d64e99dfc7dbd2dcf022ddcc5510cba95349 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 20 Feb 2024 15:36:42 +0800 Subject: net: kcm: Simplify the allocation of slab caches Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. And change cache name from 'kcm_mux_cache' to 'kcm_mux', 'kcm_psock_cache' to 'kcm_psock'. Signed-off-by: Kunwu Chan Reviewed-by: Michal Swiatkowski Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 73c200c5c8e40..dc46f17a3187e 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1878,15 +1878,11 @@ static int __init kcm_init(void) { int err = -ENOMEM; - kcm_muxp = kmem_cache_create("kcm_mux_cache", - sizeof(struct kcm_mux), 0, - SLAB_HWCACHE_ALIGN, NULL); + kcm_muxp = KMEM_CACHE(kcm_mux, SLAB_HWCACHE_ALIGN); if (!kcm_muxp) goto fail; - kcm_psockp = kmem_cache_create("kcm_psock_cache", - sizeof(struct kcm_psock), 0, - SLAB_HWCACHE_ALIGN, NULL); + kcm_psockp = KMEM_CACHE(kcm_psock, SLAB_HWCACHE_ALIGN); if (!kcm_psockp) goto fail; -- cgit 1.2.3-korg From 11a548f252c4a754026d4a46e1d91ffe6c88d051 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 20 Feb 2024 15:36:43 +0800 Subject: ip6mr: Simplify the allocation of slab caches in ip6_mr_init Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. And change cache name from 'ip6_mrt_cache' to 'mfc6_cache'. Signed-off-by: Kunwu Chan Reviewed-by: Michal Swiatkowski Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9782c180fee64..1f19743f25406 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1373,10 +1373,7 @@ int __init ip6_mr_init(void) { int err; - mrt_cachep = kmem_cache_create("ip6_mrt_cache", - sizeof(struct mfc6_cache), - 0, SLAB_HWCACHE_ALIGN, - NULL); + mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN); if (!mrt_cachep) return -ENOMEM; -- cgit 1.2.3-korg From eec70af2b41cf5311f5bea5b22f24958f642c72a Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 20 Feb 2024 15:36:44 +0800 Subject: ipmr: Simplify the allocation of slab caches Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. And change cache name from 'ip_mrt_cache' to 'mfc_cache'. Signed-off-by: Kunwu Chan Reviewed-by: Michal Swiatkowski Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3622298365105..5561bce3a37e8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3139,10 +3139,7 @@ int __init ip_mr_init(void) { int err; - mrt_cachep = kmem_cache_create("ip_mrt_cache", - sizeof(struct mfc_cache), - 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, - NULL); + mrt_cachep = KMEM_CACHE(mfc_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC); err = register_pernet_subsys(&ipmr_net_ops); if (err) -- cgit 1.2.3-korg From 7eb2bc2481a1162ceacdbacd2089736558a08f8d Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 20 Feb 2024 15:36:45 +0800 Subject: ipv4: Simplify the allocation of slab caches in ip_rt_init Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. And change cache name from 'ip_dst_cache' to 'rtable'. Signed-off-by: Kunwu Chan Reviewed-by: Michal Swiatkowski Signed-off-by: David S. Miller --- net/ipv4/route.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 16615d107cf06..b512288d6fcc6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3693,9 +3693,8 @@ int __init ip_rt_init(void) panic("IP: failed to allocate ip_rt_acct\n"); #endif - ipv4_dst_ops.kmem_cachep = - kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + ipv4_dst_ops.kmem_cachep = KMEM_CACHE(rtable, + SLAB_HWCACHE_ALIGN | SLAB_PANIC); ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; -- cgit 1.2.3-korg From 072f88ca5ca4ef8655721341195028844165b2f1 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Tue, 20 Feb 2024 15:36:46 +0800 Subject: ipv6: Simplify the allocation of slab caches Use the new KMEM_CACHE() macro instead of direct kmem_cache_create to simplify the creation of SLAB caches. Signed-off-by: Kunwu Chan Reviewed-by: Michal Swiatkowski Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 805bbf26b3efd..6540d877d3693 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2493,10 +2493,8 @@ int __init fib6_init(void) { int ret = -ENOMEM; - fib6_node_kmem = kmem_cache_create("fib6_nodes", - sizeof(struct fib6_node), 0, - SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, - NULL); + fib6_node_kmem = KMEM_CACHE(fib6_node, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT); if (!fib6_node_kmem) goto out; -- cgit 1.2.3-korg From 6030b3a469f8936d7f0f928e788f12a4fe14a4ca Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 16 Feb 2024 20:16:20 +0530 Subject: wifi: mac80211: check beacon countdown is complete on per link basis Currently, function to check if beacon countdown is complete uses deflink to fetch the beacon and check the counter. However, with MLO, there is a need to check the counter for the beacon in a particular link. Add support to use link_id in order to fetch the beacon from a particular link data. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240216144621.514385-2-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- drivers/net/wireless/ath/ath10k/wmi.c | 2 +- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/ath/ath9k/beacon.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_beacon.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 2 +- drivers/net/wireless/mediatek/mt76/mac80211.c | 4 ++-- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- include/net/mac80211.h | 4 +++- net/mac80211/tx.c | 14 ++++++++++++-- 12 files changed, 26 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 41053219ee957..e322b528baafc 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -2034,7 +2034,7 @@ static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif) if (!arvif->is_up) return; - if (!ieee80211_beacon_cntdwn_is_complete(vif)) { + if (!ieee80211_beacon_cntdwn_is_complete(vif, 0)) { ieee80211_beacon_update_cntdwn(vif, 0); ret = ath10k_mac_setup_bcn_tmpl(arvif); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index ddf15717d5046..2e9661f4bea82 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3884,7 +3884,7 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) * actual channel switch is done */ if (arvif->vif->bss_conf.csa_active && - ieee80211_beacon_cntdwn_is_complete(arvif->vif)) { + ieee80211_beacon_cntdwn_is_complete(arvif->vif, 0)) { ieee80211_csa_finish(arvif->vif, 0); continue; } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index f7cab50bdfd12..a1e2729582e8e 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1577,7 +1577,7 @@ void ath11k_mac_bcn_tx_event(struct ath11k_vif *arvif) return; if (vif->bss_conf.color_change_active && - ieee80211_beacon_cntdwn_is_complete(vif)) { + ieee80211_beacon_cntdwn_is_complete(vif, 0)) { arvif->bcca_zero_sent = true; ieee80211_color_change_finish(vif); return; diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index 4e48407138b25..b399a7926ef5d 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -365,7 +365,7 @@ bool ath9k_csa_is_finished(struct ath_softc *sc, struct ieee80211_vif *vif) if (!vif || !vif->bss_conf.csa_active) return false; - if (!ieee80211_beacon_cntdwn_is_complete(vif)) + if (!ieee80211_beacon_cntdwn_is_complete(vif, 0)) return false; ieee80211_csa_finish(vif, 0); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c index 8179d35dc3101..547634f82183d 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c @@ -514,7 +514,7 @@ bool ath9k_htc_csa_is_finished(struct ath9k_htc_priv *priv) if (!vif || !vif->bss_conf.csa_active) return false; - if (!ieee80211_beacon_cntdwn_is_complete(vif)) + if (!ieee80211_beacon_cntdwn_is_complete(vif, 0)) return false; ieee80211_csa_finish(vif, 0); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index cc592e288188d..123fe9bba982a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -1466,7 +1466,7 @@ static void iwl_mvm_csa_count_down(struct iwl_mvm *mvm, mvmvif->csa_countdown = true; - if (!ieee80211_beacon_cntdwn_is_complete(csa_vif)) { + if (!ieee80211_beacon_cntdwn_is_complete(csa_vif, 0)) { int c = ieee80211_beacon_update_cntdwn(csa_vif, 0); iwl_mvm_mac_ctxt_beacon_changed(mvm, csa_vif, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 89b1c7a876608..a59d264a11c52 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -156,7 +156,7 @@ static void iwl_mvm_csa_noa_start(struct iwl_mvm *mvm) * So we just do nothing here and the switch * will be performed on the last TBTT. */ - if (!ieee80211_beacon_cntdwn_is_complete(csa_vif)) { + if (!ieee80211_beacon_cntdwn_is_complete(csa_vif, 0)) { IWL_WARN(mvm, "CSA NOA started too early\n"); goto out_unlock; } diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 8bf82755ca4c6..758e380fdf1dd 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1613,7 +1613,7 @@ EXPORT_SYMBOL_GPL(mt76_get_sar_power); static void __mt76_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) { - if (vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(vif)) + if (vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(vif, 0)) ieee80211_csa_finish(vif, 0); } @@ -1638,7 +1638,7 @@ __mt76_csa_check(void *priv, u8 *mac, struct ieee80211_vif *vif) if (!vif->bss_conf.csa_active) return; - dev->csa_complete |= ieee80211_beacon_cntdwn_is_complete(vif); + dev->csa_complete |= ieee80211_beacon_cntdwn_is_complete(vif, 0); } void mt76_csa_check(struct mt76_dev *dev) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 66bf92c164c30..db5041d23938b 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5739,7 +5739,7 @@ static void rtl8xxxu_update_beacon_work_callback(struct work_struct *work) } if (vif->bss_conf.csa_active) { - if (ieee80211_beacon_cntdwn_is_complete(vif)) { + if (ieee80211_beacon_cntdwn_is_complete(vif, 0)) { ieee80211_csa_finish(vif, 0); return; } diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 0554946ed30e6..2ea11a86d920a 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -2305,7 +2305,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, rcu_dereference(link_conf->chanctx_conf)->def.chan); } - if (link_conf->csa_active && ieee80211_beacon_cntdwn_is_complete(vif)) + if (link_conf->csa_active && ieee80211_beacon_cntdwn_is_complete(vif, link_id)) ieee80211_csa_finish(vif, link_id); } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fc223761e3afc..25c892ea9eb34 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5566,10 +5566,12 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id); /** * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1 * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO * * This function returns whether the countdown reached zero. */ -bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif); +bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif, + unsigned int link_id); /** * ieee80211_color_change_finish - notify mac80211 about color change diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f4be4af568be7..6bf223e6cd1a5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5095,9 +5095,11 @@ unlock: } EXPORT_SYMBOL(ieee80211_beacon_set_cntdwn); -bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif) +bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif, + unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_link_data *link; struct beacon_data *beacon = NULL; u8 *beacon_data; size_t beacon_data_len; @@ -5106,9 +5108,17 @@ bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif) if (!ieee80211_sdata_running(sdata)) return false; + if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) + return 0; + rcu_read_lock(); + + link = rcu_dereference(sdata->link[link_id]); + if (!link) + goto out; + if (vif->type == NL80211_IFTYPE_AP) { - beacon = rcu_dereference(sdata->deflink.u.ap.beacon); + beacon = rcu_dereference(link->u.ap.beacon); if (WARN_ON(!beacon || !beacon->tail)) goto out; beacon_data = beacon->tail; -- cgit 1.2.3-korg From d73fbaf24c5a1e0698a7a5e17d66a5100efef72a Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 6 Feb 2024 16:54:06 +0200 Subject: wifi: mac80211: make associated BSS pointer visible to the driver Some drivers need the data in it, so move it to the link conf, which is exposed to the driver. Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.6fe9782b87b4.Ifbffef638f07ca7f5c2b27f40d2cf2942d21de0b@changeid [remove bss pointer from internal struct, update docs] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ net/mac80211/ieee80211_i.h | 2 -- net/mac80211/mlme.c | 18 +++++++++--------- 3 files changed, 14 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 25c892ea9eb34..56c6ecb2c10a2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -557,6 +557,10 @@ struct ieee80211_fils_discovery { * to that BSS) that can change during the lifetime of the BSS. * * @vif: reference to owning VIF + * @bss: the cfg80211 bss descriptor. Valid only for a station, and only + * when associated. Note: This contains information which is not + * necessarily authenticated. For example, information coming from probe + * responses. * @addr: (link) address used locally * @link_id: link ID, or 0 for non-MLO * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE @@ -700,6 +704,7 @@ struct ieee80211_fils_discovery { */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; + struct cfg80211_bss *bss; const u8 *bssid; unsigned int link_id; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f3edb1a148a7f..4bec625a84d12 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1006,8 +1006,6 @@ struct ieee80211_link_data_managed { int mu_edca_last_param_set; u8 bss_param_ch_cnt; - - struct cfg80211_bss *bss; }; struct ieee80211_link_data_ap { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e1554666d706c..87ffc19770b8e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2015,7 +2015,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct cfg80211_bss *cbss = link->u.mgd.bss; + struct cfg80211_bss *cbss = link->conf->bss; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; enum nl80211_band current_band; @@ -2928,7 +2928,7 @@ static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link, ieee80211_check_rate_mask(link); - link->u.mgd.bss = cbss; + link->conf->bss = cbss; memcpy(link->u.mgd.bssid, cbss->bssid, ETH_ALEN); if (sdata->vif.p2p || @@ -3076,7 +3076,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->associated = false; /* other links will be destroyed */ - sdata->deflink.u.mgd.bss = NULL; + sdata->deflink.conf->bss = NULL; sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; netif_carrier_off(sdata->dev); @@ -3406,7 +3406,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst, sdata->vif.cfg.ssid, sdata->vif.cfg.ssid_len, - sdata->deflink.u.mgd.bss->channel); + sdata->deflink.conf->bss->channel); } ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); @@ -3489,7 +3489,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, return NULL; if (ifmgd->associated) - cbss = sdata->deflink.u.mgd.bss; + cbss = sdata->deflink.conf->bss; else if (ifmgd->auth_data) cbss = ifmgd->auth_data->bss; else if (ifmgd->assoc_data && ifmgd->assoc_data->link[0].bss) @@ -3568,8 +3568,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; - cfg80211_unlink_bss(local->hw.wiphy, link->u.mgd.bss); - link->u.mgd.bss = NULL; + cfg80211_unlink_bss(local->hw.wiphy, link->conf->bss); + link->conf->bss = NULL; } } @@ -6208,7 +6208,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } if (!ifmgd->associated || - !ieee80211_rx_our_beacon(bssid, link->u.mgd.bss)) + !ieee80211_rx_our_beacon(bssid, link->conf->bss)) return; bssid = link->u.mgd.bssid; @@ -6235,7 +6235,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, */ if (!ieee80211_is_s1g_beacon(hdr->frame_control)) ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); - parse_params.bss = link->u.mgd.bss; + parse_params.bss = link->conf->bss; parse_params.filter = care_about_ies; parse_params.crc = ncrc; elems = ieee802_11_parse_elems_full(&parse_params); -- cgit 1.2.3-korg From ba4b1fa3128b2fbf14e167230315cbd9074b629b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 15 Feb 2024 23:21:51 +0000 Subject: wifi: mac80211: clean up assignments to pointer cache. The assignment to pointer cache in function mesh_fast_tx_gc can be made at the declaration time rather than a later assignment. There are also 3 functions where pointer cache is being initialized at declaration time and later re-assigned again with the same value, these are redundant and can be removed. Cleans up code and three clang scan build warnings: warning: Value stored to 'cache' during its initialization is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Simon Horman Link: https://msgid.link/20240215232151.2075483-1-colin.i.king@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 735edde1bd819..91b55d6a68b97 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -600,11 +600,10 @@ unlock_sta: void mesh_fast_tx_gc(struct ieee80211_sub_if_data *sdata) { unsigned long timeout = msecs_to_jiffies(MESH_FAST_TX_CACHE_TIMEOUT); - struct mesh_tx_cache *cache; + struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; - cache = &sdata->u.mesh.tx_cache; if (atomic_read(&cache->rht.nelems) < MESH_FAST_TX_CACHE_THRESHOLD_SIZE) return; @@ -622,7 +621,6 @@ void mesh_fast_tx_flush_mpath(struct mesh_path *mpath) struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; - cache = &sdata->u.mesh.tx_cache; spin_lock_bh(&cache->walk_lock); hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) if (entry->mpath == mpath) @@ -637,7 +635,6 @@ void mesh_fast_tx_flush_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; - cache = &sdata->u.mesh.tx_cache; spin_lock_bh(&cache->walk_lock); hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) if (rcu_access_pointer(entry->mpath->next_hop) == sta) @@ -651,7 +648,6 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx *entry; - cache = &sdata->u.mesh.tx_cache; spin_lock_bh(&cache->walk_lock); entry = rhashtable_lookup_fast(&cache->rht, addr, fast_tx_rht_params); if (entry) -- cgit 1.2.3-korg From f79ab5d2bced9bd7c0ce86d2aa5b70d053001bb4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Feb 2024 17:41:36 +0200 Subject: wifi: cfg80211: Add KHZ_PER_GHZ to units.h and reuse The KHZ_PER_GHZ might be used by others (with the name aligned with similar constants). Define it in units.h and convert wireless to use it. Signed-off-by: Andy Shevchenko Reviewed-by: Simon Horman Link: https://msgid.link/20240215154136.630029-1-andriy.shevchenko@linux.intel.com Signed-off-by: Johannes Berg --- include/linux/units.h | 5 ++++- net/wireless/reg.c | 7 +++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/units.h b/include/linux/units.h index 45110daaf8d32..00e15de33eca2 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -24,10 +24,13 @@ #define NANOHZ_PER_HZ 1000000000UL #define MICROHZ_PER_HZ 1000000UL #define MILLIHZ_PER_HZ 1000UL + #define HZ_PER_KHZ 1000UL -#define KHZ_PER_MHZ 1000UL #define HZ_PER_MHZ 1000000UL +#define KHZ_PER_MHZ 1000UL +#define KHZ_PER_GHZ 1000000UL + #define MILLIWATT_PER_WATT 1000UL #define MICROWATT_PER_MILLIWATT 1000UL #define MICROWATT_PER_WATT 1000000UL diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 50cadbad485fb..753f8e9aa4b19 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -57,6 +57,8 @@ #include #include #include +#include + #include #include "core.h" #include "reg.h" @@ -1289,20 +1291,17 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, u32 freq_khz) { -#define ONE_GHZ_IN_KHZ 1000000 /* * From 802.11ad: directional multi-gigabit (DMG): * Pertaining to operation in a frequency band containing a channel * with the Channel starting frequency above 45 GHz. */ - u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ? - 20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; + u32 limit = freq_khz > 45 * KHZ_PER_GHZ ? 20 * KHZ_PER_GHZ : 2 * KHZ_PER_GHZ; if (abs(freq_khz - freq_range->start_freq_khz) <= limit) return true; if (abs(freq_khz - freq_range->end_freq_khz) <= limit) return true; return false; -#undef ONE_GHZ_IN_KHZ } /* -- cgit 1.2.3-korg From f8599d634094b1257054a8d0815785d658cbdb74 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 16 Feb 2024 13:54:27 +0200 Subject: wifi: cfg80211: set correct param change count in ML element The ML element generation code to create a BSS entry from a per-STA profile was not overwriting the BSS parameter change count. This meant that the incorrect parameter change count would be reported within the multi-link element. Fix this by returning the BSS parameter change count from the function and placing it into the ML element. The returned tbtt info was never used, so just drop that to simplify the code. Fixes: 5f478adf1f99 ("wifi: cfg80211: generate an ML element for per-STA profiles") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240216135047.f2a507634692.I06b122c7a319a38b4e970f5e0bd3d3ef9cac4cbe@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 88e8b25c073a7..0907a72231eeb 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2567,9 +2567,9 @@ error: } static u8 -cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, - const struct ieee80211_neighbor_ap_info **ap_info, - const u8 **tbtt_info) +cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, + const struct ieee80211_neighbor_ap_info **ap_info, + u8 *param_ch_count) { const struct ieee80211_neighbor_ap_info *info; const struct element *rnr; @@ -2626,7 +2626,9 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, if (mld_id == mld_params->mld_id && link_id == lid) { *ap_info = info; - *tbtt_info = pos; + *param_ch_count = + le16_get_bits(mld_params->params, + IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT); return use_for; } @@ -2836,8 +2838,8 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, enum nl80211_band band; u32 freq; const u8 *profile; - const u8 *tbtt_info; ssize_t profile_len; + u8 param_ch_count; u8 link_id, use_for; if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i], @@ -2880,10 +2882,11 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, profile_len -= 2; /* Find in RNR to look up channel information */ - use_for = cfg80211_tbtt_info_for_mld_ap(tx_data->ie, - tx_data->ielen, - mld_id, link_id, - &ap_info, &tbtt_info); + use_for = cfg80211_rnr_info_for_mld_ap(tx_data->ie, + tx_data->ielen, + mld_id, link_id, + &ap_info, + ¶m_ch_count); if (!use_for) continue; @@ -2926,7 +2929,8 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, continue; /* Copy the Basic Multi-Link element including the common - * information, and then fix up the link ID. + * information, and then fix up the link ID and BSS param + * change count. * Note that the ML element length has been verified and we * also checked that it contains the link ID. */ @@ -2937,6 +2941,8 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, sizeof(*ml_elem) + ml_common_len); new_ie[data.ielen + sizeof(*ml_elem) + 1 + ETH_ALEN] = link_id; + new_ie[data.ielen + sizeof(*ml_elem) + 1 + ETH_ALEN + 1] = + param_ch_count; data.ielen += sizeof(*ml_elem) + ml_common_len; -- cgit 1.2.3-korg From 317bad4c3b61eaf14a7f5c65521a3aa8b0b6f1bc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Feb 2024 13:54:28 +0200 Subject: wifi: cfg80211: remove cfg80211_inform_single_bss_frame_data() This function pretty much does what cfg80211_inform_single_bss_data() already does, except on a frame data. But we can call the other one, after populating the inform_data more completely, so we don't need to do everything twice. This also uncovered a few bugs: * the 6 GHz power type checks were only done in this function, move (and rename from 'uhb') those; * the chains/chain_signal information wasn't used in the latter, add that Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240216135047.f3f864f94c78.I2192adb32ab10713e71f395a9d203386264f6ed5@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 245 +++++++++++++++++----------------------------------- 1 file changed, 80 insertions(+), 165 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0907a72231eeb..7541381686461 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2087,6 +2087,35 @@ struct cfg80211_inform_single_bss_data { u64 cannot_use_reasons; }; +static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen, + const u32 flags) +{ + const struct element *tmp; + struct ieee80211_he_operation *he_oper; + + tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen); + if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) { + const struct ieee80211_he_6ghz_oper *he_6ghz_oper; + + he_oper = (void *)&tmp->data[1]; + he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); + + if (!he_6ghz_oper) + return false; + + switch (u8_get_bits(he_6ghz_oper->control, + IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { + case IEEE80211_6GHZ_CTRL_REG_LPI_AP: + return true; + case IEEE80211_6GHZ_CTRL_REG_SP_AP: + return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT); + case IEEE80211_6GHZ_CTRL_REG_VLP_AP: + return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT); + } + } + return false; +} + /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss * cfg80211_inform_single_bss_data(struct wiphy *wiphy, @@ -2119,6 +2148,14 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, if (!channel) return NULL; + if (channel->band == NL80211_BAND_6GHZ && + !cfg80211_6ghz_power_type_valid(data->ie, data->ielen, + channel->flags)) { + data->use_for = 0; + data->cannot_use_reasons = + NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH; + } + memcpy(tmp.pub.bssid, data->bssid, ETH_ALEN); tmp.pub.channel = channel; if (data->bss_source != BSS_SOURCE_STA_PROFILE) @@ -2130,6 +2167,9 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, tmp.ts_boottime = drv_data->boottime_ns; tmp.parent_tsf = drv_data->parent_tsf; ether_addr_copy(tmp.parent_bssid, drv_data->parent_bssid); + tmp.pub.chains = drv_data->chains; + memcpy(tmp.pub.chain_signal, drv_data->chain_signal, + IEEE80211_MAX_CHAINS); tmp.pub.use_for = data->use_for; tmp.pub.cannot_use_reasons = data->cannot_use_reasons; @@ -3025,59 +3065,23 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_inform_bss_data); -static bool cfg80211_uhb_power_type_valid(const u8 *ie, - size_t ielen, - const u32 flags) -{ - const struct element *tmp; - struct ieee80211_he_operation *he_oper; - - tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen); - if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) { - const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - - he_oper = (void *)&tmp->data[1]; - he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); - - if (!he_6ghz_oper) - return false; - - switch (u8_get_bits(he_6ghz_oper->control, - IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) { - case IEEE80211_6GHZ_CTRL_REG_LPI_AP: - return true; - case IEEE80211_6GHZ_CTRL_REG_SP_AP: - return !(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT); - case IEEE80211_6GHZ_CTRL_REG_VLP_AP: - return !(flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT); - } - } - return false; -} - -/* cfg80211_inform_bss_width_frame helper */ -static struct cfg80211_bss * -cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, - struct cfg80211_inform_bss *data, - struct ieee80211_mgmt *mgmt, size_t len, - gfp_t gfp) +struct cfg80211_bss * +cfg80211_inform_bss_frame_data(struct wiphy *wiphy, + struct cfg80211_inform_bss *data, + struct ieee80211_mgmt *mgmt, size_t len, + gfp_t gfp) { - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - struct cfg80211_internal_bss tmp = {}, *res; - struct cfg80211_bss_ies *ies; - struct ieee80211_channel *channel; - bool signal_valid; + struct cfg80211_inform_single_bss_data inform_data = { + .drv_data = data, + .use_for = data->restrict_use ? + data->use_for : + NL80211_BSS_USE_FOR_ALL, + .cannot_use_reasons = data->cannot_use_reasons, + }; + size_t min_hdr_len = offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); struct ieee80211_ext *ext = NULL; - u8 *bssid, *variable; - u16 capability, beacon_int; - size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); - int bss_type; - - BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != - offsetof(struct ieee80211_mgmt, u.beacon.variable)); - - trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len); + struct cfg80211_bss *res; if (WARN_ON(!mgmt)) return NULL; @@ -3085,9 +3089,10 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, if (WARN_ON(!wiphy)) return NULL; - if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && - (data->signal < 0 || data->signal > 100))) - return NULL; + BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) != + offsetof(struct ieee80211_mgmt, u.beacon.variable)); + + trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len); if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { ext = (void *) mgmt; @@ -3100,140 +3105,50 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, if (WARN_ON(len < min_hdr_len)) return NULL; - ielen = len - min_hdr_len; - variable = mgmt->u.probe_resp.variable; - if (ext) { - if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - variable = ext->u.s1g_short_beacon.variable; - else - variable = ext->u.s1g_beacon.variable; - } - - channel = cfg80211_get_bss_channel(wiphy, variable, ielen, data->chan); - if (!channel) - return NULL; - - if (channel->band == NL80211_BAND_6GHZ && - !cfg80211_uhb_power_type_valid(variable, ielen, channel->flags)) { - data->restrict_use = 1; - data->use_for = 0; - data->cannot_use_reasons = - NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH; - } - + inform_data.ielen = len - min_hdr_len; + inform_data.ie = mgmt->u.probe_resp.variable; if (ext) { const struct ieee80211_s1g_bcn_compat_ie *compat; const struct element *elem; + if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) + inform_data.ie = ext->u.s1g_short_beacon.variable; + else + inform_data.ie = ext->u.s1g_beacon.variable; + elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, - variable, ielen); + inform_data.ie, inform_data.ielen); if (!elem) return NULL; if (elem->datalen < sizeof(*compat)) return NULL; compat = (void *)elem->data; - bssid = ext->u.s1g_beacon.sa; - capability = le16_to_cpu(compat->compat_info); - beacon_int = le16_to_cpu(compat->beacon_int); + memcpy(inform_data.bssid, ext->u.s1g_beacon.sa, ETH_ALEN); + inform_data.capability = le16_to_cpu(compat->compat_info); + inform_data.beacon_interval = le16_to_cpu(compat->beacon_int); } else { - bssid = mgmt->bssid; - beacon_int = le16_to_cpu(mgmt->u.probe_resp.beacon_int); - capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); + memcpy(inform_data.bssid, mgmt->bssid, ETH_ALEN); + inform_data.beacon_interval = + le16_to_cpu(mgmt->u.probe_resp.beacon_int); + inform_data.capability = + le16_to_cpu(mgmt->u.probe_resp.capab_info); } - if (channel->band == NL80211_BAND_60GHZ) { - bss_type = capability & WLAN_CAPABILITY_DMG_TYPE_MASK; - if (bss_type == WLAN_CAPABILITY_DMG_TYPE_AP || - bss_type == WLAN_CAPABILITY_DMG_TYPE_PBSS) - regulatory_hint_found_beacon(wiphy, channel, gfp); - } else { - if (capability & WLAN_CAPABILITY_ESS) - regulatory_hint_found_beacon(wiphy, channel, gfp); - } - - ies = kzalloc(sizeof(*ies) + ielen, gfp); - if (!ies) - return NULL; - ies->len = ielen; - ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); - ies->from_beacon = ieee80211_is_beacon(mgmt->frame_control) || - ieee80211_is_s1g_beacon(mgmt->frame_control); - memcpy(ies->data, variable, ielen); + inform_data.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); if (ieee80211_is_probe_resp(mgmt->frame_control)) - rcu_assign_pointer(tmp.pub.proberesp_ies, ies); + inform_data.ftype = CFG80211_BSS_FTYPE_PRESP; else - rcu_assign_pointer(tmp.pub.beacon_ies, ies); - rcu_assign_pointer(tmp.pub.ies, ies); + inform_data.ftype = CFG80211_BSS_FTYPE_BEACON; - memcpy(tmp.pub.bssid, bssid, ETH_ALEN); - tmp.pub.beacon_interval = beacon_int; - tmp.pub.capability = capability; - tmp.pub.channel = channel; - tmp.pub.signal = data->signal; - tmp.ts_boottime = data->boottime_ns; - tmp.parent_tsf = data->parent_tsf; - tmp.pub.chains = data->chains; - memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS); - ether_addr_copy(tmp.parent_bssid, data->parent_bssid); - tmp.pub.use_for = data->restrict_use ? - data->use_for : - NL80211_BSS_USE_FOR_ALL; - tmp.pub.cannot_use_reasons = data->cannot_use_reasons; - - signal_valid = data->chan == channel; - spin_lock_bh(&rdev->bss_lock); - res = __cfg80211_bss_update(rdev, &tmp, signal_valid, jiffies); - if (!res) - goto drop; - - rdev_inform_bss(rdev, &res->pub, ies, data->drv_data); - - spin_unlock_bh(&rdev->bss_lock); - - trace_cfg80211_return_bss(&res->pub); - /* __cfg80211_bss_update gives us a referenced result */ - return &res->pub; - -drop: - spin_unlock_bh(&rdev->bss_lock); - return NULL; -} - -struct cfg80211_bss * -cfg80211_inform_bss_frame_data(struct wiphy *wiphy, - struct cfg80211_inform_bss *data, - struct ieee80211_mgmt *mgmt, size_t len, - gfp_t gfp) -{ - struct cfg80211_inform_single_bss_data inform_data = { - .drv_data = data, - .ie = mgmt->u.probe_resp.variable, - .ielen = len - offsetof(struct ieee80211_mgmt, - u.probe_resp.variable), - .use_for = data->restrict_use ? - data->use_for : - NL80211_BSS_USE_FOR_ALL, - .cannot_use_reasons = data->cannot_use_reasons, - }; - struct cfg80211_bss *res; - - res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, - len, gfp); + res = cfg80211_inform_single_bss_data(wiphy, &inform_data, gfp); if (!res) return NULL; /* don't do any further MBSSID/ML handling for S1G */ - if (ieee80211_is_s1g_beacon(mgmt->frame_control)) + if (ext) return res; - inform_data.ftype = ieee80211_is_beacon(mgmt->frame_control) ? - CFG80211_BSS_FTYPE_BEACON : CFG80211_BSS_FTYPE_PRESP; - memcpy(inform_data.bssid, mgmt->bssid, ETH_ALEN); - inform_data.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); - inform_data.beacon_interval = - le16_to_cpu(mgmt->u.probe_resp.beacon_int); - /* process each non-transmitting bss */ cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp); -- cgit 1.2.3-korg From 7e899c1d6f0da2a98ebf6629274ef912d4c83359 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Feb 2024 13:54:29 +0200 Subject: wifi: cfg80211: clean up cfg80211_inform_bss_frame_data() Make cfg80211_inform_bss_frame_data() call the existing cfg80211_inform_bss_data() after parsing the frame in the appropriate way, so we have less code duplication. This required introducing a new CFG80211_BSS_FTYPE_S1G_BEACON, but that can be used by other drivers as well. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240216135047.874aed1eff5f.Ib7d88d126eec50c64763251a78cb432bb5df14df@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/scan.c | 71 +++++++++++++++++++++++--------------------------- 2 files changed, 34 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 57c2298af35b2..f9eada2a26ec0 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7175,11 +7175,13 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, * from a beacon or probe response * @CFG80211_BSS_FTYPE_BEACON: data comes from a beacon * @CFG80211_BSS_FTYPE_PRESP: data comes from a probe response + * @CFG80211_BSS_FTYPE_S1G_BEACON: data comes from an S1G beacon */ enum cfg80211_bss_frame_type { CFG80211_BSS_FTYPE_UNKNOWN, CFG80211_BSS_FTYPE_BEACON, CFG80211_BSS_FTYPE_PRESP, + CFG80211_BSS_FTYPE_S1G_BEACON, }; /** diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7541381686461..c2d85fa4b75d5 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2213,6 +2213,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, switch (data->ftype) { case CFG80211_BSS_FTYPE_BEACON: + case CFG80211_BSS_FTYPE_S1G_BEACON: ies->from_beacon = true; fallthrough; case CFG80211_BSS_FTYPE_UNKNOWN: @@ -3057,6 +3058,10 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, if (!res) return NULL; + /* don't do any further MBSSID/ML handling for S1G */ + if (ftype == CFG80211_BSS_FTYPE_S1G_BEACON) + return res; + cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp); cfg80211_parse_ml_sta_data(wiphy, &inform_data, res, gfp); @@ -3071,17 +3076,16 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, gfp_t gfp) { - struct cfg80211_inform_single_bss_data inform_data = { - .drv_data = data, - .use_for = data->restrict_use ? - data->use_for : - NL80211_BSS_USE_FOR_ALL, - .cannot_use_reasons = data->cannot_use_reasons, - }; size_t min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); struct ieee80211_ext *ext = NULL; - struct cfg80211_bss *res; + enum cfg80211_bss_frame_type ftype; + u16 beacon_interval; + const u8 *bssid; + u16 capability; + const u8 *ie; + size_t ielen; + u64 tsf; if (WARN_ON(!mgmt)) return NULL; @@ -3105,56 +3109,45 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (WARN_ON(len < min_hdr_len)) return NULL; - inform_data.ielen = len - min_hdr_len; - inform_data.ie = mgmt->u.probe_resp.variable; + ielen = len - min_hdr_len; + ie = mgmt->u.probe_resp.variable; if (ext) { const struct ieee80211_s1g_bcn_compat_ie *compat; const struct element *elem; if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - inform_data.ie = ext->u.s1g_short_beacon.variable; + ie = ext->u.s1g_short_beacon.variable; else - inform_data.ie = ext->u.s1g_beacon.variable; + ie = ext->u.s1g_beacon.variable; - elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, - inform_data.ie, inform_data.ielen); + elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, ie, ielen); if (!elem) return NULL; if (elem->datalen < sizeof(*compat)) return NULL; compat = (void *)elem->data; - memcpy(inform_data.bssid, ext->u.s1g_beacon.sa, ETH_ALEN); - inform_data.capability = le16_to_cpu(compat->compat_info); - inform_data.beacon_interval = le16_to_cpu(compat->beacon_int); + bssid = ext->u.s1g_beacon.sa; + capability = le16_to_cpu(compat->compat_info); + beacon_interval = le16_to_cpu(compat->beacon_int); } else { - memcpy(inform_data.bssid, mgmt->bssid, ETH_ALEN); - inform_data.beacon_interval = - le16_to_cpu(mgmt->u.probe_resp.beacon_int); - inform_data.capability = - le16_to_cpu(mgmt->u.probe_resp.capab_info); + bssid = mgmt->bssid; + beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); + capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); } - inform_data.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); + tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); if (ieee80211_is_probe_resp(mgmt->frame_control)) - inform_data.ftype = CFG80211_BSS_FTYPE_PRESP; + ftype = CFG80211_BSS_FTYPE_PRESP; + else if (ext) + ftype = CFG80211_BSS_FTYPE_S1G_BEACON; else - inform_data.ftype = CFG80211_BSS_FTYPE_BEACON; - - res = cfg80211_inform_single_bss_data(wiphy, &inform_data, gfp); - if (!res) - return NULL; - - /* don't do any further MBSSID/ML handling for S1G */ - if (ext) - return res; - - /* process each non-transmitting bss */ - cfg80211_parse_mbssid_data(wiphy, &inform_data, res, gfp); + ftype = CFG80211_BSS_FTYPE_BEACON; - cfg80211_parse_ml_sta_data(wiphy, &inform_data, res, gfp); - - return res; + return cfg80211_inform_bss_data(wiphy, data, ftype, + bssid, tsf, capability, + beacon_interval, ie, ielen, + gfp); } EXPORT_SYMBOL(cfg80211_inform_bss_frame_data); -- cgit 1.2.3-korg From 6b756efcd9f01a7f972c0aa0da1c4f84658ba156 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Feb 2024 13:54:30 +0200 Subject: wifi: cfg80211: refactor RNR parsing We'll need more parsing of the reduced neighbor report element, and we already have two places doing pretty much the same. Combine by refactoring the parsing into a separate function with a callback for each item found. Signed-off-by: Johannes Berg Reviewed-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240216135047.cfff14b692fc.Ibe25be88a769eab29ebb17b9d19af666df6a2227@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 306 +++++++++++++++++++++++++++++----------------------- 1 file changed, 169 insertions(+), 137 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index c2d85fa4b75d5..e46dfc71c4970 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -611,104 +611,144 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry, return 0; } -VISIBLE_IF_CFG80211_KUNIT int -cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, - struct list_head *list) +enum cfg80211_rnr_iter_ret { + RNR_ITER_CONTINUE, + RNR_ITER_BREAK, + RNR_ITER_ERROR, +}; + +static bool +cfg80211_iter_rnr(const u8 *elems, size_t elems_len, + enum cfg80211_rnr_iter_ret + (*iter)(void *data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len), + void *iter_data) { - struct ieee80211_neighbor_ap_info *ap_info; - const struct element *elem, *ssid_elem; + const struct element *rnr; const u8 *pos, *end; - u32 s_ssid_tmp; - int n_coloc = 0, ret; - LIST_HEAD(ap_list); - ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp); - if (ret) - return 0; + for_each_element_id(rnr, WLAN_EID_REDUCED_NEIGHBOR_REPORT, + elems, elems_len) { + const struct ieee80211_neighbor_ap_info *info; - for_each_element_id(elem, WLAN_EID_REDUCED_NEIGHBOR_REPORT, - ies->data, ies->len) { - pos = elem->data; - end = elem->data + elem->datalen; + pos = rnr->data; + end = rnr->data + rnr->datalen; /* RNR IE may contain more than one NEIGHBOR_AP_INFO */ - while (pos + sizeof(*ap_info) <= end) { - enum nl80211_band band; - int freq; + while (sizeof(*info) <= end - pos) { u8 length, i, count; + u8 type; - ap_info = (void *)pos; - count = u8_get_bits(ap_info->tbtt_info_hdr, - IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1; - length = ap_info->tbtt_info_len; + info = (void *)pos; + count = u8_get_bits(info->tbtt_info_hdr, + IEEE80211_AP_INFO_TBTT_HDR_COUNT) + + 1; + length = info->tbtt_info_len; - pos += sizeof(*ap_info); + pos += sizeof(*info); - if (!ieee80211_operating_class_to_band(ap_info->op_class, - &band)) - break; + if (count * length > end - pos) + return false; - freq = ieee80211_channel_to_frequency(ap_info->channel, - band); + type = u8_get_bits(info->tbtt_info_hdr, + IEEE80211_AP_INFO_TBTT_HDR_TYPE); - if (end - pos < count * length) - break; + for (i = 0; i < count; i++) { + switch (iter(iter_data, type, info, + pos, length)) { + case RNR_ITER_CONTINUE: + break; + case RNR_ITER_BREAK: + return true; + case RNR_ITER_ERROR: + return false; + } - if (u8_get_bits(ap_info->tbtt_info_hdr, - IEEE80211_AP_INFO_TBTT_HDR_TYPE) != - IEEE80211_TBTT_INFO_TYPE_TBTT) { - pos += count * length; - continue; + pos += length; } + } - /* TBTT info must include bss param + BSSID + - * (short SSID or same_ssid bit to be set). - * ignore other options, and move to the - * next AP info - */ - if (band != NL80211_BAND_6GHZ || - !(length == offsetofend(struct ieee80211_tbtt_info_7_8_9, - bss_params) || - length == sizeof(struct ieee80211_tbtt_info_7_8_9) || - length >= offsetofend(struct ieee80211_tbtt_info_ge_11, - bss_params))) { - pos += count * length; - continue; - } + if (pos != end) + return false; + } - for (i = 0; i < count; i++) { - struct cfg80211_colocated_ap *entry; + return true; +} + +struct colocated_ap_data { + const struct element *ssid_elem; + struct list_head ap_list; + u32 s_ssid_tmp; + int n_coloc; +}; - entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN, - GFP_ATOMIC); +static enum cfg80211_rnr_iter_ret +cfg80211_parse_colocated_ap_iter(void *_data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len) +{ + struct colocated_ap_data *data = _data; + struct cfg80211_colocated_ap *entry; + enum nl80211_band band; - if (!entry) - goto error; + if (type != IEEE80211_TBTT_INFO_TYPE_TBTT) + return RNR_ITER_CONTINUE; - entry->center_freq = freq; + if (!ieee80211_operating_class_to_band(info->op_class, &band)) + return RNR_ITER_CONTINUE; - if (!cfg80211_parse_ap_info(entry, pos, length, - ssid_elem, - s_ssid_tmp)) { - n_coloc++; - list_add_tail(&entry->list, &ap_list); - } else { - kfree(entry); - } + /* TBTT info must include bss param + BSSID + (short SSID or + * same_ssid bit to be set). Ignore other options, and move to + * the next AP info + */ + if (band != NL80211_BAND_6GHZ || + !(tbtt_info_len == offsetofend(struct ieee80211_tbtt_info_7_8_9, + bss_params) || + tbtt_info_len == sizeof(struct ieee80211_tbtt_info_7_8_9) || + tbtt_info_len >= offsetofend(struct ieee80211_tbtt_info_ge_11, + bss_params))) + return RNR_ITER_CONTINUE; + + entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN, GFP_ATOMIC); + if (!entry) + return RNR_ITER_ERROR; + + entry->center_freq = + ieee80211_channel_to_frequency(info->channel, band); + + if (!cfg80211_parse_ap_info(entry, tbtt_info, tbtt_info_len, + data->ssid_elem, data->s_ssid_tmp)) { + data->n_coloc++; + list_add_tail(&entry->list, &data->ap_list); + } else { + kfree(entry); + } - pos += length; - } - } + return RNR_ITER_CONTINUE; +} -error: - if (pos != end) { - cfg80211_free_coloc_ap_list(&ap_list); - return 0; - } +VISIBLE_IF_CFG80211_KUNIT int +cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, + struct list_head *list) +{ + struct colocated_ap_data data = {}; + int ret; + + INIT_LIST_HEAD(&data.ap_list); + + ret = cfg80211_calc_short_ssid(ies, &data.ssid_elem, &data.s_ssid_tmp); + if (ret) + return 0; + + if (!cfg80211_iter_rnr(ies->data, ies->len, + cfg80211_parse_colocated_ap_iter, &data)) { + cfg80211_free_coloc_ap_list(&data.ap_list); + return 0; } - list_splice_tail(&ap_list, list); - return n_coloc; + list_splice_tail(&data.ap_list, list); + return data.n_coloc; } EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_parse_colocated_ap); @@ -2607,79 +2647,71 @@ error: return NULL; } -static u8 -cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, - const struct ieee80211_neighbor_ap_info **ap_info, - u8 *param_ch_count) -{ - const struct ieee80211_neighbor_ap_info *info; - const struct element *rnr; - const u8 *pos, *end; - - for_each_element_id(rnr, WLAN_EID_REDUCED_NEIGHBOR_REPORT, ie, ielen) { - pos = rnr->data; - end = rnr->data + rnr->datalen; - - /* RNR IE may contain more than one NEIGHBOR_AP_INFO */ - while (sizeof(*info) <= end - pos) { - const struct ieee80211_rnr_mld_params *mld_params; - u16 params; - u8 length, i, count, mld_params_offset; - u8 type, lid; - u32 use_for; - - info = (void *)pos; - count = u8_get_bits(info->tbtt_info_hdr, - IEEE80211_AP_INFO_TBTT_HDR_COUNT) + 1; - length = info->tbtt_info_len; +struct tbtt_info_iter_data { + const struct ieee80211_neighbor_ap_info *ap_info; + u8 param_ch_count; + u32 use_for; + u8 mld_id, link_id; +}; - pos += sizeof(*info); +static enum cfg80211_rnr_iter_ret +cfg802121_mld_ap_rnr_iter(void *_data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len) +{ + const struct ieee80211_rnr_mld_params *mld_params; + struct tbtt_info_iter_data *data = _data; + u8 link_id; + + if (type == IEEE80211_TBTT_INFO_TYPE_TBTT && + tbtt_info_len >= offsetofend(struct ieee80211_tbtt_info_ge_11, + mld_params)) + mld_params = (void *)(tbtt_info + + offsetof(struct ieee80211_tbtt_info_ge_11, + mld_params)); + else if (type == IEEE80211_TBTT_INFO_TYPE_MLD && + tbtt_info_len >= sizeof(struct ieee80211_rnr_mld_params)) + mld_params = (void *)tbtt_info; + else + return RNR_ITER_CONTINUE; - if (count * length > end - pos) - return 0; + link_id = le16_get_bits(mld_params->params, + IEEE80211_RNR_MLD_PARAMS_LINK_ID); - type = u8_get_bits(info->tbtt_info_hdr, - IEEE80211_AP_INFO_TBTT_HDR_TYPE); + if (data->mld_id != mld_params->mld_id) + return RNR_ITER_CONTINUE; - if (type == IEEE80211_TBTT_INFO_TYPE_TBTT && - length >= - offsetofend(struct ieee80211_tbtt_info_ge_11, - mld_params)) { - mld_params_offset = - offsetof(struct ieee80211_tbtt_info_ge_11, mld_params); - use_for = NL80211_BSS_USE_FOR_ALL; - } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD && - length >= sizeof(struct ieee80211_rnr_mld_params)) { - mld_params_offset = 0; - use_for = NL80211_BSS_USE_FOR_MLD_LINK; - } else { - pos += count * length; - continue; - } + if (data->link_id != link_id) + return RNR_ITER_CONTINUE; - for (i = 0; i < count; i++) { - mld_params = (void *)pos + mld_params_offset; - params = le16_to_cpu(mld_params->params); + data->ap_info = info; + data->param_ch_count = + le16_get_bits(mld_params->params, + IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT); - lid = u16_get_bits(params, - IEEE80211_RNR_MLD_PARAMS_LINK_ID); + if (type == IEEE80211_TBTT_INFO_TYPE_TBTT) + data->use_for = NL80211_BSS_USE_FOR_ALL; + else + data->use_for = NL80211_BSS_USE_FOR_MLD_LINK; + return RNR_ITER_BREAK; +} - if (mld_id == mld_params->mld_id && - link_id == lid) { - *ap_info = info; - *param_ch_count = - le16_get_bits(mld_params->params, - IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT); +static u8 +cfg80211_rnr_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, + const struct ieee80211_neighbor_ap_info **ap_info, + u8 *param_ch_count) +{ + struct tbtt_info_iter_data data = { + .mld_id = mld_id, + .link_id = link_id, + }; - return use_for; - } + cfg80211_iter_rnr(ie, ielen, cfg802121_mld_ap_rnr_iter, &data); - pos += length; - } - } - } + *ap_info = data.ap_info; + *param_ch_count = data.param_ch_count; - return 0; + return data.use_for; } static struct element * -- cgit 1.2.3-korg From 6bd14aee0bd25525ab229acd9bfe536dd8642364 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Feb 2024 13:54:31 +0200 Subject: wifi: mac80211: align ieee80211_mle_get_bss_param_ch_cnt() Align the prototype of ieee80211_mle_get_bss_param_ch_cnt() to also take a u8 * like the other functions, and make it return -1 when the field isn't found, so that mac80211 can check that instead of explicitly open-coding the check. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240216135047.583309181bc3.Ia61cb0b4fc034d5ac8fcfaf6f6fb2e115fadafe7@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 11 ++++++----- net/mac80211/mlme.c | 9 +++++---- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e4322238f2735..303c754598970 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4990,17 +4990,18 @@ static inline int ieee80211_mle_get_link_id(const u8 *data) /** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count - * @mle: the basic multi link element + * @data: pointer to the basic multi link element * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). * * If the BSS parameter change count value can't be found (the presence bit - * for it is clear), 0 will be returned. + * for it is clear), -1 will be returned. */ -static inline u8 -ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) +static inline int +ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) { + const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; @@ -5008,7 +5009,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)) - return 0; + return -1; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 87ffc19770b8e..5f2e9f5e17790 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4202,13 +4202,14 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, */ assoc_data->link[link_id].status = WLAN_STATUS_SUCCESS; if (elems->ml_basic) { - if (!(elems->ml_basic->control & - cpu_to_le16(IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT))) { + int bss_param_ch_cnt = + ieee80211_mle_get_bss_param_ch_cnt((const void *)elems->ml_basic); + + if (bss_param_ch_cnt < 0) { ret = false; goto out; } - link->u.mgd.bss_param_ch_cnt = - ieee80211_mle_get_bss_param_ch_cnt(elems->ml_basic); + link->u.mgd.bss_param_ch_cnt = bss_param_ch_cnt; } } else if (elems->parse_error & IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC || !elems->prof || -- cgit 1.2.3-korg From 894dd84e49ec114a2dde7b312ae4cada40d15bdb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Feb 2024 13:54:32 +0200 Subject: wifi: cfg80211: use ML element parsing helpers Use the existing ML element parsing helpers and add a new one for this (ieee80211_mle_get_mld_id). Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240216135047.4da47b1f035b.I437a5570ac456449facb0b147851ef24a1e473c2@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 38 ++++++++++++++++++++++++++++++++++++++ net/wireless/scan.c | 41 ++++++++--------------------------------- 2 files changed, 46 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 303c754598970..3385a2cc5b099 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -5115,6 +5115,44 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) return get_unaligned_le16(common); } +/** + * ieee80211_mle_get_mld_id - returns the MLD ID + * @data: pointer to the multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD ID is not present, 0 will be returned. + */ +static inline u8 ieee80211_mle_get_mld_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + + return *common; +} + /** * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e46dfc71c4970..7cf36b8d3ae74 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2833,17 +2833,16 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, struct cfg80211_bss *bss; u8 mld_id, reporter_link_id, bss_change_count; u16 seen_links = 0; - const u8 *pos; u8 i; - if (!ieee80211_mle_size_ok(elem->data + 1, elem->datalen - 1)) + if (!ieee80211_mle_type_ok(elem->data + 1, + IEEE80211_ML_CONTROL_TYPE_BASIC, + elem->datalen - 1)) return; - ml_elem = (void *)elem->data + 1; + ml_elem = (void *)(elem->data + 1); control = le16_to_cpu(ml_elem->control); - if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) != - IEEE80211_ML_CONTROL_TYPE_BASIC) - return; + ml_common_len = ml_elem->variable[0]; /* Must be present when transmitted by an AP (in a probe response) */ if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) || @@ -2851,24 +2850,8 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, !(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) return; - ml_common_len = ml_elem->variable[0]; - - /* length + MLD MAC address */ - pos = ml_elem->variable + 1 + 6; - - reporter_link_id = pos[0]; - pos += 1; - - bss_change_count = pos[0]; - pos += 1; - - if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)) - pos += 2; - if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_EML_CAPA)) - pos += 2; - - /* MLD capabilities and operations */ - pos += 2; + reporter_link_id = ieee80211_mle_get_link_id(elem->data + 1); + bss_change_count = ieee80211_mle_get_bss_param_ch_cnt(elem->data + 1); /* * The MLD ID of the reporting AP is always zero. It is set if the AP @@ -2876,15 +2859,7 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy, * relating to a nontransmitted BSS (matching the Multi-BSSID Index, * Draft P802.11be_D3.2, 35.3.4.2) */ - if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MLD_ID)) { - mld_id = *pos; - pos += 1; - } else { - mld_id = 0; - } - - /* Extended MLD capabilities and operations */ - pos += 2; + mld_id = ieee80211_mle_get_mld_id(elem->data + 1); /* Fully defrag the ML element for sta information/profile iteration */ mle = cfg80211_defrag_mle(elem, tx_data->ie, tx_data->ielen, gfp); -- cgit 1.2.3-korg From 4f4d8be6dc37a28b8655d03918680d8ea8c82f75 Mon Sep 17 00:00:00 2001 From: Alexis Lothoré Date: Thu, 15 Feb 2024 15:13:52 +0100 Subject: wifi: nl80211: force WLAN_AKM_SUITE_SAE in big endian in NL80211_CMD_EXTERNAL_AUTH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-space supplicant (observed at least on wpa_supplicant) historically parses the NL80211_ATTR_AKM_SUITES from the NL80211_CMD_EXTERNAL_AUTH message as big endian _only_ when its value is WLAN_AKM_SUITE_SAE, while processing anything else in host endian. This behavior makes any driver relying on SAE external auth to switch AKM suite to big endian if it is WLAN_AKM_SUITE_SAE. A fix bringing compatibility with both endianness has been brought into wpa_supplicant, however we must keep compatibility with older versions, while trying to reduce the occurences of this manual conversion in wireless drivers. Add the be32 conversion specifically on WLAN_AKM_SUITE_SAE in nl80211 layer to keep compatibility with older wpa_supplicant versions. Suggested-by: Johannes Berg Signed-off-by: Alexis Lothoré Link: https://msgid.link/20240215-nl80211_fix_akm_suites_endianness-v1-1-57e902632f9d@bootlin.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dd9a092b6baba..ab6d98cdac13d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -20132,9 +20132,26 @@ int cfg80211_external_auth_request(struct net_device *dev, if (!hdr) goto nla_put_failure; + /* Some historical mistakes in drivers <-> userspace interface (notably + * between drivers and wpa_supplicant) led to a big-endian conversion + * being needed on NL80211_ATTR_AKM_SUITES _only_ when its value is + * WLAN_AKM_SUITE_SAE. This is now fixed on userspace side, but for the + * benefit of older wpa_supplicant versions, send this particular value + * in big-endian. Note that newer wpa_supplicant will also detect this + * particular value in big endian still, so it all continues to work. + */ + if (params->key_mgmt_suite == WLAN_AKM_SUITE_SAE) { + if (nla_put_be32(msg, NL80211_ATTR_AKM_SUITES, + cpu_to_be32(WLAN_AKM_SUITE_SAE))) + goto nla_put_failure; + } else { + if (nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, + params->key_mgmt_suite)) + goto nla_put_failure; + } + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || - nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) || nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION, params->action) || nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) || -- cgit 1.2.3-korg From 81830c8f809c01f3780dc33f7d951be8e146ced1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 21 Feb 2024 12:48:18 +0100 Subject: wifi: nl80211: refactor parsing CSA offsets The CSA offset parsing happens the same way for all of beacon template offsets, probe response template offsets and TX offsets (for using during probe response TX from userspace directly). Refactor the parsing here. There's an additional check this introduces, which is that the number of counters in TX offsets doesn't exceed the driver capability, but as only two counters are used at most for anything, this is hopefully OK. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 137 ++++++++++++++++++++----------------------------- 1 file changed, 56 insertions(+), 81 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ab6d98cdac13d..bb65b684e26ad 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10048,6 +10048,42 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb, return 0; } +static int nl80211_parse_counter_offsets(struct cfg80211_registered_device *rdev, + const u8 *data, size_t datalen, + int first_count, struct nlattr *attr, + const u16 **offsets, unsigned int *n_offsets) +{ + int i; + + *n_offsets = 0; + + if (!attr) + return 0; + + if (!nla_len(attr) || (nla_len(attr) % sizeof(u16))) + return -EINVAL; + + *n_offsets = nla_len(attr) / sizeof(u16); + if (rdev->wiphy.max_num_csa_counters && + (*n_offsets > rdev->wiphy.max_num_csa_counters)) + return -EINVAL; + + *offsets = nla_data(attr); + + /* sanity checks - counters should fit and be the same */ + for (i = 0; i < *n_offsets; i++) { + u16 offset = (*offsets)[i]; + + if (offset >= datalen) + return -EINVAL; + + if (first_count != -1 && data[offset] != first_count) + return -EINVAL; + } + + return 0; +} + static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -10059,7 +10095,6 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) int err; bool need_new_beacon = false; bool need_handle_dfs_flag = true; - int len, i; u32 cs_count; if (!rdev->ops->channel_switch || @@ -10144,72 +10179,23 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) goto free; } - len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]); - if (!len || (len % sizeof(u16))) { - err = -EINVAL; + err = nl80211_parse_counter_offsets(rdev, params.beacon_csa.tail, + params.beacon_csa.tail_len, + params.count, + csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON], + ¶ms.counter_offsets_beacon, + ¶ms.n_counter_offsets_beacon); + if (err) goto free; - } - params.n_counter_offsets_beacon = len / sizeof(u16); - if (rdev->wiphy.max_num_csa_counters && - (params.n_counter_offsets_beacon > - rdev->wiphy.max_num_csa_counters)) { - err = -EINVAL; + err = nl80211_parse_counter_offsets(rdev, params.beacon_csa.probe_resp, + params.beacon_csa.probe_resp_len, + params.count, + csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP], + ¶ms.counter_offsets_presp, + ¶ms.n_counter_offsets_presp); + if (err) goto free; - } - - params.counter_offsets_beacon = - nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]); - - /* sanity checks - counters should fit and be the same */ - for (i = 0; i < params.n_counter_offsets_beacon; i++) { - u16 offset = params.counter_offsets_beacon[i]; - - if (offset >= params.beacon_csa.tail_len) { - err = -EINVAL; - goto free; - } - - if (params.beacon_csa.tail[offset] != params.count) { - err = -EINVAL; - goto free; - } - } - - if (csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]) { - len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]); - if (!len || (len % sizeof(u16))) { - err = -EINVAL; - goto free; - } - - params.n_counter_offsets_presp = len / sizeof(u16); - if (rdev->wiphy.max_num_csa_counters && - (params.n_counter_offsets_presp > - rdev->wiphy.max_num_csa_counters)) { - err = -EINVAL; - goto free; - } - - params.counter_offsets_presp = - nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]); - - /* sanity checks - counters should fit and be the same */ - for (i = 0; i < params.n_counter_offsets_presp; i++) { - u16 offset = params.counter_offsets_presp[i]; - - if (offset >= params.beacon_csa.probe_resp_len) { - err = -EINVAL; - goto free; - } - - if (params.beacon_csa.probe_resp[offset] != - params.count) { - err = -EINVAL; - goto free; - } - } - } skip_beacons: err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); @@ -12638,23 +12624,12 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); - if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) { - int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); - int i; - - if (len % sizeof(u16)) - return -EINVAL; - - params.n_csa_offsets = len / sizeof(u16); - params.csa_offsets = - nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]); - - /* check that all the offsets fit the frame */ - for (i = 0; i < params.n_csa_offsets; i++) { - if (params.csa_offsets[i] >= params.len) - return -EINVAL; - } - } + err = nl80211_parse_counter_offsets(rdev, NULL, params.len, -1, + info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX], + ¶ms.csa_offsets, + ¶ms.n_csa_offsets); + if (err) + return err; if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); -- cgit 1.2.3-korg From f796feabb9f5b1e5c48780a7a0023ab4b82336dd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Feb 2024 12:00:01 +0100 Subject: udp: add local "peek offset enabled" flag We want to re-organize the struct sock layout. The sk_peek_off field location is problematic, as most protocols want it in the RX read area, while UDP wants it on a cacheline different from sk_receive_queue. Create a local (inside udp_sock) copy of the 'peek offset is enabled' flag and place it inside the same cacheline of reader_queue. Check such flag before reading sk_peek_off. This will save potential false sharing and cache misses in the fast-path. Tested under UDP flood with small packets. The struct sock layout update causes a 4% performance drop, and this patch restores completely the original tput. Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/67ab679c15fbf49fa05b3ffe05d91c47ab84f147.1708426665.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/udp.h | 10 ++++++++++ net/ipv4/af_inet.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/af_inet6.c | 2 +- 4 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/udp.h b/include/linux/udp.h index d04188714dca1..3748e82b627b7 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -92,6 +92,9 @@ struct udp_sock { /* This fields follows rcvbuf value, and is touched by udp_recvmsg */ int forward_threshold; + + /* Cache friendly copy of sk->sk_peek_off >= 0 */ + bool peeking_with_offset; }; #define udp_test_bit(nr, sk) \ @@ -109,6 +112,13 @@ struct udp_sock { #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) +static inline int udp_set_peek_off(struct sock *sk, int val) +{ + sk_set_peek_off(sk, val); + WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0); + return 0; +} + static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { udp_assign_bit(NO_CHECK6_TX, sk, val); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ad278009e4698..5daebdcbca326 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1103,7 +1103,7 @@ const struct proto_ops inet_dgram_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .splice_eof = inet_splice_eof, - .set_peek_off = sk_set_peek_off, + .set_peek_off = udp_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f631b0a21af4c..38cce7cc51f65 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1589,7 +1589,7 @@ int udp_init_sock(struct sock *sk) void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { - if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { + if (unlikely(READ_ONCE(udp_sk(sk)->peeking_with_offset))) { bool slow = lock_sock_fast(sk); sk_peek_offset_bwd(sk, len); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 959bfd9f6344f..b90d46533cdcc 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -736,7 +736,7 @@ const struct proto_ops inet6_dgram_ops = { .recvmsg = inet6_recvmsg, /* retpoline's sake */ .read_skb = udp_read_skb, .mmap = sock_no_mmap, - .set_peek_off = sk_set_peek_off, + .set_peek_off = udp_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif -- cgit 1.2.3-korg From ee076b73e576b0a052d5686d873346b285ae50ea Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:46 +0800 Subject: net: mctp: avoid confusion over local/peer dest/source addresses We have a double-swap of local and peer addresses in mctp_alloc_local_tag; the arguments in both call sites are swapped, but there is also a swap in the implementation of alloc_local_tag. This is opaque because we're using source/dest address references, which don't match the local/peer semantics. Avoid this confusion by naming the arguments as 'local' and 'peer', and remove the double swap. The calling order now matches mctp_key_alloc. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/net/mctp.h | 4 ++-- net/mctp/af_mctp.c | 2 +- net/mctp/route.c | 16 ++++++++-------- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/mctp.h b/include/net/mctp.h index da86e106c91d5..f937a325ea6f2 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -87,7 +87,7 @@ struct mctp_sock { }; /* Key for matching incoming packets to sockets or reassembly contexts. - * Packets are matched on (src,dest,tag). + * Packets are matched on (peer EID, local EID, tag). * * Lifetime / locking requirements: * @@ -254,7 +254,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, void mctp_key_unref(struct mctp_sk_key *key); struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, - mctp_eid_t daddr, mctp_eid_t saddr, + mctp_eid_t local, mctp_eid_t peer, bool manual, u8 *tagp); /* routing <--> device interface */ diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index f6be58b68c6f3..d8197e9e233bd 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -367,7 +367,7 @@ static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg) if (ctl.flags) return -EINVAL; - key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY, + key = mctp_alloc_local_tag(msk, MCTP_ADDR_ANY, ctl.peer_addr, true, &tag); if (IS_ERR(key)) return PTR_ERR(key); diff --git a/net/mctp/route.c b/net/mctp/route.c index 7a47a58aa54b4..04d29e910af9a 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -596,11 +596,11 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, refcount_inc(&key->refs); } -/* Allocate a locally-owned tag value for (saddr, daddr), and reserve +/* Allocate a locally-owned tag value for (local, peer), and reserve * it for the socket msk */ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, - mctp_eid_t daddr, mctp_eid_t saddr, + mctp_eid_t local, mctp_eid_t peer, bool manual, u8 *tagp) { struct net *net = sock_net(&msk->sk); @@ -610,11 +610,11 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, u8 tagbits; /* for NULL destination EIDs, we may get a response from any peer */ - if (daddr == MCTP_ADDR_NULL) - daddr = MCTP_ADDR_ANY; + if (peer == MCTP_ADDR_NULL) + peer = MCTP_ADDR_ANY; /* be optimistic, alloc now */ - key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL); + key = mctp_key_alloc(msk, local, peer, 0, GFP_KERNEL); if (!key) return ERR_PTR(-ENOMEM); @@ -635,8 +635,8 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, if (tmp->tag & MCTP_HDR_FLAG_TO) continue; - if (!(mctp_address_matches(tmp->peer_addr, daddr) && - mctp_address_matches(tmp->local_addr, saddr))) + if (!(mctp_address_matches(tmp->peer_addr, peer) && + mctp_address_matches(tmp->local_addr, local))) continue; spin_lock(&tmp->lock); @@ -924,7 +924,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, key = mctp_lookup_prealloc_tag(msk, daddr, req_tag, &tag); else - key = mctp_alloc_local_tag(msk, daddr, saddr, + key = mctp_alloc_local_tag(msk, saddr, daddr, false, &tag); if (IS_ERR(key)) { -- cgit 1.2.3-korg From aee6479a458e2c5027e558cfe26c60c37f8efc80 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:47 +0800 Subject: net: mctp: Add some detail on the key allocation implementation We could do with a little more comment on where MCTP_ADDR_ANY will match in the key allocations. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/mctp/route.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'net') diff --git a/net/mctp/route.c b/net/mctp/route.c index 04d29e910af9a..b4ad47bb0da5b 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -73,6 +73,40 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) return NULL; } +/* A note on the key allocations. + * + * struct net->mctp.keys contains our set of currently-allocated keys for + * MCTP tag management. The lookup tuple for these is the peer EID, + * local EID and MCTP tag. + * + * In some cases, the peer EID may be MCTP_EID_ANY: for example, when a + * broadcast message is sent, we may receive responses from any peer EID. + * Because the broadcast dest address is equivalent to ANY, we create + * a key with (local = local-eid, peer = ANY). This allows a match on the + * incoming broadcast responses from any peer. + * + * We perform lookups when packets are received, and when tags are allocated + * in two scenarios: + * + * - when a packet is sent, with a locally-owned tag: we need to find an + * unused tag value for the (local, peer) EID pair. + * + * - when a tag is manually allocated: we need to find an unused tag value + * for the peer EID, but don't have a specific local EID at that stage. + * + * in the latter case, on successful allocation, we end up with a tag with + * (local = ANY, peer = peer-eid). + * + * So, the key set allows both a local EID of ANY, as well as a peer EID of + * ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast. + * The matching (in mctp_key_match()) during lookup allows the match value to + * be ANY in either the dest or source addresses. + * + * When allocating (+ inserting) a tag, we need to check for conflicts amongst + * the existing tag set. This requires macthing either exactly on the local + * and peer addresses, or either being ANY. + */ + static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, mctp_eid_t peer, u8 tag) { @@ -368,6 +402,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * key lookup to find the socket, but don't use this * key for reassembly - we'll create a more specific * one for future packets if required (ie, !EOM). + * + * this lookup requires key->peer to be MCTP_ADDR_ANY, + * it doesn't match just any key->peer. */ any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); if (any_key) { -- cgit 1.2.3-korg From fc944ecc4f1a287882d90441360a0ba61d45fd8b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:48 +0800 Subject: net: mctp: make key lookups match the ANY address on either local or peer We may have an ANY address in either the local or peer address of a sk_key, and may want to match on an incoming daddr or saddr being ANY. Do this by altering the conflicting-tag lookup to also accept ANY as the local/peer address. We don't want mctp_address_matches to match on the requested EID being ANY, as that is a specific lookup case on packet input. Reported-by: Eric Chuang Reported-by: Anthony Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/mctp/route.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mctp/route.c b/net/mctp/route.c index b4ad47bb0da5b..0c2ed75a9e282 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -113,7 +113,7 @@ static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, if (!mctp_address_matches(key->local_addr, local)) return false; - if (key->peer_addr != peer) + if (!mctp_address_matches(key->peer_addr, peer)) return false; if (key->tag != tag) @@ -672,8 +672,16 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, if (tmp->tag & MCTP_HDR_FLAG_TO) continue; - if (!(mctp_address_matches(tmp->peer_addr, peer) && - mctp_address_matches(tmp->local_addr, local))) + /* Since we're avoiding conflicting entries, match peer and + * local addresses, including with a wildcard on ANY. See + * 'A note on key allocations' for background. + */ + if (peer != MCTP_ADDR_ANY && + !mctp_address_matches(tmp->peer_addr, peer)) + continue; + + if (local != MCTP_ADDR_ANY && + !mctp_address_matches(tmp->local_addr, local)) continue; spin_lock(&tmp->lock); -- cgit 1.2.3-korg From a1f4cf5791e7914f3e42f5462669353104fef8a9 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:49 +0800 Subject: net: mctp: tests: create test skbs with the correct net and device In our test skb creation functions, we're not setting up the net and device data. This doesn't matter at the moment, but we will want to add support for distinct net IDs in future. Set the ->net identifier on the test MCTP device, and ensure that test skbs are set up with the correct device-related data on creation. Create a helper for setting skb->dev and mctp_skb_cb->net. We have a few cases where we're calling __mctp_cb() to initialise the cb (which we need for the above) separately, so integrate this into the skb creation helpers. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/mctp/test/route-test.c | 23 +++++++++++++++-------- net/mctp/test/utils.c | 2 ++ 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 92ea4158f7fc4..714e5ae47629f 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -79,6 +79,16 @@ static void mctp_test_route_destroy(struct kunit *test, kfree_rcu(&rt->rt, rcu); } +static void mctp_test_skb_set_dev(struct sk_buff *skb, + struct mctp_test_dev *dev) +{ + struct mctp_skb_cb *cb; + + cb = mctp_cb(skb); + cb->net = READ_ONCE(dev->mdev->net); + skb->dev = dev->ndev; +} + static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, unsigned int data_len) { @@ -91,6 +101,7 @@ static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, if (!skb) return NULL; + __mctp_cb(skb); memcpy(skb_put(skb, hdr_len), hdr, hdr_len); buf = skb_put(skb, data_len); @@ -111,6 +122,7 @@ static struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr, if (!skb) return NULL; + __mctp_cb(skb); memcpy(skb_put(skb, hdr_len), hdr, hdr_len); memcpy(skb_put(skb, data_len), data, data_len); @@ -249,8 +261,6 @@ static void mctp_test_rx_input(struct kunit *test) skb = mctp_test_create_skb(¶ms->hdr, 1); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); - __mctp_cb(skb); - mctp_pkttype_receive(skb, dev->ndev, &mctp_packet_type, NULL); KUNIT_EXPECT_EQ(test, !!rt->pkts.qlen, params->input); @@ -344,8 +354,7 @@ static void mctp_test_route_input_sk(struct kunit *test) skb = mctp_test_create_skb_data(¶ms->hdr, ¶ms->type); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); - skb->dev = dev->ndev; - __mctp_cb(skb); + mctp_test_skb_set_dev(skb, dev); rc = mctp_route_input(&rt->rt, skb); @@ -417,8 +426,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) skb = mctp_test_create_skb_data(¶ms->hdrs[i], &c); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); - skb->dev = dev->ndev; - __mctp_cb(skb); + mctp_test_skb_set_dev(skb, dev); rc = mctp_route_input(&rt->rt, skb); } @@ -576,8 +584,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) skb = mctp_test_create_skb_data(¶ms->hdr, &c); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); - skb->dev = dev->ndev; - __mctp_cb(skb); + mctp_test_skb_set_dev(skb, dev); rc = mctp_route_input(&rt->rt, skb); diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index e03ba66bbe181..565763eb02114 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -54,6 +55,7 @@ struct mctp_test_dev *mctp_test_create_dev(void) rcu_read_lock(); dev->mdev = __mctp_dev_get(ndev); + dev->mdev->net = mctp_default_net(dev_net(ndev)); rcu_read_unlock(); return dev; -- cgit 1.2.3-korg From 43e6795574f5d75284a3cb21f5b76a5ffb98e8e6 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:50 +0800 Subject: net: mctp: separate key correlation across nets Currently, we lookup sk_keys from the entire struct net_namespace, which may contain multiple MCTP net IDs. In those cases we want to distinguish between endpoints with the same EID but different net ID. Add the net ID data to the struct mctp_sk_key, populate on add and filter on this during route lookup. For the ioctl interface, we use a default net of MCTP_INITIAL_DEFAULT_NET (ie., what will be in use for single-net configurations), but we'll extend the ioctl interface to provide net-specific tag allocation in an upcoming change. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/net/mctp.h | 2 ++ net/mctp/af_mctp.c | 4 ++-- net/mctp/route.c | 43 +++++++++++++++++++++++++++++++------------ net/mctp/test/route-test.c | 7 +++++-- 4 files changed, 40 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/mctp.h b/include/net/mctp.h index f937a325ea6f2..0dfae6f51a322 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -133,6 +133,7 @@ struct mctp_sock { * - through an expiry timeout, on a per-socket timer */ struct mctp_sk_key { + unsigned int net; mctp_eid_t peer_addr; mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */ __u8 tag; /* incoming tag match; invert TO for local */ @@ -254,6 +255,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, void mctp_key_unref(struct mctp_sk_key *key); struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, + unsigned int netid, mctp_eid_t local, mctp_eid_t peer, bool manual, u8 *tagp); diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index d8197e9e233bd..05315a422ffb3 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -367,8 +367,8 @@ static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg) if (ctl.flags) return -EINVAL; - key = mctp_alloc_local_tag(msk, MCTP_ADDR_ANY, ctl.peer_addr, - true, &tag); + key = mctp_alloc_local_tag(msk, MCTP_INITIAL_DEFAULT_NET, + MCTP_ADDR_ANY, ctl.peer_addr, true, &tag); if (IS_ERR(key)) return PTR_ERR(key); diff --git a/net/mctp/route.c b/net/mctp/route.c index 0c2ed75a9e282..28648a7ec8666 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -107,9 +107,12 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) * and peer addresses, or either being ANY. */ -static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, - mctp_eid_t peer, u8 tag) +static bool mctp_key_match(struct mctp_sk_key *key, unsigned int net, + mctp_eid_t local, mctp_eid_t peer, u8 tag) { + if (key->net != net) + return false; + if (!mctp_address_matches(key->local_addr, local)) return false; @@ -126,7 +129,7 @@ static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, * key exists. */ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, - mctp_eid_t peer, + unsigned int netid, mctp_eid_t peer, unsigned long *irqflags) __acquires(&key->lock) { @@ -142,7 +145,7 @@ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, spin_lock_irqsave(&net->mctp.keys_lock, flags); hlist_for_each_entry(key, &net->mctp.keys, hlist) { - if (!mctp_key_match(key, mh->dest, peer, tag)) + if (!mctp_key_match(key, netid, mh->dest, peer, tag)) continue; spin_lock(&key->lock); @@ -165,6 +168,7 @@ static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, } static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, + unsigned int net, mctp_eid_t local, mctp_eid_t peer, u8 tag, gfp_t gfp) { @@ -174,6 +178,7 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, if (!key) return NULL; + key->net = net; key->peer_addr = peer; key->local_addr = local; key->tag = tag; @@ -219,8 +224,8 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) } hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { - if (mctp_key_match(tmp, key->local_addr, key->peer_addr, - key->tag)) { + if (mctp_key_match(tmp, key->net, key->local_addr, + key->peer_addr, key->tag)) { spin_lock(&tmp->lock); if (tmp->valid) rc = -EEXIST; @@ -361,6 +366,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct mctp_sock *msk; struct mctp_hdr *mh; + unsigned int netid; unsigned long f; u8 tag, flags; int rc; @@ -379,6 +385,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) /* grab header, advance data ptr */ mh = mctp_hdr(skb); + netid = mctp_cb(skb)->net; skb_pull(skb, sizeof(struct mctp_hdr)); if (mh->ver != 1) @@ -392,7 +399,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) /* lookup socket / reasm context, exactly matching (src,dest,tag). * we hold a ref on the key, and key->lock held. */ - key = mctp_lookup_key(net, skb, mh->src, &f); + key = mctp_lookup_key(net, skb, netid, mh->src, &f); if (flags & MCTP_HDR_FLAG_SOM) { if (key) { @@ -406,7 +413,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * this lookup requires key->peer to be MCTP_ADDR_ANY, * it doesn't match just any key->peer. */ - any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); + any_key = mctp_lookup_key(net, skb, netid, + MCTP_ADDR_ANY, &f); if (any_key) { msk = container_of(any_key->sk, struct mctp_sock, sk); @@ -443,7 +451,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * packets for this message */ if (!key) { - key = mctp_key_alloc(msk, mh->dest, mh->src, + key = mctp_key_alloc(msk, netid, mh->dest, mh->src, tag, GFP_ATOMIC); if (!key) { rc = -ENOMEM; @@ -637,6 +645,7 @@ static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, * it for the socket msk */ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, + unsigned int netid, mctp_eid_t local, mctp_eid_t peer, bool manual, u8 *tagp) { @@ -651,7 +660,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, peer = MCTP_ADDR_ANY; /* be optimistic, alloc now */ - key = mctp_key_alloc(msk, local, peer, 0, GFP_KERNEL); + key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL); if (!key) return ERR_PTR(-ENOMEM); @@ -668,6 +677,10 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, * lock held, they don't change over the lifetime of the key. */ + /* tags are net-specific */ + if (tmp->net != netid) + continue; + /* if we don't own the tag, it can't conflict */ if (tmp->tag & MCTP_HDR_FLAG_TO) continue; @@ -716,6 +729,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, } static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk, + unsigned int netid, mctp_eid_t daddr, u8 req_tag, u8 *tagp) { @@ -730,6 +744,9 @@ static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk, spin_lock_irqsave(&mns->keys_lock, flags); hlist_for_each_entry(tmp, &mns->keys, hlist) { + if (tmp->net != netid) + continue; + if (tmp->tag != req_tag) continue; @@ -910,6 +927,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct mctp_sk_key *key; struct mctp_hdr *hdr; unsigned long flags; + unsigned int netid; unsigned int mtu; mctp_eid_t saddr; bool ext_rt; @@ -960,16 +978,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, rc = 0; } spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); + netid = READ_ONCE(rt->dev->net); if (rc) goto out_release; if (req_tag & MCTP_TAG_OWNER) { if (req_tag & MCTP_TAG_PREALLOC) - key = mctp_lookup_prealloc_tag(msk, daddr, + key = mctp_lookup_prealloc_tag(msk, netid, daddr, req_tag, &tag); else - key = mctp_alloc_local_tag(msk, saddr, daddr, + key = mctp_alloc_local_tag(msk, netid, saddr, daddr, false, &tag); if (IS_ERR(key)) { diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 714e5ae47629f..b3dbd3600d916 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -552,6 +552,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) struct mctp_sock *msk; struct socket *sock; unsigned long flags; + unsigned int net; int rc; u8 c; @@ -559,6 +560,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) dev = mctp_test_create_dev(); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + net = READ_ONCE(dev->mdev->net); rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); @@ -570,8 +572,9 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) mns = &sock_net(sock->sk)->mctp; /* set the incoming tag according to test params */ - key = mctp_key_alloc(msk, params->key_local_addr, params->key_peer_addr, - params->key_tag, GFP_KERNEL); + key = mctp_key_alloc(msk, net, params->key_local_addr, + params->key_peer_addr, params->key_tag, + GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, key); -- cgit 1.2.3-korg From c16d2380e8fdd9fc1fd0e9c60e068f264b2d0ced Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:51 +0800 Subject: net: mctp: provide a more specific tag allocation ioctl Now that we have net-specific tags, extend the tag allocation ioctls (SIOCMCTPALLOCTAG / SIOCMCTPDROPTAG) to allow a network parameter to be passed to the tag allocation. We also add a local_addr member to the ioc struct, to allow for a future finer-grained tag allocation using local EIDs too. We don't add any specific support for that now though, so require MCTP_ADDR_ANY or MCTP_ADDR_NULL for those at present. The old ioctls will still work, but allocate for the default MCTP net. These are now marked as deprecated in the header. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/uapi/linux/mctp.h | 32 +++++++++++++ net/mctp/af_mctp.c | 117 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 129 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 154ab56651f1a..e1db65df9359f 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -50,7 +50,14 @@ struct sockaddr_mctp_ext { #define SIOCMCTPALLOCTAG (SIOCPROTOPRIVATE + 0) #define SIOCMCTPDROPTAG (SIOCPROTOPRIVATE + 1) +#define SIOCMCTPALLOCTAG2 (SIOCPROTOPRIVATE + 2) +#define SIOCMCTPDROPTAG2 (SIOCPROTOPRIVATE + 3) +/* Deprecated: use mctp_ioc_tag_ctl2 / TAG2 ioctls instead, which defines the + * MCTP network ID as part of the allocated tag. Using this assumes the default + * net ID for allocated tags, which may not give correct behaviour on system + * with multiple networks configured. + */ struct mctp_ioc_tag_ctl { mctp_eid_t peer_addr; @@ -65,4 +72,29 @@ struct mctp_ioc_tag_ctl { __u16 flags; }; +struct mctp_ioc_tag_ctl2 { + /* Peer details: network ID, peer EID, local EID. All set by the + * caller. + * + * Local EID must be MCTP_ADDR_NULL or MCTP_ADDR_ANY in current + * kernels. + */ + unsigned int net; + mctp_eid_t peer_addr; + mctp_eid_t local_addr; + + /* Set by caller, but no flags defined currently. Must be 0 */ + __u16 flags; + + /* For SIOCMCTPALLOCTAG2: must be passed as zero, kernel will + * populate with the allocated tag value. Returned tag value will + * always have TO and PREALLOC set. + * + * For SIOCMCTPDROPTAG2: userspace provides tag value to drop, from + * a prior SIOCMCTPALLOCTAG2 call (and so must have TO and PREALLOC set). + */ + __u8 tag; + +}; + #endif /* __UAPI_MCTP_H */ diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 05315a422ffb3..de52a9191da0e 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -350,30 +350,102 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname, return -EINVAL; } -static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg) +/* helpers for reading/writing the tag ioc, handling compatibility across the + * two versions, and some basic API error checking + */ +static int mctp_ioctl_tag_copy_from_user(unsigned long arg, + struct mctp_ioc_tag_ctl2 *ctl, + bool tagv2) +{ + struct mctp_ioc_tag_ctl ctl_compat; + unsigned long size; + void *ptr; + int rc; + + if (tagv2) { + size = sizeof(*ctl); + ptr = ctl; + } else { + size = sizeof(ctl_compat); + ptr = &ctl_compat; + } + + rc = copy_from_user(ptr, (void __user *)arg, size); + if (rc) + return -EFAULT; + + if (!tagv2) { + /* compat, using defaults for new fields */ + ctl->net = MCTP_INITIAL_DEFAULT_NET; + ctl->peer_addr = ctl_compat.peer_addr; + ctl->local_addr = MCTP_ADDR_ANY; + ctl->flags = ctl_compat.flags; + ctl->tag = ctl_compat.tag; + } + + if (ctl->flags) + return -EINVAL; + + if (ctl->local_addr != MCTP_ADDR_ANY && + ctl->local_addr != MCTP_ADDR_NULL) + return -EINVAL; + + return 0; +} + +static int mctp_ioctl_tag_copy_to_user(unsigned long arg, + struct mctp_ioc_tag_ctl2 *ctl, + bool tagv2) +{ + struct mctp_ioc_tag_ctl ctl_compat; + unsigned long size; + void *ptr; + int rc; + + if (tagv2) { + ptr = ctl; + size = sizeof(*ctl); + } else { + ctl_compat.peer_addr = ctl->peer_addr; + ctl_compat.tag = ctl->tag; + ctl_compat.flags = ctl->flags; + + ptr = &ctl_compat; + size = sizeof(ctl_compat); + } + + rc = copy_to_user((void __user *)arg, ptr, size); + if (rc) + return -EFAULT; + + return 0; +} + +static int mctp_ioctl_alloctag(struct mctp_sock *msk, bool tagv2, + unsigned long arg) { struct net *net = sock_net(&msk->sk); struct mctp_sk_key *key = NULL; - struct mctp_ioc_tag_ctl ctl; + struct mctp_ioc_tag_ctl2 ctl; unsigned long flags; u8 tag; + int rc; - if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl))) - return -EFAULT; + rc = mctp_ioctl_tag_copy_from_user(arg, &ctl, tagv2); + if (rc) + return rc; if (ctl.tag) return -EINVAL; - if (ctl.flags) - return -EINVAL; - - key = mctp_alloc_local_tag(msk, MCTP_INITIAL_DEFAULT_NET, - MCTP_ADDR_ANY, ctl.peer_addr, true, &tag); + key = mctp_alloc_local_tag(msk, ctl.net, MCTP_ADDR_ANY, + ctl.peer_addr, true, &tag); if (IS_ERR(key)) return PTR_ERR(key); ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC; - if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) { + rc = mctp_ioctl_tag_copy_to_user(arg, &ctl, tagv2); + if (rc) { unsigned long fl2; /* Unwind our key allocation: the keys list lock needs to be * taken before the individual key locks, and we need a valid @@ -385,28 +457,27 @@ static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg) __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_DROPPED); mctp_key_unref(key); spin_unlock_irqrestore(&net->mctp.keys_lock, flags); - return -EFAULT; + return rc; } mctp_key_unref(key); return 0; } -static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg) +static int mctp_ioctl_droptag(struct mctp_sock *msk, bool tagv2, + unsigned long arg) { struct net *net = sock_net(&msk->sk); - struct mctp_ioc_tag_ctl ctl; + struct mctp_ioc_tag_ctl2 ctl; unsigned long flags, fl2; struct mctp_sk_key *key; struct hlist_node *tmp; int rc; u8 tag; - if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl))) - return -EFAULT; - - if (ctl.flags) - return -EINVAL; + rc = mctp_ioctl_tag_copy_from_user(arg, &ctl, tagv2); + if (rc) + return rc; /* Must be a local tag, TO set, preallocated */ if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC)) @@ -422,6 +493,7 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg) */ spin_lock_irqsave(&key->lock, fl2); if (key->manual_alloc && + ctl.net == key->net && ctl.peer_addr == key->peer_addr && tag == key->tag) { __mctp_key_remove(key, net, fl2, @@ -439,12 +511,17 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg) static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk); + bool tagv2 = false; switch (cmd) { + case SIOCMCTPALLOCTAG2: case SIOCMCTPALLOCTAG: - return mctp_ioctl_alloctag(msk, arg); + tagv2 = cmd == SIOCMCTPALLOCTAG2; + return mctp_ioctl_alloctag(msk, tagv2, arg); case SIOCMCTPDROPTAG: - return mctp_ioctl_droptag(msk, arg); + case SIOCMCTPDROPTAG2: + tagv2 = cmd == SIOCMCTPDROPTAG2; + return mctp_ioctl_droptag(msk, tagv2, arg); } return -EINVAL; -- cgit 1.2.3-korg From 61b50531dc66b0b299da22580074d8960f99bb0c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:52 +0800 Subject: net: mctp: tests: Add netid argument to __mctp_route_test_init We'll want to create net-specific test setups in an upcoming change, so allow the caller to provide a non-default netid. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/mctp/test/route-test.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index b3dbd3600d916..0880c3c04ace6 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -293,7 +293,8 @@ KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests, static void __mctp_route_test_init(struct kunit *test, struct mctp_test_dev **devp, struct mctp_test_route **rtp, - struct socket **sockp) + struct socket **sockp, + unsigned int netid) { struct sockaddr_mctp addr = {0}; struct mctp_test_route *rt; @@ -303,6 +304,8 @@ static void __mctp_route_test_init(struct kunit *test, dev = mctp_test_create_dev(); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + if (netid != MCTP_NET_ANY) + WRITE_ONCE(dev->mdev->net, netid); rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); @@ -311,7 +314,7 @@ static void __mctp_route_test_init(struct kunit *test, KUNIT_ASSERT_EQ(test, rc, 0); addr.smctp_family = AF_MCTP; - addr.smctp_network = MCTP_NET_ANY; + addr.smctp_network = netid; addr.smctp_addr.s_addr = 8; addr.smctp_type = 0; rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr)); @@ -349,7 +352,7 @@ static void mctp_test_route_input_sk(struct kunit *test) params = test->param_value; - __mctp_route_test_init(test, &dev, &rt, &sock); + __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); skb = mctp_test_create_skb_data(¶ms->hdr, ¶ms->type); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); @@ -419,7 +422,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) params = test->param_value; - __mctp_route_test_init(test, &dev, &rt, &sock); + __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); for (i = 0; i < params->n_hdrs; i++) { c = i; -- cgit 1.2.3-korg From 9acdc089c08884c1f1b4567efcd7290781ee8e3a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:53 +0800 Subject: net: mctp: tests: Add MCTP net isolation tests Add a couple of tests that excersise the new net-specific sk_key and bind lookups Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/mctp/test/route-test.c | 161 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) (limited to 'net') diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 0880c3c04ace6..bad084525f175 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -678,6 +678,165 @@ static void mctp_route_input_sk_keys_to_desc( KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests, mctp_route_input_sk_keys_to_desc); +struct test_net { + unsigned int netid; + struct mctp_test_dev *dev; + struct mctp_test_route *rt; + struct socket *sock; + struct sk_buff *skb; + struct mctp_sk_key *key; + struct { + u8 type; + unsigned int data; + } msg; +}; + +static void +mctp_test_route_input_multiple_nets_bind_init(struct kunit *test, + struct test_net *t) +{ + struct mctp_hdr hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1) | FL_TO); + + t->msg.data = t->netid; + + __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid); + + t->skb = mctp_test_create_skb_data(&hdr, &t->msg); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb); + mctp_test_skb_set_dev(t->skb, t->dev); +} + +static void +mctp_test_route_input_multiple_nets_bind_fini(struct kunit *test, + struct test_net *t) +{ + __mctp_route_test_fini(test, t->dev, t->rt, t->sock); +} + +/* Test that skbs from different nets (otherwise identical) get routed to their + * corresponding socket via the sockets' bind() + */ +static void mctp_test_route_input_multiple_nets_bind(struct kunit *test) +{ + struct sk_buff *rx_skb1, *rx_skb2; + struct test_net t1, t2; + int rc; + + t1.netid = 1; + t2.netid = 2; + + t1.msg.type = 0; + t2.msg.type = 0; + + mctp_test_route_input_multiple_nets_bind_init(test, &t1); + mctp_test_route_input_multiple_nets_bind_init(test, &t2); + + rc = mctp_route_input(&t1.rt->rt, t1.skb); + KUNIT_ASSERT_EQ(test, rc, 0); + rc = mctp_route_input(&t2.rt->rt, t2.skb); + KUNIT_ASSERT_EQ(test, rc, 0); + + rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb1); + KUNIT_EXPECT_EQ(test, rx_skb1->len, sizeof(t1.msg)); + KUNIT_EXPECT_EQ(test, + *(unsigned int *)skb_pull(rx_skb1, sizeof(t1.msg.data)), + t1.netid); + kfree_skb(rx_skb1); + + rx_skb2 = skb_recv_datagram(t2.sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb2); + KUNIT_EXPECT_EQ(test, rx_skb2->len, sizeof(t2.msg)); + KUNIT_EXPECT_EQ(test, + *(unsigned int *)skb_pull(rx_skb2, sizeof(t2.msg.data)), + t2.netid); + kfree_skb(rx_skb2); + + mctp_test_route_input_multiple_nets_bind_fini(test, &t1); + mctp_test_route_input_multiple_nets_bind_fini(test, &t2); +} + +static void +mctp_test_route_input_multiple_nets_key_init(struct kunit *test, + struct test_net *t) +{ + struct mctp_hdr hdr = RX_HDR(1, 9, 8, FL_S | FL_E | FL_T(1)); + struct mctp_sock *msk; + struct netns_mctp *mns; + unsigned long flags; + + t->msg.data = t->netid; + + __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid); + + msk = container_of(t->sock->sk, struct mctp_sock, sk); + + t->key = mctp_key_alloc(msk, t->netid, hdr.dest, hdr.src, 1, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->key); + + mns = &sock_net(t->sock->sk)->mctp; + spin_lock_irqsave(&mns->keys_lock, flags); + mctp_reserve_tag(&init_net, t->key, msk); + spin_unlock_irqrestore(&mns->keys_lock, flags); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->key); + t->skb = mctp_test_create_skb_data(&hdr, &t->msg); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb); + mctp_test_skb_set_dev(t->skb, t->dev); +} + +static void +mctp_test_route_input_multiple_nets_key_fini(struct kunit *test, + struct test_net *t) +{ + mctp_key_unref(t->key); + __mctp_route_test_fini(test, t->dev, t->rt, t->sock); +} + +/* test that skbs from different nets (otherwise identical) get routed to their + * corresponding socket via the sk_key + */ +static void mctp_test_route_input_multiple_nets_key(struct kunit *test) +{ + struct sk_buff *rx_skb1, *rx_skb2; + struct test_net t1, t2; + int rc; + + t1.netid = 1; + t2.netid = 2; + + /* use type 1 which is not bound */ + t1.msg.type = 1; + t2.msg.type = 1; + + mctp_test_route_input_multiple_nets_key_init(test, &t1); + mctp_test_route_input_multiple_nets_key_init(test, &t2); + + rc = mctp_route_input(&t1.rt->rt, t1.skb); + KUNIT_ASSERT_EQ(test, rc, 0); + rc = mctp_route_input(&t2.rt->rt, t2.skb); + KUNIT_ASSERT_EQ(test, rc, 0); + + rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb1); + KUNIT_EXPECT_EQ(test, rx_skb1->len, sizeof(t1.msg)); + KUNIT_EXPECT_EQ(test, + *(unsigned int *)skb_pull(rx_skb1, sizeof(t1.msg.data)), + t1.netid); + kfree_skb(rx_skb1); + + rx_skb2 = skb_recv_datagram(t2.sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, rx_skb2); + KUNIT_EXPECT_EQ(test, rx_skb2->len, sizeof(t2.msg)); + KUNIT_EXPECT_EQ(test, + *(unsigned int *)skb_pull(rx_skb2, sizeof(t2.msg.data)), + t2.netid); + kfree_skb(rx_skb2); + + mctp_test_route_input_multiple_nets_key_fini(test, &t1); + mctp_test_route_input_multiple_nets_key_fini(test, &t2); +} + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), @@ -686,6 +845,8 @@ static struct kunit_case mctp_test_cases[] = { mctp_route_input_sk_reasm_gen_params), KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys, mctp_route_input_sk_keys_gen_params), + KUNIT_CASE(mctp_test_route_input_multiple_nets_bind), + KUNIT_CASE(mctp_test_route_input_multiple_nets_key), {} }; -- cgit 1.2.3-korg From 1394c1dec1c619a46867ed32791a29695372bff8 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:54 +0800 Subject: net: mctp: copy skb ext data when fragmenting If we're fragmenting on local output, the original packet may contain ext data for the MCTP flows. We'll want this in the resulting fragment skbs too. So, do a skb_ext_copy() in the fragmentation path, and implement the MCTP-specific parts of an ext copy operation. Fixes: 67737c457281 ("mctp: Pass flow data & flow release events to drivers") Reported-by: Jian Zhang Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 8 ++++++++ net/mctp/route.c | 3 +++ 2 files changed, 11 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b9de3ee65ae64..1f918e602bc4f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6849,6 +6849,14 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, for (i = 0; i < sp->len; i++) xfrm_state_hold(sp->xvec[i]); } +#endif +#ifdef CONFIG_MCTP_FLOWS + if (old_active & (1 << SKB_EXT_MCTP)) { + struct mctp_flow *flow = skb_ext_get_ptr(old, SKB_EXT_MCTP); + + if (flow->key) + refcount_inc(&flow->key->refs); + } #endif __skb_ext_put(old); return new; diff --git a/net/mctp/route.c b/net/mctp/route.c index 28648a7ec8666..34e5131135093 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -905,6 +905,9 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, /* copy message payload */ skb_copy_bits(skb, pos, skb_transport_header(skb2), size); + /* we need to copy the extensions, for MCTP flow data */ + skb_ext_copy(skb2, skb); + /* do route */ rc = rt->output(rt, skb2); if (rc) -- cgit 1.2.3-korg From 109a5331143da79eb2b63dee9c65188784edfbce Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:55 +0800 Subject: net: mctp: tests: Test that outgoing skbs have flow data populated When CONFIG_MCTP_FLOWS is enabled, outgoing skbs should have their SKB_EXT_MCTP extension set for drivers to consume. Add two tests for local-to-output routing that check for the flow extensions: one for the simple single-packet case, and one for fragmentation. We now make MCTP_TEST select MCTP_FLOWS, so we always get coverage of these flow tests. The tests are skippable if MCTP_FLOWS is (otherwise) disabled, but that would need manual config tweaking. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/mctp/Kconfig | 1 + net/mctp/test/route-test.c | 136 +++++++++++++++++++++++++++ tools/testing/kunit/configs/all_tests.config | 1 + 3 files changed, 138 insertions(+) (limited to 'net') diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig index 3a5c0e70da776..d8d3413a37f7e 100644 --- a/net/mctp/Kconfig +++ b/net/mctp/Kconfig @@ -14,6 +14,7 @@ menuconfig MCTP config MCTP_TEST bool "MCTP core tests" if !KUNIT_ALL_TESTS + select MCTP_FLOWS depends on MCTP=y && KUNIT=y default KUNIT_ALL_TESTS diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index bad084525f175..eb7e9ac956122 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -837,6 +837,140 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test) mctp_test_route_input_multiple_nets_key_fini(test, &t2); } +#if IS_ENABLED(CONFIG_MCTP_FLOWS) + +static void mctp_test_flow_init(struct kunit *test, + struct mctp_test_dev **devp, + struct mctp_test_route **rtp, + struct socket **sock, + struct sk_buff **skbp, + unsigned int len) +{ + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb; + + /* we have a slightly odd routing setup here; the test route + * is for EID 8, which is our local EID. We don't do a routing + * lookup, so that's fine - all we require is a path through + * mctp_local_output, which will call rt->output on whatever + * route we provide + */ + __mctp_route_test_init(test, &dev, &rt, sock, MCTP_NET_ANY); + + /* Assign a single EID. ->addrs is freed on mctp netdev release */ + dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); + dev->mdev->num_addrs = 1; + dev->mdev->addrs[0] = 8; + + skb = alloc_skb(len + sizeof(struct mctp_hdr) + 1, GFP_KERNEL); + KUNIT_ASSERT_TRUE(test, skb); + __mctp_cb(skb); + skb_reserve(skb, sizeof(struct mctp_hdr) + 1); + memset(skb_put(skb, len), 0, len); + + /* take a ref for the route, we'll decrement in local output */ + refcount_inc(&rt->rt.refs); + + *devp = dev; + *rtp = rt; + *skbp = skb; +} + +static void mctp_test_flow_fini(struct kunit *test, + struct mctp_test_dev *dev, + struct mctp_test_route *rt, + struct socket *sock) +{ + __mctp_route_test_fini(test, dev, rt, sock); +} + +/* test that an outgoing skb has the correct MCTP extension data set */ +static void mctp_test_packet_flow(struct kunit *test) +{ + struct sk_buff *skb, *skb2; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct mctp_flow *flow; + struct socket *sock; + u8 dst = 8; + int n, rc; + + mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 30); + + rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + n = rt->pkts.qlen; + KUNIT_ASSERT_EQ(test, n, 1); + + skb2 = skb_dequeue(&rt->pkts); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); + + flow = skb_ext_find(skb2, SKB_EXT_MCTP); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flow); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flow->key); + KUNIT_ASSERT_PTR_EQ(test, flow->key->sk, sock->sk); + + kfree_skb(skb2); + mctp_test_flow_fini(test, dev, rt, sock); +} + +/* test that outgoing skbs, after fragmentation, all have the correct MCTP + * extension data set. + */ +static void mctp_test_fragment_flow(struct kunit *test) +{ + struct mctp_flow *flows[2]; + struct sk_buff *tx_skbs[2]; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb; + struct socket *sock; + u8 dst = 8; + int n, rc; + + mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 100); + + rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + n = rt->pkts.qlen; + KUNIT_ASSERT_EQ(test, n, 2); + + /* both resulting packets should have the same flow data */ + tx_skbs[0] = skb_dequeue(&rt->pkts); + tx_skbs[1] = skb_dequeue(&rt->pkts); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[0]); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[1]); + + flows[0] = skb_ext_find(tx_skbs[0], SKB_EXT_MCTP); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[0]); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[0]->key); + KUNIT_ASSERT_PTR_EQ(test, flows[0]->key->sk, sock->sk); + + flows[1] = skb_ext_find(tx_skbs[1], SKB_EXT_MCTP); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, flows[1]); + KUNIT_ASSERT_PTR_EQ(test, flows[1]->key, flows[0]->key); + + kfree_skb(tx_skbs[0]); + kfree_skb(tx_skbs[1]); + mctp_test_flow_fini(test, dev, rt, sock); +} + +#else +static void mctp_test_packet_flow(struct kunit *test) +{ + kunit_skip(test, "Requires CONFIG_MCTP_FLOWS=y"); +} + +static void mctp_test_fragment_flow(struct kunit *test) +{ + kunit_skip(test, "Requires CONFIG_MCTP_FLOWS=y"); +} +#endif + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), @@ -847,6 +981,8 @@ static struct kunit_case mctp_test_cases[] = { mctp_route_input_sk_keys_gen_params), KUNIT_CASE(mctp_test_route_input_multiple_nets_bind), KUNIT_CASE(mctp_test_route_input_multiple_nets_key), + KUNIT_CASE(mctp_test_packet_flow), + KUNIT_CASE(mctp_test_fragment_flow), {} }; diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 12da4c493867f..a6cf69a665e84 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -23,6 +23,7 @@ CONFIG_USB4=y CONFIG_NET=y CONFIG_MCTP=y +CONFIG_MCTP_FLOWS=y CONFIG_INET=y CONFIG_MPTCP=y -- cgit 1.2.3-korg From d192eaf57f006072e8fffbbc5312da87ead86fc3 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:56 +0800 Subject: net: mctp: tests: Add a test for proper tag creation on local output Ensure we have the correct key parameters on sending a message. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- net/mctp/test/route-test.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'net') diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index eb7e9ac956122..77e5dd4222580 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -971,6 +971,80 @@ static void mctp_test_fragment_flow(struct kunit *test) } #endif +/* Test that outgoing skbs cause a suitable tag to be created */ +static void mctp_test_route_output_key_create(struct kunit *test) +{ + const unsigned int netid = 50; + const u8 dst = 26, src = 15; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct mctp_sk_key *key; + struct netns_mctp *mns; + unsigned long flags; + struct socket *sock; + struct sk_buff *skb; + bool empty, single; + const int len = 2; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + WRITE_ONCE(dev->mdev->net, netid); + + rt = mctp_test_create_route(&init_net, dev->mdev, dst, 68); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); + dev->mdev->num_addrs = 1; + dev->mdev->addrs[0] = src; + + skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); + KUNIT_ASSERT_TRUE(test, skb); + __mctp_cb(skb); + skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len); + memset(skb_put(skb, len), 0, len); + + refcount_inc(&rt->rt.refs); + + mns = &sock_net(sock->sk)->mctp; + + /* We assume we're starting from an empty keys list, which requires + * preceding tests to clean up correctly! + */ + spin_lock_irqsave(&mns->keys_lock, flags); + empty = hlist_empty(&mns->keys); + spin_unlock_irqrestore(&mns->keys_lock, flags); + KUNIT_ASSERT_TRUE(test, empty); + + rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + key = NULL; + single = false; + spin_lock_irqsave(&mns->keys_lock, flags); + if (!hlist_empty(&mns->keys)) { + key = hlist_entry(mns->keys.first, struct mctp_sk_key, hlist); + single = hlist_is_singular_node(&key->hlist, &mns->keys); + } + spin_unlock_irqrestore(&mns->keys_lock, flags); + + KUNIT_ASSERT_NOT_NULL(test, key); + KUNIT_ASSERT_TRUE(test, single); + + KUNIT_EXPECT_EQ(test, key->net, netid); + KUNIT_EXPECT_EQ(test, key->local_addr, src); + KUNIT_EXPECT_EQ(test, key->peer_addr, dst); + /* key has incoming tag, so inverse of what we sent */ + KUNIT_EXPECT_FALSE(test, key->tag & MCTP_TAG_OWNER); + + sock_release(sock); + mctp_test_route_destroy(test, rt); + mctp_test_destroy_dev(dev); +} + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), @@ -983,6 +1057,7 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_route_input_multiple_nets_key), KUNIT_CASE(mctp_test_packet_flow), KUNIT_CASE(mctp_test_fragment_flow), + KUNIT_CASE(mctp_test_route_output_key_create), {} }; -- cgit 1.2.3-korg From 9eda38dc9150c260ae74edd2e1fe2e2b6fc62cee Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 15 Feb 2024 09:05:00 +0100 Subject: net/af_iucv: fix virtual vs physical address confusion Fix virtual vs physical address confusion. This does not fix a bug since virtual and physical address spaces are currently the same. Signed-off-by: Alexander Gordeev Reviewed-by: Alexandra Winter Link: https://lore.kernel.org/r/20240215080500.2616848-1-agordeev@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 498a0c35b7bb2..4aa1c72e6c49e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1060,13 +1060,12 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, int i; /* skip iucv_array lying in the headroom */ - iba[0].address = (u32)(addr_t)skb->data; + iba[0].address = (u32)virt_to_phys(skb->data); iba[0].length = (u32)skb_headlen(skb); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - iba[i + 1].address = - (u32)(addr_t)skb_frag_address(frag); + iba[i + 1].address = (u32)virt_to_phys(skb_frag_address(frag)); iba[i + 1].length = (u32)skb_frag_size(frag); } err = pr_iucv->message_send(iucv->path, &txmsg, @@ -1162,13 +1161,12 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, struct iucv_array *iba = (struct iucv_array *)skb->head; int i; - iba[0].address = (u32)(addr_t)skb->data; + iba[0].address = (u32)virt_to_phys(skb->data); iba[0].length = (u32)skb_headlen(skb); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - iba[i + 1].address = - (u32)(addr_t)skb_frag_address(frag); + iba[i + 1].address = (u32)virt_to_phys(skb_frag_address(frag)); iba[i + 1].length = (u32)skb_frag_size(frag); } rc = pr_iucv->message_receive(path, msg, -- cgit 1.2.3-korg From 3e7a0dccf0703edd3a2d9e3361f672241c31773b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 21 Feb 2024 08:17:32 -0800 Subject: ipv6/sit: Do not allocate stats in the driver With commit 34d21de99cea9 ("net: Move {l,t,d}stats allocation to core and convert veth & vrf"), stats allocation could be done on net core instead of this driver. With this new approach, the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc). This is core responsibility now. Remove the allocation in the ipv6/sit driver and leverage the network core allocation. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240221161732.3026127-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ed3a44aa1e9d8..5ad01480854d2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1408,7 +1408,6 @@ static void ipip6_dev_free(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); dst_cache_destroy(&tunnel->dst_cache); - free_percpu(dev->tstats); } #define SIT_FEATURES (NETIF_F_SG | \ @@ -1437,6 +1436,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_LLTX; dev->features |= SIT_FEATURES; dev->hw_features |= SIT_FEATURES; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; + } static int ipip6_tunnel_init(struct net_device *dev) @@ -1449,16 +1450,11 @@ static int ipip6_tunnel_init(struct net_device *dev) strcpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); - if (err) { - free_percpu(dev->tstats); - dev->tstats = NULL; + if (err) return err; - } + netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); return 0; -- cgit 1.2.3-korg From d2efeb52c344c3e987d143dc5232f53f9b96c86a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 20 Feb 2024 20:42:44 +0100 Subject: net: ethtool: avoid rebuilds on UTS_RELEASE change Currently, when you switch between branches or something like that and rebuild, net/ethtool/ioctl.c has to be built again because it depends on UTS_RELEASE. By instead referencing a string variable stored in another object file, this can be avoided. Signed-off-by: Jann Horn Reviewed-by: John Garry Link: https://lore.kernel.org/r/20240220194244.2056384-1-jannh@google.com Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 1763e8b697e14..b419969c0dcbb 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -26,12 +26,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include "common.h" /* State held across locks and calls for commands which have devlink fallback */ @@ -713,7 +713,8 @@ ethtool_get_drvinfo(struct net_device *dev, struct ethtool_devlink_compat *rsp) struct device *parent = dev->dev.parent; rsp->info.cmd = ETHTOOL_GDRVINFO; - strscpy(rsp->info.version, UTS_RELEASE, sizeof(rsp->info.version)); + strscpy(rsp->info.version, init_uts_ns.name.release, + sizeof(rsp->info.version)); if (ops->get_drvinfo) { ops->get_drvinfo(dev, &rsp->info); if (!rsp->info.bus_info[0] && parent) -- cgit 1.2.3-korg From 025f8ad20f2e3264d11683aa9cbbf0083eefbdcd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 22 Feb 2024 15:03:10 +0100 Subject: net: mpls: error out if inner headers are not set mpls_gso_segment() assumes skb_inner_network_header() returns a valid result: mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) goto out; if (unlikely(!pskb_may_pull(skb, mpls_hlen))) With syzbot reproducer, skb_inner_network_header() yields 0, skb_network_header() returns 108, so this will "pskb_may_pull(skb, -108)))" which triggers a newly added DEBUG_NET_WARN_ON_ONCE() check: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 pskb_may_pull_reason include/linux/skbuff.h:2723 [inline] WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 pskb_may_pull include/linux/skbuff.h:2739 [inline] WARNING: CPU: 0 PID: 5068 at include/linux/skbuff.h:2723 mpls_gso_segment+0x773/0xaa0 net/mpls/mpls_gso.c:34 [..] skb_mac_gso_segment+0x383/0x740 net/core/gso.c:53 nsh_gso_segment+0x40a/0xad0 net/nsh/nsh.c:108 skb_mac_gso_segment+0x383/0x740 net/core/gso.c:53 __skb_gso_segment+0x324/0x4c0 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] [..] sch_direct_xmit+0x11a/0x5f0 net/sched/sch_generic.c:327 [..] packet_sendmsg+0x46a9/0x6130 net/packet/af_packet.c:3113 [..] First iteration of this patch made mpls_hlen signed and changed test to error out to "mpls_hlen <= 0 || ..". Eric Dumazet said: > I was thinking about adding a debug check in skb_inner_network_header() > if inner_network_header is zero (that would mean it is not 'set' yet), > but this would trigger even after your patch. So add new skb_inner_network_header_was_set() helper and use that. The syzbot reproducer injects data via packet socket. The skb that gets allocated and passed down the stack has ->protocol set to NSH (0x894f) and gso_type set to SKB_GSO_UDP | SKB_GSO_DODGY. This gets passed to skb_mac_gso_segment(), which sees NSH as ptype to find a callback for. nsh_gso_segment() retrieves next type: proto = tun_p_to_eth_p(nsh_hdr(skb)->np); ... which is MPLS (TUN_P_MPLS_UC). It updates skb->protocol and then calls mpls_gso_segment(). Inner offsets are all 0, so mpls_gso_segment() ends up with a negative header size. In case more callers rely on silent handling of such large may_pull values we could also 'legalize' this behaviour, either replacing the debug check with (len > INT_MAX) test or removing it and instead adding a comment before existing if (unlikely(len > skb->len)) return SKB_DROP_REASON_PKT_TOO_SMALL; test in pskb_may_pull_reason(), saying that this check also implicitly takes care of callers that miscompute header sizes. Cc: Simon Horman Fixes: 219eee9c0d16 ("net: skbuff: add overflow debug check to pull/push helpers") Reported-by: syzbot+99d15fcdb0132a1e1a82@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/00000000000043b1310611e388aa@google.com/raw Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20240222140321.14080-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 5 +++++ net/mpls/mpls_gso.c | 3 +++ 2 files changed, 8 insertions(+) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 28c7cb7ce2515..1470b74fb6d29 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2894,6 +2894,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) +{ + return skb->inner_network_header > 0; +} + static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 533d082f0701e..45d1e6a157fc7 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -27,6 +27,9 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, __be16 mpls_protocol; unsigned int mpls_hlen; + if (!skb_inner_network_header_was_set(skb)) + goto out; + skb_reset_network_header(skb); mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) -- cgit 1.2.3-korg From e353ea9ce471331c13edffd5977eadd602d1bb80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:08 +0000 Subject: rtnetlink: prepare nla_put_iflink() to run under RCU We want to be able to run rtnl_fill_ifinfo() under RCU protection instead of RTNL in the future. This patch prepares dev_get_iflink() and nla_put_iflink() to run either with RTNL or RCU held. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- drivers/net/can/vxcan.c | 2 +- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 2 +- drivers/net/ipvlan/ipvlan_main.c | 2 +- drivers/net/macsec.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/netkit.c | 2 +- drivers/net/veth.c | 2 +- drivers/net/wireless/virtual/virt_wifi.c | 2 +- net/8021q/vlan_dev.c | 4 ++-- net/core/dev.c | 2 +- net/core/rtnetlink.c | 6 +++--- net/dsa/user.c | 2 +- net/ieee802154/6lowpan/core.c | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/xfrm/xfrm_interface_core.c | 2 +- 16 files changed, 20 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7a5be705d7183..6f2a688fccbfb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1272,10 +1272,10 @@ static int ipoib_get_iflink(const struct net_device *dev) /* parent interface */ if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) - return dev->ifindex; + return READ_ONCE(dev->ifindex); /* child/vlan interface */ - return priv->parent->ifindex; + return READ_ONCE(priv->parent->ifindex); } static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 98c669ad51414..f7fabba707ea6 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -119,7 +119,7 @@ static int vxcan_get_iflink(const struct net_device *dev) rcu_read_lock(); peer = rcu_dereference(priv->peer); - iflink = peer ? peer->ifindex : 0; + iflink = peer ? READ_ONCE(peer->ifindex) : 0; rcu_read_unlock(); return iflink; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 046b5f7d8e7ca..9d2a9562c96ff 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -98,7 +98,7 @@ static int rmnet_vnd_get_iflink(const struct net_device *dev) { struct rmnet_priv *priv = netdev_priv(dev); - return priv->real_dev->ifindex; + return READ_ONCE(priv->real_dev->ifindex); } static int rmnet_vnd_init(struct net_device *dev) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index df7c43a109e1a..5920f7e633523 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -349,7 +349,7 @@ static int ipvlan_get_iflink(const struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - return ipvlan->phy_dev->ifindex; + return READ_ONCE(ipvlan->phy_dev->ifindex); } static const struct net_device_ops ipvlan_netdev_ops = { diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 7f5426285c61b..4b5513c9c2bef 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3753,7 +3753,7 @@ static void macsec_get_stats64(struct net_device *dev, static int macsec_get_iflink(const struct net_device *dev) { - return macsec_priv(dev)->real_dev->ifindex; + return READ_ONCE(macsec_priv(dev)->real_dev->ifindex); } static const struct net_device_ops macsec_netdev_ops = { diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a3cc665757e87..0cec2783a3e71 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1158,7 +1158,7 @@ static int macvlan_dev_get_iflink(const struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - return vlan->lowerdev->ifindex; + return READ_ONCE(vlan->lowerdev->ifindex); } static const struct ethtool_ops macvlan_ethtool_ops = { diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 39171380ccf29..a4d2e76a8d587 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -145,7 +145,7 @@ static int netkit_get_iflink(const struct net_device *dev) rcu_read_lock(); peer = rcu_dereference(nk->peer); if (peer) - iflink = peer->ifindex; + iflink = READ_ONCE(peer->ifindex); rcu_read_unlock(); return iflink; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 500b9dfccd08e..dd5aa8ab65a86 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1461,7 +1461,7 @@ static int veth_get_iflink(const struct net_device *dev) rcu_read_lock(); peer = rcu_dereference(priv->peer); - iflink = peer ? peer->ifindex : 0; + iflink = peer ? READ_ONCE(peer->ifindex) : 0; rcu_read_unlock(); return iflink; diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c index ba14d83353a4b..6a84ec58d618b 100644 --- a/drivers/net/wireless/virtual/virt_wifi.c +++ b/drivers/net/wireless/virtual/virt_wifi.c @@ -453,7 +453,7 @@ static int virt_wifi_net_device_get_iflink(const struct net_device *dev) { struct virt_wifi_netdev_priv *priv = netdev_priv(dev); - return priv->lowerdev->ifindex; + return READ_ONCE(priv->lowerdev->ifindex); } static const struct net_device_ops virt_wifi_ops = { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index df55525182517..39876eff51d21 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -762,9 +762,9 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev) static int vlan_dev_get_iflink(const struct net_device *dev) { - struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; + const struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - return real_dev->ifindex; + return READ_ONCE(real_dev->ifindex); } static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx, diff --git a/net/core/dev.c b/net/core/dev.c index c588808be77f5..0628d8ff1ed93 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -641,7 +641,7 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - return dev->ifindex; + return READ_ONCE(dev->ifindex); } EXPORT_SYMBOL(dev_get_iflink); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c54dbe05c4c5d..060543fe7919c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1611,10 +1611,10 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev, bool force) { - int ifindex = dev_get_iflink(dev); + int iflink = dev_get_iflink(dev); - if (force || dev->ifindex != ifindex) - return nla_put_u32(skb, IFLA_LINK, ifindex); + if (force || READ_ONCE(dev->ifindex) != iflink) + return nla_put_u32(skb, IFLA_LINK, iflink); return 0; } diff --git a/net/dsa/user.c b/net/dsa/user.c index 4d53c76a9840a..9c42a6edcdc8a 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -352,7 +352,7 @@ void dsa_user_mii_bus_init(struct dsa_switch *ds) /* user device handling ****************************************************/ static int dsa_user_get_iflink(const struct net_device *dev) { - return dsa_user_to_conduit(dev)->ifindex; + return READ_ONCE(dsa_user_to_conduit(dev)->ifindex); } static int dsa_user_open(struct net_device *dev) diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index e643f52663f9b..77b4e92027c5d 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -93,7 +93,7 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n) static int lowpan_get_iflink(const struct net_device *dev) { - return lowpan_802154_dev(dev)->wdev->ifindex; + return READ_ONCE(lowpan_802154_dev(dev)->wdev->ifindex); } static const struct net_device_ops lowpan_netdev_ops = { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 44406c28445dc..5fd07581efafe 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1756,7 +1756,7 @@ int ip6_tnl_get_iflink(const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - return t->parms.link; + return READ_ONCE(t->parms.link); } EXPORT_SYMBOL(ip6_tnl_get_iflink); diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index dafefef3cf51a..717855b9acf1c 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -727,7 +727,7 @@ static int xfrmi_get_iflink(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - return xi->p.link; + return READ_ONCE(xi->p.link); } static const struct net_device_ops xfrmi_netdev_ops = { -- cgit 1.2.3-korg From 4ad268136421dc9813280c55940eca00796420e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:09 +0000 Subject: ipv6: prepare inet6_fill_ifla6_attrs() for RCU We want to no longer hold RTNL while calling inet6_fill_ifla6_attrs() in the future. Add needed READ_ONCE()/WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 163 ++++++++++++++++++++++++++++------------------------ net/ipv6/ndisc.c | 2 +- 2 files changed, 90 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c669ea266ab71..a56dad307fe31 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3492,7 +3492,8 @@ static void addrconf_dev_config(struct net_device *dev) /* this device type has no EUI support */ if (dev->type == ARPHRD_NONE && idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) - idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; + WRITE_ONCE(idev->cnf.addr_gen_mode, + IN6_ADDR_GEN_MODE_RANDOM); addrconf_addr_gen(idev, false); } @@ -3764,7 +3765,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, rt6_mtu_change(dev, dev->mtu); idev->cnf.mtu6 = dev->mtu; } - idev->tstamp = jiffies; + WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); /* @@ -4006,7 +4007,7 @@ restart: ipv6_mc_down(idev); } - idev->tstamp = jiffies; + WRITE_ONCE(idev->tstamp, jiffies); idev->ra_mtu = 0; /* Last: Shot the device (if unregistered) */ @@ -5634,87 +5635,97 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); } -static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, - __s32 *array, int bytes) +static void ipv6_store_devconf(const struct ipv6_devconf *cnf, + __s32 *array, int bytes) { BUG_ON(bytes < (DEVCONF_MAX * 4)); memset(array, 0, bytes); - array[DEVCONF_FORWARDING] = cnf->forwarding; - array[DEVCONF_HOPLIMIT] = cnf->hop_limit; - array[DEVCONF_MTU6] = cnf->mtu6; - array[DEVCONF_ACCEPT_RA] = cnf->accept_ra; - array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects; - array[DEVCONF_AUTOCONF] = cnf->autoconf; - array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; - array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; + array[DEVCONF_FORWARDING] = READ_ONCE(cnf->forwarding); + array[DEVCONF_HOPLIMIT] = READ_ONCE(cnf->hop_limit); + array[DEVCONF_MTU6] = READ_ONCE(cnf->mtu6); + array[DEVCONF_ACCEPT_RA] = READ_ONCE(cnf->accept_ra); + array[DEVCONF_ACCEPT_REDIRECTS] = READ_ONCE(cnf->accept_redirects); + array[DEVCONF_AUTOCONF] = READ_ONCE(cnf->autoconf); + array[DEVCONF_DAD_TRANSMITS] = READ_ONCE(cnf->dad_transmits); + array[DEVCONF_RTR_SOLICITS] = READ_ONCE(cnf->rtr_solicits); array[DEVCONF_RTR_SOLICIT_INTERVAL] = - jiffies_to_msecs(cnf->rtr_solicit_interval); + jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_interval)); array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = - jiffies_to_msecs(cnf->rtr_solicit_max_interval); + jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_max_interval)); array[DEVCONF_RTR_SOLICIT_DELAY] = - jiffies_to_msecs(cnf->rtr_solicit_delay); - array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; + jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_delay)); + array[DEVCONF_FORCE_MLD_VERSION] = READ_ONCE(cnf->force_mld_version); array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] = - jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval); + jiffies_to_msecs(READ_ONCE(cnf->mldv1_unsolicited_report_interval)); array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] = - jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval); - array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; - array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; - array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; - array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; - array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; - array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; - array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; - array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric; - array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit; - array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; + jiffies_to_msecs(READ_ONCE(cnf->mldv2_unsolicited_report_interval)); + array[DEVCONF_USE_TEMPADDR] = READ_ONCE(cnf->use_tempaddr); + array[DEVCONF_TEMP_VALID_LFT] = READ_ONCE(cnf->temp_valid_lft); + array[DEVCONF_TEMP_PREFERED_LFT] = READ_ONCE(cnf->temp_prefered_lft); + array[DEVCONF_REGEN_MAX_RETRY] = READ_ONCE(cnf->regen_max_retry); + array[DEVCONF_MAX_DESYNC_FACTOR] = READ_ONCE(cnf->max_desync_factor); + array[DEVCONF_MAX_ADDRESSES] = READ_ONCE(cnf->max_addresses); + array[DEVCONF_ACCEPT_RA_DEFRTR] = READ_ONCE(cnf->accept_ra_defrtr); + array[DEVCONF_RA_DEFRTR_METRIC] = READ_ONCE(cnf->ra_defrtr_metric); + array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = + READ_ONCE(cnf->accept_ra_min_hop_limit); + array[DEVCONF_ACCEPT_RA_PINFO] = READ_ONCE(cnf->accept_ra_pinfo); #ifdef CONFIG_IPV6_ROUTER_PREF - array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; + array[DEVCONF_ACCEPT_RA_RTR_PREF] = READ_ONCE(cnf->accept_ra_rtr_pref); array[DEVCONF_RTR_PROBE_INTERVAL] = - jiffies_to_msecs(cnf->rtr_probe_interval); + jiffies_to_msecs(READ_ONCE(cnf->rtr_probe_interval)); #ifdef CONFIG_IPV6_ROUTE_INFO - array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen; - array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; + array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = + READ_ONCE(cnf->accept_ra_rt_info_min_plen); + array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = + READ_ONCE(cnf->accept_ra_rt_info_max_plen); #endif #endif - array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; - array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; + array[DEVCONF_PROXY_NDP] = READ_ONCE(cnf->proxy_ndp); + array[DEVCONF_ACCEPT_SOURCE_ROUTE] = + READ_ONCE(cnf->accept_source_route); #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; - array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; + array[DEVCONF_OPTIMISTIC_DAD] = READ_ONCE(cnf->optimistic_dad); + array[DEVCONF_USE_OPTIMISTIC] = READ_ONCE(cnf->use_optimistic); #endif #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding); #endif - array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; - array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; - array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; - array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; - array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; - array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; - array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; - array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown; + array[DEVCONF_DISABLE_IPV6] = READ_ONCE(cnf->disable_ipv6); + array[DEVCONF_ACCEPT_DAD] = READ_ONCE(cnf->accept_dad); + array[DEVCONF_FORCE_TLLAO] = READ_ONCE(cnf->force_tllao); + array[DEVCONF_NDISC_NOTIFY] = READ_ONCE(cnf->ndisc_notify); + array[DEVCONF_SUPPRESS_FRAG_NDISC] = + READ_ONCE(cnf->suppress_frag_ndisc); + array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = + READ_ONCE(cnf->accept_ra_from_local); + array[DEVCONF_ACCEPT_RA_MTU] = READ_ONCE(cnf->accept_ra_mtu); + array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = + READ_ONCE(cnf->ignore_routes_with_linkdown); /* we omit DEVCONF_STABLE_SECRET for now */ - array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; - array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; - array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; - array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; - array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; + array[DEVCONF_USE_OIF_ADDRS_ONLY] = READ_ONCE(cnf->use_oif_addrs_only); + array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = + READ_ONCE(cnf->drop_unicast_in_l2_multicast); + array[DEVCONF_DROP_UNSOLICITED_NA] = READ_ONCE(cnf->drop_unsolicited_na); + array[DEVCONF_KEEP_ADDR_ON_DOWN] = READ_ONCE(cnf->keep_addr_on_down); + array[DEVCONF_SEG6_ENABLED] = READ_ONCE(cnf->seg6_enabled); #ifdef CONFIG_IPV6_SEG6_HMAC - array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; + array[DEVCONF_SEG6_REQUIRE_HMAC] = READ_ONCE(cnf->seg6_require_hmac); #endif - array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; - array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; - array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; - array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; - array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled; - array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; - array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; - array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; - array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; - array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na; - array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft; + array[DEVCONF_ENHANCED_DAD] = READ_ONCE(cnf->enhanced_dad); + array[DEVCONF_ADDR_GEN_MODE] = READ_ONCE(cnf->addr_gen_mode); + array[DEVCONF_DISABLE_POLICY] = READ_ONCE(cnf->disable_policy); + array[DEVCONF_NDISC_TCLASS] = READ_ONCE(cnf->ndisc_tclass); + array[DEVCONF_RPL_SEG_ENABLED] = READ_ONCE(cnf->rpl_seg_enabled); + array[DEVCONF_IOAM6_ENABLED] = READ_ONCE(cnf->ioam6_enabled); + array[DEVCONF_IOAM6_ID] = READ_ONCE(cnf->ioam6_id); + array[DEVCONF_IOAM6_ID_WIDE] = READ_ONCE(cnf->ioam6_id_wide); + array[DEVCONF_NDISC_EVICT_NOCARRIER] = + READ_ONCE(cnf->ndisc_evict_nocarrier); + array[DEVCONF_ACCEPT_UNTRACKED_NA] = + READ_ONCE(cnf->accept_untracked_na); + array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft); } static inline size_t inet6_ifla6_size(void) @@ -5794,13 +5805,14 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, u32 ext_filter_mask) { - struct nlattr *nla; struct ifla_cacheinfo ci; + struct nlattr *nla; + u32 ra_mtu; - if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags)) + if (nla_put_u32(skb, IFLA_INET6_FLAGS, READ_ONCE(idev->if_flags))) goto nla_put_failure; ci.max_reasm_len = IPV6_MAXPLEN; - ci.tstamp = cstamp_delta(idev->tstamp); + ci.tstamp = cstamp_delta(READ_ONCE(idev->tstamp)); ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME)); if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) @@ -5832,11 +5844,12 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); read_unlock_bh(&idev->lock); - if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, + READ_ONCE(idev->cnf.addr_gen_mode))) goto nla_put_failure; - if (idev->ra_mtu && - nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu)) + ra_mtu = READ_ONCE(idev->ra_mtu); + if (ra_mtu && nla_put_u32(skb, IFLA_INET6_RA_MTU, ra_mtu)) goto nla_put_failure; return 0; @@ -6037,7 +6050,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, if (tb[IFLA_INET6_ADDR_GEN_MODE]) { u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); - idev->cnf.addr_gen_mode = mode; + WRITE_ONCE(idev->cnf.addr_gen_mode, mode); } return 0; @@ -6516,7 +6529,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } if (idev->cnf.addr_gen_mode != new_val) { - idev->cnf.addr_gen_mode = new_val; + WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); addrconf_init_auto_addrs(idev->dev); } } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { @@ -6527,7 +6540,8 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, idev = __in6_dev_get(dev); if (idev && idev->cnf.addr_gen_mode != new_val) { - idev->cnf.addr_gen_mode = new_val; + WRITE_ONCE(idev->cnf.addr_gen_mode, + new_val); addrconf_init_auto_addrs(idev->dev); } } @@ -6592,14 +6606,15 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, struct inet6_dev *idev = __in6_dev_get(dev); if (idev) { - idev->cnf.addr_gen_mode = - IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + WRITE_ONCE(idev->cnf.addr_gen_mode, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY); } } } else { struct inet6_dev *idev = ctl->extra1; - idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + WRITE_ONCE(idev->cnf.addr_gen_mode, + IN6_ADDR_GEN_MODE_STABLE_PRIVACY); } out: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 73cb31afe9354..8523f0595b018 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1975,7 +1975,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer, if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) idev->nd_parms->reachable_time = neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)); - idev->tstamp = jiffies; + WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); in6_dev_put(idev); } -- cgit 1.2.3-korg From 8afc7a78d55de726b2747d7775c54def79509ec5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:10 +0000 Subject: ipv6: prepare inet6_fill_ifinfo() for RCU protection We want to use RCU protection instead of RTNL for inet6_fill_ifinfo(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- net/core/dev.c | 4 ++-- net/ipv6/addrconf.c | 11 +++++++---- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f07c8374f29cb..09023e44db4e2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4354,8 +4354,10 @@ static inline bool netif_testing(const struct net_device *dev) */ static inline bool netif_oper_up(const struct net_device *dev) { - return (dev->operstate == IF_OPER_UP || - dev->operstate == IF_OPER_UNKNOWN /* backward compat */); + unsigned int operstate = READ_ONCE(dev->operstate); + + return operstate == IF_OPER_UP || + operstate == IF_OPER_UNKNOWN /* backward compat */; } /** diff --git a/net/core/dev.c b/net/core/dev.c index 0628d8ff1ed93..275fd5259a4a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8632,12 +8632,12 @@ unsigned int dev_get_flags(const struct net_device *dev) { unsigned int flags; - flags = (dev->flags & ~(IFF_PROMISC | + flags = (READ_ONCE(dev->flags) & ~(IFF_PROMISC | IFF_ALLMULTI | IFF_RUNNING | IFF_LOWER_UP | IFF_DORMANT)) | - (dev->gflags & (IFF_PROMISC | + (READ_ONCE(dev->gflags) & (IFF_PROMISC | IFF_ALLMULTI)); if (netif_running(dev)) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a56dad307fe31..daa81556d1180 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6062,6 +6062,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, struct net_device *dev = idev->dev; struct ifinfomsg *hdr; struct nlmsghdr *nlh; + int ifindex, iflink; void *protoinfo; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); @@ -6072,16 +6073,18 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, hdr->ifi_family = AF_INET6; hdr->__ifi_pad = 0; hdr->ifi_type = dev->type; - hdr->ifi_index = dev->ifindex; + ifindex = READ_ONCE(dev->ifindex); + hdr->ifi_index = ifindex; hdr->ifi_flags = dev_get_flags(dev); hdr->ifi_change = 0; + iflink = dev_get_iflink(dev); if (nla_put_string(skb, IFLA_IFNAME, dev->name) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || - nla_put_u32(skb, IFLA_MTU, dev->mtu) || - (dev->ifindex != dev_get_iflink(dev) && - nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || + nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) || + (ifindex != iflink && + nla_put_u32(skb, IFLA_LINK, iflink)) || nla_put_u8(skb, IFLA_OPERSTATE, netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN)) goto nla_put_failure; -- cgit 1.2.3-korg From ac14ad9755d4f8f286fd2cd710aa5dcf9a3c777a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:11 +0000 Subject: ipv6: use xarray iterator to implement inet6_dump_ifinfo() Prepare inet6_dump_ifinfo() to run with RCU protection instead of RTNL and use for_each_netdev_dump() interface. Also properly return 0 at the end of a dump, avoiding an extra recvmsg() system call and RTNL acquisition. Note that RTNL-less dumps need core changes, coming later in the series. Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index daa81556d1180..6556136d2b8dd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6132,50 +6132,42 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh, static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int h, s_h; - int idx = 0, s_idx; + struct { + unsigned long ifindex; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; + int err; /* only requests using strict checking can pass data to * influence the dump */ if (cb->strict_check) { - int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack); + err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack); if (err < 0) return err; } - s_h = cb->args[0]; - s_idx = cb->args[1]; - + err = 0; rcu_read_lock(); - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - if (inet6_fill_ifinfo(skb, idev, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWLINK, NLM_F_MULTI) < 0) - goto out; -cont: - idx++; + for_each_netdev_dump(net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = inet6_fill_ifinfo(skb, idev, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWLINK, NLM_F_MULTI); + if (err < 0) { + if (likely(skb->len)) + err = skb->len; + break; } } -out: rcu_read_unlock(); - cb->args[1] = idx; - cb->args[0] = h; - return skb->len; + return err; } void inet6_ifinfo_notify(int event, struct inet6_dev *idev) -- cgit 1.2.3-korg From 6647b338fc5c6741736fe51a25fc2c0bec6398b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:12 +0000 Subject: netlink: fix netlink_diag_dump() return value __netlink_diag_dump() returns 1 if the dump is not complete, zero if no error occurred. If err variable is zero, this means the dump is complete: We should not return skb->len in this case, but 0. This allows NLMSG_DONE to be appended to the skb. User space does not have to call us again only to get NLMSG_DONE. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/netlink/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/diag.c b/net/netlink/diag.c index e12c90d5f6ad2..61981e01fd6ff 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -207,7 +207,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); } - return err < 0 ? err : skb->len; + return err <= 0 ? err : skb->len; } static int netlink_diag_dump_done(struct netlink_callback *cb) -- cgit 1.2.3-korg From b5590270068c4324dac4a2b5a4a156e02e21339f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:13 +0000 Subject: netlink: hold nlk->cb_mutex longer in __netlink_dump_start() __netlink_dump_start() releases nlk->cb_mutex right before calling netlink_dump() which grabs it again. This seems dangerous, even if KASAN did not bother yet. Add a @lock_taken parameter to netlink_dump() to let it grab the mutex if called from netlink_recvmsg() only. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9c962347cf859..94f3860526bfa 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -130,7 +130,7 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { "nlk_cb_mutex-MAX_LINKS" }; -static int netlink_dump(struct sock *sk); +static int netlink_dump(struct sock *sk, bool lock_taken); /* nl_table locking explained: * Lookup and traversal are protected with an RCU read-side lock. Insertion @@ -1987,7 +1987,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (READ_ONCE(nlk->cb_running) && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { - ret = netlink_dump(sk); + ret = netlink_dump(sk, false); if (ret) { WRITE_ONCE(sk->sk_err, -ret); sk_error_report(sk); @@ -2196,7 +2196,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, return 0; } -static int netlink_dump(struct sock *sk) +static int netlink_dump(struct sock *sk, bool lock_taken) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_ext_ack extack = {}; @@ -2208,7 +2208,8 @@ static int netlink_dump(struct sock *sk) int alloc_min_size; int alloc_size; - mutex_lock(nlk->cb_mutex); + if (!lock_taken) + mutex_lock(nlk->cb_mutex); if (!nlk->cb_running) { err = -EINVAL; goto errout_skb; @@ -2365,9 +2366,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, WRITE_ONCE(nlk->cb_running, true); nlk->dump_done_errno = INT_MAX; - mutex_unlock(nlk->cb_mutex); - - ret = netlink_dump(sk); + ret = netlink_dump(sk, true); sock_put(sk); -- cgit 1.2.3-korg From e39951d965bf58b5aba7f61dc1140dcb8271af22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:14 +0000 Subject: rtnetlink: change nlk->cb_mutex role In commit af65bdfce98d ("[NETLINK]: Switch cb_lock spinlock to mutex and allow to override it"), Patrick McHardy used a common mutex to protect both nlk->cb and the dump() operations. The override is used for rtnl dumps, registered with rntl_register() and rntl_register_module(). We want to be able to opt-out some dump() operations to not acquire RTNL, so we need to protect nlk->cb with a per socket mutex. This patch renames nlk->cb_def_mutex to nlk->nl_cb_mutex The optional pointer to the mutex used to protect dump() call is stored in nlk->dump_cb_mutex Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 32 ++++++++++++++++++-------------- net/netlink/af_netlink.h | 5 +++-- 2 files changed, 21 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 94f3860526bfa..84cad7be6d433 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -636,7 +636,7 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *cb_mutex, int protocol, + struct mutex *dump_cb_mutex, int protocol, int kern) { struct sock *sk; @@ -651,15 +651,11 @@ static int __netlink_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); nlk = nlk_sk(sk); - if (cb_mutex) { - nlk->cb_mutex = cb_mutex; - } else { - nlk->cb_mutex = &nlk->cb_def_mutex; - mutex_init(nlk->cb_mutex); - lockdep_set_class_and_name(nlk->cb_mutex, + mutex_init(&nlk->nl_cb_mutex); + lockdep_set_class_and_name(&nlk->nl_cb_mutex, nlk_cb_mutex_keys + protocol, nlk_cb_mutex_key_strings[protocol]); - } + nlk->dump_cb_mutex = dump_cb_mutex; init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; @@ -2209,7 +2205,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) int alloc_size; if (!lock_taken) - mutex_lock(nlk->cb_mutex); + mutex_lock(&nlk->nl_cb_mutex); if (!nlk->cb_running) { err = -EINVAL; goto errout_skb; @@ -2261,14 +2257,22 @@ static int netlink_dump(struct sock *sk, bool lock_taken) netlink_skb_set_owner_r(skb, sk); if (nlk->dump_done_errno > 0) { + struct mutex *extra_mutex = nlk->dump_cb_mutex; + cb->extack = &extack; + + if (extra_mutex) + mutex_lock(extra_mutex); nlk->dump_done_errno = cb->dump(skb, cb); + if (extra_mutex) + mutex_unlock(extra_mutex); + cb->extack = NULL; } if (nlk->dump_done_errno > 0 || skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { - mutex_unlock(nlk->cb_mutex); + mutex_unlock(&nlk->nl_cb_mutex); if (sk_filter(sk, skb)) kfree_skb(skb); @@ -2302,13 +2306,13 @@ static int netlink_dump(struct sock *sk, bool lock_taken) WRITE_ONCE(nlk->cb_running, false); module = cb->module; skb = cb->skb; - mutex_unlock(nlk->cb_mutex); + mutex_unlock(&nlk->nl_cb_mutex); module_put(module); consume_skb(skb); return 0; errout_skb: - mutex_unlock(nlk->cb_mutex); + mutex_unlock(&nlk->nl_cb_mutex); kfree_skb(skb); return err; } @@ -2331,7 +2335,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, } nlk = nlk_sk(sk); - mutex_lock(nlk->cb_mutex); + mutex_lock(&nlk->nl_cb_mutex); /* A dump is in progress... */ if (nlk->cb_running) { ret = -EBUSY; @@ -2382,7 +2386,7 @@ error_put: module_put(control->module); error_unlock: sock_put(sk); - mutex_unlock(nlk->cb_mutex); + mutex_unlock(&nlk->nl_cb_mutex); error_free: kfree_skb(skb); return ret; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 2145979b9986a..9751e29d4bbb9 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -39,8 +39,9 @@ struct netlink_sock { bool cb_running; int dump_done_errno; struct netlink_callback cb; - struct mutex *cb_mutex; - struct mutex cb_def_mutex; + struct mutex nl_cb_mutex; + + struct mutex *dump_cb_mutex; void (*netlink_rcv)(struct sk_buff *skb); int (*netlink_bind)(struct net *net, int group); void (*netlink_unbind)(struct net *net, int group); -- cgit 1.2.3-korg From 386520e0ecc01004d3a29c70c5a77d4bbf8a8420 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:15 +0000 Subject: rtnetlink: add RTNL_FLAG_DUMP_UNLOCKED flag Similarly to RTNL_FLAG_DOIT_UNLOCKED, this new flag allows dump operations registered via rtnl_register() or rtnl_register_module() to opt-out from RTNL protection. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 ++ include/net/rtnetlink.h | 1 + net/core/rtnetlink.c | 2 ++ net/netlink/af_netlink.c | 3 +++ 4 files changed, 8 insertions(+) (limited to 'net') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1a4445bf2ab9a..5df7340d4dabc 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -291,6 +291,7 @@ struct netlink_callback { u16 answer_flags; u32 min_dump_alloc; unsigned int prev_seq, seq; + int flags; bool strict_check; union { u8 ctx[48]; @@ -323,6 +324,7 @@ struct netlink_dump_control { void *data; struct module *module; u32 min_dump_alloc; + int flags; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 6506221c5fe31..3bfb80bad1739 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = BIT(0), RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), + RTNL_FLAG_DUMP_UNLOCKED = BIT(2), }; enum rtnl_kinds { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 060543fe7919c..1b26dfa5668d2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6532,6 +6532,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } owner = link->owner; dumpit = link->dumpit; + flags = link->flags; if (type == RTM_GETLINK - RTM_BASE) min_dump_alloc = rtnl_calcit(skb, nlh); @@ -6549,6 +6550,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, .dump = dumpit, .min_dump_alloc = min_dump_alloc, .module = owner, + .flags = flags, }; err = netlink_dump_start(rtnl, skb, nlh, &c); /* netlink_dump_start() will keep a reference on diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 84cad7be6d433..be5792b638aa5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2261,6 +2261,8 @@ static int netlink_dump(struct sock *sk, bool lock_taken) cb->extack = &extack; + if (cb->flags & RTNL_FLAG_DUMP_UNLOCKED) + extra_mutex = NULL; if (extra_mutex) mutex_lock(extra_mutex); nlk->dump_done_errno = cb->dump(skb, cb); @@ -2355,6 +2357,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->data = control->data; cb->module = control->module; cb->min_dump_alloc = control->min_dump_alloc; + cb->flags = control->flags; cb->skb = skb; cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk); -- cgit 1.2.3-korg From 69fdb7e411b6d01eb08447e672302b69c9e176ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:16 +0000 Subject: ipv6: switch inet6_dump_ifinfo() to RCU protection No longer hold RTNL while calling inet6_dump_ifinfo() Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6556136d2b8dd..a280614b37652 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7462,7 +7462,7 @@ int __init addrconf_init(void) rtnl_af_register(&inet6_ops); err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK, - NULL, inet6_dump_ifinfo, 0); + NULL, inet6_dump_ifinfo, RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; -- cgit 1.2.3-korg From 22e36ea9f5d7707ae3d64c497d172f4ef735c353 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:17 +0000 Subject: inet: allow ip_valid_fib_dump_req() to be called with RTNL or RCU Add a new field into struct fib_dump_filter, to let callers tell if they use RTNL locking or RCU. This is used in the following patch, when inet_dump_fib() no longer holds RTNL. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 15 +++++++++++---- net/ipv4/ipmr.c | 4 +++- net/ipv6/ip6_fib.c | 7 +++++-- net/ipv6/ip6mr.c | 4 +++- net/mpls/af_mpls.c | 4 +++- 6 files changed, 26 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index d4667b7797e3e..9b2f69ba5e498 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -264,6 +264,7 @@ struct fib_dump_filter { bool filter_set; bool dump_routes; bool dump_exceptions; + bool rtnl_held; unsigned char protocol; unsigned char rt_type; unsigned int flags; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 390f4be7f7bec..39f67990e01c1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -916,7 +916,8 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct rtmsg *rtm; int err, i; - ASSERT_RTNL(); + if (filter->rtnl_held) + ASSERT_RTNL(); if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request"); @@ -961,7 +962,10 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, break; case RTA_OIF: ifindex = nla_get_u32(tb[i]); - filter->dev = __dev_get_by_index(net, ifindex); + if (filter->rtnl_held) + filter->dev = __dev_get_by_index(net, ifindex); + else + filter->dev = dev_get_by_index_rcu(net, ifindex); if (!filter->dev) return -ENODEV; break; @@ -983,8 +987,11 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req); static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - struct fib_dump_filter filter = { .dump_routes = true, - .dump_exceptions = true }; + struct fib_dump_filter filter = { + .dump_routes = true, + .dump_exceptions = true, + .rtnl_held = true, + }; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int h, s_h; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5561bce3a37e8..0708ac6f6c582 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2587,7 +2587,9 @@ errout_free: static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { - struct fib_dump_filter filter = {}; + struct fib_dump_filter filter = { + .rtnl_held = true, + }; int err; if (cb->strict_check) { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6540d877d3693..5c558dc1c6838 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -620,8 +620,11 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true, - .filter.dump_routes = true }; + struct rt6_rtnl_dump_arg arg = { + .filter.dump_exceptions = true, + .filter.dump_routes = true, + .filter.rtnl_held = true, + }; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int h, s_h; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 1f19743f25406..cb0ee81a068a4 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2592,7 +2592,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; - struct fib_dump_filter filter = {}; + struct fib_dump_filter filter = { + .rtnl_held = true, + }; int err; if (cb->strict_check) { diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 1af29af653885..6dab883a08dda 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2179,7 +2179,9 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); struct mpls_route __rcu **platform_label; - struct fib_dump_filter filter = {}; + struct fib_dump_filter filter = { + .rtnl_held = true, + }; unsigned int flags = NLM_F_MULTI; size_t platform_labels; unsigned int index; -- cgit 1.2.3-korg From 4ce5dc9316de50e0da84beafe55d1344f829cece Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:19 +0000 Subject: inet: switch inet_dump_fib() to RCU protection No longer hold RTNL while calling inet_dump_fib(). Also change return value for a completed dump: Returning 0 instead of skb->len allows NLMSG_DONE to be appended to the skb. User space does not have to call us again to get a standalone NLMSG_DONE marker. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 37 ++++++++++++++++++------------------- net/ipv4/fib_trie.c | 4 ++-- 2 files changed, 20 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 39f67990e01c1..bf3a2214fe29b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -990,7 +990,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct fib_dump_filter filter = { .dump_routes = true, .dump_exceptions = true, - .rtnl_held = true, + .rtnl_held = false, }; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); @@ -998,12 +998,13 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_head *head; - int dumped = 0, err; + int dumped = 0, err = 0; + rcu_read_lock(); if (cb->strict_check) { err = ip_valid_fib_dump_req(net, nlh, &filter, cb); if (err < 0) - return err; + goto unlock; } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { struct rtmsg *rtm = nlmsg_data(nlh); @@ -1012,29 +1013,28 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) /* ipv4 does not use prefix flag */ if (filter.flags & RTM_F_PREFIX) - return skb->len; + goto unlock; if (filter.table_id) { tb = fib_get_table(net, filter.table_id); if (!tb) { if (rtnl_msg_family(cb->nlh) != PF_INET) - return skb->len; + goto unlock; NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); - return -ENOENT; + err = -ENOENT; + goto unlock; } - - rcu_read_lock(); err = fib_table_dump(tb, skb, cb, &filter); - rcu_read_unlock(); - return skb->len ? : err; + if (err < 0 && skb->len) + err = skb->len; + goto unlock; } s_h = cb->args[0]; s_e = cb->args[1]; - rcu_read_lock(); - + err = 0; for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv4.fib_table_hash[h]; @@ -1047,9 +1047,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) err = fib_table_dump(tb, skb, cb, &filter); if (err < 0) { if (likely(skb->len)) - goto out; - - goto out_err; + err = skb->len; + goto out; } dumped = 1; next: @@ -1057,13 +1056,12 @@ next: } } out: - err = skb->len; -out_err: - rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; +unlock: + rcu_read_unlock(); return err; } @@ -1666,5 +1664,6 @@ void __init ip_fib_init(void) rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0); rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, + RTNL_FLAG_DUMP_UNLOCKED); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0fc7ab5832d1a..f474106464d2f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2368,7 +2368,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, * and key == 0 means the dump has wrapped around and we are done. */ if (count && !key) - return skb->len; + return 0; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { int err; @@ -2394,7 +2394,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, cb->args[3] = key; cb->args[2] = count; - return skb->len; + return 0; } void __init fib_trie_init(void) -- cgit 1.2.3-korg From 74808e72e0b2d7cac886151198c0330daadaee70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:20 +0000 Subject: rtnetlink: make rtnl_fill_link_ifmap() RCU ready Use READ_ONCE() to read the following device fields: dev->mem_start dev->mem_end dev->base_addr dev->irq dev->dma dev->if_port Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1b26dfa5668d2..2d83ab76a3c95 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1455,17 +1455,18 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, return 0; } -static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) +static int rtnl_fill_link_ifmap(struct sk_buff *skb, + const struct net_device *dev) { struct rtnl_link_ifmap map; memset(&map, 0, sizeof(map)); - map.mem_start = dev->mem_start; - map.mem_end = dev->mem_end; - map.base_addr = dev->base_addr; - map.irq = dev->irq; - map.dma = dev->dma; - map.port = dev->if_port; + map.mem_start = READ_ONCE(dev->mem_start); + map.mem_end = READ_ONCE(dev->mem_end); + map.base_addr = READ_ONCE(dev->base_addr); + map.irq = READ_ONCE(dev->irq); + map.dma = READ_ONCE(dev->dma); + map.port = READ_ONCE(dev->if_port); if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD)) return -EMSGSIZE; @@ -1875,9 +1876,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; } - if (rtnl_fill_link_ifmap(skb, dev)) - goto nla_put_failure; - if (dev->addr_len) { if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast)) @@ -1927,6 +1925,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, rcu_read_lock(); if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) goto nla_put_failure_rcu; + if (rtnl_fill_link_ifmap(skb, dev)) + goto nla_put_failure_rcu; + rcu_read_unlock(); if (rtnl_fill_prop_list(skb, dev)) -- cgit 1.2.3-korg From 0ec4e48c3a233820e0bce1f5ba9ed3e4520f90e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:21 +0000 Subject: rtnetlink: provide RCU protection to rtnl_fill_prop_list() We want to be able to run rtnl_fill_ifinfo() under RCU protection instead of RTNL in the future. dev->name_node items are already rcu protected. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d83ab76a3c95..39f17d0b6ceaa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1699,7 +1699,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb, struct netdev_name_node *name_node; int count = 0; - list_for_each_entry(name_node, &dev->name_node->list, list) { + list_for_each_entry_rcu(name_node, &dev->name_node->list, list) { if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name)) return -EMSGSIZE; count++; @@ -1707,6 +1707,7 @@ static int rtnl_fill_alt_ifnames(struct sk_buff *skb, return count; } +/* RCU protected. */ static int rtnl_fill_prop_list(struct sk_buff *skb, const struct net_device *dev) { @@ -1927,11 +1928,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure_rcu; if (rtnl_fill_link_ifmap(skb, dev)) goto nla_put_failure_rcu; - - rcu_read_unlock(); - if (rtnl_fill_prop_list(skb, dev)) - goto nla_put_failure; + goto nla_put_failure_rcu; + rcu_read_unlock(); if (dev->dev.parent && nla_put_string(skb, IFLA_PARENT_DEV_NAME, -- cgit 1.2.3-korg From c3718936ec47ae811a7ce9a618b6cb1cda835bab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Feb 2024 20:10:54 +0000 Subject: ipv6: anycast: complete RCU handling of struct ifacaddr6 struct ifacaddr6 are already freed after RCU grace period. Add __rcu qualifier to aca_next pointer, and idev->ac_list Add relevant rcu_assign_pointer() and dereference accessors. ipv6_chk_acast_dev() no longer needs to acquire idev->lock. /proc/net/anycast6 is now purely RCU protected, it no longer acquires idev->lock. Similarly in6_dump_addrs() can use RCU protection to iterate through anycast addresses. It was relying on a mixture of RCU and RTNL but next patches will get rid of RTNL there. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240223201054.220534-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/if_inet6.h | 4 ++-- net/ipv6/addrconf.c | 4 ++-- net/ipv6/anycast.c | 61 +++++++++++++++++++------------------------------- 3 files changed, 27 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f07642264c1eb..238ad3349456a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -144,7 +144,7 @@ struct ipv6_ac_socklist { struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; - struct ifacaddr6 *aca_next; + struct ifacaddr6 __rcu *aca_next; struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; @@ -196,7 +196,7 @@ struct inet6_dev { spinlock_t mc_report_lock; /* mld query report lock */ struct mutex mc_lock; /* mld global lock */ - struct ifacaddr6 *ac_list; + struct ifacaddr6 __rcu *ac_list; rwlock_t lock; refcount_t refcnt; __u32 if_flags; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a280614b37652..e27069ad938ca 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5314,8 +5314,8 @@ next: case ANYCAST_ADDR: fillargs->event = RTM_GETANYCAST; /* anycast address */ - for (ifaca = idev->ac_list; ifaca; - ifaca = ifaca->aca_next, ip_idx++) { + for (ifaca = rcu_dereference(idev->ac_list); ifaca; + ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, fillargs); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index bb17f484ee2cd..0f2506e353592 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -296,7 +296,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) goto out; } - for (aca = idev->ac_list; aca; aca = aca->aca_next) { + for (aca = rtnl_dereference(idev->ac_list); aca; + aca = rtnl_dereference(aca->aca_next)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) { aca->aca_users++; err = 0; @@ -317,13 +318,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) goto out; } - aca->aca_next = idev->ac_list; - idev->ac_list = aca; - /* Hold this for addrconf_join_solict() below before we unlock, * it is already exposed via idev->ac_list. */ aca_get(aca); + aca->aca_next = idev->ac_list; + rcu_assign_pointer(idev->ac_list, aca); + write_unlock_bh(&idev->lock); ipv6_add_acaddr_hash(net, aca); @@ -350,7 +351,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) write_lock_bh(&idev->lock); prev_aca = NULL; - for (aca = idev->ac_list; aca; aca = aca->aca_next) { + for (aca = rtnl_dereference(idev->ac_list); aca; + aca = rtnl_dereference(aca->aca_next)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) break; prev_aca = aca; @@ -364,9 +366,9 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } if (prev_aca) - prev_aca->aca_next = aca->aca_next; + rcu_assign_pointer(prev_aca->aca_next, aca->aca_next); else - idev->ac_list = aca->aca_next; + rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); @@ -392,8 +394,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) struct ifacaddr6 *aca; write_lock_bh(&idev->lock); - while ((aca = idev->ac_list) != NULL) { - idev->ac_list = aca->aca_next; + while ((aca = rtnl_dereference(idev->ac_list)) != NULL) { + rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); ipv6_del_acaddr_hash(aca); @@ -420,11 +422,10 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - for (aca = idev->ac_list; aca; aca = aca->aca_next) + for (aca = rcu_dereference(idev->ac_list); aca; + aca = rcu_dereference(aca->aca_next)) if (ipv6_addr_equal(&aca->aca_addr, addr)) break; - read_unlock_bh(&idev->lock); return aca != NULL; } return false; @@ -477,30 +478,25 @@ bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, struct ac6_iter_state { struct seq_net_private p; struct net_device *dev; - struct inet6_dev *idev; }; #define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) { - struct ifacaddr6 *im = NULL; struct ac6_iter_state *state = ac6_seq_private(seq); struct net *net = seq_file_net(seq); + struct ifacaddr6 *im = NULL; - state->idev = NULL; for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; + idev = __in6_dev_get(state->dev); if (!idev) continue; - read_lock_bh(&idev->lock); - im = idev->ac_list; - if (im) { - state->idev = idev; + im = rcu_dereference(idev->ac_list); + if (im) break; - } - read_unlock_bh(&idev->lock); } return im; } @@ -508,22 +504,17 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im) { struct ac6_iter_state *state = ac6_seq_private(seq); + struct inet6_dev *idev; - im = im->aca_next; + im = rcu_dereference(im->aca_next); while (!im) { - if (likely(state->idev != NULL)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); - if (!state->dev) { - state->idev = NULL; + if (!state->dev) break; - } - state->idev = __in6_dev_get(state->dev); - if (!state->idev) + idev = __in6_dev_get(state->dev); + if (!idev) continue; - read_lock_bh(&state->idev->lock); - im = state->idev->ac_list; + im = rcu_dereference(idev->ac_list); } return im; } @@ -555,12 +546,6 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ac6_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { - struct ac6_iter_state *state = ac6_seq_private(seq); - - if (likely(state->idev != NULL)) { - read_unlock_bh(&state->idev->lock); - state->idev = NULL; - } rcu_read_unlock(); } -- cgit 1.2.3-korg From 28de50eeb734bbdbc1c4397134fa3501d3490a62 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 23 Feb 2024 21:17:54 +0100 Subject: mptcp: token kunit: set protocol As it would be done when initiating an MPTCP sock. This is not strictly needed for this test, but it will be when a later patch will check if the right protocol is being used when calling mptcp_sk(). Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-next-20240223-misc-improvements-v1-2-b6c8a10396bd@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/token_test.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c index bfff53e668da6..4fc39fa2e262d 100644 --- a/net/mptcp/token_test.c +++ b/net/mptcp/token_test.c @@ -52,14 +52,19 @@ static struct mptcp_subflow_context *build_ctx(struct kunit *test) static struct mptcp_sock *build_msk(struct kunit *test) { struct mptcp_sock *msk; + struct sock *sk; msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk); refcount_set(&((struct sock *)msk)->sk_refcnt, 1); sock_net_set((struct sock *)msk, &init_net); + sk = (struct sock *)msk; + /* be sure the token helpers can dereference sk->sk_prot */ - ((struct sock *)msk)->sk_prot = &tcp_prot; + sk->sk_prot = &tcp_prot; + sk->sk_protocol = IPPROTO_MPTCP; + return msk; } -- cgit 1.2.3-korg From dcc03f270d1e2f4b9715537d8deb734bd019e187 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 23 Feb 2024 21:17:55 +0100 Subject: mptcp: check the protocol in tcp_sk() with DEBUG_NET Fuzzers and static checkers might not detect when tcp_sk() is used with a non tcp_sock structure. This kind of mistake already happened a few times with MPTCP: when wrongly using TCP-specific helpers with mptcp_sock pointers. On the other hand, there are many 'tcp_xxx()' helpers that are taking a 'struct sock' pointer as arguments, and some of them are only looking at fields from 'struct sock', and nothing from 'struct tcp_sock'. It is then tempting to use them with a 'struct mptcp_sock'. So a new simple check is done when CONFIG_DEBUG_NET is enabled to tell kernel devs when a non-TCP socket is being used as a TCP one. 'tcp_sk()' macro is then re-defined to add a WARN when an unexpected socket is being used. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-next-20240223-misc-improvements-v1-3-b6c8a10396bd@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f89b197a9f927..026ed360bd72d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -348,6 +348,16 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk) sock_owned_by_me((const struct sock *)msk); } +#ifdef CONFIG_DEBUG_NET +/* MPTCP-specific: we might (indirectly) call this helper with the wrong sk */ +#undef tcp_sk +#define tcp_sk(ptr) ({ \ + typeof(ptr) _ptr = (ptr); \ + WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \ + container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \ +}) +#endif + #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) /* the msk socket don't use the backlog, also account for the bulk -- cgit 1.2.3-korg From 14d29ec5302caac945267b9586fad01ecddc700c Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 23 Feb 2024 21:17:56 +0100 Subject: mptcp: check the protocol in mptcp_sk() with DEBUG_NET Fuzzers and static checkers might not detect when mptcp_sk() is used with a non mptcp_sock structure. This is similar to the parent commit, where it is easy to use mptcp_sk() with a TCP sock, e.g. with a subflow sk. So a new simple check is done when CONFIG_DEBUG_NET is enabled to tell kernel devs when a non-MPTCP socket is being used as an MPTCP one. 'mptcp_sk()' macro is then defined differently: with an extra WARN to complain when an unexpected socket is being used. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240223-upstream-net-next-20240223-misc-improvements-v1-4-b6c8a10396bd@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 026ed360bd72d..051b100d94031 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -356,9 +356,15 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk) WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \ container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \ }) -#endif +#define mptcp_sk(ptr) ({ \ + typeof(ptr) _ptr = (ptr); \ + WARN_ON(_ptr->sk_protocol != IPPROTO_MPTCP); \ + container_of_const(_ptr, struct mptcp_sock, sk.icsk_inet.sk); \ +}) +#else /* !CONFIG_DEBUG_NET */ #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) +#endif /* the msk socket don't use the backlog, also account for the bulk * free memory -- cgit 1.2.3-korg From aceb147b20a2eda9a10bdd4b5650718f731ef3e1 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 22 Feb 2024 06:41:17 -0800 Subject: xfrm: Do not allocate stats in the driver With commit 34d21de99cea9 ("net: Move {l,t,d}stats allocation to core and convert veth & vrf"), stats allocation could be done on net core instead of this driver. With this new approach, the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc). This is core responsibility now. Remove the allocation in the xfrm driver and leverage the network core allocation. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface_core.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 21d50d75c2608..1a1f6613fda2f 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -240,7 +240,6 @@ static void xfrmi_dev_free(struct net_device *dev) struct xfrm_if *xi = netdev_priv(dev); gro_cells_destroy(&xi->gro_cells); - free_percpu(dev->tstats); } static int xfrmi_create(struct net_device *dev) @@ -749,6 +748,7 @@ static void xfrmi_dev_setup(struct net_device *dev) dev->flags = IFF_NOARP; dev->needs_free_netdev = true; dev->priv_destructor = xfrmi_dev_free; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; netif_keep_dst(dev); eth_broadcast_addr(dev->broadcast); @@ -765,15 +765,9 @@ static int xfrmi_dev_init(struct net_device *dev) struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link); int err; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - err = gro_cells_init(&xi->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); + if (err) return err; - } dev->features |= NETIF_F_LLTX; dev->features |= XFRMI_FEATURES; -- cgit 1.2.3-korg From f8cbf6bde4c8d5d32330bcceafa7b139fec89f97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 24 Feb 2024 09:06:30 +0000 Subject: netlink: use kvmalloc() in netlink_alloc_large_skb() This is a followup of commit 234ec0b6034b ("netlink: fix potential sleeping issue in mqueue_flush_file"), because vfree_atomic() overhead is unfortunate for medium sized allocations. 1) If the allocation is smaller than PAGE_SIZE, do not bother with vmalloc() at all. Some arches have 64KB PAGE_SIZE, while NLMSG_GOODSIZE is smaller than 8KB. 2) Use kvmalloc(), which might allocate one high order page instead of vmalloc if memory is not too fragmented. Signed-off-by: Eric Dumazet Reviewed-by: Zhengchao Shao Link: https://lore.kernel.org/r/20240224090630.605917-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index be5792b638aa5..765921cf11948 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1202,23 +1202,21 @@ struct sock *netlink_getsockbyfilp(struct file *filp) struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) { + size_t head_size = SKB_HEAD_ALIGN(size); struct sk_buff *skb; void *data; - if (size <= NLMSG_GOODSIZE || broadcast) + if (head_size <= PAGE_SIZE || broadcast) return alloc_skb(size, GFP_KERNEL); - size = SKB_DATA_ALIGN(size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - - data = vmalloc(size); - if (data == NULL) + data = kvmalloc(head_size, GFP_KERNEL); + if (!data) return NULL; - skb = __build_skb(data, size); - if (skb == NULL) - vfree(data); - else + skb = __build_skb(data, head_size); + if (!skb) + kvfree(data); + else if (is_vmalloc_addr(data)) skb->destructor = netlink_skb_destructor; return skb; -- cgit 1.2.3-korg From d75fe63a0708bd28eac5cda1212fa5fd037297dc Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 24 Feb 2024 16:41:21 +0800 Subject: ipv6: raw: remove useless input parameter in rawv6_err The input parameter 'opt' in rawv6_err() is not used. Therefore, remove it. Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240224084121.2479603-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/raw.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 03dbb874c363b..479c4c2c17858 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -288,8 +288,7 @@ out: } static void rawv6_err(struct sock *sk, struct sk_buff *skb, - struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -344,7 +343,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr, inet6_iif(skb), inet6_iif(skb))) continue; - rawv6_err(sk, skb, NULL, type, code, inner_offset, info); + rawv6_err(sk, skb, type, code, inner_offset, info); } rcu_read_unlock(); } -- cgit 1.2.3-korg From 12a686c2e761f1f1f6e6e2117a9ab9c6de2ac8a7 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 26 Feb 2024 02:24:52 +0000 Subject: net: make SK_MEMORY_PCPU_RESERV tunable This patch adds /proc/sys/net/core/mem_pcpu_rsv sysctl file, to make SK_MEMORY_PCPU_RESERV tunable. Commit 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") introduced per-cpu forward alloc cache: "Implement a per-cpu cache of +1/-1 MB, to reduce number of changes to sk->sk_prot->memory_allocated, which would otherwise be cause of false sharing." sk_prot->memory_allocated points to global atomic variable: atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; If increasing the per-cpu cache size from 1MB to e.g. 16MB, changes to sk->sk_prot->memory_allocated can be further reduced. Performance may be improved on system with many cores. Signed-off-by: Adam Li Reviewed-by: Christoph Lameter (Ampere) Signed-off-by: David S. Miller --- Documentation/admin-guide/sysctl/net.rst | 5 +++++ include/net/sock.h | 5 +++-- net/core/sock.c | 1 + net/core/sysctl_net_core.c | 9 +++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 3960916519557..7250c0542828b 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -206,6 +206,11 @@ Will increase power usage. Default: 0 (off) +mem_pcpu_rsv +------------ + +Per-cpu reserved forward alloc cache size in page units. Default 1MB per CPU. + rmem_default ------------ diff --git a/include/net/sock.h b/include/net/sock.h index 796a902cf4c19..09a0cde8bf522 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1443,6 +1443,7 @@ sk_memory_allocated(const struct sock *sk) /* 1 MB per cpu, in page units */ #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) +extern int sysctl_mem_pcpu_rsv; static inline void sk_memory_allocated_add(struct sock *sk, int amt) @@ -1451,7 +1452,7 @@ sk_memory_allocated_add(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } @@ -1465,7 +1466,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } diff --git a/net/core/sock.c b/net/core/sock.c index 8d86886e39fa6..df2ac54a8f74e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -283,6 +283,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; +int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE; int sysctl_tstamp_allow_data __read_mostly = 1; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0f0cb1465e089..986f15e5d6c41 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -30,6 +30,7 @@ static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; +static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -407,6 +408,14 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &min_rcvbuf, }, + { + .procname = "mem_pcpu_rsv", + .data = &sysctl_mem_pcpu_rsv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_mem_pcpu_rsv, + }, { .procname = "dev_weight", .data = &weight_p, -- cgit 1.2.3-korg From 65be4393f363c4bd5c388ddf3e3eb4abee2b1f79 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:19 +0800 Subject: tcp: directly drop skb in cookie check for ipv4 Only move the skb drop from tcp_v4_do_rcv() to cookie_v4_check() itself, no other changes made. It can help us refine the specific drop reasons later. Signed-off-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 4 ++++ net/ipv4/tcp_ipv4.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index be88bf586ff9f..38f331da6677f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -408,6 +408,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct rtable *rt; __u8 rcv_wscale; int full_space; + SKB_DR(reason); if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -477,10 +478,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) */ if (ret) inet_sk(ret)->cork.fl.u.ip4 = fl4; + else + goto out_drop; out: return ret; out_free: reqsk_free(req); out_drop: + kfree_skb_reason(skb, reason); return NULL; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c50c5a32b84a..0a944e1090886 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1915,7 +1915,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *nsk = tcp_v4_cookie_check(sk, skb); if (!nsk) - goto discard; + return 0; if (nsk != sk) { if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; -- cgit 1.2.3-korg From a4a69a3719ec55f677b8481f58215b6338b781e5 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:20 +0800 Subject: tcp: use drop reasons in cookie check for ipv4 Now it's time to use the prepared definitions to refine this part. Four reasons used might enough for now, I think. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 38f331da6677f..7972ad3d7c73a 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -421,8 +421,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (IS_ERR(req)) goto out; } - if (!req) + if (!req) { + SKB_DR_SET(reason, NO_SOCKET); goto out_drop; + } ireq = inet_rsk(req); @@ -434,8 +436,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) */ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); - if (security_inet_conn_request(sk, skb, req)) + if (security_inet_conn_request(sk, skb, req)) { + SKB_DR_SET(reason, SECURITY_HOOK); goto out_free; + } tcp_ao_syncookie(sk, skb, req, AF_INET); @@ -452,8 +456,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) + if (IS_ERR(rt)) { + SKB_DR_SET(reason, IP_OUTNOROUTES); goto out_free; + } /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); @@ -476,10 +482,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ - if (ret) - inet_sk(ret)->cork.fl.u.ip4 = fl4; - else + if (!ret) { + SKB_DR_SET(reason, NO_SOCKET); goto out_drop; + } + inet_sk(ret)->cork.fl.u.ip4 = fl4; out: return ret; out_free: -- cgit 1.2.3-korg From ed43e76cdcc497e2b27d84db27e7df5612be2643 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:21 +0800 Subject: tcp: directly drop skb in cookie check for ipv6 Like previous patch does, only moving skb drop logical code to cookie_v6_check() for later refinement. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv6/syncookies.c | 4 ++++ net/ipv6/tcp_ipv6.c | 5 +---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 6b9c692788196..ea0d9954a29f2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -177,6 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct sock *ret = sk; __u8 rcv_wscale; int full_space; + SKB_DR(reason); if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -256,10 +257,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ecn_ok &= cookie_ecn_ok(net, dst); ret = tcp_get_cookie_sock(sk, skb, req, dst); + if (!ret) + goto out_drop; out: return ret; out_free: reqsk_free(req); out_drop: + kfree_skb_reason(skb, reason); return NULL; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 57b25b1fc9d9d..0c180bb8187fa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1653,11 +1653,8 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_LISTEN) { struct sock *nsk = tcp_v6_cookie_check(sk, skb); - if (!nsk) - goto discard; - if (nsk != sk) { - if (tcp_child_process(sk, nsk, skb)) + if (nsk && tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) __kfree_skb(opt_skb); -- cgit 1.2.3-korg From 253541a3c1e43f6b42033f78d2289a216af58387 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:22 +0800 Subject: tcp: use drop reasons in cookie check for ipv6 Like what I did to ipv4 mode, refine this part: adding more drop reasons for better tracing. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv6/syncookies.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ea0d9954a29f2..8bad0a44a0a6c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -190,16 +190,20 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (IS_ERR(req)) goto out; } - if (!req) + if (!req) { + SKB_DR_SET(reason, NO_SOCKET); goto out_drop; + } ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - if (security_inet_conn_request(sk, skb, req)) + if (security_inet_conn_request(sk, skb, req)) { + SKB_DR_SET(reason, SECURITY_HOOK); goto out_free; + } if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || @@ -236,8 +240,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); - if (IS_ERR(dst)) + if (IS_ERR(dst)) { + SKB_DR_SET(reason, IP_OUTNOROUTES); goto out_free; + } } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); @@ -257,8 +263,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ecn_ok &= cookie_ecn_ok(net, dst); ret = tcp_get_cookie_sock(sk, skb, req, dst); - if (!ret) + if (!ret) { + SKB_DR_SET(reason, NO_SOCKET); goto out_drop; + } out: return ret; out_free: -- cgit 1.2.3-korg From e615e3a24ed6f1a501f9b5426ec0b476fded4d22 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:24 +0800 Subject: tcp: add more specific possible drop reasons in tcp_rcv_synsent_state_process() This patch does two things: 1) add two more new reasons 2) only change the return value(1) to various drop reason values for the future use For now, we still cannot trace those two reasons. We'll implement the full function in the subsequent patch in this series. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 74c03f0a6c0c5..83308cca16108 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6361,6 +6361,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_TIMEOUT_MIN, TCP_RTO_MAX); + SKB_DR_SET(reason, TCP_INVALID_ACK_SEQUENCE); goto reset_and_undo; } @@ -6369,6 +6370,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_time_stamp_ts(tp))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); + SKB_DR_SET(reason, TCP_RFC7323_PAWS); goto reset_and_undo; } @@ -6572,7 +6574,8 @@ discard_and_undo: reset_and_undo: tcp_clear_options(&tp->rx_opt); tp->rx_opt.mss_clamp = saved_clamp; - return 1; + /* we can reuse/return @reason to its caller to handle the exception */ + return reason; } static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) -- cgit 1.2.3-korg From 7d6ed9afde8547723f6f96f81f984cc6c48eef52 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:25 +0800 Subject: tcp: add dropreasons in tcp_rcv_state_process() In this patch, I equipped this function with more dropreasons, but it still doesn't work yet, which I will do later. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp_input.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 33bf92dff0aff..af2a4dcd45183 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -348,7 +348,7 @@ void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); -int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); +enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 83308cca16108..5d874817a78db 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6619,7 +6619,8 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) * address independent. */ -int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +enum skb_drop_reason +tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -6635,7 +6636,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_LISTEN: if (th->ack) - return 1; + return SKB_DROP_REASON_TCP_FLAGS; if (th->rst) { SKB_DR_SET(reason, TCP_RESET); @@ -6704,8 +6705,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) FLAG_NO_CHALLENGE_ACK); if ((int)reason <= 0) { - if (sk->sk_state == TCP_SYN_RECV) - return 1; /* send one RST */ + if (sk->sk_state == TCP_SYN_RECV) { + /* send one RST */ + if (!reason) + return SKB_DROP_REASON_TCP_OLD_ACK; + return -reason; + } /* accept old ack during closing */ if ((int)reason < 0) { tcp_send_challenge_ack(sk); @@ -6781,7 +6786,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (READ_ONCE(tp->linger2) < 0) { tcp_done(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; + return SKB_DROP_REASON_TCP_ABORT_ON_DATA; } if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { @@ -6790,7 +6795,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_fastopen_active_disable(sk); tcp_done(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; + return SKB_DROP_REASON_TCP_ABORT_ON_DATA; } tmo = tcp_fin_time(sk); @@ -6855,7 +6860,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk, skb); - return 1; + return SKB_DROP_REASON_TCP_ABORT_ON_DATA; } } fallthrough; -- cgit 1.2.3-korg From b9825695930546af725b1e686b8eaf4c71201728 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:26 +0800 Subject: tcp: make the dropreason really work when calling tcp_rcv_state_process() Update three callers including both ipv4 and ipv6 and let the dropreason mechanism work in reality. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv4/tcp_minisocks.c | 10 +++++----- net/ipv6/tcp_ipv6.c | 3 ++- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index af2a4dcd45183..6ae35199d3b3c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -396,8 +396,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race); -int tcp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb); +enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); void tcp_clear_retrans(struct tcp_sock *tp); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0a944e1090886..c79e255499727 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1926,7 +1926,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } else sock_rps_save_rxhash(sk, skb); - if (tcp_rcv_state_process(sk, skb)) { + reason = tcp_rcv_state_process(sk, skb); + if (reason) { rsk = sk; goto reset; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9e85f2a0bddd4..52040b0e2616b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -911,11 +911,11 @@ EXPORT_SYMBOL(tcp_check_req); * be created. */ -int tcp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb) +enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb) __releases(&((child)->sk_lock.slock)) { - int ret = 0; + enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; int state = child->sk_state; /* record sk_napi_id and sk_rx_queue_mapping of child. */ @@ -923,7 +923,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { - ret = tcp_rcv_state_process(child, skb); + reason = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) parent->sk_data_ready(parent); @@ -937,6 +937,6 @@ int tcp_child_process(struct sock *parent, struct sock *child, bh_unlock_sock(child); sock_put(child); - return ret; + return reason; } EXPORT_SYMBOL(tcp_child_process); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0c180bb8187fa..4f8464e04b7f1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1663,7 +1663,8 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } else sock_rps_save_rxhash(sk, skb); - if (tcp_rcv_state_process(sk, skb)) + reason = tcp_rcv_state_process(sk, skb); + if (reason) goto reset; if (opt_skb) goto ipv6_pktoptions; -- cgit 1.2.3-korg From ee01defe25bad09a37b68dd051a7e931d1e4cd91 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:27 +0800 Subject: tcp: make dropreason in tcp_child_process() work It's time to let it work right now. We've already prepared for this:) Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 12 +++++++----- net/ipv6/tcp_ipv6.c | 16 ++++++++++------ 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c79e255499727..a22ee58387518 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1907,7 +1907,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } - reason = SKB_DROP_REASON_NOT_SPECIFIED; if (tcp_checksum_complete(skb)) goto csum_err; @@ -1917,7 +1916,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (!nsk) return 0; if (nsk != sk) { - if (tcp_child_process(sk, nsk, skb)) { + reason = tcp_child_process(sk, nsk, skb); + if (reason) { rsk = nsk; goto reset; } @@ -2276,10 +2276,12 @@ process: if (nsk == sk) { reqsk_put(req); tcp_v4_restore_cb(skb); - } else if (tcp_child_process(sk, nsk, skb)) { - tcp_v4_send_reset(nsk, skb); - goto discard_and_relse; } else { + drop_reason = tcp_child_process(sk, nsk, skb); + if (drop_reason) { + tcp_v4_send_reset(nsk, skb); + goto discard_and_relse; + } sock_put(sk); return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4f8464e04b7f1..f677f0fa51968 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1623,7 +1623,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (np->rxopt.all) opt_skb = skb_clone_and_charge_r(skb, sk); - reason = SKB_DROP_REASON_NOT_SPECIFIED; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst; @@ -1654,8 +1653,11 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *nsk = tcp_v6_cookie_check(sk, skb); if (nsk != sk) { - if (nsk && tcp_child_process(sk, nsk, skb)) - goto reset; + if (nsk) { + reason = tcp_child_process(sk, nsk, skb); + if (reason) + goto reset; + } if (opt_skb) __kfree_skb(opt_skb); return 0; @@ -1854,10 +1856,12 @@ process: if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); - } else if (tcp_child_process(sk, nsk, skb)) { - tcp_v6_send_reset(nsk, skb); - goto discard_and_relse; } else { + drop_reason = tcp_child_process(sk, nsk, skb); + if (drop_reason) { + tcp_v6_send_reset(nsk, skb); + goto discard_and_relse; + } sock_put(sk); return 0; } -- cgit 1.2.3-korg From 67c8e4bb4f54ad22ca0ce4700c8728ffd73acb66 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 26 Feb 2024 14:14:11 +0100 Subject: net: ioam6: multicast event Add a multicast group to the ioam6 generic netlink family and provide ioam6_event() to send an ioam6 event to the multicast group. Reviewed-by: David Ahern Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/net/ioam6.h | 4 ++++ net/ipv6/ioam6.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) (limited to 'net') diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 781d2d8b2f29d..2cbbee6e806aa 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -12,6 +12,7 @@ #include #include #include +#include #include struct ioam6_namespace { @@ -65,4 +66,7 @@ void ioam6_exit(void); int ioam6_iptunnel_init(void); void ioam6_iptunnel_exit(void); +void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp, + void *opt, unsigned int opt_len); + #endif /* _NET_IOAM6_H */ diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 571f0e4d9cf3d..5fa923f066321 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -612,6 +612,68 @@ static const struct genl_ops ioam6_genl_ops[] = { }, }; +#define IOAM6_GENL_EV_GRP_OFFSET 0 + +static const struct genl_multicast_group ioam6_mcgrps[] = { + [IOAM6_GENL_EV_GRP_OFFSET] = { .name = IOAM6_GENL_EV_GRP_NAME, + .flags = GENL_MCAST_CAP_NET_ADMIN }, +}; + +static int ioam6_event_put_trace(struct sk_buff *skb, + struct ioam6_trace_hdr *trace, + unsigned int len) +{ + if (nla_put_u16(skb, IOAM6_EVENT_ATTR_TRACE_NAMESPACE, + be16_to_cpu(trace->namespace_id)) || + nla_put_u8(skb, IOAM6_EVENT_ATTR_TRACE_NODELEN, trace->nodelen) || + nla_put_u32(skb, IOAM6_EVENT_ATTR_TRACE_TYPE, + be32_to_cpu(trace->type_be32)) || + nla_put(skb, IOAM6_EVENT_ATTR_TRACE_DATA, + len - sizeof(struct ioam6_trace_hdr) - trace->remlen * 4, + trace->data + trace->remlen * 4)) + return 1; + + return 0; +} + +void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp, + void *opt, unsigned int opt_len) +{ + struct nlmsghdr *nlh; + struct sk_buff *skb; + + if (!genl_has_listeners(&ioam6_genl_family, net, + IOAM6_GENL_EV_GRP_OFFSET)) + return; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!skb) + return; + + nlh = genlmsg_put(skb, 0, 0, &ioam6_genl_family, 0, type); + if (!nlh) + goto nla_put_failure; + + switch (type) { + case IOAM6_EVENT_UNSPEC: + WARN_ON_ONCE(1); + break; + case IOAM6_EVENT_TRACE: + if (ioam6_event_put_trace(skb, (struct ioam6_trace_hdr *)opt, + opt_len)) + goto nla_put_failure; + break; + } + + genlmsg_end(skb, nlh); + genlmsg_multicast_netns(&ioam6_genl_family, net, skb, 0, + IOAM6_GENL_EV_GRP_OFFSET, gfp); + return; + +nla_put_failure: + nlmsg_free(skb); +} + static struct genl_family ioam6_genl_family __ro_after_init = { .name = IOAM6_GENL_NAME, .version = IOAM6_GENL_VERSION, @@ -620,6 +682,8 @@ static struct genl_family ioam6_genl_family __ro_after_init = { .ops = ioam6_genl_ops, .n_ops = ARRAY_SIZE(ioam6_genl_ops), .resv_start_op = IOAM6_CMD_NS_SET_SCHEMA + 1, + .mcgrps = ioam6_mcgrps, + .n_mcgrps = ARRAY_SIZE(ioam6_mcgrps), .module = THIS_MODULE, }; -- cgit 1.2.3-korg From f655c78d6225f585ef60a9d93ffb79d507ff3ad3 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 26 Feb 2024 14:14:12 +0100 Subject: net: exthdrs: ioam6: send trace event If we're processing an IOAM Pre-allocated Trace Option-Type (the only one supported currently), then send the trace as an ioam6 event to the netlink multicast group. This way, user space apps will be able to collect IOAM data. Reviewed-by: David Ahern Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 02e9ffb63af19..727792907d6cb 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -50,6 +50,7 @@ #endif #include #include +#include #include #include @@ -954,6 +955,9 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) + optoff + sizeof(*hdr)); ioam6_fill_trace_data(skb, ns, trace, true); + + ioam6_event(IOAM6_EVENT_TRACE, dev_net(skb->dev), + GFP_ATOMIC, (void *)trace, hdr->opt_len - 2); break; default: break; -- cgit 1.2.3-korg From 2e26b6dfade4db750809b9231cce49498bb71767 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 27 Feb 2024 08:57:45 +0800 Subject: ipv6: raw: remove useless input parameter in rawv6_get/seticmpfilter The input parameter 'level' in rawv6_get/seticmpfilter is not used. Therefore, remove it. Signed-off-by: Zhengchao Shao Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv6/raw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 479c4c2c17858..76e6eb3b643d0 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -934,7 +934,7 @@ do_confirm: goto done; } -static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, +static int rawv6_seticmpfilter(struct sock *sk, int optname, sockptr_t optval, int optlen) { switch (optname) { @@ -951,7 +951,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, return 0; } -static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, +static int rawv6_geticmpfilter(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int len; @@ -1037,7 +1037,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; - return rawv6_seticmpfilter(sk, level, optname, optval, optlen); + return rawv6_seticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) @@ -1098,7 +1098,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; - return rawv6_geticmpfilter(sk, level, optname, optval, optlen); + return rawv6_geticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) -- cgit 1.2.3-korg From 292fac464b012200c4e99d08974fed3bc087b848 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 26 Feb 2024 19:29:15 -0600 Subject: net: ethtool: eee: Remove legacy _u32 from keee All MAC drivers have been converted to use the link mode members of keee. So remove the _u32 values, and the code in the ethtool core to convert the legacy _u32 values to link modes. Reviewed-by: Simon Horman Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/ethtool.h | 3 --- net/ethtool/eee.c | 31 ++++--------------------------- net/ethtool/ioctl.c | 29 ++++++++++------------------- 3 files changed, 14 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b90c33607594b..9901e563f706e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -226,9 +226,6 @@ struct ethtool_keee { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertised); - u32 supported_u32; - u32 advertised_u32; - u32 lp_advertised_u32; u32 tx_lpi_timer; bool tx_lpi_enabled; bool eee_active; diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index db6faa18fe413..bf398973eb8a0 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -4,9 +4,6 @@ #include "common.h" #include "bitset.h" -#define EEE_MODES_COUNT \ - (sizeof_field(struct ethtool_keee, supported_u32) * BITS_PER_BYTE) - struct eee_req_info { struct ethnl_req_info base; }; @@ -41,15 +38,6 @@ static int eee_prepare_data(const struct ethnl_req_info *req_base, ret = dev->ethtool_ops->get_eee(dev, eee); ethnl_ops_complete(dev); - if (!ret && !ethtool_eee_use_linkmodes(eee)) { - ethtool_convert_legacy_u32_to_link_mode(eee->supported, - eee->supported_u32); - ethtool_convert_legacy_u32_to_link_mode(eee->advertised, - eee->advertised_u32); - ethtool_convert_legacy_u32_to_link_mode(eee->lp_advertised, - eee->lp_advertised_u32); - } - return ret; } @@ -62,11 +50,6 @@ static int eee_reply_size(const struct ethnl_req_info *req_base, int len = 0; int ret; - BUILD_BUG_ON(sizeof(eee->advertised_u32) * BITS_PER_BYTE != - EEE_MODES_COUNT); - BUILD_BUG_ON(sizeof(eee->lp_advertised_u32) * BITS_PER_BYTE != - EEE_MODES_COUNT); - /* MODES_OURS */ ret = ethnl_bitset_size(eee->advertised, eee->supported, __ETHTOOL_LINK_MODE_MASK_NBITS, @@ -154,16 +137,10 @@ ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info) if (ret < 0) return ret; - if (ethtool_eee_use_linkmodes(&eee)) { - ret = ethnl_update_bitset(eee.advertised, - __ETHTOOL_LINK_MODE_MASK_NBITS, - tb[ETHTOOL_A_EEE_MODES_OURS], - link_mode_names, info->extack, &mod); - } else { - ret = ethnl_update_bitset32(&eee.advertised_u32, EEE_MODES_COUNT, - tb[ETHTOOL_A_EEE_MODES_OURS], - link_mode_names, info->extack, &mod); - } + ret = ethnl_update_bitset(eee.advertised, + __ETHTOOL_LINK_MODE_MASK_NBITS, + tb[ETHTOOL_A_EEE_MODES_OURS], + link_mode_names, info->extack, &mod); if (ret < 0) return ret; ethnl_update_bool(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b419969c0dcbb..317308bdbda95 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1514,9 +1514,6 @@ static void eee_to_keee(struct ethtool_keee *keee, { memset(keee, 0, sizeof(*keee)); - keee->supported_u32 = eee->supported; - keee->advertised_u32 = eee->advertised; - keee->lp_advertised_u32 = eee->lp_advertised; keee->eee_active = eee->eee_active; keee->eee_enabled = eee->eee_enabled; keee->tx_lpi_enabled = eee->tx_lpi_enabled; @@ -1533,6 +1530,8 @@ static void eee_to_keee(struct ethtool_keee *keee, static void keee_to_eee(struct ethtool_eee *eee, const struct ethtool_keee *keee) { + bool overflow; + memset(eee, 0, sizeof(*eee)); eee->eee_active = keee->eee_active; @@ -1540,22 +1539,14 @@ static void keee_to_eee(struct ethtool_eee *eee, eee->tx_lpi_enabled = keee->tx_lpi_enabled; eee->tx_lpi_timer = keee->tx_lpi_timer; - if (ethtool_eee_use_linkmodes(keee)) { - bool overflow; - - overflow = !ethtool_convert_link_mode_to_legacy_u32(&eee->supported, - keee->supported); - ethtool_convert_link_mode_to_legacy_u32(&eee->advertised, - keee->advertised); - ethtool_convert_link_mode_to_legacy_u32(&eee->lp_advertised, - keee->lp_advertised); - if (overflow) - pr_warn("Ethtool ioctl interface doesn't support passing EEE linkmodes beyond bit 32\n"); - } else { - eee->supported = keee->supported_u32; - eee->advertised = keee->advertised_u32; - eee->lp_advertised = keee->lp_advertised_u32; - } + overflow = !ethtool_convert_link_mode_to_legacy_u32(&eee->supported, + keee->supported); + ethtool_convert_link_mode_to_legacy_u32(&eee->advertised, + keee->advertised); + ethtool_convert_link_mode_to_legacy_u32(&eee->lp_advertised, + keee->lp_advertised); + if (overflow) + pr_warn("Ethtool ioctl interface doesn't support passing EEE linkmodes beyond bit 32\n"); } static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) -- cgit 1.2.3-korg From d4f01c5e477afecb0baede359e4b19b9882872fa Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 28 Feb 2024 03:06:58 +0000 Subject: net: remove SLAB_MEM_SPREAD flag usage The SLAB_MEM_SPREAD flag used to be implemented in SLAB, which was removed as of v6.8-rc1, so it became a dead flag since the commit 16a1d968358a ("mm/slab: remove mm/slab.c and slab_def.h"). And the series[1] went on to mark it obsolete to avoid confusion for users. Here we can just remove all its users, which has no functional change. [1] https://lore.kernel.org/all/20240223-slab-cleanup-flags-v2-1-02f1753e8303@suse.cz/ Signed-off-by: Chengming Zhou Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240228030658.3512782-1-chengming.zhou@linux.dev Signed-off-by: Jakub Kicinski --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index ed3df2f749bfa..7e9c8fc9a5b45 100644 --- a/net/socket.c +++ b/net/socket.c @@ -343,7 +343,7 @@ static void init_inodecache(void) 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD | SLAB_ACCOUNT), + SLAB_ACCOUNT), init_once); BUG_ON(sock_inode_cachep == NULL); } -- cgit 1.2.3-korg From 0598f8f3bb77893a13105d47bb7dfe42f1dc1f4e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Feb 2024 09:24:09 +0000 Subject: inet: annotate devconf data-races Add READ_ONCE() in ipv4_devconf_get() and corresponding WRITE_ONCE() in ipv4_devconf_set() Add IPV4_DEVCONF_RO() and IPV4_DEVCONF_ALL_RO() macros, and use them when reading devconf fields. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240227092411.2315725-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 14 ++++++++------ net/ipv4/devinet.c | 21 +++++++++++---------- net/ipv4/igmp.c | 4 ++-- net/ipv4/proc.c | 2 +- net/ipv4/route.c | 4 ++-- 5 files changed, 24 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ddb27fc0ee8c8..cb5280e6cc219 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -53,13 +53,15 @@ struct in_device { }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1]) +#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr)) #define IPV4_DEVCONF_ALL(net, attr) \ IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr) +#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr)) -static inline int ipv4_devconf_get(struct in_device *in_dev, int index) +static inline int ipv4_devconf_get(const struct in_device *in_dev, int index) { index--; - return in_dev->cnf.data[index]; + return READ_ONCE(in_dev->cnf.data[index]); } static inline void ipv4_devconf_set(struct in_device *in_dev, int index, @@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, { index--; set_bit(index, in_dev->cnf.state); - in_dev->cnf.data[index] = val; + WRITE_ONCE(in_dev->cnf.data[index], val); } static inline void ipv4_devconf_setall(struct in_device *in_dev) @@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_NET_ORCONF(in_dev, net, attr) \ - (IPV4_DEVCONF_ALL(net, attr) || \ + (IPV4_DEVCONF_ALL_RO(net, attr) || \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr) #define IN_DEV_MAXCONF(in_dev, attr) \ - (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \ + (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \ IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bc74f131fe4df..ca75d0fff1d1e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1982,7 +1982,7 @@ static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev, return -EMSGSIZE; for (i = 0; i < IPV4_DEVCONF_MAX; i++) - ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; + ((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]); return 0; } @@ -2068,9 +2068,9 @@ static int inet_netconf_msgsize_devconf(int type) } static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, - struct ipv4_devconf *devconf, u32 portid, - u32 seq, int event, unsigned int flags, - int type) + const struct ipv4_devconf *devconf, + u32 portid, u32 seq, int event, + unsigned int flags, int type) { struct nlmsghdr *nlh; struct netconfmsg *ncm; @@ -2095,27 +2095,28 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, - IPV4_DEVCONF(*devconf, FORWARDING)) < 0) + IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_RP_FILTER) && nla_put_s32(skb, NETCONFA_RP_FILTER, - IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) + IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, - IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) + IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_BC_FORWARDING) && nla_put_s32(skb, NETCONFA_BC_FORWARDING, - IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0) + IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_PROXY_NEIGH) && nla_put_s32(skb, NETCONFA_PROXY_NEIGH, - IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) + IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, - IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) + IPV4_DEVCONF_RO(*devconf, + IGNORE_ROUTES_WITH_LINKDOWN)) < 0) goto nla_put_failure; out: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index efeeca2b13285..717e97a389a8a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -120,12 +120,12 @@ */ #define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ ((in_dev)->mr_v1_seen && \ time_before(jiffies, (in_dev)->mr_v1_seen))) #define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ ((in_dev)->mr_v2_seen && \ time_before(jiffies, (in_dev)->mr_v2_seen))) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 5f4654ebff487..914bc9c35cc70 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -395,7 +395,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", - IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, + IPV4_DEVCONF_ALL_RO(net, FORWARDING) ? 1 : 2, READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b512288d6fcc6..c8f76f56dc165 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2313,7 +2313,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (IN_DEV_BFORWARD(in_dev)) goto make_route; /* not do cache if bc_forwarding is enabled */ - if (IPV4_DEVCONF_ALL(net, BC_FORWARDING)) + if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING)) do_cache = false; goto brd_input; } @@ -2993,7 +2993,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, #ifdef CONFIG_IP_MROUTE if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && - IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { + IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, r, portid); -- cgit 1.2.3-korg From bbcf91053bb622c4c26a9bfc998d3b0c59227f10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Feb 2024 09:24:10 +0000 Subject: inet: do not use RTNL in inet_netconf_get_devconf() "ip -4 netconf show dev XXXX" no longer acquires RTNL. Return -ENODEV instead of -EINVAL if no netdev or idev can be found. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240227092411.2315725-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/devinet.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ca75d0fff1d1e..f045a34e90b97 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2205,21 +2205,20 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); - struct nlattr *tb[NETCONFA_MAX+1]; + struct nlattr *tb[NETCONFA_MAX + 1]; + const struct ipv4_devconf *devconf; + struct in_device *in_dev = NULL; + struct net_device *dev = NULL; struct sk_buff *skb; - struct ipv4_devconf *devconf; - struct in_device *in_dev; - struct net_device *dev; int ifindex; int err; err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err) - goto errout; + return err; - err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) - goto errout; + return -EINVAL; ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); switch (ifindex) { @@ -2230,10 +2229,10 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, devconf = net->ipv4.devconf_dflt; break; default: - dev = __dev_get_by_index(net, ifindex); - if (!dev) - goto errout; - in_dev = __in_dev_get_rtnl(dev); + err = -ENODEV; + dev = dev_get_by_index(net, ifindex); + if (dev) + in_dev = in_dev_get(dev); if (!in_dev) goto errout; devconf = &in_dev->cnf; @@ -2257,6 +2256,9 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, } err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: + if (in_dev) + in_dev_put(in_dev); + dev_put(dev); return err; } @@ -2826,5 +2828,6 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, - inet_netconf_dump_devconf, 0); + inet_netconf_dump_devconf, + RTNL_FLAG_DOIT_UNLOCKED); } -- cgit 1.2.3-korg From 167487070d644a285ed863516c80b3c35ec929d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Feb 2024 09:24:11 +0000 Subject: inet: use xa_array iterator to implement inet_netconf_dump_devconf() 1) inet_netconf_dump_devconf() can run under RCU protection instead of RTNL. 2) properly return 0 at the end of a dump, avoiding an an extra recvmsg() system call. 3) Do not use inet_base_seq() anymore, for_each_netdev_dump() has nice properties. Restarting a GETDEVCONF dump if a device has been added/removed or if net->ipv4.dev_addr_genid has changed is moot. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240227092411.2315725-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/devinet.c | 101 +++++++++++++++++++++++------------------------------ 1 file changed, 43 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f045a34e90b97..af741af61830a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2267,11 +2267,13 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - int h, s_h; - int idx, s_idx; + struct { + unsigned long ifindex; + unsigned int all_default; + } *ctx = (void *)cb->ctx; + const struct in_device *in_dev; struct net_device *dev; - struct in_device *in_dev; - struct hlist_head *head; + int err = 0; if (cb->strict_check) { struct netlink_ext_ack *extack = cb->extack; @@ -2288,64 +2290,47 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, } } - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet_base_seq(net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - in_dev = __in_dev_get_rcu(dev); - if (!in_dev) - goto cont; - - if (inet_netconf_fill_devconf(skb, dev->ifindex, - &in_dev->cnf, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, - NLM_F_MULTI, - NETCONFA_ALL) < 0) { - rcu_read_unlock(); - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - rcu_read_unlock(); + rcu_read_lock(); + for_each_netdev_dump(net, dev, ctx->ifindex) { + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + continue; + err = inet_netconf_fill_devconf(skb, dev->ifindex, + &in_dev->cnf, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) + goto done; } - if (h == NETDEV_HASHENTRIES) { - if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, - net->ipv4.devconf_all, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + if (ctx->all_default == 0) { + err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; - } - if (h == NETDEV_HASHENTRIES + 1) { - if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, - net->ipv4.devconf_dflt, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + ctx->all_default++; + } + if (ctx->all_default == 1) { + err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; + ctx->all_default++; } done: - cb->args[0] = h; - cb->args[1] = idx; - - return skb->len; + if (err < 0 && likely(skb->len)) + err = skb->len; + rcu_read_unlock(); + return err; } #ifdef CONFIG_SYSCTL @@ -2829,5 +2814,5 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, inet_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED); } -- cgit 1.2.3-korg From 9ff74d77180a0b0b2705f62bd4dbd313e964ebf8 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 27 Feb 2024 17:36:04 +0800 Subject: netlabel: remove impossible return value in netlbl_bitmap_walk Since commit 446fda4f2682 ("[NetLabel]: CIPSOv4 engine"), *bitmap_walk function only returns -1. Nearly 18 years have passed, -2 scenes never come up, so there's no need to consider it. Signed-off-by: Zhengchao Shao Reviewed-by: Jiri Pirko Acked-by: Paul Moore Link: https://lore.kernel.org/r/20240227093604.3574241-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv4/cipso_ipv4.c | 5 +---- net/ipv6/calipso.c | 5 +---- net/netlabel/netlabel_kapi.c | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index d048aa8332938..8b17d83e5fde4 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -864,11 +864,8 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, net_clen_bits, net_spot + 1, 1); - if (net_spot < 0) { - if (net_spot == -2) - return -EFAULT; + if (net_spot < 0) return 0; - } switch (doi_def->type) { case CIPSO_V4_MAP_PASS: diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 1578ed9e97d89..eb8ee1e9373a7 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -657,11 +657,8 @@ static int calipso_map_cat_ntoh(const struct calipso_doi *doi_def, net_clen_bits, spot + 1, 1); - if (spot < 0) { - if (spot == -2) - return -EFAULT; + if (spot < 0) return 0; - } ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat, spot, diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 7b844581ebeea..1ba4f58e1d351 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -876,7 +876,7 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, * Description: * Starting at @offset, walk the bitmap from left to right until either the * desired bit is found or we reach the end. Return the bit offset, -1 if - * not found, or -2 if error. + * not found. */ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, u32 offset, u8 state) -- cgit 1.2.3-korg From d35150c79ffcd730e3c9f20cf128288606e3bf3b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 27 Feb 2024 10:23:36 -0800 Subject: net: bridge: Do not allocate stats in the driver With commit 34d21de99cea9 ("net: Move {l,t,d}stats allocation to core and convert veth & vrf"), stats allocation could be done on net core instead of this driver. With this new approach, the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc). This is core responsibility now. Remove the allocation in the bridge driver and leverage the network core allocation. Signed-off-by: Breno Leitao Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240227182338.2739884-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/bridge/br_device.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 874cec75a818e..4f636f7b05558 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -113,26 +113,18 @@ static int br_dev_init(struct net_device *dev) struct net_bridge *br = netdev_priv(dev); int err; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - err = br_fdb_hash_init(br); - if (err) { - free_percpu(dev->tstats); + if (err) return err; - } err = br_mdb_hash_init(br); if (err) { - free_percpu(dev->tstats); br_fdb_hash_fini(br); return err; } err = br_vlan_init(br); if (err) { - free_percpu(dev->tstats); br_mdb_hash_fini(br); br_fdb_hash_fini(br); return err; @@ -140,7 +132,6 @@ static int br_dev_init(struct net_device *dev) err = br_multicast_init_stats(br); if (err) { - free_percpu(dev->tstats); br_vlan_flush(br); br_mdb_hash_fini(br); br_fdb_hash_fini(br); @@ -159,7 +150,6 @@ static void br_dev_uninit(struct net_device *dev) br_vlan_flush(br); br_mdb_hash_fini(br); br_fdb_hash_fini(br); - free_percpu(dev->tstats); } static int br_dev_open(struct net_device *dev) @@ -496,6 +486,7 @@ void br_dev_setup(struct net_device *dev) dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; dev->vlan_features = COMMON_FEATURES; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; br->dev = dev; spin_lock_init(&br->lock); -- cgit 1.2.3-korg From 82a48affb36f1fee36fe415051a8947d42861108 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 27 Feb 2024 10:23:37 -0800 Subject: net: bridge: Exit if multicast_init_stats fails If br_multicast_init_stats() fails, there is no need to set lockdep classes. Just return from the error path. Signed-off-by: Breno Leitao Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240227182338.2739884-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/bridge/br_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 4f636f7b05558..c366ccc8b3db7 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -135,10 +135,11 @@ static int br_dev_init(struct net_device *dev) br_vlan_flush(br); br_mdb_hash_fini(br); br_fdb_hash_fini(br); + return err; } netdev_lockdep_set_classes(dev); - return err; + return 0; } static void br_dev_uninit(struct net_device *dev) -- cgit 1.2.3-korg From 1200097fa8f0d8e8ddfe5c554d8fa2bc03b2df92 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Feb 2024 21:01:04 +0000 Subject: net: call skb_defer_free_flush() from __napi_busy_loop() skb_defer_free_flush() is currently called from net_rx_action() and napi_threaded_poll(). We should also call it from __napi_busy_loop() otherwise there is the risk the percpu queue can grow until an IPI is forced from skb_attempt_defer_free() adding a latency spike. Signed-off-by: Eric Dumazet Cc: Samiullah Khawaja Acked-by: Stanislav Fomichev Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240227210105.3815474-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 275fd5259a4a9..053fac78305c7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6173,6 +6173,27 @@ struct napi_struct *napi_by_id(unsigned int napi_id) return NULL; } +static void skb_defer_free_flush(struct softnet_data *sd) +{ + struct sk_buff *skb, *next; + + /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ + if (!READ_ONCE(sd->defer_list)) + return; + + spin_lock(&sd->defer_lock); + skb = sd->defer_list; + sd->defer_list = NULL; + sd->defer_count = 0; + spin_unlock(&sd->defer_lock); + + while (skb != NULL) { + next = skb->next; + napi_consume_skb(skb, 1); + skb = next; + } +} + #if defined(CONFIG_NET_RX_BUSY_POLL) static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) @@ -6297,6 +6318,7 @@ count: if (work > 0) __NET_ADD_STATS(dev_net(napi->dev), LINUX_MIB_BUSYPOLLRXPACKETS, work); + skb_defer_free_flush(this_cpu_ptr(&softnet_data)); local_bh_enable(); if (!loop_end || loop_end(loop_end_arg, start_time)) @@ -6726,27 +6748,6 @@ static int napi_thread_wait(struct napi_struct *napi) return -1; } -static void skb_defer_free_flush(struct softnet_data *sd) -{ - struct sk_buff *skb, *next; - - /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ - if (!READ_ONCE(sd->defer_list)) - return; - - spin_lock(&sd->defer_lock); - skb = sd->defer_list; - sd->defer_list = NULL; - sd->defer_count = 0; - spin_unlock(&sd->defer_lock); - - while (skb != NULL) { - next = skb->next; - napi_consume_skb(skb, 1); - skb = next; - } -} - static int napi_threaded_poll(void *data) { struct napi_struct *napi = data; -- cgit 1.2.3-korg From 67ea41d19d2aad2da2bcaeb7992c4fff9dc28a8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Feb 2024 22:22:59 +0000 Subject: inet6: expand rcu_read_lock() scope in inet6_dump_addr() I missed that inet6_dump_addr() is calling in6_dump_addrs() from two points. First one under RTNL protection, and second one under rcu_read_lock(). Since we want to remove RTNL use from inet6_dump_addr() very soon, no longer assume in6_dump_addrs() is protected by RTNL (even if this is still the case). Use rcu_read_lock() earlier to fix this lockdep splat: WARNING: suspicious RCU usage 6.8.0-rc5-syzkaller-01618-gf8cbf6bde4c8 #0 Not tainted net/ipv6/addrconf.c:5317 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by syz-executor.2/8834: #0: ffff88802f554678 (nlk_cb_mutex-ROUTE){+.+.}-{3:3}, at: __netlink_dump_start+0x119/0x780 net/netlink/af_netlink.c:2338 #1: ffffffff8f377a88 (rtnl_mutex){+.+.}-{3:3}, at: netlink_dump+0x676/0xda0 net/netlink/af_netlink.c:2265 #2: ffff88807e5f0580 (&ndev->lock){++--}-{2:2}, at: in6_dump_addrs+0xb8/0x1de0 net/ipv6/addrconf.c:5279 stack backtrace: CPU: 1 PID: 8834 Comm: syz-executor.2 Not tainted 6.8.0-rc5-syzkaller-01618-gf8cbf6bde4c8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 lockdep_rcu_suspicious+0x220/0x340 kernel/locking/lockdep.c:6712 in6_dump_addrs+0x1b47/0x1de0 net/ipv6/addrconf.c:5317 inet6_dump_addr+0x1597/0x1690 net/ipv6/addrconf.c:5428 netlink_dump+0x6a6/0xda0 net/netlink/af_netlink.c:2266 __netlink_dump_start+0x59d/0x780 net/netlink/af_netlink.c:2374 netlink_dump_start include/linux/netlink.h:340 [inline] rtnetlink_rcv_msg+0xcf7/0x10d0 net/core/rtnetlink.c:6555 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2547 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1361 netlink_sendmsg+0x8e0/0xcb0 net/netlink/af_netlink.c:1902 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 Fixes: c3718936ec47 ("ipv6: anycast: complete RCU handling of struct ifacaddr6") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240227222259.4081489-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e27069ad938ca..953a95898e4ad 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5300,9 +5300,9 @@ next: fillargs->event = RTM_GETMULTICAST; /* multicast address */ - for (ifmca = rtnl_dereference(idev->mc_list); + for (ifmca = rcu_dereference(idev->mc_list); ifmca; - ifmca = rtnl_dereference(ifmca->next), ip_idx++) { + ifmca = rcu_dereference(ifmca->next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); @@ -5410,6 +5410,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_idx = idx = cb->args[1]; s_ip_idx = cb->args[2]; + rcu_read_lock(); if (cb->strict_check) { err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); @@ -5434,7 +5435,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } } - rcu_read_lock(); cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; @@ -5456,10 +5456,10 @@ cont: } } done: - rcu_read_unlock(); cb->args[0] = h; cb->args[1] = idx; put_tgt_net: + rcu_read_unlock(); if (fillargs.netnsid >= 0) put_net(tgt_net); -- cgit 1.2.3-korg From 8b2b1e62cdb9ce36886e77c90a9594e41f3a2112 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 28 Feb 2024 15:25:05 +0800 Subject: ipv4: raw: remove useless input parameter in do_raw_set/getsockopt The input parameter 'level' in do_raw_set/getsockopt() is not used. Therefore, remove it. Signed-off-by: Zhengchao Shao Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240228072505.640550-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- net/ipv4/raw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index aea89326c6979..7d2bdfd7e7d70 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -815,7 +815,7 @@ static int raw_geticmpfilter(struct sock *sk, char __user *optval, int __user *o out: return ret; } -static int do_raw_setsockopt(struct sock *sk, int level, int optname, +static int do_raw_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { if (optname == ICMP_FILTER) { @@ -832,11 +832,11 @@ static int raw_setsockopt(struct sock *sk, int level, int optname, { if (level != SOL_RAW) return ip_setsockopt(sk, level, optname, optval, optlen); - return do_raw_setsockopt(sk, level, optname, optval, optlen); + return do_raw_setsockopt(sk, optname, optval, optlen); } -static int do_raw_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int do_raw_getsockopt(struct sock *sk, int optname, + char __user *optval, int __user *optlen) { if (optname == ICMP_FILTER) { if (inet_sk(sk)->inet_num != IPPROTO_ICMP) @@ -852,7 +852,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname, { if (level != SOL_RAW) return ip_getsockopt(sk, level, optname, optval, optlen); - return do_raw_getsockopt(sk, level, optname, optval, optlen); + return do_raw_getsockopt(sk, optname, optval, optlen); } static int raw_ioctl(struct sock *sk, int cmd, int *karg) -- cgit 1.2.3-korg From 3e2f544dd8a33b2f650b32920b9bef103da2a7cd Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 28 Feb 2024 03:31:21 -0800 Subject: net: get stats64 if device if driver is configured If the network driver is relying in the net core to do stats allocation, then we want to dev_get_tstats64() instead of netdev_stats_to_stats64(), since there are per-cpu stats that needs to be taken in consideration. This will also simplify the drivers in regard to statistics. Once the driver sets NETDEV_PCPU_STAT_TSTATS, it doesn't not need to allocate the stacks, neither it needs to set `.ndo_get_stats64 = dev_get_tstats64` for the generic stats collection function anymore. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 053fac78305c7..34b39c03e97de 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10702,6 +10702,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, ops->ndo_get_stats64(dev, storage); } else if (ops->ndo_get_stats) { netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); + } else if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) { + dev_get_tstats64(dev, storage); } else { netdev_stats_to_stats64(storage, &dev->stats); } -- cgit 1.2.3-korg From fa0cd90213695b928410484264b38982757a5c28 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 28 Feb 2024 03:31:22 -0800 Subject: net: sit: Do not set .ndo_get_stats64 If the driver is using the network core allocation mechanism, by setting NETDEV_PCPU_STAT_TSTATS, as this driver is, then, it doesn't need to set the dev_get_tstats64() generic .ndo_get_stats64 function pointer. Since the network core calls it automatically, and .ndo_get_stats64 should only be set if the driver needs special treatment. This simplifies the driver, since all the generic statistics is now handled by core. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/ipv6/sit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5ad01480854d2..655c9b1a19b89 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1398,7 +1398,6 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate, - .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; -- cgit 1.2.3-korg From ba132d841d5637655273c28d30751c8d18ac189b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 13:51:30 +0000 Subject: rxrpc: Record the Tx serial in the rxrpc_txbuf and retransmit trace Each Rx protocol packet contains a per-connection monotonically increasing serial number used to correlate outgoing messages with their replies - something that can be used for RTT calculation. Note this value in the rxrpc_txbuf struct in addition to the wire header and then log it in the rxrpc_retransmit trace for reference. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 10 +++++++--- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 6 +++--- net/rxrpc/output.c | 36 +++++++++++++++++------------------- net/rxrpc/txbuf.c | 1 + 5 files changed, 29 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 87b8de9b6c1c4..9add56980485e 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1506,25 +1506,29 @@ TRACE_EVENT(rxrpc_drop_ack, ); TRACE_EVENT(rxrpc_retransmit, - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, s64 expiry), + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, + rxrpc_serial_t serial, s64 expiry), - TP_ARGS(call, seq, expiry), + TP_ARGS(call, seq, serial, expiry), TP_STRUCT__entry( __field(unsigned int, call) __field(rxrpc_seq_t, seq) + __field(rxrpc_serial_t, serial) __field(s64, expiry) ), TP_fast_assign( __entry->call = call->debug_id; __entry->seq = seq; + __entry->serial = serial; __entry->expiry = expiry; ), - TP_printk("c=%08x q=%x xp=%lld", + TP_printk("c=%08x q=%x r=%x xp=%lld", __entry->call, __entry->seq, + __entry->serial, __entry->expiry) ); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7818aae1be8e0..f761257558101 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -794,6 +794,7 @@ struct rxrpc_txbuf { ktime_t last_sent; /* Time at which last transmitted */ refcount_t ref; rxrpc_seq_t seq; /* Sequence number of this packet */ + rxrpc_serial_t serial; /* Last serial number transmitted with */ unsigned int call_debug_id; unsigned int debug_id; unsigned int len; /* Amount of data in buffer */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 0f78544d043be..a4c3099767198 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -160,7 +160,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) goto no_further_resend; found_txb: - if (after(ntohl(txb->wire.serial), call->acks_highest_serial)) + if (after(txb->serial, call->acks_highest_serial)) continue; /* Ack point not yet reached */ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); @@ -170,7 +170,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) set_bit(RXRPC_TXBUF_RESENT, &txb->flags); } - trace_rxrpc_retransmit(call, txb->seq, + trace_rxrpc_retransmit(call, txb->seq, txb->serial, ktime_to_ns(ktime_sub(txb->last_sent, max_age))); @@ -197,7 +197,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) break; /* Not transmitted yet */ if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && - before(ntohl(txb->wire.serial), ntohl(ack->serial))) + before(txb->serial, ntohl(ack->serial))) goto do_resend; /* Wasn't accounted for by a more recent ping. */ if (ktime_after(txb->last_sent, max_age)) { diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 4a292f860ae37..bad96a983e12f 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -189,7 +189,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) struct rxrpc_connection *conn; struct msghdr msg; struct kvec iov[1]; - rxrpc_serial_t serial; size_t len, n; int ret, rtt_slot = -1; u16 rwind; @@ -216,15 +215,15 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n; len = iov[0].iov_len; - serial = rxrpc_get_next_serial(conn); - txb->wire.serial = htonl(serial); - trace_rxrpc_tx_ack(call->debug_id, serial, + txb->serial = rxrpc_get_next_serial(conn); + txb->wire.serial = htonl(txb->serial); + trace_rxrpc_tx_ack(call->debug_id, txb->serial, ntohl(txb->ack.firstPacket), ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks, rwind); if (txb->ack.reason == RXRPC_ACK_PING) - rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping); + rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_ping); rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); @@ -235,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) ret = do_udp_sendmsg(conn->local->socket, &msg, len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) { - trace_rxrpc_tx_fail(call->debug_id, serial, ret, + trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, rxrpc_tx_point_call_ack); } else { trace_rxrpc_tx_packet(call->debug_id, &txb->wire, @@ -247,7 +246,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) if (!__rxrpc_call_is_complete(call)) { if (ret < 0) - rxrpc_cancel_rtt_probe(call, serial, rtt_slot); + rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); rxrpc_set_keepalive(call); } @@ -327,15 +326,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) struct rxrpc_connection *conn = call->conn; struct msghdr msg; struct kvec iov[1]; - rxrpc_serial_t serial; size_t len; int ret, rtt_slot = -1; _enter("%x,{%d}", txb->seq, txb->len); - /* Each transmission of a Tx packet needs a new serial number */ - serial = rxrpc_get_next_serial(conn); - txb->wire.serial = htonl(serial); + /* Each transmission of a Tx packet+ needs a new serial number */ + txb->serial = rxrpc_get_next_serial(conn); + txb->wire.serial = htonl(txb->serial); if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && txb->seq == 1) @@ -388,7 +386,7 @@ dont_set_request_ack: static int lose; if ((lose++ & 7) == 7) { ret = 0; - trace_rxrpc_tx_data(call, txb->seq, serial, + trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->wire.flags, test_bit(RXRPC_TXBUF_RESENT, &txb->flags), true); @@ -396,7 +394,7 @@ dont_set_request_ack: } } - trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags, + trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->wire.flags, test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false); /* Track what we've attempted to transmit at least once so that the @@ -415,7 +413,7 @@ dont_set_request_ack: txb->last_sent = ktime_get_real(); if (txb->wire.flags & RXRPC_REQUEST_ACK) - rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); + rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet @@ -429,8 +427,8 @@ dont_set_request_ack: if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); - rxrpc_cancel_rtt_probe(call, serial, rtt_slot); - trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); + trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, rxrpc_tx_point_call_data_nofrag); } else { trace_rxrpc_tx_packet(call->debug_id, &txb->wire, @@ -489,7 +487,7 @@ send_fragmentable: txb->last_sent = ktime_get_real(); if (txb->wire.flags & RXRPC_REQUEST_ACK) - rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); + rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); switch (conn->local->srx.transport.family) { case AF_INET6: @@ -508,8 +506,8 @@ send_fragmentable: if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); - rxrpc_cancel_rtt_probe(call, serial, rtt_slot); - trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); + trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, rxrpc_tx_point_call_data_frag); } else { trace_rxrpc_tx_packet(call->debug_id, &txb->wire, diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index d43be85123864..f2903c81cf5b4 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -34,6 +34,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, txb->flags = 0; txb->ack_why = 0; txb->seq = call->tx_prepared + 1; + txb->serial = 0; txb->wire.epoch = htonl(call->conn->proto.epoch); txb->wire.cid = htonl(call->cid); txb->wire.callNumber = htonl(call->call_id); -- cgit 1.2.3-korg From 12bdff73a147aebcea8efae2b395ef0c27448909 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 15:01:10 +0000 Subject: rxrpc: Convert rxrpc_txbuf::flags into a mask and don't use atomics Convert the transmission buffer flags into a mask and use | and & rather than bitops functions (atomic ops are not required as only the I/O thread can manipulate them once submitted for transmission). The bottom byte can then correspond directly to the Rx protocol header flags. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 12 +++++------- net/rxrpc/ar-internal.h | 8 ++++---- net/rxrpc/call_event.c | 8 ++++---- net/rxrpc/input.c | 2 +- net/rxrpc/output.c | 27 +++++++++++++-------------- net/rxrpc/sendmsg.c | 10 ++++------ net/rxrpc/txbuf.c | 4 ++-- 7 files changed, 33 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 9add56980485e..33888f688325b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1084,9 +1084,9 @@ TRACE_EVENT(rxrpc_tx_packet, TRACE_EVENT(rxrpc_tx_data, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, u8 flags, bool retrans, bool lose), + rxrpc_serial_t serial, unsigned int flags, bool lose), - TP_ARGS(call, seq, serial, flags, retrans, lose), + TP_ARGS(call, seq, serial, flags, lose), TP_STRUCT__entry( __field(unsigned int, call) @@ -1094,8 +1094,7 @@ TRACE_EVENT(rxrpc_tx_data, __field(rxrpc_serial_t, serial) __field(u32, cid) __field(u32, call_id) - __field(u8, flags) - __field(bool, retrans) + __field(u16, flags) __field(bool, lose) ), @@ -1106,7 +1105,6 @@ TRACE_EVENT(rxrpc_tx_data, __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; - __entry->retrans = retrans; __entry->lose = lose; ), @@ -1116,8 +1114,8 @@ TRACE_EVENT(rxrpc_tx_data, __entry->call_id, __entry->serial, __entry->seq, - __entry->flags, - __entry->retrans ? " *RETRANS*" : "", + __entry->flags & RXRPC_TXBUF_WIRE_FLAGS, + __entry->flags & RXRPC_TXBUF_RESENT ? " *RETRANS*" : "", __entry->lose ? " *LOSE*" : "") ); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f761257558101..54d1dc97cb0f7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -800,9 +800,9 @@ struct rxrpc_txbuf { unsigned int len; /* Amount of data in buffer */ unsigned int space; /* Remaining data space */ unsigned int offset; /* Offset of fill point */ - unsigned long flags; -#define RXRPC_TXBUF_LAST 0 /* Set if last packet in Tx phase */ -#define RXRPC_TXBUF_RESENT 1 /* Set if has been resent */ + unsigned int flags; +#define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */ +#define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ struct { /* The packet for encrypting and DMA'ing. We align it such @@ -822,7 +822,7 @@ struct rxrpc_txbuf { static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) { - return txb->wire.flags & RXRPC_CLIENT_INITIATED; + return txb->flags & RXRPC_CLIENT_INITIATED; } static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index a4c3099767198..77eacbfc5d456 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -84,7 +84,7 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, txb->ack_why = why; txb->wire.seq = 0; txb->wire.type = RXRPC_PACKET_TYPE_ACK; - txb->wire.flags |= RXRPC_SLOW_START_OK; + txb->flags |= RXRPC_SLOW_START_OK; txb->ack.bufferSpace = 0; txb->ack.maxSkew = 0; txb->ack.firstPacket = 0; @@ -167,7 +167,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) if (list_empty(&txb->tx_link)) { list_add_tail(&txb->tx_link, &retrans_queue); - set_bit(RXRPC_TXBUF_RESENT, &txb->flags); + txb->flags |= RXRPC_TXBUF_RESENT; } trace_rxrpc_retransmit(call, txb->seq, txb->serial, @@ -210,7 +210,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) unacked = true; if (list_empty(&txb->tx_link)) { list_add_tail(&txb->tx_link, &retrans_queue); - set_bit(RXRPC_TXBUF_RESENT, &txb->flags); + txb->flags |= RXRPC_TXBUF_RESENT; rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); } } @@ -320,7 +320,7 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) call->tx_top = txb->seq; list_add_tail(&txb->call_link, &call->tx_buffer); - if (txb->wire.flags & RXRPC_LAST_PACKET) + if (txb->flags & RXRPC_LAST_PACKET) rxrpc_close_tx_phase(call); rxrpc_transmit_one(call, txb); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9691de00ade75..c435b50c33f4a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -212,7 +212,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { if (before_eq(txb->seq, call->acks_hard_ack)) continue; - if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) { + if (txb->flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index bad96a983e12f..8344ece5358ae 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -205,7 +205,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) msg.msg_flags = 0; if (txb->ack.reason == RXRPC_ACK_PING) - txb->wire.flags |= RXRPC_REQUEST_ACK; + txb->flags |= RXRPC_REQUEST_ACK; + txb->wire.flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; n = rxrpc_fill_out_ack(conn, call, txb, &rwind); if (n == 0) @@ -239,7 +240,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) } else { trace_rxrpc_tx_packet(call->debug_id, &txb->wire, rxrpc_tx_point_call_ack); - if (txb->wire.flags & RXRPC_REQUEST_ACK) + if (txb->flags & RXRPC_REQUEST_ACK) call->peer->rtt_last_req = ktime_get_real(); } rxrpc_tx_backoff(call, ret); @@ -357,13 +358,13 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) * service call, lest OpenAFS incorrectly send us an ACK with some * soft-ACKs in it and then never follow up with a proper hard ACK. */ - if (txb->wire.flags & RXRPC_REQUEST_ACK) + if (txb->flags & RXRPC_REQUEST_ACK) why = rxrpc_reqack_already_on; - else if (test_bit(RXRPC_TXBUF_LAST, &txb->flags) && rxrpc_sending_to_client(txb)) + else if ((txb->flags & RXRPC_LAST_PACKET) && rxrpc_sending_to_client(txb)) why = rxrpc_reqack_no_srv_last; else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) why = rxrpc_reqack_ack_lost; - else if (test_bit(RXRPC_TXBUF_RESENT, &txb->flags)) + else if (txb->flags & RXRPC_TXBUF_RESENT) why = rxrpc_reqack_retrans; else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2) why = rxrpc_reqack_slow_start; @@ -379,7 +380,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]); trace_rxrpc_req_ack(call->debug_id, txb->seq, why); if (why != rxrpc_reqack_no_srv_last) - txb->wire.flags |= RXRPC_REQUEST_ACK; + txb->flags |= RXRPC_REQUEST_ACK; dont_set_request_ack: if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { @@ -387,15 +388,12 @@ dont_set_request_ack: if ((lose++ & 7) == 7) { ret = 0; trace_rxrpc_tx_data(call, txb->seq, txb->serial, - txb->wire.flags, - test_bit(RXRPC_TXBUF_RESENT, &txb->flags), - true); + txb->flags, true); goto done; } } - trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->wire.flags, - test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false); /* Track what we've attempted to transmit at least once so that the * retransmission algorithm doesn't try to resend what we haven't sent @@ -411,8 +409,9 @@ dont_set_request_ack: if (txb->len >= call->peer->maxdata) goto send_fragmentable; + txb->wire.flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; txb->last_sent = ktime_get_real(); - if (txb->wire.flags & RXRPC_REQUEST_ACK) + if (txb->flags & RXRPC_REQUEST_ACK) rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); /* send the packet by UDP @@ -442,7 +441,7 @@ dont_set_request_ack: done: if (ret >= 0) { call->tx_last_sent = txb->last_sent; - if (txb->wire.flags & RXRPC_REQUEST_ACK) { + if (txb->flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = txb->last_sent; if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; @@ -486,7 +485,7 @@ send_fragmentable: _debug("send fragment"); txb->last_sent = ktime_get_real(); - if (txb->wire.flags & RXRPC_REQUEST_ACK) + if (txb->flags & RXRPC_REQUEST_ACK) rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); switch (conn->local->srx.transport.family) { diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 5677d5690a022..25c7c1d4f4c6d 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -240,7 +240,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_notify_end_tx_t notify_end_tx) { rxrpc_seq_t seq = txb->seq; - bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke; + bool poke, last = txb->flags & RXRPC_LAST_PACKET; rxrpc_inc_stat(call->rxnet, stat_tx_data); @@ -394,13 +394,11 @@ reload: /* add the packet to the send queue if it's now full */ if (!txb->space || (msg_data_left(msg) == 0 && !more)) { - if (msg_data_left(msg) == 0 && !more) { - txb->wire.flags |= RXRPC_LAST_PACKET; - __set_bit(RXRPC_TXBUF_LAST, &txb->flags); - } + if (msg_data_left(msg) == 0 && !more) + txb->flags |= RXRPC_LAST_PACKET; else if (call->tx_top - call->acks_hard_ack < call->tx_winsize) - txb->wire.flags |= RXRPC_MORE_PACKETS; + txb->flags |= RXRPC_MORE_PACKETS; ret = call->security->secure_packet(call, txb); if (ret < 0) diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index f2903c81cf5b4..48d5a8f644e5b 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -31,7 +31,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, txb->space = sizeof(txb->data); txb->len = 0; txb->offset = 0; - txb->flags = 0; + txb->flags = call->conn->out_clientflag; txb->ack_why = 0; txb->seq = call->tx_prepared + 1; txb->serial = 0; @@ -40,7 +40,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, txb->wire.callNumber = htonl(call->call_id); txb->wire.seq = htonl(txb->seq); txb->wire.type = packet_type; - txb->wire.flags = call->conn->out_clientflag; + txb->wire.flags = 0; txb->wire.userStatus = 0; txb->wire.securityIndex = call->security_ix; txb->wire._rsvd = 0; -- cgit 1.2.3-korg From 41b8debba79ca4a4aee35f2e3c66bfc7bb6691ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 14:37:52 +0000 Subject: rxrpc: Note cksum in txbuf Add a field to rxrpc_txbuf in which to store the checksum to go in the header as this may get overwritten in the wire header struct when transmitting as part of a jumbo packet. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/output.c | 1 + net/rxrpc/rxkad.c | 2 +- net/rxrpc/txbuf.c | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 54d1dc97cb0f7..c9a2882627aac 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -803,6 +803,7 @@ struct rxrpc_txbuf { unsigned int flags; #define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */ #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ + __be16 cksum; /* Checksum to go in header */ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ struct { /* The packet for encrypting and DMA'ing. We align it such diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8344ece5358ae..828b145edc560 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -335,6 +335,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) /* Each transmission of a Tx packet+ needs a new serial number */ txb->serial = rxrpc_get_next_serial(conn); txb->wire.serial = htonl(txb->serial); + txb->wire.cksum = txb->cksum; if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && txb->seq == 1) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 6b32d61d4cdc4..28c9ce763be4f 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -378,7 +378,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) y = (y >> 16) & 0xffff; if (y == 0) y = 1; /* zero checksums are not permitted */ - txb->wire.cksum = htons(y); + txb->cksum = htons(y); switch (call->conn->security_level) { case RXRPC_SECURITY_PLAIN: diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 48d5a8f644e5b..7273615afe94d 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -35,6 +35,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, txb->ack_why = 0; txb->seq = call->tx_prepared + 1; txb->serial = 0; + txb->cksum = 0; txb->wire.epoch = htonl(call->conn->proto.epoch); txb->wire.cid = htonl(call->cid); txb->wire.callNumber = htonl(call->call_id); -- cgit 1.2.3-korg From 17469ae0582aaacad36e8e858f58b86c369f21ef Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Jan 2024 16:17:03 +0000 Subject: rxrpc: Fix the names of the fields in the ACK trailer struct From AFS-3.3 a trailer containing extra info was added to the ACK packet format - but AF_RXRPC has the names of some of the fields mixed up compared to other AFS implementations. Rename the struct and the fields to make them match. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/conn_event.c | 16 ++++++++-------- net/rxrpc/input.c | 22 +++++++++++----------- net/rxrpc/output.c | 14 +++++++------- net/rxrpc/protocol.h | 6 +++--- 5 files changed, 30 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 33888f688325b..c730cd732348e 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -83,7 +83,7 @@ EM(rxrpc_badmsg_bad_abort, "bad-abort") \ EM(rxrpc_badmsg_bad_jumbo, "bad-jumbo") \ EM(rxrpc_badmsg_short_ack, "short-ack") \ - EM(rxrpc_badmsg_short_ack_info, "short-ack-info") \ + EM(rxrpc_badmsg_short_ack_trailer, "short-ack-trailer") \ EM(rxrpc_badmsg_short_hdr, "short-hdr") \ EM(rxrpc_badmsg_unsupported_packet, "unsup-pkt") \ EM(rxrpc_badmsg_zero_call, "zero-call") \ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 1f251d758cb9d..598b4ee389fc1 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -88,7 +88,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct rxrpc_ackpacket ack; }; } __attribute__((packed)) pkt; - struct rxrpc_ackinfo ack_info; + struct rxrpc_acktrailer trailer; size_t len; int ret, ioc; u32 serial, mtu, call_id, padding; @@ -122,8 +122,8 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[0].iov_len = sizeof(pkt.whdr); iov[1].iov_base = &padding; iov[1].iov_len = 3; - iov[2].iov_base = &ack_info; - iov[2].iov_len = sizeof(ack_info); + iov[2].iov_base = &trailer; + iov[2].iov_len = sizeof(trailer); serial = rxrpc_get_next_serial(conn); @@ -158,14 +158,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; - ack_info.rxMTU = htonl(rxrpc_rx_mtu); - ack_info.maxMTU = htonl(mtu); - ack_info.rwind = htonl(rxrpc_rx_window_size); - ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + trailer.maxMTU = htonl(rxrpc_rx_mtu); + trailer.ifMTU = htonl(mtu); + trailer.rwind = htonl(rxrpc_rx_window_size); + trailer.jumbo_max = htonl(rxrpc_rx_jumbo_max); pkt.whdr.flags |= RXRPC_SLOW_START_OK; padding = 0; iov[0].iov_len += sizeof(pkt.ack); - len += sizeof(pkt.ack) + 3 + sizeof(ack_info); + len += sizeof(pkt.ack) + 3 + sizeof(trailer); ioc = 3; trace_rxrpc_tx_ack(chan->call_debug_id, serial, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c435b50c33f4a..ea2df62e05e79 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -670,14 +670,14 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, /* * Process the extra information that may be appended to an ACK packet */ -static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, - struct rxrpc_ackinfo *ackinfo) +static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb, + struct rxrpc_acktrailer *trailer) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer; unsigned int mtu; bool wake = false; - u32 rwind = ntohl(ackinfo->rwind); + u32 rwind = ntohl(trailer->rwind); if (rwind > RXRPC_TX_MAX_WINDOW) rwind = RXRPC_TX_MAX_WINDOW; @@ -691,7 +691,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, if (call->cong_ssthresh > rwind) call->cong_ssthresh = rwind; - mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); + mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU)); peer = call->peer; if (mtu < peer->maxdata) { @@ -837,7 +837,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_ackpacket ack; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_ackinfo info; + struct rxrpc_acktrailer trailer; rxrpc_serial_t ack_serial, acked_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; int nr_acks, offset, ioffset; @@ -917,11 +917,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) goto send_response; } - info.rxMTU = 0; + trailer.maxMTU = 0; ioffset = offset + nr_acks + 3; - if (skb->len >= ioffset + sizeof(info) && - skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) - return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info); + if (skb->len >= ioffset + sizeof(trailer) && + skb_copy_bits(skb, ioffset, &trailer, sizeof(trailer)) < 0) + return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_trailer); if (nr_acks > 0) skb_condense(skb); @@ -950,8 +950,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } /* Parse rwind and mtu sizes if provided. */ - if (info.rxMTU) - rxrpc_input_ackinfo(call, skb, &info); + if (trailer.maxMTU) + rxrpc_input_ack_trailer(call, skb, &trailer); if (first_soft_ack == 0) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 828b145edc560..3803bf900a46e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -83,7 +83,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, struct rxrpc_txbuf *txb, u16 *_rwind) { - struct rxrpc_ackinfo ackinfo; + struct rxrpc_acktrailer trailer; unsigned int qsize, sack, wrap, to; rxrpc_seq_t window, wtop; int rsize; @@ -126,16 +126,16 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, qsize = (window - 1) - call->rx_consumed; rsize = max_t(int, call->rx_winsize - qsize, 0); *_rwind = rsize; - ackinfo.rxMTU = htonl(rxrpc_rx_mtu); - ackinfo.maxMTU = htonl(mtu); - ackinfo.rwind = htonl(rsize); - ackinfo.jumbo_max = htonl(jmax); + trailer.maxMTU = htonl(rxrpc_rx_mtu); + trailer.ifMTU = htonl(mtu); + trailer.rwind = htonl(rsize); + trailer.jumbo_max = htonl(jmax); *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; - memcpy(ackp, &ackinfo, sizeof(ackinfo)); - return txb->ack.nAcks + 3 + sizeof(ackinfo); + memcpy(ackp, &trailer, sizeof(trailer)); + return txb->ack.nAcks + 3 + sizeof(trailer); } /* diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index e8ee4af43ca89..4fe6b4d20ada9 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -135,9 +135,9 @@ struct rxrpc_ackpacket { /* * ACK packets can have a further piece of information tagged on the end */ -struct rxrpc_ackinfo { - __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */ - __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */ +struct rxrpc_acktrailer { + __be32 maxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */ + __be32 ifMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */ __be32 rwind; /* Rx window size (packets) [AFS 3.4] */ __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */ }; -- cgit 1.2.3-korg From d73f3a7488754b01a9b86154763f266dcde6ca4e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jan 2024 16:39:15 +0000 Subject: rxrpc: Strip barriers and atomics off of timer tracking Strip the atomic ops and barriering off of the call timer tracking as this is handled solely within the I/O thread, except for expect_term_by which is set by sendmsg(). Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/call_event.c | 32 ++++++++++++++++---------------- net/rxrpc/conn_client.c | 4 ++-- net/rxrpc/input.c | 15 ++++++--------- net/rxrpc/output.c | 18 ++++++++---------- 4 files changed, 32 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 77eacbfc5d456..84eedbb49fcb8 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -27,7 +27,7 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, unsigned long ping_at = now + rxrpc_idle_ack_delay; if (time_before(ping_at, call->ping_at)) { - WRITE_ONCE(call->ping_at, ping_at); + call->ping_at = ping_at; rxrpc_reduce_call_timer(call, ping_at, now, rxrpc_timer_set_for_ping); trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial); @@ -53,7 +53,7 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, ack_at += READ_ONCE(call->tx_backoff); ack_at += now; if (time_before(ack_at, call->delay_ack_at)) { - WRITE_ONCE(call->delay_ack_at, ack_at); + call->delay_ack_at = ack_at; rxrpc_reduce_call_timer(call, ack_at, now, rxrpc_timer_set_for_ack); } @@ -220,7 +220,7 @@ no_resend: resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, !list_empty(&retrans_queue)); - WRITE_ONCE(call->resend_at, resend_at); + call->resend_at = resend_at; if (unacked) rxrpc_congestion_timeout(call); @@ -260,7 +260,7 @@ static void rxrpc_begin_service_reply(struct rxrpc_call *call) unsigned long now = jiffies; rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY); - WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); + call->delay_ack_at = now + MAX_JIFFY_OFFSET; if (call->ackr_reason == RXRPC_ACK_DELAY) call->ackr_reason = 0; trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); @@ -399,13 +399,13 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) /* If we see our async-event poke, check for timeout trippage. */ now = jiffies; - t = READ_ONCE(call->expect_rx_by); + t = call->expect_rx_by; if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); expired = true; } - t = READ_ONCE(call->expect_req_by); + t = call->expect_req_by; if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST && time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); @@ -418,41 +418,41 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) expired = true; } - t = READ_ONCE(call->delay_ack_at); + t = call->delay_ack_at; if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); - cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET); + call->delay_ack_at = now + MAX_JIFFY_OFFSET; rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0, rxrpc_propose_ack_ping_for_lost_ack); } - t = READ_ONCE(call->ack_lost_at); + t = call->ack_lost_at; if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); - cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET); + call->ack_lost_at = now + MAX_JIFFY_OFFSET; set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); } - t = READ_ONCE(call->keepalive_at); + t = call->keepalive_at; if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); - cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); + call->keepalive_at = now + MAX_JIFFY_OFFSET; rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_keepalive); } - t = READ_ONCE(call->ping_at); + t = call->ping_at; if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); - cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET); + call->ping_at = now + MAX_JIFFY_OFFSET; rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_keepalive); } - t = READ_ONCE(call->resend_at); + t = call->resend_at; if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); - cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); + call->resend_at = now + MAX_JIFFY_OFFSET; resend = true; } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 3b9b267a4431e..d25bf1cf36700 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -636,7 +636,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { unsigned long final_ack_at = jiffies + 2; - WRITE_ONCE(chan->final_ack_at, final_ack_at); + chan->final_ack_at = final_ack_at; smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */ set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); rxrpc_reduce_conn_timer(conn, final_ack_at); @@ -770,7 +770,7 @@ next: conn_expires_at = conn->idle_timestamp + expiry; - now = READ_ONCE(jiffies); + now = jiffies; if (time_after(conn_expires_at, now)) goto not_yet_expired; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ea2df62e05e79..e53a49accc16c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -288,14 +288,12 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, static bool rxrpc_receiving_reply(struct rxrpc_call *call) { struct rxrpc_ack_summary summary = { 0 }; - unsigned long now, timo; + unsigned long now; rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { now = jiffies; - timo = now + MAX_JIFFY_OFFSET; - - WRITE_ONCE(call->delay_ack_at, timo); + call->delay_ack_at = now + MAX_JIFFY_OFFSET; trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } @@ -594,7 +592,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) if (timo) { now = jiffies; expect_req_by = now + timo; - WRITE_ONCE(call->expect_req_by, expect_req_by); + call->expect_req_by = now + timo; rxrpc_reduce_call_timer(call, expect_req_by, now, rxrpc_timer_set_for_idle); } @@ -1048,11 +1046,10 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) timo = READ_ONCE(call->next_rx_timo); if (timo) { - unsigned long now = jiffies, expect_rx_by; + unsigned long now = jiffies; - expect_rx_by = now + timo; - WRITE_ONCE(call->expect_rx_by, expect_rx_by); - rxrpc_reduce_call_timer(call, expect_rx_by, now, + call->expect_rx_by = now + timo; + rxrpc_reduce_call_timer(call, call->expect_rx_by, now, rxrpc_timer_set_for_normal); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3803bf900a46e..2386b01b2231a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -67,11 +67,10 @@ static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) */ static void rxrpc_set_keepalive(struct rxrpc_call *call) { - unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; + unsigned long now = jiffies; - keepalive_at += now; - WRITE_ONCE(call->keepalive_at, keepalive_at); - rxrpc_reduce_call_timer(call, keepalive_at, now, + call->keepalive_at = now + call->next_rx_timo / 6; + rxrpc_reduce_call_timer(call, call->keepalive_at, now, rxrpc_timer_set_for_keepalive); } @@ -449,7 +448,7 @@ done: ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; - WRITE_ONCE(call->ack_lost_at, ack_lost_at); + call->ack_lost_at = ack_lost_at; rxrpc_reduce_call_timer(call, ack_lost_at, nowj, rxrpc_timer_set_for_lost_ack); } @@ -458,11 +457,10 @@ done: if (txb->seq == 1 && !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { - unsigned long nowj = jiffies, expect_rx_by; + unsigned long nowj = jiffies; - expect_rx_by = nowj + call->next_rx_timo; - WRITE_ONCE(call->expect_rx_by, expect_rx_by); - rxrpc_reduce_call_timer(call, expect_rx_by, nowj, + call->expect_rx_by = nowj + call->next_rx_timo; + rxrpc_reduce_call_timer(call, call->expect_rx_by, nowj, rxrpc_timer_set_for_normal); } @@ -724,7 +722,7 @@ void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb) unsigned long now = jiffies; unsigned long resend_at = now + call->peer->rto_j; - WRITE_ONCE(call->resend_at, resend_at); + call->resend_at = resend_at; rxrpc_reduce_call_timer(call, resend_at, now, rxrpc_timer_set_for_send); } -- cgit 1.2.3-korg From 693f9c13ec890b0ab36dfb9a5441fdce47255737 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 22:29:58 +0000 Subject: rxrpc: Remove atomic handling on some fields only used in I/O thread call->tx_transmitted and call->acks_prev_seq don't need to be managed with cmpxchg() and barriers as it's only used within the singular I/O thread. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/call_event.c | 10 ++++------ net/rxrpc/output.c | 8 +++----- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 84eedbb49fcb8..1184518dcdb8b 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -115,7 +115,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) struct rxrpc_skb_priv *sp; struct rxrpc_txbuf *txb; unsigned long resend_at; - rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted); + rxrpc_seq_t transmitted = call->tx_transmitted; ktime_t now, max_age, oldest, ack_ts; bool unacked = false; unsigned int i; @@ -184,16 +184,14 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) * seen. Anything between the soft-ACK table and that point will get * ACK'd or NACK'd in due course, so don't worry about it here; here we * need to consider retransmitting anything beyond that point. - * - * Note that ACK for a packet can beat the update of tx_transmitted. */ - if (after_eq(READ_ONCE(call->acks_prev_seq), READ_ONCE(call->tx_transmitted))) + if (after_eq(call->acks_prev_seq, call->tx_transmitted)) goto no_further_resend; list_for_each_entry_from(txb, &call->tx_buffer, call_link) { - if (before_eq(txb->seq, READ_ONCE(call->acks_prev_seq))) + if (before_eq(txb->seq, call->acks_prev_seq)) continue; - if (after(txb->seq, READ_ONCE(call->tx_transmitted))) + if (after(txb->seq, call->tx_transmitted)) break; /* Not transmitted yet */ if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 2386b01b2231a..1e039b6f4494a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -397,12 +397,10 @@ dont_set_request_ack: /* Track what we've attempted to transmit at least once so that the * retransmission algorithm doesn't try to resend what we haven't sent - * yet. However, this can race as we can receive an ACK before we get - * to this point. But, OTOH, if we won't get an ACK mentioning this - * packet unless the far side received it (though it could have - * discarded it anyway and NAK'd it). + * yet. */ - cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq); + if (txb->seq == call->tx_transmitted + 1) + call->tx_transmitted = txb->seq; /* send the packet with the don't fragment bit set if we currently * think it's small enough */ -- cgit 1.2.3-korg From d32636982ce967110588c8445660b85685e26e5a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 22:38:31 +0000 Subject: rxrpc: Do lazy DF flag resetting Don't reset the DF flag after transmission, but rather set it when needed since it should be a fast op now that we call IP directly. This includes turning it off for RESPONSE packets and, for the moment, ACK packets. In future, we will need to turn it on for ACK packets used to do path MTU discovery. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/output.c | 4 ++-- net/rxrpc/rxkad.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 1e039b6f4494a..8aa8ba32eacc1 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -231,6 +231,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) txb->ack.previousPacket = htonl(call->rx_highest_seq); iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); + rxrpc_local_dont_fragment(conn->local, false); ret = do_udp_sendmsg(conn->local->socket, &msg, len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) { @@ -406,6 +407,7 @@ dont_set_request_ack: * think it's small enough */ if (txb->len >= call->peer->maxdata) goto send_fragmentable; + rxrpc_local_dont_fragment(conn->local, true); txb->wire.flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; txb->last_sent = ktime_get_real(); @@ -492,8 +494,6 @@ send_fragmentable: rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag); ret = do_udp_sendmsg(conn->local->socket, &msg, len); conn->peer->last_tx_at = ktime_get_seconds(); - - rxrpc_local_dont_fragment(conn->local, true); break; default: diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 28c9ce763be4f..e451ac90bfee5 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -726,7 +726,6 @@ static int rxkad_send_response(struct rxrpc_connection *conn, rxrpc_local_dont_fragment(conn->local, false); ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len); - rxrpc_local_dont_fragment(conn->local, true); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_rxkad_response); -- cgit 1.2.3-korg From 1ac6a8536c2cbb12b593f0f4ffd7816b15a484fa Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 22:49:19 +0000 Subject: rxrpc: Merge together DF/non-DF branches of data Tx function Merge together the DF and non-DF branches of the transmission function and always set the flag to the right thing before transmitting. If we see -EMSGSIZE from udp_sendmsg(), turn off DF and retry. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/output.c | 54 +++++++++++++++--------------------------------------- 1 file changed, 15 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8aa8ba32eacc1..e2c9e645fcfb9 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -323,8 +323,9 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) */ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { - enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; + enum rxrpc_req_ack_trace why; + enum rxrpc_tx_point frag; struct msghdr msg; struct kvec iov[1]; size_t len; @@ -405,11 +406,16 @@ dont_set_request_ack: /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (txb->len >= call->peer->maxdata) - goto send_fragmentable; - rxrpc_local_dont_fragment(conn->local, true); + if (txb->len >= call->peer->maxdata) { + rxrpc_local_dont_fragment(conn->local, false); + frag = rxrpc_tx_point_call_data_frag; + } else { + rxrpc_local_dont_fragment(conn->local, true); + frag = rxrpc_tx_point_call_data_nofrag; + } txb->wire.flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; +retry: txb->last_sent = ktime_get_real(); if (txb->flags & RXRPC_REQUEST_ACK) rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); @@ -435,8 +441,11 @@ dont_set_request_ack: } rxrpc_tx_backoff(call, ret); - if (ret == -EMSGSIZE) - goto send_fragmentable; + if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_frag) { + rxrpc_local_dont_fragment(conn->local, false); + frag = rxrpc_tx_point_call_data_frag; + goto retry; + } done: if (ret >= 0) { @@ -478,39 +487,6 @@ done: _leave(" = %d [%u]", ret, call->peer->maxdata); return ret; - -send_fragmentable: - /* attempt to send this message with fragmentation enabled */ - _debug("send fragment"); - - txb->last_sent = ktime_get_real(); - if (txb->flags & RXRPC_REQUEST_ACK) - rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); - - switch (conn->local->srx.transport.family) { - case AF_INET6: - case AF_INET: - rxrpc_local_dont_fragment(conn->local, false); - rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag); - ret = do_udp_sendmsg(conn->local->socket, &msg, len); - conn->peer->last_tx_at = ktime_get_seconds(); - break; - - default: - BUG(); - } - - if (ret < 0) { - rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); - rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); - trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, - rxrpc_tx_point_call_data_frag); - } else { - trace_rxrpc_tx_packet(call->debug_id, &txb->wire, - rxrpc_tx_point_call_data_frag); - } - rxrpc_tx_backoff(call, ret); - goto done; } /* -- cgit 1.2.3-korg From ff342bdc59f4a7431d0b58ce8bc2ef7d44cff15f Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 23:07:43 +0000 Subject: rxrpc: Add a kvec[] to the rxrpc_txbuf struct Add a kvec[] to the rxrpc_txbuf struct to point to the contributory buffers for a packet. Start with just a single element for now, but this will be expanded later. Make the ACK sending function use it, which means that rxrpc_fill_out_ack() doesn't need to return the size of the sack table, padding and trailer. Make the data sending code use it, both in where sendmsg() packages code up into txbufs and where those txbufs are transmitted. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/output.c | 33 ++++++++++++--------------------- net/rxrpc/sendmsg.c | 8 +++++--- net/rxrpc/txbuf.c | 3 +++ 4 files changed, 22 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index c9a2882627aac..c6731f43a2d57 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -805,6 +805,8 @@ struct rxrpc_txbuf { #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ __be16 cksum; /* Checksum to go in header */ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ + u8 nr_kvec; + struct kvec kvec[1]; struct { /* The packet for encrypting and DMA'ing. We align it such * that data[] aligns correctly for any crypto blocksize. diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index e2c9e645fcfb9..f2b10c3e4cc26 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -77,10 +77,10 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call) /* * Fill out an ACK packet. */ -static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, - struct rxrpc_call *call, - struct rxrpc_txbuf *txb, - u16 *_rwind) +static void rxrpc_fill_out_ack(struct rxrpc_connection *conn, + struct rxrpc_call *call, + struct rxrpc_txbuf *txb, + u16 *_rwind) { struct rxrpc_acktrailer trailer; unsigned int qsize, sack, wrap, to; @@ -134,7 +134,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, *ackp++ = 0; *ackp++ = 0; memcpy(ackp, &trailer, sizeof(trailer)); - return txb->ack.nAcks + 3 + sizeof(trailer); + txb->kvec[0].iov_len = sizeof(txb->wire) + + sizeof(txb->ack) + txb->ack.nAcks + 3 + sizeof(trailer); + txb->len = txb->kvec[0].iov_len; } /* @@ -187,8 +189,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { struct rxrpc_connection *conn; struct msghdr msg; - struct kvec iov[1]; - size_t len, n; int ret, rtt_slot = -1; u16 rwind; @@ -207,13 +207,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) txb->flags |= RXRPC_REQUEST_ACK; txb->wire.flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; - n = rxrpc_fill_out_ack(conn, call, txb, &rwind); - if (n == 0) - return 0; - - iov[0].iov_base = &txb->wire; - iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n; - len = iov[0].iov_len; + rxrpc_fill_out_ack(conn, call, txb, &rwind); txb->serial = rxrpc_get_next_serial(conn); txb->wire.serial = htonl(txb->serial); @@ -230,9 +224,9 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) /* Grab the highest received seq as late as possible */ txb->ack.previousPacket = htonl(call->rx_highest_seq); - iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); + iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, txb->len); rxrpc_local_dont_fragment(conn->local, false); - ret = do_udp_sendmsg(conn->local->socket, &msg, len); + ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) { trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, @@ -327,7 +321,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) enum rxrpc_req_ack_trace why; enum rxrpc_tx_point frag; struct msghdr msg; - struct kvec iov[1]; size_t len; int ret, rtt_slot = -1; @@ -342,10 +335,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) txb->seq == 1) txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; - iov[0].iov_base = &txb->wire; - iov[0].iov_len = sizeof(txb->wire) + txb->len; - len = iov[0].iov_len; - iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); + len = txb->kvec[0].iov_len; + iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 25c7c1d4f4c6d..1e81046ea8a60 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -362,7 +362,7 @@ reload: if (!txb) goto maybe_error; - txb->offset = offset; + txb->offset = offset + sizeof(struct rxrpc_wire_header); txb->space -= offset; txb->space = min_t(size_t, chunk, txb->space); } @@ -374,8 +374,8 @@ reload: size_t copy = min_t(size_t, txb->space, msg_data_left(msg)); _debug("add %zu", copy); - if (!copy_from_iter_full(txb->data + txb->offset, copy, - &msg->msg_iter)) + if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset, + copy, &msg->msg_iter)) goto efault; _debug("added"); txb->space -= copy; @@ -404,6 +404,8 @@ reload: if (ret < 0) goto out; + txb->kvec[0].iov_len += txb->len; + txb->len = txb->kvec[0].iov_len; rxrpc_queue_packet(rx, call, txb, notify_end_tx); txb = NULL; } diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 7273615afe94d..91e96cda6dc70 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -36,6 +36,9 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, txb->seq = call->tx_prepared + 1; txb->serial = 0; txb->cksum = 0; + txb->nr_kvec = 1; + txb->kvec[0].iov_base = &txb->wire; + txb->kvec[0].iov_len = sizeof(txb->wire); txb->wire.epoch = htonl(call->conn->proto.epoch); txb->wire.cid = htonl(call->cid); txb->wire.callNumber = htonl(call->call_id); -- cgit 1.2.3-korg From 44125d5aaddadfe37a97e642218df184871187be Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Jan 2024 10:47:39 +0000 Subject: rxrpc: Split up the DATA packet transmission function Split (sub)packet preparation and timestamping out of the DATA packet transmission function to make it easier to glue multiple txbufs together into a jumbo DATA packet. This will require preparation and timestamping of all the subpackets in a txbuf, and these functions provide convenient points to place the required iteration. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/output.c | 98 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 69 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index c6731f43a2d57..54550ab62adce 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1166,7 +1166,6 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) */ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb); int rxrpc_send_abort_packet(struct rxrpc_call *); -int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *); void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f2b10c3e4cc26..25b8fc9aef978 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -313,37 +313,23 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) } /* - * send a packet through the transport endpoint + * Prepare a (sub)packet for transmission. */ -int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb, + rxrpc_serial_t serial) { - struct rxrpc_connection *conn = call->conn; + struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; enum rxrpc_req_ack_trace why; - enum rxrpc_tx_point frag; - struct msghdr msg; - size_t len; - int ret, rtt_slot = -1; + struct rxrpc_connection *conn = call->conn; _enter("%x,{%d}", txb->seq, txb->len); - /* Each transmission of a Tx packet+ needs a new serial number */ - txb->serial = rxrpc_get_next_serial(conn); - txb->wire.serial = htonl(txb->serial); - txb->wire.cksum = txb->cksum; + txb->serial = serial; if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && txb->seq == 1) txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; - len = txb->kvec[0].iov_len; - iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, len); - - msg.msg_name = &call->peer->srx.transport; - msg.msg_namelen = call->peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. * @@ -376,6 +362,59 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) txb->flags |= RXRPC_REQUEST_ACK; dont_set_request_ack: + whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; + whdr->serial = htonl(txb->serial); + whdr->cksum = txb->cksum; + + trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false); +} + +/* + * Prepare a packet for transmission. + */ +static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +{ + rxrpc_serial_t serial; + + /* Each transmission of a Tx packet needs a new serial number */ + serial = rxrpc_get_next_serial(call->conn); + + rxrpc_prepare_data_subpacket(call, txb, serial); + + return txb->len; +} + +/* + * Set the times on a packet before transmission + */ +static int rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +{ + ktime_t tstamp = ktime_get_real(); + int rtt_slot = -1; + + txb->last_sent = tstamp; + if (txb->flags & RXRPC_REQUEST_ACK) + rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); + + return rtt_slot; +} + +/* + * send a packet through the transport endpoint + */ +static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +{ + struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; + struct rxrpc_connection *conn = call->conn; + enum rxrpc_tx_point frag; + struct msghdr msg; + size_t len; + int ret, rtt_slot = -1; + + _enter("%x,{%d}", txb->seq, txb->len); + + len = rxrpc_prepare_data_packet(call, txb); + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { @@ -386,7 +425,13 @@ dont_set_request_ack: } } - trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false); + iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, len); + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; /* Track what we've attempted to transmit at least once so that the * retransmission algorithm doesn't try to resend what we haven't sent @@ -405,11 +450,8 @@ dont_set_request_ack: frag = rxrpc_tx_point_call_data_nofrag; } - txb->wire.flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; retry: - txb->last_sent = ktime_get_real(); - if (txb->flags & RXRPC_REQUEST_ACK) - rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); + rtt_slot = rxrpc_tstamp_data_packets(call, txb); /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet @@ -424,11 +466,9 @@ retry: if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); - trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, - rxrpc_tx_point_call_data_nofrag); + trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); } else { - trace_rxrpc_tx_packet(call->debug_id, &txb->wire, - rxrpc_tx_point_call_data_nofrag); + trace_rxrpc_tx_packet(call->debug_id, whdr, frag); } rxrpc_tx_backoff(call, ret); -- cgit 1.2.3-korg From a1c9af4d4467354132417c2d8db10d6e928a7f77 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jan 2024 08:24:58 +0000 Subject: rxrpc: Don't pick values out of the wire header when setting up security Don't pick values out of the wire header in rxkad when setting up DATA packet security, but rather use other sources. This makes it easier to get rid of txb->wire. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/rxkad.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index e451ac90bfee5..ef0849c8329ca 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -259,7 +259,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, _enter(""); - check = txb->seq ^ ntohl(txb->wire.callNumber); + check = txb->seq ^ call->call_id; hdr->data_size = htonl((u32)check << 16 | txb->len); txb->len += sizeof(struct rxkad_level1_hdr); @@ -302,7 +302,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, _enter(""); - check = txb->seq ^ ntohl(txb->wire.callNumber); + check = txb->seq ^ call->call_id; rxkhdr->data_size = htonl(txb->len | (u32)check << 16); rxkhdr->checksum = 0; @@ -362,9 +362,9 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv)); /* calculate the security checksum */ - x = (ntohl(txb->wire.cid) & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= txb->seq & 0x3fffffff; - crypto.buf[0] = txb->wire.callNumber; + crypto.buf[0] = htonl(call->call_id); crypto.buf[1] = htonl(x); sg_init_one(&sg, crypto.buf, 8); -- cgit 1.2.3-korg From 99afb28c676cc36747cdb18ffaa6692a60888853 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jan 2024 08:35:25 +0000 Subject: rxrpc: Move rxrpc_send_ACK() to output.c with rxrpc_send_ack_packet() Move rxrpc_send_ACK() to output.c to so that it is with rxrpc_send_ack_packet() prior to merging the two. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/ar-internal.h | 4 ++-- net/rxrpc/call_event.c | 37 ------------------------------------- net/rxrpc/output.c | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 54550ab62adce..a8795ef0d669b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -873,7 +873,6 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); */ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why); -void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_ack_trace); void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); @@ -1164,7 +1163,8 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) /* * output.c */ -int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb); +void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, + rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why); int rxrpc_send_abort_packet(struct rxrpc_call *); void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 1184518dcdb8b..e19ea54dce545 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -61,43 +61,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial); } -/* - * Queue an ACK for immediate transmission. - */ -void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, - rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) -{ - struct rxrpc_txbuf *txb; - - if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) - return; - - rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); - - txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK, - rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS); - if (!txb) { - kleave(" = -ENOMEM"); - return; - } - - txb->ack_why = why; - txb->wire.seq = 0; - txb->wire.type = RXRPC_PACKET_TYPE_ACK; - txb->flags |= RXRPC_SLOW_START_OK; - txb->ack.bufferSpace = 0; - txb->ack.maxSkew = 0; - txb->ack.firstPacket = 0; - txb->ack.previousPacket = 0; - txb->ack.serial = htonl(serial); - txb->ack.reason = ack_reason; - txb->ack.nAcks = 0; - - trace_rxrpc_send_ack(call, why, ack_reason, serial); - rxrpc_send_ack_packet(call, txb); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); -} - /* * Handle congestion being detected by the retransmit timeout. */ diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 25b8fc9aef978..ec9ae9c6c492f 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -185,7 +185,7 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call, /* * Transmit an ACK packet. */ -int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { struct rxrpc_connection *conn; struct msghdr msg; @@ -248,6 +248,43 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) return ret; } +/* + * Queue an ACK for immediate transmission. + */ +void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, + rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) +{ + struct rxrpc_txbuf *txb; + + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + return; + + rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); + + txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK, + rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS); + if (!txb) { + kleave(" = -ENOMEM"); + return; + } + + txb->ack_why = why; + txb->wire.seq = 0; + txb->wire.type = RXRPC_PACKET_TYPE_ACK; + txb->flags |= RXRPC_SLOW_START_OK; + txb->ack.bufferSpace = 0; + txb->ack.maxSkew = 0; + txb->ack.firstPacket = 0; + txb->ack.previousPacket = 0; + txb->ack.serial = htonl(serial); + txb->ack.reason = ack_reason; + txb->ack.nAcks = 0; + + trace_rxrpc_send_ack(call, why, ack_reason, serial); + rxrpc_send_ack_packet(call, txb); + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); +} + /* * Send an ABORT call packet. */ -- cgit 1.2.3-korg From d289ab65b89c1d4d88417cb6c03e923f21f95fae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:26 +0000 Subject: ipv6: annotate data-races around cnf.disable_ipv6 disable_ipv6 is read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. v2: do not preload net before rtnl_trylock() in addrconf_disable_ipv6() (Jiri) Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 +++++---- net/ipv6/ip6_input.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9e949403c1363..c965400fb9340 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4214,7 +4214,7 @@ static void addrconf_dad_work(struct work_struct *w) if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && ipv6_addr_equal(&ifp->addr, &addr)) { /* DAD failed for link-local based on MAC */ - idev->cnf.disable_ipv6 = 1; + WRITE_ONCE(idev->cnf.disable_ipv6, 1); pr_info("%s: IPv6 being disabled!\n", ifp->idev->dev->name); @@ -6389,7 +6389,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); - idev->cnf.disable_ipv6 = newf; + + WRITE_ONCE(idev->cnf.disable_ipv6, newf); if (changed) dev_disable_change(idev); } @@ -6406,7 +6407,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->disable_ipv6) { rtnl_unlock(); @@ -6414,7 +6415,7 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->disable_ipv6) { - net->ipv6.devconf_dflt->disable_ipv6 = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); } else if ((!newf) ^ (!old)) dev_disable_change((struct inet6_dev *)table->extra1); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index b8378814532ce..1ba97933c74fb 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,9 +168,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, SKB_DR_SET(reason, NOT_SPECIFIED); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || - !idev || unlikely(idev->cnf.disable_ipv6)) { + !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - if (idev && unlikely(idev->cnf.disable_ipv6)) + if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6))) SKB_DR_SET(reason, IPV6DISABLED); goto drop; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31b86fe661aa6..0559bd0005858 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (unlikely(idev->cnf.disable_ipv6)) { + if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; -- cgit 1.2.3-korg From 553ced03b227c99f0bf68d86f7b45ed7125563f2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:27 +0000 Subject: ipv6: addrconf_disable_ipv6() optimization Writing over /proc/sys/net/ipv6/conf/default/disable_ipv6 does not need to hold RTNL. v3: remove a wrong change (Jakub Kicinski feedback) Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c965400fb9340..6975685f55ea9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6399,21 +6399,20 @@ static void addrconf_disable_change(struct net *net, __s32 newf) static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) { - struct net *net; + struct net *net = (struct net *)table->extra2; int old; + if (p == &net->ipv6.devconf_dflt->disable_ipv6) { + WRITE_ONCE(*p, newf); + return 0; + } + if (!rtnl_trylock()) return restart_syscall(); - net = (struct net *)table->extra2; old = *p; WRITE_ONCE(*p, newf); - if (p == &net->ipv6.devconf_dflt->disable_ipv6) { - rtnl_unlock(); - return 0; - } - if (p == &net->ipv6.devconf_all->disable_ipv6) { WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf); addrconf_disable_change(net, newf); -- cgit 1.2.3-korg From e7135f484994494a38071e3653a83d21d305f50c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:28 +0000 Subject: ipv6: annotate data-races around cnf.mtu6 idev->cnf.mtu6 might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 52a51c69aa9de..a30c6aa9e5cf3 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -332,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst rcu_read_lock(); idev = __in6_dev_get(dst->dev); if (idev) - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); out: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6975685f55ea9..1b534edc7ca2c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3671,7 +3671,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev) { rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; + WRITE_ONCE(idev->cnf.mtu6, dev->mtu); break; } @@ -3763,7 +3763,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { rt6_mtu_change(dev, dev->mtu); - idev->cnf.mtu6 = dev->mtu; + WRITE_ONCE(idev->cnf.mtu6, dev->mtu); } WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8523f0595b018..e96d79cd34d27 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1578,8 +1578,8 @@ skip_routeinfo: if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); - } else if (in6_dev->cnf.mtu6 != mtu) { - in6_dev->cnf.mtu6 = mtu; + } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { + WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 707d65bc9c0e5..66c685b0b6199 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1596,7 +1596,7 @@ static unsigned int fib6_mtu(const struct fib6_result *res) rcu_read_lock(); idev = __in6_dev_get(dev); - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); } @@ -3249,8 +3249,8 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res, mtu = IPV6_MIN_MTU; idev = __in6_dev_get(dev); - if (idev && idev->cnf.mtu6 > mtu) - mtu = idev->cnf.mtu6; + if (idev) + mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6)); } mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); -- cgit 1.2.3-korg From e0bb2675fea2783c45bb95d74f00c55156720863 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:29 +0000 Subject: ipv6: annotate data-races around cnf.hop_limit idev->cnf.hop_limit and net->ipv6.devconf_all->hop_limit might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Acked-by: Florian Westphal # for netfilter parts Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/nf_reject_ipv6.c | 4 ++-- net/ipv6/output_core.c | 4 ++-- net/netfilter/nf_synproxy_core.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2b383d92d7f57..2c3f629079584 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -460,7 +460,7 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, set_tun->ttl = ip6_dst_hoplimit(dst); dst_release(dst); } else { - set_tun->ttl = net->ipv6.devconf_all->hop_limit; + set_tun->ttl = READ_ONCE(net->ipv6.devconf_all->hop_limit); } #endif } else { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56c3c467f9deb..f61d977ac0528 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1346,7 +1346,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (val < 0) - val = sock_net(sk)->ipv6.devconf_all->hop_limit; + val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit); break; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e96d79cd34d27..9c9c31268432e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1423,7 +1423,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { - in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + WRITE_ONCE(in6_dev->cnf.hop_limit, ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 196dd4ecb5e21..dedee264b8f6c 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -83,7 +83,7 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, - net->ipv6.devconf_all->hop_limit); + READ_ONCE(net->ipv6.devconf_all->hop_limit)); nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); @@ -124,7 +124,7 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net, skb_reserve(nskb, LL_MAX_HEADER); nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, - net->ipv6.devconf_all->hop_limit); + READ_ONCE(net->ipv6.devconf_all->hop_limit)); skb_reset_transport_header(nskb); icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr)); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index b5205311f372b..806d4b5dd1e60 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -111,9 +111,9 @@ int ip6_dst_hoplimit(struct dst_entry *dst) rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) - hoplimit = idev->cnf.hop_limit; + hoplimit = READ_ONCE(idev->cnf.hop_limit); else - hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; + hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); rcu_read_unlock(); } return hoplimit; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index fbbc4fd373495..5b140c12b7dfa 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -800,7 +800,7 @@ synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb, skb_reset_network_header(skb); iph = skb_put(skb, sizeof(*iph)); ip6_flow_hdr(iph, 0, 0); - iph->hop_limit = net->ipv6.devconf_all->hop_limit; + iph->hop_limit = READ_ONCE(net->ipv6.devconf_all->hop_limit); iph->nexthdr = IPPROTO_TCP; iph->saddr = *saddr; iph->daddr = *daddr; -- cgit 1.2.3-korg From 32f754176e889cdfe989ef08ece19859427755df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:30 +0000 Subject: ipv6: annotate data-races around cnf.forwarding idev->cnf.forwarding and net->ipv6.devconf_all->forwarding might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 2 +- include/net/ipv6.h | 8 +++++--- net/core/filter.c | 2 +- net/ipv6/addrconf.c | 10 ++++++---- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 11 ++++++----- net/ipv6/route.c | 4 ++-- 7 files changed, 22 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index cd4083e0b3b9e..e13e4920ee9b2 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -339,7 +339,7 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) in6_dev = in6_dev_get(netdev); if (!in6_dev) goto out; - is_router = !!in6_dev->cnf.forwarding; + is_router = !!READ_ONCE(in6_dev->cnf.forwarding); in6_dev_put(in6_dev); /* ipv6_stub != NULL if in6_dev_get returned an inet6_dev */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cf25ea21d770d..88a8e554f7a12 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) return 0; } -static inline bool ipv6_accept_ra(struct inet6_dev *idev) +static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { + s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); + /* If forwarding is enabled, RA are not accepted unless the special * hybrid mode (accept_ra=2) is enabled. */ - return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 : - idev->cnf.accept_ra; + return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 : + accept_ra; } #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ diff --git a/net/core/filter.c b/net/core/filter.c index 358870408a51e..58e8e1a70aa75 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5988,7 +5988,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return -ENODEV; idev = __in6_dev_get_safely(dev); - if (unlikely(!idev || !idev->cnf.forwarding)) + if (unlikely(!idev || !READ_ONCE(idev->cnf.forwarding))) return BPF_FIB_LKUP_RET_FWD_DISABLED; if (flags & BPF_FIB_LOOKUP_OUTPUT) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b534edc7ca2c..ec99e393f6a91 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -551,7 +551,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, goto out; if ((all || type == NETCONFA_FORWARDING) && - nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + nla_put_s32(skb, NETCONFA_FORWARDING, + READ_ONCE(devconf->forwarding)) < 0) goto nla_put_failure; #ifdef CONFIG_IPV6_MROUTE if ((all || type == NETCONFA_MC_FORWARDING) && @@ -869,7 +870,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); - idev->cnf.forwarding = newf; + + WRITE_ONCE(idev->cnf.forwarding, newf); if (changed) dev_forward_change(idev); } @@ -886,7 +888,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->forwarding) { if ((!newf) ^ (!old)) @@ -901,7 +903,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { int old_dflt = net->ipv6.devconf_dflt->forwarding; - net->ipv6.devconf_dflt->forwarding = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->forwarding, newf); if ((!newf) ^ (!old_dflt)) inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0559bd0005858..444be8c84cc57 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -501,7 +501,7 @@ int ip6_forward(struct sk_buff *skb) u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); - if (net->ipv6.devconf_all->forwarding == 0) + if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) goto error; if (skb->pkt_type != PACKET_HOST) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9c9c31268432e..1fb5e37bc78be 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -903,7 +903,7 @@ have_ifp: } if (ipv6_chk_acast_addr(net, dev, &msg->target) || - (idev->cnf.forwarding && + (READ_ONCE(idev->cnf.forwarding) && (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && @@ -929,7 +929,7 @@ have_ifp: } if (is_router < 0) - is_router = idev->cnf.forwarding; + is_router = READ_ONCE(idev->cnf.forwarding); if (dad) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target, @@ -1080,7 +1080,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * Note that we don't do a (daddr == all-routers-mcast) check. */ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; - if (!neigh && lladdr && idev && idev->cnf.forwarding) { + if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) { if (accept_untracked_na(dev, saddr)) { neigh = neigh_create(&nd_tbl, &msg->target, dev); new_state = NUD_STALE; @@ -1100,7 +1100,8 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && + READ_ONCE(net->ipv6.devconf_all->forwarding) && + net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; @@ -1148,7 +1149,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) } /* Don't accept RS if we're not in router mode */ - if (!idev->cnf.forwarding) + if (!READ_ONCE(idev->cnf.forwarding)) goto out; /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 66c685b0b6199..6a2b53de48182 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2220,7 +2220,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, strict |= flags & RT6_LOOKUP_F_IFACE; strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; - if (net->ipv6.devconf_all->forwarding == 0) + if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) strict |= RT6_LOOKUP_F_REACHABLE; rcu_read_lock(); @@ -4149,7 +4149,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + if (READ_ONCE(in6_dev->cnf.forwarding) || !in6_dev->cnf.accept_redirects) return; /* RFC2461 8.1: -- cgit 1.2.3-korg From ddea75d344dd87cf7f5d79efb1aab80b493f8393 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:31 +0000 Subject: ipv6: annotate data-races in ndisc_router_discovery() Annotate reads from in6_dev->cnf.XXX fields, as they could change concurrently. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1fb5e37bc78be..f6430db249401 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1319,7 +1319,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) if (old_if_flags != in6_dev->if_flags) send_ifinfo_notify = true; - if (!in6_dev->cnf.accept_ra_defrtr) { + if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { ND_PRINTK(2, info, "RA: %s, defrtr is false for dev: %s\n", __func__, skb->dev->name); @@ -1327,7 +1327,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); - if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) { + if (lifetime != 0 && + lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { ND_PRINTK(2, info, "RA: router lifetime (%ds) is too short: %s\n", lifetime, skb->dev->name); @@ -1338,7 +1339,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) * accept_ra_from_local is set to true. */ net = dev_net(in6_dev->dev); - if (!in6_dev->cnf.accept_ra_from_local && + if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", @@ -1350,7 +1351,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ if (pref == ICMPV6_ROUTER_PREF_INVALID || - !in6_dev->cnf.accept_ra_rtr_pref) + !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref)) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif /* routes added from RAs do not use nexthop objects */ @@ -1421,10 +1422,12 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) spin_unlock_bh(&table->tb6_lock); } - if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && + if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 && ra_msg->icmph.icmp6_hop_limit) { - if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { - WRITE_ONCE(in6_dev->cnf.hop_limit, ra_msg->icmph.icmp6_hop_limit); + if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <= + ra_msg->icmph.icmp6_hop_limit) { + WRITE_ONCE(in6_dev->cnf.hop_limit, + ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { @@ -1506,7 +1509,7 @@ skip_linkparms: } #ifdef CONFIG_IPV6_ROUTE_INFO - if (!in6_dev->cnf.accept_ra_from_local && + if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, @@ -1515,7 +1518,7 @@ skip_linkparms: goto skip_routeinfo; } - if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { + if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_ri; p; @@ -1527,14 +1530,14 @@ skip_linkparms: continue; #endif if (ri->prefix_len == 0 && - !in6_dev->cnf.accept_ra_defrtr) + !READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) continue; if (ri->lifetime != 0 && - ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft) + ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) continue; - if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) + if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen)) continue; - if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) + if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen)) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, &ipv6_hdr(skb)->saddr); @@ -1554,7 +1557,7 @@ skip_routeinfo: } #endif - if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) { + if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_pi; p; @@ -1565,7 +1568,7 @@ skip_routeinfo: } } - if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) { + if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) { __be32 n; u32 mtu; -- cgit 1.2.3-korg From fca34cc075996767fbbdb6252be9ddd21c34c920 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:32 +0000 Subject: ipv6: annotate data-races around idev->cnf.ignore_routes_with_linkdown idev->cnf.ignore_routes_with_linkdown can be used without any locks, add appropriate annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- net/ipv6/addrconf.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 30d6f1e84e465..9d06eb945509e 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -417,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) if (unlikely(!idev)) return true; - return !!idev->cnf.ignore_routes_with_linkdown; + return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ec99e393f6a91..82b8d90afb6aa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -566,7 +566,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, - devconf->ignore_routes_with_linkdown) < 0) + READ_ONCE(devconf->ignore_routes_with_linkdown)) < 0) goto nla_put_failure; out: @@ -935,7 +935,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) if (idev) { int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf); - idev->cnf.ignore_routes_with_linkdown = newf; + WRITE_ONCE(idev->cnf.ignore_routes_with_linkdown, newf); if (changed) inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, @@ -956,7 +956,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) net = (struct net *)table->extra2; old = *p; - *p = newf; + WRITE_ONCE(*p, newf); if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) { if ((!newf) ^ (!old)) @@ -970,7 +970,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) } if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) { - net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf; + WRITE_ONCE(net->ipv6.devconf_dflt->ignore_routes_with_linkdown, newf); addrconf_linkdown_change(net, newf); if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, -- cgit 1.2.3-korg From e248948a4471c6efca19f7c5e657b30ee67a6485 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:33 +0000 Subject: ipv6: annotate data-races in rt6_probe() Use READ_ONCE() while reading idev->cnf.rtr_probe_interval while its value could be changed. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/route.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6a2b53de48182..1b897c57c55fe 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -645,14 +645,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh) write_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, - neigh->updated + idev->cnf.rtr_probe_interval)) { + neigh->updated + + READ_ONCE(idev->cnf.rtr_probe_interval))) { work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) __neigh_set_probe_once(neigh); } write_unlock_bh(&neigh->lock); } else if (time_after(jiffies, last_probe + - idev->cnf.rtr_probe_interval)) { + READ_ONCE(idev->cnf.rtr_probe_interval))) { work = kmalloc(sizeof(*work), GFP_ATOMIC); } -- cgit 1.2.3-korg From a8fbd4d90720b6c930661ed593d54aba77cec3c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:34 +0000 Subject: ipv6: annotate data-races around devconf->proxy_ndp devconf->proxy_ndp can be read and written locklessly, add appropriate annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 82b8d90afb6aa..409aa5ddb8569 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -561,7 +561,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, goto nla_put_failure; #endif if ((all || type == NETCONFA_PROXY_NEIGH) && - nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, + READ_ONCE(devconf->proxy_ndp)) < 0) goto nla_put_failure; if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 444be8c84cc57..f08af3f4e54f5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -552,7 +552,7 @@ int ip6_forward(struct sk_buff *skb) } /* XXX: idev->cnf.proxy_ndp? */ - if (net->ipv6.devconf_all->proxy_ndp && + if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f6430db249401..4114918f12c88 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -904,7 +904,8 @@ have_ifp: if (ipv6_chk_acast_addr(net, dev, &msg->target) || (READ_ONCE(idev->cnf.forwarding) && - (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) && + (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) || + READ_ONCE(idev->cnf.proxy_ndp)) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -1101,7 +1102,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && READ_ONCE(net->ipv6.devconf_all->forwarding) && - net->ipv6.devconf_all->proxy_ndp && + READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; -- cgit 1.2.3-korg From 624d5aec487cf8c2955d9c5880685714f7fe8e6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:35 +0000 Subject: ipv6: annotate data-races around devconf->disable_policy idev->cnf.disable_policy and net->ipv6.devconf_all->disable_policy can be read locklessly. Add appropriate annotations on reads and writes. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/route.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 409aa5ddb8569..44340ff82da53 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6699,7 +6699,7 @@ int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) if (!rtnl_trylock()) return restart_syscall(); - *valp = val; + WRITE_ONCE(*valp, val); net = (struct net *)ctl->extra2; if (valp == &net->ipv6.devconf_dflt->disable_policy) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f08af3f4e54f5..b9dd3a66e4236 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -513,8 +513,8 @@ int ip6_forward(struct sk_buff *skb) if (skb_warn_if_lro(skb)) goto drop; - if (!net->ipv6.devconf_all->disable_policy && - (!idev || !idev->cnf.disable_policy) && + if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) && + (!idev || !READ_ONCE(idev->cnf.disable_policy)) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1b897c57c55fe..a92fcac902aea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4584,8 +4584,8 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, f6i->dst_nocount = true; if (!anycast && - (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy)) + (READ_ONCE(net->ipv6.devconf_all->disable_policy) || + READ_ONCE(idev->cnf.disable_policy))) f6i->dst_nopolicy = true; } -- cgit 1.2.3-korg From 45b90ec9a214ff7fbde8190126301a07387c46e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:36 +0000 Subject: ipv6: addrconf_disable_policy() optimization Writing over /proc/sys/net/ipv6/conf/default/disable_policy does not need to hold RTNL. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 44340ff82da53..0e7135a206b09 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6693,20 +6693,19 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) static int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) { + struct net *net = (struct net *)ctl->extra2; struct inet6_dev *idev; - struct net *net; + + if (valp == &net->ipv6.devconf_dflt->disable_policy) { + WRITE_ONCE(*valp, val); + return 0; + } if (!rtnl_trylock()) return restart_syscall(); WRITE_ONCE(*valp, val); - net = (struct net *)ctl->extra2; - if (valp == &net->ipv6.devconf_dflt->disable_policy) { - rtnl_unlock(); - return 0; - } - if (valp == &net->ipv6.devconf_all->disable_policy) { struct net_device *dev; -- cgit 1.2.3-korg From 2aba913f99debce6cd1c7f606d69d094d1202559 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:37 +0000 Subject: ipv6/addrconf: annotate data-races around devconf fields (I) Annotate lockless reads and writes on following devconf fields: - regen_min_advance - regen_max_retry - dad_transmits - use_tempaddr - max_addresses - max_desync_factor - temp_valid_lft - rtr_solicits - rtr_solicit_max_interval - rtr_solicit_interval - rtr_solicit_delay - enhanced_dad - accept_redirects Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 67 +++++++++++++++++++++++++++++------------------------ net/ipv6/route.c | 3 ++- 2 files changed, 39 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e7135a206b09..bcfe725f8fbd4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1359,11 +1359,12 @@ out: in6_ifa_put(ifp); } -static unsigned long ipv6_get_regen_advance(struct inet6_dev *idev) +static unsigned long ipv6_get_regen_advance(const struct inet6_dev *idev) { - return idev->cnf.regen_min_advance + idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; + return READ_ONCE(idev->cnf.regen_min_advance) + + READ_ONCE(idev->cnf.regen_max_retry) * + READ_ONCE(idev->cnf.dad_transmits) * + max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; } static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) @@ -1384,7 +1385,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block) retry: in6_dev_hold(idev); - if (idev->cnf.use_tempaddr <= 0) { + if (READ_ONCE(idev->cnf.use_tempaddr) <= 0) { write_unlock_bh(&idev->lock); pr_info("%s: use_tempaddr is disabled\n", __func__); in6_dev_put(idev); @@ -1392,8 +1393,8 @@ retry: goto out; } spin_lock_bh(&ifp->lock); - if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { - idev->cnf.use_tempaddr = -1; /*XXX*/ + if (ifp->regen_count++ >= READ_ONCE(idev->cnf.regen_max_retry)) { + WRITE_ONCE(idev->cnf.use_tempaddr, -1); /*XXX*/ spin_unlock_bh(&ifp->lock); write_unlock_bh(&idev->lock); pr_warn("%s: regeneration time exceeded - disabled temporary address support\n", @@ -1415,7 +1416,7 @@ retry: */ cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft); max_desync_factor = min_t(long, - idev->cnf.max_desync_factor, + READ_ONCE(idev->cnf.max_desync_factor), cnf_temp_preferred_lft - regen_advance); if (unlikely(idev->desync_factor > max_desync_factor)) { @@ -1432,7 +1433,7 @@ retry: memset(&cfg, 0, sizeof(cfg)); cfg.valid_lft = min_t(__u32, ifp->valid_lft, - idev->cnf.temp_valid_lft + age); + READ_ONCE(idev->cnf.temp_valid_lft) + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft); cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); @@ -1685,7 +1686,7 @@ static int ipv6_get_saddr_eval(struct net *net, */ int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? !!(dst->prefs & IPV6_PREFER_SRC_TMP) : - score->ifa->idev->cnf.use_tempaddr >= 2; + READ_ONCE(score->ifa->idev->cnf.use_tempaddr) >= 2; ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; break; } @@ -2168,6 +2169,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net *net = dev_net(idev->dev); + int max_addresses; if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -2205,9 +2207,9 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) spin_unlock_bh(&ifp->lock); - if (idev->cnf.max_addresses && - ipv6_count_addresses(idev) >= - idev->cnf.max_addresses) + max_addresses = READ_ONCE(idev->cnf.max_addresses); + if (max_addresses && + ipv6_count_addresses(idev) >= max_addresses) goto lock_errdad; net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n", @@ -2604,11 +2606,11 @@ static void manage_tempaddrs(struct inet6_dev *idev, * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively. */ age = (now - ift->cstamp) / HZ; - max_valid = idev->cnf.temp_valid_lft - age; + max_valid = READ_ONCE(idev->cnf.temp_valid_lft) - age; if (max_valid < 0) max_valid = 0; - max_prefered = idev->cnf.temp_prefered_lft - + max_prefered = READ_ONCE(idev->cnf.temp_prefered_lft) - idev->desync_factor - age; if (max_prefered < 0) max_prefered = 0; @@ -2641,7 +2643,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft)) create = true; - if (create && idev->cnf.use_tempaddr > 0) { + if (create && READ_ONCE(idev->cnf.use_tempaddr) > 0) { /* When a new public address is created as described * in [ADDRCONF], also create a new temporary address. */ @@ -2669,7 +2671,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, int create = 0, update_lft = 0; if (!ifp && valid_lft) { - int max_addresses = in6_dev->cnf.max_addresses; + int max_addresses = READ_ONCE(in6_dev->cnf.max_addresses); struct ifa6_config cfg = { .pfx = addr, .plen = pinfo->prefix_len, @@ -4028,6 +4030,7 @@ static void addrconf_rs_timer(struct timer_list *t) struct inet6_dev *idev = from_timer(idev, t, rs_timer); struct net_device *dev = idev->dev; struct in6_addr lladdr; + int rtr_solicits; write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) @@ -4040,7 +4043,9 @@ static void addrconf_rs_timer(struct timer_list *t) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { + rtr_solicits = READ_ONCE(idev->cnf.rtr_solicits); + + if (idev->rs_probes++ < rtr_solicits || rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -4050,11 +4055,12 @@ static void addrconf_rs_timer(struct timer_list *t) write_lock(&idev->lock); idev->rs_interval = rfc3315_s14_backoff_update( - idev->rs_interval, idev->cnf.rtr_solicit_max_interval); + idev->rs_interval, + READ_ONCE(idev->cnf.rtr_solicit_max_interval)); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == - idev->cnf.rtr_solicits) ? - idev->cnf.rtr_solicit_delay : + READ_ONCE(idev->cnf.rtr_solicits)) ? + READ_ONCE(idev->cnf.rtr_solicit_delay) : idev->rs_interval); } else { /* @@ -4075,24 +4081,25 @@ put: */ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) { - unsigned long rand_num; struct inet6_dev *idev = ifp->idev; + unsigned long rand_num; u64 nonce; if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else - rand_num = get_random_u32_below(idev->cnf.rtr_solicit_delay ? : 1); + rand_num = get_random_u32_below( + READ_ONCE(idev->cnf.rtr_solicit_delay) ? : 1); nonce = 0; - if (idev->cnf.enhanced_dad || - dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) { + if (READ_ONCE(idev->cnf.enhanced_dad) || + READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad)) { do get_random_bytes(&nonce, 6); while (nonce == 0); } ifp->dad_nonce = nonce; - ifp->dad_probes = idev->cnf.dad_transmits; + ifp->dad_probes = READ_ONCE(idev->cnf.dad_transmits); addrconf_mod_dad_work(ifp, rand_num); } @@ -4331,7 +4338,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits != 0 && + READ_ONCE(ifp->idev->cnf.rtr_solicits) != 0 && (dev->flags & IFF_LOOPBACK) == 0 && (dev->type != ARPHRD_TUNNEL) && !netif_is_team_port(dev); @@ -4366,7 +4373,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); ifp->idev->rs_interval = rfc3315_s14_backoff_init( - ifp->idev->cnf.rtr_solicit_interval); + READ_ONCE(ifp->idev->cnf.rtr_solicit_interval)); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); @@ -5915,7 +5922,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token, return -EINVAL; } - if (idev->cnf.rtr_solicits == 0) { + if (READ_ONCE(idev->cnf.rtr_solicits) == 0) { NL_SET_ERR_MSG(extack, "Router solicitation is disabled on device"); return -EINVAL; @@ -5948,7 +5955,7 @@ update_lft: if (update_rs) { idev->if_flags |= IF_RS_SENT; idev->rs_interval = rfc3315_s14_backoff_init( - idev->cnf.rtr_solicit_interval); + READ_ONCE(idev->cnf.rtr_solicit_interval)); idev->rs_probes = 1; addrconf_mod_rs_timer(idev, idev->rs_interval); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a92fcac902aea..2cecb1c5a58f6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4150,7 +4150,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (READ_ONCE(in6_dev->cnf.forwarding) || !in6_dev->cnf.accept_redirects) + if (READ_ONCE(in6_dev->cnf.forwarding) || + !READ_ONCE(in6_dev->cnf.accept_redirects)) return; /* RFC2461 8.1: -- cgit 1.2.3-korg From 2f0ff05a44302c91af54a5f9efe1b65b7681540e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:38 +0000 Subject: ipv6/addrconf: annotate data-races around devconf fields (II) Final (?) round of this series. Annotate lockless reads on following devconf fields, because they be changed concurrently from /proc/net/ipv6/conf. - accept_dad - optimistic_dad - use_optimistic - use_oif_addrs_only - ra_honor_pio_life - keep_addr_on_down - ndisc_notify - ndisc_evict_nocarrier - suppress_frag_ndisc - addr_gen_mode - seg6_enabled - ioam6_enabled - ioam6_id - ioam6_id_wide - drop_unicast_in_l2_multicast - mldv[12]_unsolicited_report_interval - force_mld_version - force_tllao - accept_untracked_na - drop_unsolicited_na - accept_source_route Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 49 ++++++++++++++++++++++++++----------------------- net/ipv6/exthdrs.c | 16 +++++++++------- net/ipv6/ioam6.c | 8 ++++---- net/ipv6/ip6_input.c | 2 +- net/ipv6/mcast.c | 14 +++++++------- net/ipv6/ndisc.c | 18 +++++++++--------- net/ipv6/seg6_hmac.c | 8 +++++--- 7 files changed, 61 insertions(+), 54 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bcfe725f8fbd4..436d9a6e6a82e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1557,15 +1557,17 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -static bool ipv6_use_optimistic_addr(struct net *net, - struct inet6_dev *idev) +static bool ipv6_use_optimistic_addr(const struct net *net, + const struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (!idev) return false; - if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && + !READ_ONCE(idev->cnf.optimistic_dad)) return false; - if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic) + if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) && + !READ_ONCE(idev->cnf.use_optimistic)) return false; return true; @@ -1574,13 +1576,14 @@ static bool ipv6_use_optimistic_addr(struct net *net, #endif } -static bool ipv6_allow_optimistic_dad(struct net *net, - struct inet6_dev *idev) +static bool ipv6_allow_optimistic_dad(const struct net *net, + const struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (!idev) return false; - if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) && + !READ_ONCE(idev->cnf.optimistic_dad)) return false; return true; @@ -1862,7 +1865,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, idev = __in6_dev_get(dst_dev); if ((dst_type & IPV6_ADDR_MULTICAST) || dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL || - (idev && idev->cnf.use_oif_addrs_only)) { + (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) { use_oif_addr = true; } } @@ -2683,8 +2686,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, }; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if ((net->ipv6.devconf_all->optimistic_dad || - in6_dev->cnf.optimistic_dad) && + if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) || + READ_ONCE(in6_dev->cnf.optimistic_dad)) && !net->ipv6.devconf_all->forwarding && sllao) cfg.ifa_flags |= IFA_F_OPTIMISTIC; #endif @@ -2733,7 +2736,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, */ update_lft = !create && stored_lft; - if (update_lft && !in6_dev->cnf.ra_honor_pio_life) { + if (update_lft && !READ_ONCE(in6_dev->cnf.ra_honor_pio_life)) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); @@ -3317,8 +3320,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, struct inet6_ifaddr *ifp; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad || - idev->cnf.optimistic_dad) && + if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) || + READ_ONCE(idev->cnf.optimistic_dad)) && !dev_net(idev->dev)->ipv6.devconf_all->forwarding) cfg.ifa_flags |= IFA_F_OPTIMISTIC; #endif @@ -3890,10 +3893,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) */ if (!unregister && !idev->cnf.disable_ipv6) { /* aggregate the system setting and interface setting */ - int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; + int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down); if (!_keep_addr) - _keep_addr = idev->cnf.keep_addr_on_down; + _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down); keep_addr = (_keep_addr > 0); } @@ -4119,8 +4122,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) net = dev_net(dev); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - (net->ipv6.devconf_all->accept_dad < 1 && - idev->cnf.accept_dad < 1) || + (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 && + READ_ONCE(idev->cnf.accept_dad) < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { bool send_na = false; @@ -4212,8 +4215,8 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; - if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 || - idev->cnf.accept_dad > 1) && + if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->accept_dad) > 1 || + READ_ONCE(idev->cnf.accept_dad) > 1) && !idev->cnf.disable_ipv6 && !(ifp->flags & IFA_F_STABLE_PRIVACY)) { struct in6_addr addr; @@ -4352,8 +4355,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, /* send unsolicited NA if enabled */ if (send_na && - (ifp->idev->cnf.ndisc_notify || - dev_net(dev)->ipv6.devconf_all->ndisc_notify)) { + (READ_ONCE(ifp->idev->cnf.ndisc_notify) || + READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr, /*router=*/ !!ifp->idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, @@ -6541,7 +6544,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { struct net_device *dev; - net->ipv6.devconf_dflt->addr_gen_mode = new_val; + WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val); for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev && @@ -6553,7 +6556,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, } } - *((u32 *)ctl->data) = new_val; + WRITE_ONCE(*((u32 *)ctl->data), new_val); } out: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 727792907d6cb..26f1e4a5ade05 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -379,9 +379,8 @@ static int ipv6_srh_rcv(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); - accept_seg6 = net->ipv6.devconf_all->seg6_enabled; - if (accept_seg6 > idev->cnf.seg6_enabled) - accept_seg6 = idev->cnf.seg6_enabled; + accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled), + READ_ONCE(idev->cnf.seg6_enabled)); if (!accept_seg6) { kfree_skb(skb); @@ -655,10 +654,13 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; struct net *net = dev_net(skb->dev); - int accept_source_route = net->ipv6.devconf_all->accept_source_route; + int accept_source_route; - if (idev && accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; + accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route); + + if (idev) + accept_source_route = min(accept_source_route, + READ_ONCE(idev->cnf.accept_source_route)); if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + @@ -919,7 +921,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) goto drop; /* Ignore if IOAM is not enabled on ingress */ - if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) goto ignore; /* Truncated Option header */ diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index 5fa923f066321..08c9295130657 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -727,7 +727,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -735,7 +735,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -822,7 +822,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (!skb->dev) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); @@ -830,7 +830,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 1ba97933c74fb..133610a49da6b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -236,7 +236,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (!ipv6_addr_is_multicast(&hdr->daddr) && (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) && - idev->cnf.drop_unicast_in_l2_multicast) { + READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) { SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST); goto err; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 76ee1615ff2a0..7ba01d8cfbae8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -159,9 +159,9 @@ static int unsolicited_report_interval(struct inet6_dev *idev) int iv; if (mld_in_v1_mode(idev)) - iv = idev->cnf.mldv1_unsolicited_report_interval; + iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval); else - iv = idev->cnf.mldv2_unsolicited_report_interval; + iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval); return iv > 0 ? iv : 1; } @@ -1202,15 +1202,15 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, static int mld_force_mld_version(const struct inet6_dev *idev) { + const struct net *net = dev_net(idev->dev); + int all_force; + + all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version); /* Normally, both are 0 here. If enforcement to a particular is * being used, individual device enforcement will have a lower * precedence over 'all' device (.../conf/all/force_mld_version). */ - - if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0) - return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version; - else - return idev->cnf.force_mld_version; + return all_force ?: READ_ONCE(idev->cnf.force_mld_version); } static bool mld_in_v2_mode_only(const struct inet6_dev *idev) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4114918f12c88..ae134634c323c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -451,7 +451,7 @@ static void ip6_nd_hdr(struct sk_buff *skb, rcu_read_lock(); idev = __in6_dev_get(skb->dev); - tclass = idev ? idev->cnf.ndisc_tclass : 0; + tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0; rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); @@ -535,7 +535,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = false; - inc_opt |= ifp->idev->cnf.force_tllao; + inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao); in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, @@ -974,7 +974,7 @@ static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr) { struct inet6_dev *idev = __in6_dev_get(dev); - switch (idev->cnf.accept_untracked_na) { + switch (READ_ONCE(idev->cnf.accept_untracked_na)) { case 0: /* Don't accept untracked na (absent in neighbor cache) */ return 0; case 1: /* Create new entries from na if currently untracked */ @@ -1025,7 +1025,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && - idev->cnf.drop_unsolicited_na) + READ_ONCE(idev->cnf.drop_unsolicited_na)) return reason; if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) @@ -1818,7 +1818,7 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) if (!idev) return true; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && - idev->cnf.suppress_frag_ndisc) { + READ_ONCE(idev->cnf.suppress_frag_ndisc)) { net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); return true; } @@ -1895,8 +1895,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, idev = in6_dev_get(dev); if (!idev) break; - if (idev->cnf.ndisc_notify || - net->ipv6.devconf_all->ndisc_notify) + if (READ_ONCE(idev->cnf.ndisc_notify) || + READ_ONCE(net->ipv6.devconf_all->ndisc_notify)) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; @@ -1905,8 +1905,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, if (!idev) evict_nocarrier = true; else { - evict_nocarrier = idev->cnf.ndisc_evict_nocarrier && - net->ipv6.devconf_all->ndisc_evict_nocarrier; + evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) && + READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier); in6_dev_put(idev); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index d43c50a7310d6..861e0366f549d 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -241,6 +241,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) struct sr6_tlv_hmac *tlv; struct ipv6_sr_hdr *srh; struct inet6_dev *idev; + int require_hmac; idev = __in6_dev_get(skb->dev); @@ -248,16 +249,17 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb) tlv = seg6_get_tlv_hmac(srh); + require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac); /* mandatory check but no tlv */ - if (idev->cnf.seg6_require_hmac > 0 && !tlv) + if (require_hmac > 0 && !tlv) return false; /* no check */ - if (idev->cnf.seg6_require_hmac < 0) + if (require_hmac < 0) return true; /* check only if present */ - if (idev->cnf.seg6_require_hmac == 0 && !tlv) + if (require_hmac == 0 && !tlv) return true; /* now, seg6_require_hmac >= 0 && tlv */ -- cgit 1.2.3-korg From 2a02f8379bde89472cddb49dc8e55a17e0b68eed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:39 +0000 Subject: ipv6: use xa_array iterator to implement inet6_netconf_dump_devconf() 1) inet6_netconf_dump_devconf() can run under RCU protection instead of RTNL. 2) properly return 0 at the end of a dump, avoiding an an extra recvmsg() system call. 3) Do not use inet6_base_seq() anymore, for_each_netdev_dump() has nice properties. Restarting a GETDEVCONF dump if a device has been added/removed or if net->ipv6.dev_addr_genid has changed is moot. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 102 +++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 436d9a6e6a82e..2f84e6ecf19f4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -727,17 +727,18 @@ static u32 inet6_base_seq(const struct net *net) return res; } - static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - int h, s_h; - int idx, s_idx; + struct { + unsigned long ifindex; + unsigned int all_default; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; + int err = 0; if (cb->strict_check) { struct netlink_ext_ack *extack = cb->extack; @@ -754,64 +755,48 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, } } - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet6_base_seq(net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - - if (inet6_netconf_fill_devconf(skb, dev->ifindex, - &idev->cnf, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, - NLM_F_MULTI, - NETCONFA_ALL) < 0) { - rcu_read_unlock(); - goto done; - } - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - rcu_read_unlock(); + rcu_read_lock(); + for_each_netdev_dump(net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = inet6_netconf_fill_devconf(skb, dev->ifindex, + &idev->cnf, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) + goto done; } - if (h == NETDEV_HASHENTRIES) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, - net->ipv6.devconf_all, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + if (ctx->all_default == 0) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; - } - if (h == NETDEV_HASHENTRIES + 1) { - if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, - net->ipv6.devconf_dflt, - NETLINK_CB(cb->skb).portid, - nlh->nlmsg_seq, - RTM_NEWNETCONF, NLM_F_MULTI, - NETCONFA_ALL) < 0) + ctx->all_default++; + } + if (ctx->all_default == 1) { + err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt, + NETLINK_CB(cb->skb).portid, + nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + NETCONFA_ALL); + if (err < 0) goto done; - else - h++; + ctx->all_default++; } done: - cb->args[0] = h; - cb->args[1] = idx; - - return skb->len; + if (err < 0 && likely(skb->len)) + err = skb->len; + rcu_read_unlock(); + return err; } #ifdef CONFIG_SYSCTL @@ -7503,7 +7488,8 @@ int __init addrconf_init(void) err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, inet6_netconf_dump_devconf, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = ipv6_addr_label_rtnl_register(); -- cgit 1.2.3-korg From 3cd3e72ccb3aade0e8fe037ef07a44b341ab577c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 11:40:11 +0000 Subject: inet: annotate data-races around ifa->ifa_tstamp and ifa->ifa_cstamp ifa->ifa_tstamp can be read locklessly. Add appropriate READ_ONCE()/WRITE_ONCE() annotations. Do the same for ifa->ifa_cstamp to prepare upcoming changes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index af741af61830a..1316046d5f289 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -713,13 +713,14 @@ static void check_lifetime(struct work_struct *work) rcu_read_lock(); hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { - unsigned long age; + unsigned long age, tstamp; if (ifa->ifa_flags & IFA_F_PERMANENT) continue; + tstamp = READ_ONCE(ifa->ifa_tstamp); /* We try to batch several events at once. */ - age = (now - ifa->ifa_tstamp + + age = (now - tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && @@ -729,17 +730,17 @@ static void check_lifetime(struct work_struct *work) INFINITY_LIFE_TIME) { continue; } else if (age >= ifa->ifa_preferred_lft) { - if (time_before(ifa->ifa_tstamp + + if (time_before(tstamp + ifa->ifa_valid_lft * HZ, next)) - next = ifa->ifa_tstamp + + next = tstamp + ifa->ifa_valid_lft * HZ; if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) change_needed = true; - } else if (time_before(ifa->ifa_tstamp + + } else if (time_before(tstamp + ifa->ifa_preferred_lft * HZ, next)) { - next = ifa->ifa_tstamp + + next = tstamp + ifa->ifa_preferred_lft * HZ; } } @@ -819,9 +820,9 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, ifa->ifa_flags |= IFA_F_DEPRECATED; ifa->ifa_preferred_lft = timeout; } - ifa->ifa_tstamp = jiffies; + WRITE_ONCE(ifa->ifa_tstamp, jiffies); if (!ifa->ifa_cstamp) - ifa->ifa_cstamp = ifa->ifa_tstamp; + WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp); } static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, @@ -1676,6 +1677,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + unsigned long tstamp; u32 preferred, valid; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm), @@ -1694,11 +1696,12 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto nla_put_failure; + tstamp = READ_ONCE(ifa->ifa_tstamp); if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { preferred = ifa->ifa_preferred_lft; valid = ifa->ifa_valid_lft; if (preferred != INFINITY_LIFE_TIME) { - long tval = (jiffies - ifa->ifa_tstamp) / HZ; + long tval = (jiffies - tstamp) / HZ; if (preferred > tval) preferred -= tval; @@ -1728,7 +1731,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || (ifa->ifa_rt_priority && nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || - put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, + put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp, preferred, valid)) goto nla_put_failure; -- cgit 1.2.3-korg From a5fcf74d80bec9948701ff0f7529ae96a0c4a41c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 11:40:12 +0000 Subject: inet: annotate data-races around ifa->ifa_valid_lft ifa->ifa_valid_lft can be read locklessly. Add appropriate READ_ONCE()/WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 1316046d5f289..99f3e7c57d36f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -714,26 +714,26 @@ static void check_lifetime(struct work_struct *work) rcu_read_lock(); hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { unsigned long age, tstamp; + u32 valid_lft; if (ifa->ifa_flags & IFA_F_PERMANENT) continue; + valid_lft = READ_ONCE(ifa->ifa_valid_lft); tstamp = READ_ONCE(ifa->ifa_tstamp); /* We try to batch several events at once. */ age = (now - tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; - if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && - age >= ifa->ifa_valid_lft) { + if (valid_lft != INFINITY_LIFE_TIME && + age >= valid_lft) { change_needed = true; } else if (ifa->ifa_preferred_lft == INFINITY_LIFE_TIME) { continue; } else if (age >= ifa->ifa_preferred_lft) { - if (time_before(tstamp + - ifa->ifa_valid_lft * HZ, next)) - next = tstamp + - ifa->ifa_valid_lft * HZ; + if (time_before(tstamp + valid_lft * HZ, next)) + next = tstamp + valid_lft * HZ; if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) change_needed = true; @@ -810,7 +810,7 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, timeout = addrconf_timeout_fixup(valid_lft, HZ); if (addrconf_finite_timeout(timeout)) - ifa->ifa_valid_lft = timeout; + WRITE_ONCE(ifa->ifa_valid_lft, timeout); else ifa->ifa_flags |= IFA_F_PERMANENT; @@ -1699,7 +1699,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, tstamp = READ_ONCE(ifa->ifa_tstamp); if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { preferred = ifa->ifa_preferred_lft; - valid = ifa->ifa_valid_lft; + valid = READ_ONCE(ifa->ifa_valid_lft); if (preferred != INFINITY_LIFE_TIME) { long tval = (jiffies - tstamp) / HZ; -- cgit 1.2.3-korg From 9f6fa3c4e722cbb9a007c3b85797bebfcdee84e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 11:40:13 +0000 Subject: inet: annotate data-races around ifa->ifa_preferred_lft ifa->ifa_preferred_lft can be read locklessly. Add appropriate READ_ONCE()/WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 99f3e7c57d36f..368fb56e1f1b2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -714,11 +714,13 @@ static void check_lifetime(struct work_struct *work) rcu_read_lock(); hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { unsigned long age, tstamp; + u32 preferred_lft; u32 valid_lft; if (ifa->ifa_flags & IFA_F_PERMANENT) continue; + preferred_lft = READ_ONCE(ifa->ifa_preferred_lft); valid_lft = READ_ONCE(ifa->ifa_valid_lft); tstamp = READ_ONCE(ifa->ifa_tstamp); /* We try to batch several events at once. */ @@ -728,20 +730,18 @@ static void check_lifetime(struct work_struct *work) if (valid_lft != INFINITY_LIFE_TIME && age >= valid_lft) { change_needed = true; - } else if (ifa->ifa_preferred_lft == + } else if (preferred_lft == INFINITY_LIFE_TIME) { continue; - } else if (age >= ifa->ifa_preferred_lft) { + } else if (age >= preferred_lft) { if (time_before(tstamp + valid_lft * HZ, next)) next = tstamp + valid_lft * HZ; if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) change_needed = true; - } else if (time_before(tstamp + - ifa->ifa_preferred_lft * HZ, + } else if (time_before(tstamp + preferred_lft * HZ, next)) { - next = tstamp + - ifa->ifa_preferred_lft * HZ; + next = tstamp + preferred_lft * HZ; } } rcu_read_unlock(); @@ -818,7 +818,7 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, if (addrconf_finite_timeout(timeout)) { if (timeout == 0) ifa->ifa_flags |= IFA_F_DEPRECATED; - ifa->ifa_preferred_lft = timeout; + WRITE_ONCE(ifa->ifa_preferred_lft, timeout); } WRITE_ONCE(ifa->ifa_tstamp, jiffies); if (!ifa->ifa_cstamp) @@ -1698,7 +1698,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, tstamp = READ_ONCE(ifa->ifa_tstamp); if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { - preferred = ifa->ifa_preferred_lft; + preferred = READ_ONCE(ifa->ifa_preferred_lft); valid = READ_ONCE(ifa->ifa_valid_lft); if (preferred != INFINITY_LIFE_TIME) { long tval = (jiffies - tstamp) / HZ; -- cgit 1.2.3-korg From 3ddc2231c8108302a8229d3c5849ee792a63230d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 11:40:14 +0000 Subject: inet: annotate data-races around ifa->ifa_flags ifa->ifa_flags can be read locklessly. Add appropriate READ_ONCE()/WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 368fb56e1f1b2..550b775cbbf3c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -716,8 +716,10 @@ static void check_lifetime(struct work_struct *work) unsigned long age, tstamp; u32 preferred_lft; u32 valid_lft; + u32 flags; - if (ifa->ifa_flags & IFA_F_PERMANENT) + flags = READ_ONCE(ifa->ifa_flags); + if (flags & IFA_F_PERMANENT) continue; preferred_lft = READ_ONCE(ifa->ifa_preferred_lft); @@ -737,7 +739,7 @@ static void check_lifetime(struct work_struct *work) if (time_before(tstamp + valid_lft * HZ, next)) next = tstamp + valid_lft * HZ; - if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) + if (!(flags & IFA_F_DEPRECATED)) change_needed = true; } else if (time_before(tstamp + preferred_lft * HZ, next)) { @@ -805,21 +807,23 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, __u32 prefered_lft) { unsigned long timeout; + u32 flags; - ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); + flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); timeout = addrconf_timeout_fixup(valid_lft, HZ); if (addrconf_finite_timeout(timeout)) WRITE_ONCE(ifa->ifa_valid_lft, timeout); else - ifa->ifa_flags |= IFA_F_PERMANENT; + flags |= IFA_F_PERMANENT; timeout = addrconf_timeout_fixup(prefered_lft, HZ); if (addrconf_finite_timeout(timeout)) { if (timeout == 0) - ifa->ifa_flags |= IFA_F_DEPRECATED; + flags |= IFA_F_DEPRECATED; WRITE_ONCE(ifa->ifa_preferred_lft, timeout); } + WRITE_ONCE(ifa->ifa_flags, flags); WRITE_ONCE(ifa->ifa_tstamp, jiffies); if (!ifa->ifa_cstamp) WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp); @@ -1313,7 +1317,7 @@ static __be32 in_dev_select_addr(const struct in_device *in_dev, const struct in_ifaddr *ifa; in_dev_for_each_ifa_rcu(ifa, in_dev) { - if (ifa->ifa_flags & IFA_F_SECONDARY) + if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) continue; if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) @@ -1341,7 +1345,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) localnet_scope = RT_SCOPE_LINK; in_dev_for_each_ifa_rcu(ifa, in_dev) { - if (ifa->ifa_flags & IFA_F_SECONDARY) + if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) continue; if (min(ifa->ifa_scope, localnet_scope) > scope) continue; @@ -1688,7 +1692,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = ifa->ifa_flags; + ifm->ifa_flags = READ_ONCE(ifa->ifa_flags); ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; @@ -1728,7 +1732,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || (ifa->ifa_proto && nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) || - nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || + nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) || (ifa->ifa_rt_priority && nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp, -- cgit 1.2.3-korg From 590e92cdc835fcf435d8611f2477fff0e16877c7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 11:40:15 +0000 Subject: inet: prepare inet_base_seq() to run without RTNL In the following patch, inet_base_seq() will no longer be called with RTNL held. Add READ_ONCE()/WRITE_ONCE() annotations in dev_base_seq_inc() and inet_base_seq(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- net/ipv4/devinet.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4868e67345096..fe054cbd41e92 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -180,8 +180,9 @@ static DECLARE_RWSEM(devnet_rename_sem); static inline void dev_base_seq_inc(struct net *net) { - while (++net->dev_base_seq == 0) - ; + unsigned int val = net->dev_base_seq + 1; + + WRITE_ONCE(net->dev_base_seq, val ?: 1); } static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 550b775cbbf3c..2afe78dfc3c2f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1837,7 +1837,7 @@ done: static u32 inet_base_seq(const struct net *net) { u32 res = atomic_read(&net->ipv4.dev_addr_genid) + - net->dev_base_seq; + READ_ONCE(net->dev_base_seq); /* Must not return 0 (see nl_dump_check_consistent()). * Chose a value far away from 0. -- cgit 1.2.3-korg From cdb2f80f1c10654efc66c1624f66df2b87eabf06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 11:40:16 +0000 Subject: inet: use xa_array iterator to implement inet_dump_ifaddr() 1) inet_dump_ifaddr() can can run under RCU protection instead of RTNL. 2) properly return 0 at the end of a dump, avoiding an an extra recvmsg() system call. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 95 +++++++++++++++++++++--------------------------------- 1 file changed, 37 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2afe78dfc3c2f..4daa8124f247c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1676,7 +1676,7 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); } -static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, +static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, struct inet_fill_args *args) { struct ifaddrmsg *ifm; @@ -1805,15 +1805,15 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, } static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, - struct netlink_callback *cb, int s_ip_idx, + struct netlink_callback *cb, int *s_ip_idx, struct inet_fill_args *fillargs) { struct in_ifaddr *ifa; int ip_idx = 0; int err; - in_dev_for_each_ifa_rtnl(ifa, in_dev) { - if (ip_idx < s_ip_idx) { + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (ip_idx < *s_ip_idx) { ip_idx++; continue; } @@ -1825,9 +1825,9 @@ static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, ip_idx++; } err = 0; - + ip_idx = 0; done: - cb->args[2] = ip_idx; + *s_ip_idx = ip_idx; return err; } @@ -1859,75 +1859,53 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) }; struct net *net = sock_net(skb->sk); struct net *tgt_net = net; - int h, s_h; - int idx, s_idx; - int s_ip_idx; - struct net_device *dev; + struct { + unsigned long ifindex; + int ip_idx; + } *ctx = (void *)cb->ctx; struct in_device *in_dev; - struct hlist_head *head; + struct net_device *dev; int err = 0; - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - s_ip_idx = cb->args[2]; - + rcu_read_lock(); if (cb->strict_check) { err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) - goto put_tgt_net; + goto done; - err = 0; if (fillargs.ifindex) { - dev = __dev_get_by_index(tgt_net, fillargs.ifindex); - if (!dev) { - err = -ENODEV; - goto put_tgt_net; - } - - in_dev = __in_dev_get_rtnl(dev); - if (in_dev) { - err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx, - &fillargs); - } - goto put_tgt_net; - } - } - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &tgt_net->dev_index_head[h]; - rcu_read_lock(); - cb->seq = inet_base_seq(tgt_net); - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - if (h > s_h || idx > s_idx) - s_ip_idx = 0; + err = -ENODEV; + dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); + if (!dev) + goto done; in_dev = __in_dev_get_rcu(dev); if (!in_dev) - goto cont; - - err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx, - &fillargs); - if (err < 0) { - rcu_read_unlock(); goto done; - } -cont: - idx++; + err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx, + &fillargs); + goto done; } - rcu_read_unlock(); } + cb->seq = inet_base_seq(tgt_net); + + for_each_netdev_dump(net, dev, ctx->ifindex) { + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + continue; + err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx, + &fillargs); + if (err < 0) + goto done; + } done: - cb->args[0] = h; - cb->args[1] = idx; -put_tgt_net: + if (err < 0 && likely(skb->len)) + err = skb->len; if (fillargs.netnsid >= 0) put_net(tgt_net); - - return skb->len ? : err; + rcu_read_unlock(); + return err; } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, @@ -2818,7 +2796,8 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0); + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, + RTNL_FLAG_DUMP_UNLOCKED); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, inet_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED); -- cgit 1.2.3-korg From 8985f2b09b3303b935b9ab4814d801251f0c7c22 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jan 2024 08:44:40 +0000 Subject: rxrpc: Use rxrpc_txbuf::kvec[0] instead of rxrpc_txbuf::wire Use rxrpc_txbuf::kvec[0] instead of rxrpc_txbuf::wire to gain access to the Rx protocol header. In future, the wire header will be stored in a page frag, not in the rxrpc_txbuf struct making it possible to use MSG_SPLICE_PAGES when sending it. Similarly, access the ack header as being immediately after the wire header when filling out an ACK packet. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/ar-internal.h | 5 ++-- net/rxrpc/output.c | 79 ++++++++++++++++++++++++------------------------- net/rxrpc/txbuf.c | 27 +++++++++-------- 3 files changed, 56 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a8795ef0d669b..9ea4e7e9d9f75 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -804,6 +804,7 @@ struct rxrpc_txbuf { #define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */ #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ __be16 cksum; /* Checksum to go in header */ + unsigned short ack_rwind; /* ACK receive window */ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ u8 nr_kvec; struct kvec kvec[1]; @@ -812,11 +813,11 @@ struct rxrpc_txbuf { * that data[] aligns correctly for any crypto blocksize. */ u8 pad[64 - sizeof(struct rxrpc_wire_header)]; - struct rxrpc_wire_header wire; /* Network-ready header */ + struct rxrpc_wire_header _wire; /* Network-ready header */ union { u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */ struct { - struct rxrpc_ackpacket ack; + struct rxrpc_ackpacket _ack; DECLARE_FLEX_ARRAY(u8, acks); }; }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ec9ae9c6c492f..5398aa24bb8e3 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -77,11 +77,13 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call) /* * Fill out an ACK packet. */ -static void rxrpc_fill_out_ack(struct rxrpc_connection *conn, - struct rxrpc_call *call, +static void rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_txbuf *txb, - u16 *_rwind) + u8 ack_reason, + rxrpc_serial_t serial) { + struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; + struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); struct rxrpc_acktrailer trailer; unsigned int qsize, sack, wrap, to; rxrpc_seq_t window, wtop; @@ -97,15 +99,24 @@ static void rxrpc_fill_out_ack(struct rxrpc_connection *conn, window = call->ackr_window; wtop = call->ackr_wtop; sack = call->ackr_sack_base % RXRPC_SACK_SIZE; - txb->ack.firstPacket = htonl(window); - txb->ack.nAcks = wtop - window; + + whdr->seq = 0; + whdr->type = RXRPC_PACKET_TYPE_ACK; + txb->flags |= RXRPC_SLOW_START_OK; + ack->bufferSpace = 0; + ack->maxSkew = 0; + ack->firstPacket = htonl(window); + ack->previousPacket = htonl(call->rx_highest_seq); + ack->serial = htonl(serial); + ack->reason = ack_reason; + ack->nAcks = wtop - window; if (after(wtop, window)) { wrap = RXRPC_SACK_SIZE - sack; - to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE); + to = min_t(unsigned int, ack->nAcks, RXRPC_SACK_SIZE); - if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) { - memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks); + if (sack + ack->nAcks <= RXRPC_SACK_SIZE) { + memcpy(txb->acks, call->ackr_sack_table + sack, ack->nAcks); } else { memcpy(txb->acks, call->ackr_sack_table + sack, wrap); memcpy(txb->acks + wrap, call->ackr_sack_table, @@ -115,16 +126,16 @@ static void rxrpc_fill_out_ack(struct rxrpc_connection *conn, ackp += to; } else if (before(wtop, window)) { pr_warn("ack window backward %x %x", window, wtop); - } else if (txb->ack.reason == RXRPC_ACK_DELAY) { - txb->ack.reason = RXRPC_ACK_IDLE; + } else if (ack->reason == RXRPC_ACK_DELAY) { + ack->reason = RXRPC_ACK_IDLE; } - mtu = conn->peer->if_mtu; - mtu -= conn->peer->hdrsize; + mtu = call->peer->if_mtu; + mtu -= call->peer->hdrsize; jmax = rxrpc_rx_jumbo_max; qsize = (window - 1) - call->rx_consumed; rsize = max_t(int, call->rx_winsize - qsize, 0); - *_rwind = rsize; + txb->ack_rwind = rsize; trailer.maxMTU = htonl(rxrpc_rx_mtu); trailer.ifMTU = htonl(mtu); trailer.rwind = htonl(rsize); @@ -134,8 +145,7 @@ static void rxrpc_fill_out_ack(struct rxrpc_connection *conn, *ackp++ = 0; *ackp++ = 0; memcpy(ackp, &trailer, sizeof(trailer)); - txb->kvec[0].iov_len = sizeof(txb->wire) + - sizeof(txb->ack) + txb->ack.nAcks + 3 + sizeof(trailer); + txb->kvec[0].iov_len += sizeof(*ack) + ack->nAcks + 3 + sizeof(trailer); txb->len = txb->kvec[0].iov_len; } @@ -187,10 +197,11 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call, */ static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { + struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; struct rxrpc_connection *conn; + struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); struct msghdr msg; int ret, rtt_slot = -1; - u16 rwind; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; @@ -203,27 +214,22 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *tx msg.msg_controllen = 0; msg.msg_flags = 0; - if (txb->ack.reason == RXRPC_ACK_PING) + if (ack->reason == RXRPC_ACK_PING) txb->flags |= RXRPC_REQUEST_ACK; - txb->wire.flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; - - rxrpc_fill_out_ack(conn, call, txb, &rwind); + whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; txb->serial = rxrpc_get_next_serial(conn); - txb->wire.serial = htonl(txb->serial); + whdr->serial = htonl(txb->serial); trace_rxrpc_tx_ack(call->debug_id, txb->serial, - ntohl(txb->ack.firstPacket), - ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks, - rwind); + ntohl(ack->firstPacket), + ntohl(ack->serial), ack->reason, ack->nAcks, + txb->ack_rwind); - if (txb->ack.reason == RXRPC_ACK_PING) + if (ack->reason == RXRPC_ACK_PING) rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_ping); rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); - /* Grab the highest received seq as late as possible */ - txb->ack.previousPacket = htonl(call->rx_highest_seq); - iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, txb->len); rxrpc_local_dont_fragment(conn->local, false); ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len); @@ -232,7 +238,7 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *tx trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, rxrpc_tx_point_call_ack); } else { - trace_rxrpc_tx_packet(call->debug_id, &txb->wire, + trace_rxrpc_tx_packet(call->debug_id, whdr, rxrpc_tx_point_call_ack); if (txb->flags & RXRPC_REQUEST_ACK) call->peer->rtt_last_req = ktime_get_real(); @@ -268,18 +274,9 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, return; } - txb->ack_why = why; - txb->wire.seq = 0; - txb->wire.type = RXRPC_PACKET_TYPE_ACK; - txb->flags |= RXRPC_SLOW_START_OK; - txb->ack.bufferSpace = 0; - txb->ack.maxSkew = 0; - txb->ack.firstPacket = 0; - txb->ack.previousPacket = 0; - txb->ack.serial = htonl(serial); - txb->ack.reason = ack_reason; - txb->ack.nAcks = 0; + rxrpc_fill_out_ack(call, txb, ack_reason, serial); + txb->ack_why = why; trace_rxrpc_send_ack(call, why, ack_reason, serial); rxrpc_send_ack_packet(call, txb); rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); @@ -365,7 +362,7 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && txb->seq == 1) - txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; + whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 91e96cda6dc70..2e8c5b15a84f3 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -19,10 +19,13 @@ atomic_t rxrpc_nr_txbuf; struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, gfp_t gfp) { + struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; txb = kmalloc(sizeof(*txb), gfp); if (txb) { + whdr = &txb->_wire; + INIT_LIST_HEAD(&txb->call_link); INIT_LIST_HEAD(&txb->tx_link); refcount_set(&txb->ref, 1); @@ -37,18 +40,18 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, txb->serial = 0; txb->cksum = 0; txb->nr_kvec = 1; - txb->kvec[0].iov_base = &txb->wire; - txb->kvec[0].iov_len = sizeof(txb->wire); - txb->wire.epoch = htonl(call->conn->proto.epoch); - txb->wire.cid = htonl(call->cid); - txb->wire.callNumber = htonl(call->call_id); - txb->wire.seq = htonl(txb->seq); - txb->wire.type = packet_type; - txb->wire.flags = 0; - txb->wire.userStatus = 0; - txb->wire.securityIndex = call->security_ix; - txb->wire._rsvd = 0; - txb->wire.serviceId = htons(call->dest_srx.srx_service); + txb->kvec[0].iov_base = whdr; + txb->kvec[0].iov_len = sizeof(*whdr); + whdr->epoch = htonl(call->conn->proto.epoch); + whdr->cid = htonl(call->cid); + whdr->callNumber = htonl(call->call_id); + whdr->seq = htonl(txb->seq); + whdr->type = packet_type; + whdr->flags = 0; + whdr->userStatus = 0; + whdr->securityIndex = call->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(call->dest_srx.srx_service); trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, -- cgit 1.2.3-korg From 80bfab79b8351c8d858e6928a091b57c103dce29 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 09:39:07 +0000 Subject: net: adopt skb_network_offset() and similar helpers This is a cleanup patch, making code a bit more concise. 1) Use skb_network_offset(skb) in place of (skb_network_header(skb) - skb->data) 2) Use -skb_network_offset(skb) in place of (skb->data - skb_network_header(skb)) 3) Use skb_transport_offset(skb) in place of (skb_transport_header(skb) - skb->data) 4) Use skb_inner_transport_offset(skb) in place of (skb_inner_transport_header(skb) - skb->data) Signed-off-by: Eric Dumazet Acked-by: Edward Cree # for sfc Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 11 +++++------ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++-- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/igbvf/netdev.c | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_fp.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 3 +-- drivers/net/ethernet/sfc/siena/tx_common.c | 5 ++--- drivers/net/ethernet/sfc/tx_common.c | 5 ++--- drivers/net/ethernet/sfc/tx_tso.c | 4 ++-- drivers/net/ethernet/sun/sunvnet_common.c | 4 ++-- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +- drivers/net/wireguard/receive.c | 2 +- kernel/bpf/cgroup.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/xfrm4_input.c | 2 +- net/ipv6/exthdrs.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++-- net/ipv6/reassembly.c | 4 ++-- net/ipv6/xfrm6_input.c | 2 +- 24 files changed, 36 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 528441b28c4ef..adcee8d9d6d9a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3538,7 +3538,7 @@ static u8 bnx2x_set_pbd_csum_enc(struct bnx2x *bp, struct sk_buff *skb, u32 *parsing_data, u32 xmit_type) { *parsing_data |= - ((((u8 *)skb_inner_transport_header(skb) - skb->data) >> 1) << + ((skb_inner_transport_offset(skb) >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W; @@ -3570,7 +3570,7 @@ static u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb, u32 *parsing_data, u32 xmit_type) { *parsing_data |= - ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) << + ((skb_transport_offset(skb) >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W; @@ -3613,7 +3613,7 @@ static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb, struct eth_tx_parse_bd_e1x *pbd, u32 xmit_type) { - u8 hlen = (skb_network_header(skb) - skb->data) >> 1; + u8 hlen = skb_network_offset(skb) >> 1; /* for now NS flag is not used in Linux */ pbd->global_data = @@ -3667,8 +3667,7 @@ static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb, u8 outerip_off, outerip_len = 0; /* from outer IP to transport */ - hlen_w = (skb_inner_transport_header(skb) - - skb_network_header(skb)) >> 1; + hlen_w = skb_inner_transport_offset(skb) >> 1; /* transport len */ hlen_w += inner_tcp_hdrlen(skb) >> 1; @@ -3714,7 +3713,7 @@ static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb, 0, IPPROTO_TCP, 0)); } - outerip_off = (skb_network_header(skb) - skb->data) >> 1; + outerip_off = (skb_network_offset(skb)) >> 1; *global_data |= outerip_off | diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index f1695c889d3a0..19668a8d22f76 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2473,9 +2473,9 @@ static netdev_features_t hns3_features_check(struct sk_buff *skb, return features; if (skb->encapsulation) - len = skb_inner_transport_header(skb) - skb->data; + len = skb_inner_transport_offset(skb); else - len = skb_transport_header(skb) - skb->data; + len = skb_transport_offset(skb); /* Assume L4 is 60 byte as TCP is the only protocol with a * a flexible value, and it's max len is 60 bytes. diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f12092cdb1f0a..a67b13869016b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13208,7 +13208,7 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, features &= ~NETIF_F_GSO_MASK; /* MACLEN can support at most 63 words */ - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 335fd13e86f71..245c458e38aee 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4423,7 +4423,7 @@ static netdev_features_t iavf_features_check(struct sk_buff *skb, features &= ~NETIF_F_GSO_MASK; /* MACLEN can support at most 63 words */ - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index cebb44f51d5f5..b355565505034 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2538,7 +2538,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index a4d4f00e6a876..b0cf310e6f7bd 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2655,7 +2655,7 @@ igbvf_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGBVF_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 3af52d238f3bf..34820f6a78b92 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5277,7 +5277,7 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e23c3614fb104..1e93edc967ed8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10205,7 +10205,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a44e4bd561421..9c960017a6de5 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4413,7 +4413,7 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IXGBEVF_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index cb1746bc0e0c5..847fa62c80df8 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -215,7 +215,7 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb, bd2_bits1 |= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT); - bd2_bits2 |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) & + bd2_bits2 |= ((skb_transport_offset(skb) >> 1) & ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) << ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 41894d154013b..b9dc0071c5de4 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -446,8 +446,7 @@ static int qlcnic_tx_encap_pkt(struct qlcnic_adapter *adapter, encap_descr |= skb_network_offset(skb) << 10; first_desc->encap_descr = cpu_to_le16(encap_descr); - first_desc->tcp_hdr_offset = skb_inner_transport_header(skb) - - skb->data; + first_desc->tcp_hdr_offset = skb_inner_transport_offset(skb); first_desc->ip_hdr_offset = skb_inner_network_offset(skb); qlcnic_set_tx_flags_opcode(first_desc, flags, opcode); diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c index a7a9ab304e136..71f9b5ec5ae46 100644 --- a/drivers/net/ethernet/sfc/siena/tx_common.c +++ b/drivers/net/ethernet/sfc/siena/tx_common.c @@ -317,11 +317,10 @@ static int efx_tx_tso_header_length(struct sk_buff *skb) size_t header_len; if (skb->encapsulation) - header_len = skb_inner_transport_header(skb) - - skb->data + + header_len = skb_inner_transport_offset(skb) + (inner_tcp_hdr(skb)->doff << 2u); else - header_len = skb_transport_header(skb) - skb->data + + header_len = skb_transport_offset(skb) + (tcp_hdr(skb)->doff << 2u); return header_len; } diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 9f2393d343715..2adb132b2f7e4 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -336,11 +336,10 @@ int efx_tx_tso_header_length(struct sk_buff *skb) size_t header_len; if (skb->encapsulation) - header_len = skb_inner_transport_header(skb) - - skb->data + + header_len = skb_inner_transport_offset(skb) + (inner_tcp_hdr(skb)->doff << 2u); else - header_len = skb_transport_header(skb) - skb->data + + header_len = skb_transport_offset(skb) + (tcp_hdr(skb)->doff << 2u); return header_len; } diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c index 64a6768f75ea1..ddf149db81801 100644 --- a/drivers/net/ethernet/sfc/tx_tso.c +++ b/drivers/net/ethernet/sfc/tx_tso.c @@ -174,8 +174,8 @@ static int tso_start(struct tso_state *st, struct efx_nic *efx, unsigned int header_len, in_len; dma_addr_t dma_addr; - st->ip_off = skb_network_header(skb) - skb->data; - st->tcp_off = skb_transport_header(skb) - skb->data; + st->ip_off = skb_network_offset(skb); + st->tcp_off = skb_transport_offset(skb); header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); in_len = skb_headlen(skb) - header_len; st->header_len = header_len; diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 3525d5c0d694c..351609f4f011d 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1144,9 +1144,9 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) nskb->protocol = skb->protocol; offset = skb_mac_header(skb) - skb->data; skb_set_mac_header(nskb, offset); - offset = skb_network_header(skb) - skb->data; + offset = skb_network_offset(skb); skb_set_network_header(nskb, offset); - offset = skb_transport_header(skb) - skb->data; + offset = skb_transport_offset(skb); skb_set_transport_header(nskb, offset); offset = 0; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 7cf02ab6de68b..6dff2c85682d8 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1257,7 +1257,7 @@ static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first, /* compute header lengths */ l4len = enc ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = enc ? (skb_inner_transport_header(skb) - skb->data) : + *hdr_len = enc ? skb_inner_transport_offset(skb) : skb_transport_offset(skb); *hdr_len += l4len; diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index a176653c88616..df275b4fccb6d 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -263,7 +263,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) * call skb_cow_data, so that there's no chance that data is removed * from the skb, so that later we can extract the original endpoint. */ - offset = skb->data - skb_network_header(skb); + offset = -skb_network_offset(skb); skb_push(skb, offset); num_frags = skb_cow_data(skb, 0, &trailer); offset += sizeof(struct message_data); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 5a568bbbeaebd..82243cb6c54db 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1358,7 +1358,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum cgroup_bpf_attach_type atype) { - unsigned int offset = skb->data - skb_network_header(skb); + unsigned int offset = -skb_network_offset(skb); struct sock *save_sk; void *saved_data_end; struct cgroup *cgrp; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7d2bdfd7e7d70..a82fd102df05a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -310,7 +310,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); raw_rcv_skb(sk, skb); return 0; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index c54676998eb60..dae35101d1895 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -58,7 +58,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) return -iph->protocol; #endif - __skb_push(skb, skb->data - skb_network_header(skb)); + __skb_push(skb, -skb_network_offset(skb)); iph->tot_len = htons(skb->len); ip_send_check(iph); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 26f1e4a5ade05..25a5b394481b7 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -804,7 +804,7 @@ looped_back: ip6_route_input(skb); if (skb_dst(skb)->error) { - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); dst_input(skb); return -1; } @@ -821,7 +821,7 @@ looped_back: goto looped_back; } - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); dst_input(skb); return -1; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index b2dd48911c8d6..1a51a44571c37 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -327,9 +327,9 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, if (!reasm_data) goto err; - payload_len = ((skb->data - skb_network_header(skb)) - + payload_len = -skb_network_offset(skb) - sizeof(struct ipv6hdr) + fq->q.len - - sizeof(struct frag_hdr)); + sizeof(struct frag_hdr); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5ebc47da1000c..acb4f119e11f0 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -272,9 +272,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, if (!reasm_data) goto out_oom; - payload_len = ((skb->data - skb_network_header(skb)) - + payload_len = -skb_network_offset(skb) - sizeof(struct ipv6hdr) + fq->q.len - - sizeof(struct frag_hdr)); + sizeof(struct frag_hdr); if (payload_len > IPV6_MAXPLEN) goto out_oversize; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 6e36e5047fbab..a17d783dc7c0d 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -43,7 +43,7 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk, int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); - int nhlen = skb->data - skb_network_header(skb); + int nhlen = -skb_network_offset(skb); skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; -- cgit 1.2.3-korg From 6752fb18dc5763e9d6efe5aebf03e4fc69e6086b Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 28 Feb 2024 10:03:17 -0800 Subject: net: ip6_tunnel: Leverage core stats allocator With commit 34d21de99cea9 ("net: Move {l,t,d}stats allocation to core and convert veth & vrf"), stats allocation could be done on net core instead of in this driver. With this new approach, the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc). This is core responsibility now. Remove the allocation in the ip6_tunnel driver and leverage the network core allocation instead. Signed-off-by: Breno Leitao Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5fd07581efafe..e9cc315832cb3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -247,7 +247,6 @@ static void ip6_dev_free(struct net_device *dev) gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); - free_percpu(dev->tstats); } static int ip6_tnl_create2(struct net_device *dev) @@ -1848,6 +1847,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->features |= NETIF_F_LLTX; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; netif_keep_dst(dev); dev->features |= IPXIPX_FEATURES; @@ -1873,13 +1873,10 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); if (ret) - goto free_stats; + return ret; ret = gro_cells_init(&t->gro_cells, dev); if (ret) @@ -1903,9 +1900,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev) destroy_dst: dst_cache_destroy(&t->dst_cache); -free_stats: - free_percpu(dev->tstats); - dev->tstats = NULL; return ret; } -- cgit 1.2.3-korg From 037db6ea57da7a134a8183dead92d64ef92babee Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 1 Mar 2024 18:43:44 +0100 Subject: mptcp: cleanup writer wake-up After commit 5cf92bbadc58 ("mptcp: re-enable sndbuf autotune"), the MPTCP_NOSPACE bit is redundant: it is always set and cleared together with SOCK_NOSPACE. Let's drop the first and always relay on the latter, dropping a bunch of useless code. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 15 +++------------ net/mptcp/protocol.h | 16 ++++++---------- 2 files changed, 9 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 76c8861a852b1..a3d79e9d06942 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1692,15 +1692,6 @@ out: } } -static void mptcp_set_nospace(struct sock *sk) -{ - /* enable autotune */ - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - - /* will be cleared on avail space */ - set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); -} - static int mptcp_disconnect(struct sock *sk, int flags); static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, @@ -1874,7 +1865,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) continue; wait_for_memory: - mptcp_set_nospace(sk); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) @@ -3945,8 +3936,8 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; - mptcp_set_nospace(sk); - smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + smp_mb__after_atomic(); /* NOSPACE is changed by mptcp_write_space() */ if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d0a7955b96c4e..f0c634e843e67 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -113,10 +113,9 @@ #define MPTCP_RST_TRANSIENT BIT(0) /* MPTCP socket atomic flags */ -#define MPTCP_NOSPACE 1 -#define MPTCP_WORK_RTX 2 -#define MPTCP_FALLBACK_DONE 4 -#define MPTCP_WORK_CLOSE_SUBFLOW 5 +#define MPTCP_WORK_RTX 1 +#define MPTCP_FALLBACK_DONE 2 +#define MPTCP_WORK_CLOSE_SUBFLOW 3 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 @@ -810,12 +809,9 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) static inline void mptcp_write_space(struct sock *sk) { - if (sk_stream_is_writeable(sk)) { - /* pairs with memory barrier in mptcp_poll */ - smp_mb(); - if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) - sk_stream_write_space(sk); - } + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + sk_stream_write_space(sk); } static inline void __mptcp_sync_sndbuf(struct sock *sk) -- cgit 1.2.3-korg From a74762675f700a5473ebe54a671a0788a5b23cc9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 1 Mar 2024 18:43:45 +0100 Subject: mptcp: avoid some duplicate code in socket option handling The mptcp_get_int_option() helper is needless open-coded in a couple of places, replace the duplicate code with the helper call. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/sockopt.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index da37e4541a5d7..ac37f6c5e2ed6 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -629,13 +629,11 @@ static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optva { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val; - - if (optlen < sizeof(int)) - return -EINVAL; + int val, ret; - if (copy_from_sockptr(&val, optval, sizeof(val))) - return -EFAULT; + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; lock_sock(sk); sockopt_seq_inc(msk); @@ -659,13 +657,11 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val; - - if (optlen < sizeof(int)) - return -EINVAL; + int val, ret; - if (copy_from_sockptr(&val, optval, sizeof(val))) - return -EFAULT; + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; lock_sock(sk); sockopt_seq_inc(msk); -- cgit 1.2.3-korg From 29b5e5ef87397963ca38d3eec0d296ad1c979bbc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 1 Mar 2024 18:43:46 +0100 Subject: mptcp: implement TCP_NOTSENT_LOWAT support Add support for such socket option storing the user-space provided value in a new msk field, and using such data to implement the _mptcp_stream_memory_free() helper, similar to the TCP one. To avoid adding more indirect calls in the fast path, open-code a variant of sk_stream_memory_free() in mptcp_sendmsg() and add direct calls to the mptcp stream memory free helper where possible. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/464 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 39 ++++++++++++++++++++++++++++++++++----- net/mptcp/protocol.h | 28 +++++++++++++++++++++++++++- net/mptcp/sockopt.c | 12 ++++++++++++ 3 files changed, 73 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a3d79e9d06942..99367c40de0d1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1762,6 +1762,30 @@ static int do_copy_data_nocache(struct sock *sk, int copy, return 0; } +/* open-code sk_stream_memory_free() plus sent limit computation to + * avoid indirect calls in fast-path. + * Called under the msk socket lock, so we can avoid a bunch of ONCE + * annotations. + */ +static u32 mptcp_send_limit(const struct sock *sk) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + u32 limit, not_sent; + + if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf)) + return 0; + + limit = mptcp_notsent_lowat(sk); + if (limit == UINT_MAX) + return UINT_MAX; + + not_sent = msk->write_seq - msk->snd_nxt; + if (not_sent >= limit) + return 0; + + return limit - not_sent; +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1806,6 +1830,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct mptcp_data_frag *dfrag; bool dfrag_collapsed; size_t psize, offset; + u32 copy_limit; + + /* ensure fitting the notsent_lowat() constraint */ + copy_limit = mptcp_send_limit(sk); + if (!copy_limit) + goto wait_for_memory; /* reuse tail pfrag, if possible, or carve a new one from the * page allocator @@ -1813,9 +1843,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dfrag = mptcp_pending_tail(sk); dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag); if (!dfrag_collapsed) { - if (!sk_stream_memory_free(sk)) - goto wait_for_memory; - if (!mptcp_page_frag_refill(sk, pfrag)) goto wait_for_memory; @@ -1830,6 +1857,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) offset = dfrag->offset + dfrag->data_len; psize = pfrag->size - offset; psize = min_t(size_t, psize, msg_data_left(msg)); + psize = min_t(size_t, psize, copy_limit); total_ts = psize + frag_truesize; if (!sk_wmem_schedule(sk, total_ts)) @@ -3760,6 +3788,7 @@ static struct proto mptcp_prot = { .unhash = mptcp_unhash, .get_port = mptcp_get_port, .forward_alloc_get = mptcp_forward_alloc_get, + .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, @@ -3933,12 +3962,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; - if (sk_stream_is_writeable(sk)) + if (__mptcp_stream_is_writeable(sk, 1)) return EPOLLOUT | EPOLLWRNORM; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); smp_mb__after_atomic(); /* NOSPACE is changed by mptcp_write_space() */ - if (sk_stream_is_writeable(sk)) + if (__mptcp_stream_is_writeable(sk, 1)) return EPOLLOUT | EPOLLWRNORM; return 0; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f0c634e843e67..7cb502260dea9 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -307,6 +307,7 @@ struct mptcp_sock { in_accept_queue:1, free_first:1, rcvspace_init:1; + u32 notsent_lowat; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -807,11 +808,36 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } +static inline u32 mptcp_notsent_lowat(const struct sock *sk) +{ + struct net *net = sock_net(sk); + u32 val; + + val = READ_ONCE(mptcp_sk(sk)->notsent_lowat); + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); +} + +static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + u32 notsent_bytes; + + notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt); + return (notsent_bytes << wake) < mptcp_notsent_lowat(sk); +} + +static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake) +{ + return mptcp_stream_memory_free(sk, wake) && + __sk_stream_is_writeable(sk, wake); +} + static inline void mptcp_write_space(struct sock *sk) { /* pairs with memory barrier in mptcp_poll */ smp_mb(); - sk_stream_write_space(sk); + if (mptcp_stream_memory_free(sk, 1)) + sk_stream_write_space(sk); } static inline void __mptcp_sync_sndbuf(struct sock *sk) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index ac37f6c5e2ed6..1b38dac70719b 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -812,6 +812,16 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return 0; case TCP_ULP: return -EOPNOTSUPP; + case TCP_NOTSENT_LOWAT: + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; + + lock_sock(sk); + WRITE_ONCE(msk->notsent_lowat, val); + mptcp_write_space(sk); + release_sock(sk); + return 0; case TCP_CONGESTION: return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); case TCP_CORK: @@ -1345,6 +1355,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_put_int_option(msk, optval, optlen, msk->cork); case TCP_NODELAY: return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); + case TCP_NOTSENT_LOWAT: + return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); } return -EOPNOTSUPP; } -- cgit 1.2.3-korg From 7f71a337b5152ea0e7bef408d1af53778a919316 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 1 Mar 2024 18:43:47 +0100 Subject: mptcp: cleanup SOL_TCP handling Most TCP-level socket options get an integer from user space, and set the corresponding field under the msk-level socket lock. Reduce the code duplication moving such operations in the common code. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/sockopt.c | 75 +++++++++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 1b38dac70719b..dcd1c76d2a3ba 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -624,18 +624,11 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t return ret; } -static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) +static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val, ret; - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; - - lock_sock(sk); sockopt_seq_inc(msk); msk->cork = !!val; mptcp_for_each_subflow(msk, subflow) { @@ -647,23 +640,15 @@ static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optva } if (!val) mptcp_check_and_set_pending(sk); - release_sock(sk); return 0; } -static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) +static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val, ret; - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; - - lock_sock(sk); sockopt_seq_inc(msk); msk->nodelay = !!val; mptcp_for_each_subflow(msk, subflow) { @@ -675,8 +660,6 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op } if (val) mptcp_check_and_set_pending(sk); - release_sock(sk); - return 0; } @@ -799,35 +782,10 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, int ret, val; switch (optname) { - case TCP_INQ: - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; - if (val < 0 || val > 1) - return -EINVAL; - - lock_sock(sk); - msk->recvmsg_inq = !!val; - release_sock(sk); - return 0; case TCP_ULP: return -EOPNOTSUPP; - case TCP_NOTSENT_LOWAT: - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; - - lock_sock(sk); - WRITE_ONCE(msk->notsent_lowat, val); - mptcp_write_space(sk); - release_sock(sk); - return 0; case TCP_CONGESTION: return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); - case TCP_CORK: - return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); - case TCP_NODELAY: - return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); @@ -840,7 +798,34 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, optval, optlen); } - return -EOPNOTSUPP; + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; + + lock_sock(sk); + switch (optname) { + case TCP_INQ: + if (val < 0 || val > 1) + ret = -EINVAL; + else + msk->recvmsg_inq = !!val; + break; + case TCP_NOTSENT_LOWAT: + WRITE_ONCE(msk->notsent_lowat, val); + mptcp_write_space(sk); + break; + case TCP_CORK: + ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); + break; + case TCP_NODELAY: + ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); + break; + default: + ret = -ENOPROTOOPT; + } + + release_sock(sk); + return ret; } int mptcp_setsockopt(struct sock *sk, int level, int optname, -- cgit 1.2.3-korg From e38b117d7f3b4a5d810f6d0069ad0f643e503796 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:25 +0100 Subject: mptcp: make pm_remove_addrs_and_subflows static mptcp_pm_remove_addrs_and_subflows() is only used in pm_netlink.c, it's no longer used in pm_userspace.c any more since the commit 8b1c94da1e48 ("mptcp: only send RM_ADDR in nl_cmd_remove"). So this patch changes it to a static function. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 4 ++-- net/mptcp/protocol.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d5a942b9ab295..80c537659922f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1550,8 +1550,8 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) } } -void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) +static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7cb502260dea9..5d034e8ca85af 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -981,8 +981,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); -void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); -- cgit 1.2.3-korg From 34ca91e15e69917d2dbb6a76fdf8d28c5c7dc05e Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:26 +0100 Subject: mptcp: export mptcp_genl_family & mptcp_nl_fill_addr This patch exports struct mptcp_genl_family and mptcp_nl_fill_addr() helper to allow them can be used in pm_userspace.c. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 9 +++------ net/mptcp/protocol.h | 4 ++++ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 80c537659922f..e8cb887561e04 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -18,9 +18,6 @@ #include "protocol.h" #include "mib.h" -/* forward declaration */ -static struct genl_family mptcp_genl_family; - static int pm_nl_pernet_id; struct mptcp_pm_add_entry { @@ -1636,8 +1633,8 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) return 0; } -static int mptcp_nl_fill_addr(struct sk_buff *skb, - struct mptcp_pm_addr_entry *entry) +int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry) { struct mptcp_addr_info *addr = &entry->addr; struct nlattr *attr; @@ -2281,7 +2278,7 @@ nla_put_failure: nlmsg_free(skb); } -static struct genl_family mptcp_genl_family __ro_after_init = { +struct genl_family mptcp_genl_family __ro_after_init = { .name = MPTCP_PM_NAME, .version = MPTCP_PM_VER, .netnsok = true, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5d034e8ca85af..10117715c57f1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -343,6 +343,8 @@ struct mptcp_sock { #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) +extern struct genl_family mptcp_genl_family; + static inline void msk_owned_by_me(const struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); @@ -996,6 +998,8 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); +int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { -- cgit 1.2.3-korg From 34e74a5cf3b7d0ab5e54725be19bd27c8b367a63 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:27 +0100 Subject: mptcp: implement mptcp_userspace_pm_dump_addr This patch implements mptcp_userspace_pm_dump_addr() to dump addresses from userspace pm address list. Use mptcp_token_get_sock() to get the msk from the given token, if userspace PM is enabled in it, traverse each address entry in address list, put every entry to userspace using mptcp_pm_nl_put_entry_msg(). Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_userspace.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 2 ++ 2 files changed, 62 insertions(+) (limited to 'net') diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index bc97cc30f013a..d6b7be3afbe51 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -572,3 +572,63 @@ set_flags_err: sock_put(sk); return ret; } + +int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct id_bitmap { + DECLARE_BITMAP(map, MPTCP_PM_MAX_ADDR_ID + 1); + } *bitmap; + const struct genl_info *info = genl_info_dump(cb); + struct net *net = sock_net(msg->sk); + struct mptcp_pm_addr_entry *entry; + struct mptcp_sock *msk; + struct nlattr *token; + int ret = -EINVAL; + struct sock *sk; + void *hdr; + + bitmap = (struct id_bitmap *)cb->ctx; + token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + + msk = mptcp_token_get_sock(net, nla_get_u32(token)); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return ret; + } + + sk = (struct sock *)msk; + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto out; + } + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (test_bit(entry->addr.id, bitmap->map)) + continue; + + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &mptcp_genl_family, + NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); + if (!hdr) + break; + + if (mptcp_nl_fill_addr(msg, entry) < 0) { + genlmsg_cancel(msg, hdr); + break; + } + + __set_bit(entry->addr.id, bitmap->map); + genlmsg_end(msg, hdr); + } + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + ret = msg->len; + +out: + sock_put(sk); + return ret; +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 10117715c57f1..829a492c8d19d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1064,6 +1064,8 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) { -- cgit 1.2.3-korg From 9e6c88e2f05b05892113fc56fabc652ac1ee7043 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:28 +0100 Subject: mptcp: add token for get-addr in yaml This patch adds token parameter together with addr in get-addr section in mptcp_pm.yaml, then use the following commands to update mptcp_pm_gen.c and mptcp_pm_gen.h: ./tools/net/ynl/ynl-gen-c.py --mode kernel \ --spec Documentation/netlink/specs/mptcp_pm.yaml --source \ -o net/mptcp/mptcp_pm_gen.c ./tools/net/ynl/ynl-gen-c.py --mode kernel \ --spec Documentation/netlink/specs/mptcp_pm.yaml --header \ -o net/mptcp/mptcp_pm_gen.h Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- Documentation/netlink/specs/mptcp_pm.yaml | 3 ++- net/mptcp/mptcp_pm_gen.c | 7 ++++--- net/mptcp/mptcp_pm_gen.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index 49f90cfb46989..af525ed297923 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -292,13 +292,14 @@ operations: - name: get-addr doc: Get endpoint information - attribute-set: endpoint + attribute-set: attr dont-validate: [ strict ] flags: [ uns-admin-perm ] do: &get-addr-attrs request: attributes: - addr + - token reply: attributes: - addr diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index 670da7822e6c9..c30a2a90a1925 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -32,8 +32,9 @@ const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] }; /* MPTCP_PM_CMD_GET_ADDR - do */ -const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { - [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, }; /* MPTCP_PM_CMD_FLUSH_ADDRS - do */ @@ -110,7 +111,7 @@ const struct genl_ops mptcp_pm_nl_ops[11] = { .doit = mptcp_pm_nl_get_addr_doit, .dumpit = mptcp_pm_nl_get_addr_dumpit, .policy = mptcp_pm_get_addr_nl_policy, - .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .maxattr = MPTCP_PM_ATTR_TOKEN, .flags = GENL_UNS_ADMIN_PERM, }, { diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h index ac9fc7225b6a0..e24258f6f819b 100644 --- a/net/mptcp/mptcp_pm_gen.h +++ b/net/mptcp/mptcp_pm_gen.h @@ -18,7 +18,7 @@ extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADD extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; -extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; +extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1]; extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; -- cgit 1.2.3-korg From 9ae7846c4b6b3c7ccd0c9c96012b92f443295a7d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:29 +0100 Subject: mptcp: dump addrs in userspace pm list This patch renames mptcp_pm_nl_get_addr_dumpit() as a dedicated in-kernel netlink PM dump addrs function mptcp_pm_nl_dump_addr(), and invoke a newly added wrapper mptcp_pm_dump_addr() in mptcp_pm_nl_get_addr_dumpit(). Invoke in-kernel PM dump addrs function mptcp_pm_nl_dump_addr() or userspace PM dump addrs function mptcp_userspace_pm_dump_addr() based on whether the token parameter is passed in or not in the wrapper. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm.c | 9 +++++++++ net/mptcp/pm_netlink.c | 10 ++++++++-- net/mptcp/protocol.h | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 53e0b08b11232..193198cec74a3 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -441,6 +441,15 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); } +int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) +{ + const struct genl_info *info = genl_info_dump(cb); + + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_dump_addr(msg, cb); + return mptcp_pm_nl_dump_addr(msg, cb); +} + int mptcp_pm_set_flags(struct net *net, struct nlattr *token, struct mptcp_pm_addr_entry *loc, struct mptcp_pm_addr_entry *rem, u8 bkup) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index e8cb887561e04..5fae35b6b305c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1722,8 +1722,8 @@ fail: return ret; } -int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb) { struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; @@ -1765,6 +1765,12 @@ int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, return msg->len; } +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return mptcp_pm_dump_addr(msg, cb); +} + static int parse_limit(struct genl_info *info, int id, unsigned int *limit) { struct nlattr *attr = info->attrs[id]; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 829a492c8d19d..f23ca7645e96f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1064,6 +1064,9 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); -- cgit 1.2.3-korg From c19ee3c7e38857b7d266d09129fa509d34716ec3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:30 +0100 Subject: mptcp: check userspace pm flags Just like MPTCP_PM_ADDR_FLAG_SIGNAL flag is checked in userspace PM announce mptcp_pm_nl_announce_doit(), PM flags should be checked in mptcp_pm_nl_subflow_create_doit() too. If MPTCP_PM_ADDR_FLAG_SUBFLOW flag is not set, there's no flags field in the output of dump_addr. This looks a bit strange: id 10 flags 10.0.3.2 This patch uses mptcp_pm_parse_entry() instead of mptcp_pm_parse_addr() to get the PM flags of the entry and check it. MPTCP_PM_ADDR_FLAG_SIGNAL flag shouldn't be set here, and if MPTCP_PM_ADDR_FLAG_SUBFLOW flag is missing from the netlink attribute, always set this flag. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_userspace.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index d6b7be3afbe51..3bd13e94b5687 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -334,7 +334,6 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry local = { 0 }; struct mptcp_addr_info addr_r; - struct mptcp_addr_info addr_l; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; @@ -360,25 +359,31 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) goto create_err; } - err = mptcp_pm_parse_addr(laddr, info, &addr_l); + err = mptcp_pm_parse_entry(laddr, info, true, &local); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); goto create_err; } + if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + GENL_SET_ERR_MSG(info, "invalid addr flags"); + err = -EINVAL; + goto create_err; + } + local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); goto create_err; } - if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) { + if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); err = -EINVAL; goto create_err; } - local.addr = addr_l; err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); @@ -387,7 +392,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) lock_sock(sk); - err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); + err = __mptcp_subflow_connect(sk, &local.addr, &addr_r); release_sock(sk); -- cgit 1.2.3-korg From 06afe09091ee69dc7ab058b4be9917ae59cc81e5 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:35 +0100 Subject: mptcp: add userspace_pm_lookup_addr_by_id helper Corresponding __lookup_addr_by_id() helper in the in-kernel netlink PM, this patch adds a new helper mptcp_userspace_pm_lookup_addr_by_id() to lookup the address entry with the given id on the userspace pm local address list. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_userspace.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 3bd13e94b5687..20cbcb62cd8c5 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -106,19 +106,26 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, return -EINVAL; } +static struct mptcp_pm_addr_entry * +mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (entry->addr.id == id) + return entry; + } + return NULL; +} + int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { - struct mptcp_pm_addr_entry *entry, *match = NULL; + struct mptcp_pm_addr_entry *match; spin_lock_bh(&msk->pm.lock); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { - if (id == entry->addr.id) { - match = entry; - break; - } - } + match = mptcp_userspace_pm_lookup_addr_by_id(msk, id); spin_unlock_bh(&msk->pm.lock); if (match) { *flags = match->flags; @@ -261,7 +268,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; - struct mptcp_pm_addr_entry *match = NULL; + struct mptcp_pm_addr_entry *match; struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; LIST_HEAD(free_list); @@ -298,13 +305,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) lock_sock(sk); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { - if (entry->addr.id == id_val) { - match = entry; - break; - } - } - + match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val); if (!match) { GENL_SET_ERR_MSG(info, "address with specified id not found"); release_sock(sk); -- cgit 1.2.3-korg From d32c8fb1c881478e4af8e6ac3c922d35c8ba3ca8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:36 +0100 Subject: mptcp: implement mptcp_userspace_pm_get_addr This patch implements mptcp_userspace_pm_get_addr() to get an address from userspace pm address list according the given 'token' and 'id'. Use nla_get_u32() to get the u32 value of 'token', then pass it to mptcp_token_get_sock() to get the msk. Pass 'msk' and 'id' to the helper mptcp_userspace_pm_lookup_addr_by_id() to get the address entry. Put this entry to userspace using mptcp_pm_nl_put_entry_info(). Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_userspace.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 2 ++ 2 files changed, 76 insertions(+) (limited to 'net') diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 20cbcb62cd8c5..b9809d988693b 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -638,3 +638,77 @@ out: sock_put(sk); return ret; } + +int mptcp_userspace_pm_get_addr(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct mptcp_pm_addr_entry addr, *entry; + struct net *net = sock_net(skb->sk); + struct mptcp_sock *msk; + struct sk_buff *msg; + int ret = -EINVAL; + struct sock *sk; + void *reply; + + msk = mptcp_token_get_sock(net, nla_get_u32(token)); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return ret; + } + + sk = (struct sock *)msk; + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto out; + } + + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + goto out; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, + info->genlhdr->cmd); + if (!reply) { + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); + ret = -EMSGSIZE; + goto fail; + } + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id); + if (!entry) { + GENL_SET_ERR_MSG(info, "address not found"); + ret = -EINVAL; + goto unlock_fail; + } + + ret = mptcp_nl_fill_addr(msg, entry); + if (ret) + goto unlock_fail; + + genlmsg_end(msg, reply); + ret = genlmsg_reply(msg, info); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + sock_put(sk); + return ret; + +unlock_fail: + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); +fail: + nlmsg_free(msg); +out: + sock_put(sk); + return ret; +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f23ca7645e96f..f848e0203e889 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1069,6 +1069,8 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); +int mptcp_userspace_pm_get_addr(struct sk_buff *skb, + struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) { -- cgit 1.2.3-korg From 564ae6794ec5f050bb6df4446820777e7253f960 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 19:18:37 +0100 Subject: mptcp: get addr in userspace pm list This patch renames mptcp_pm_nl_get_addr_doit() as a dedicated in-kernel netlink PM get addr function mptcp_pm_nl_get_addr(). and invoke a new wrapper mptcp_pm_get_addr() in mptcp_pm_nl_get_addr_doit. If a token is gotten in the wrapper, that means a userspace PM is used. So invoke mptcp_userspace_pm_get_addr() to get addr in userspace PM list. Otherwise, invoke mptcp_pm_nl_get_addr(). Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm.c | 7 +++++++ net/mptcp/pm_netlink.c | 7 ++++++- net/mptcp/protocol.h | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 193198cec74a3..b4bdd92a56482 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -441,6 +441,13 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); } +int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info) +{ + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_get_addr(skb, info); + return mptcp_pm_nl_get_addr(skb, info); +} + int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5fae35b6b305c..16f8bd47f4b8f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1672,7 +1672,7 @@ nla_put_failure: return -EMSGSIZE; } -int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1722,6 +1722,11 @@ fail: return ret; } +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + return mptcp_pm_get_addr(skb, info); +} + int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f848e0203e889..de9f0ff6dd309 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1069,6 +1069,8 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); +int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info); int mptcp_userspace_pm_get_addr(struct sk_buff *skb, struct genl_info *info); -- cgit 1.2.3-korg From 01ad6b7be1d259cdf638eaed5c1b122d043f82bd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Mar 2024 19:36:32 +0100 Subject: wifi: mac80211: always initialize match_auth My previous patch only initialized match_auth when the ifmgd->auth_data exists, but that was wrong, it should always be set. Fix that. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/3151f5d0-c18f-413d-b34b-b94f095b947c@moroto.mountain Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240301193633.1a3fc370f211.I979dd222b3b5eb5e1437886e5f7c2355eeccb9f7@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5f2e9f5e17790..89c4165f27530 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8129,10 +8129,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sizeof(ifmgd->s1g_capa_mask)); /* keep some setup (AP STA, channel, ...) if matching */ - if (ifmgd->auth_data) - match_auth = ether_addr_equal(ifmgd->auth_data->ap_addr, - assoc_data->ap_addr) && - ifmgd->auth_data->link_id == req->link_id; + match_auth = ifmgd->auth_data && + ether_addr_equal(ifmgd->auth_data->ap_addr, + assoc_data->ap_addr) && + ifmgd->auth_data->link_id == req->link_id; if (req->ap_mld_addr) { uapsd_supported = true; -- cgit 1.2.3-korg From 2a705bc314961e9b3eb8561645b7704ff278f032 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Mar 2024 19:36:33 +0100 Subject: wifi: mac80211: check link exists before use If the assoc link doesn't exist yet at this point, we cannot use it yet. This isn't normally the case, but e.g. in case of FT-DS (or just broken userspace) the link might not be set up yet and will only be created later in this function. Check that the link exists. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/3151f5d0-c18f-413d-b34b-b94f095b947c@moroto.mountain Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240301193633.c886d300e90a.Ie1d5e23b2a033d934d343c37249f6f4dfddcc5fe@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 89c4165f27530..c60d23da91fe3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8173,7 +8173,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, eth_random_addr(assoc_data->link[i].addr); sband = local->hw.wiphy->bands[link_cbss->channel->band]; - if (match_auth && i == assoc_link_id) + if (match_auth && i == assoc_link_id && link) assoc_data->link[i].conn = link->u.mgd.conn; else assoc_data->link[i].conn = -- cgit 1.2.3-korg From b73229331ed5dad6479cc3ae9c61d861d5c1f2a2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 1 Mar 2024 17:54:51 +0100 Subject: wifi: mac80211: fix supported rate masking in scan We have an nl80211 attribute to mask supported rates from probe responses in scanning, e.g. for use in P2P, but in the refactoring I stopped applying this mask. Restore it. Fixes: 07095d167749 ("wifi: mac80211: start building elements in SKBs") Link: https://msgid.link/20240301175451.7ad8ab0bd90c.I46b49e4fc27fe60b75d4559c01104e55ed381c37@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 627bd5a8bda52..f810b83a390af 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1216,8 +1216,8 @@ static int ieee80211_put_preq_ies_band(struct sk_buff *skb, if (band == NL80211_BAND_S1GHZ) return ieee80211_put_s1g_cap(skb, &sband->s1g_cap); - err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags, 0, - WLAN_EID_SUPP_RATES); + err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags, + ~rate_mask, WLAN_EID_SUPP_RATES); if (err) return err; @@ -1238,8 +1238,8 @@ static int ieee80211_put_preq_ies_band(struct sk_buff *skb, *offset = noffset; } - err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags, 0, - WLAN_EID_EXT_SUPP_RATES); + err = ieee80211_put_srates_elem(skb, sband, 0, rate_flags, + ~rate_mask, WLAN_EID_EXT_SUPP_RATES); if (err) return err; -- cgit 1.2.3-korg From a8bca3e9371dc5e276af4168be099b2a05554c2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 12:01:57 +0100 Subject: wifi: mac80211: track capability/opmode NSS separately We're currently tracking rx_nss for each station, and that is meant to be initialized to the capability NSS and later reduced by the operating mode notification NSS. However, we're mixing up capabilities and operating mode NSS in the same variable. This forces us to recalculate the NSS capability on operating mode notification RX, which is a bit strange; due to the previous fix I had to never keep rx_nss as zero, it also means that the capa is never taken into account properly. Fix all this by storing the capability value, that can be recalculated unconditionally whenever needed, and storing the operating mode notification NSS separately, taking it into account when assigning the final rx_nss value. Cc: stable@vger.kernel.org Fixes: dd6c064cfc3f ("wifi: mac80211: set station RX-NSS on reconfig") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228120157.0e1c41924d1d.I0acaa234e0267227b7e3ef81a59117c8792116bc@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/rate.c | 2 +- net/mac80211/sta_info.h | 6 +++++- net/mac80211/vht.c | 46 ++++++++++++++++++++++------------------------ 5 files changed, 30 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0744113f3535d..3aa96898a250d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1867,7 +1867,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sband->band); } - ieee80211_sta_set_rx_nss(link_sta); + ieee80211_sta_init_nss(link_sta); return ret; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4bec625a84d12..32b5ac8947bdd 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2153,7 +2153,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta); -void ieee80211_sta_set_rx_nss(struct link_sta_info *link_sta); +void ieee80211_sta_init_nss(struct link_sta_info *link_sta); enum ieee80211_sta_rx_bandwidth ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width); enum nl80211_chan_width diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 34e03b9522c8a..192c5f8b9e086 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -37,7 +37,7 @@ void rate_control_rate_init(struct sta_info *sta) struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; - ieee80211_sta_set_rx_nss(&sta->deflink); + ieee80211_sta_init_nss(&sta->deflink); if (!ref) return; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index f03731a5bbee2..a52fb76386d01 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -3,7 +3,7 @@ * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright(c) 2020-2023 Intel Corporation + * Copyright(c) 2020-2024 Intel Corporation */ #ifndef STA_INFO_H @@ -482,6 +482,8 @@ struct ieee80211_fragment_cache { * same for non-MLD STA. This is used as key for searching link STA * @link_id: Link ID uniquely identifying the link STA. This is 0 for non-MLD * and set to the corresponding vif LinkId for MLD STA + * @op_mode_nss: NSS limit as set by operating mode notification, or 0 + * @capa_nss: NSS limit as determined by local and peer capabilities * @link_hash_node: hash node for rhashtable * @sta: Points to the STA info * @gtk: group keys negotiated with this station, if any @@ -518,6 +520,8 @@ struct link_sta_info { u8 addr[ETH_ALEN]; u8 link_id; + u8 op_mode_nss, capa_nss; + struct rhlist_head link_hash_node; struct sta_info *sta; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 2c475c439ba9b..642891cafbaf2 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation */ #include @@ -541,15 +541,11 @@ ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta) return bw; } -void ieee80211_sta_set_rx_nss(struct link_sta_info *link_sta) +void ieee80211_sta_init_nss(struct link_sta_info *link_sta) { u8 ht_rx_nss = 0, vht_rx_nss = 0, he_rx_nss = 0, eht_rx_nss = 0, rx_nss; bool support_160; - /* if we received a notification already don't overwrite it */ - if (link_sta->pub->rx_nss) - return; - if (link_sta->pub->eht_cap.has_eht) { int i; const u8 *rx_nss_mcs = (void *)&link_sta->pub->eht_cap.eht_mcs_nss_supp; @@ -627,7 +623,15 @@ void ieee80211_sta_set_rx_nss(struct link_sta_info *link_sta) rx_nss = max(vht_rx_nss, ht_rx_nss); rx_nss = max(he_rx_nss, rx_nss); rx_nss = max(eht_rx_nss, rx_nss); - link_sta->pub->rx_nss = max_t(u8, 1, rx_nss); + rx_nss = max_t(u8, 1, rx_nss); + link_sta->capa_nss = rx_nss; + + /* that shouldn't be set yet, but we can handle it anyway */ + if (link_sta->op_mode_nss) + link_sta->pub->rx_nss = + min_t(u8, rx_nss, link_sta->op_mode_nss); + else + link_sta->pub->rx_nss = rx_nss; } u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, @@ -637,7 +641,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, enum ieee80211_sta_rx_bandwidth new_bw; struct sta_opmode_info sta_opmode = {}; u32 changed = 0; - u8 nss, cur_nss; + u8 nss; /* ignore - no support for BF yet */ if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF) @@ -647,23 +651,17 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; nss += 1; - if (link_sta->pub->rx_nss != nss) { - cur_nss = link_sta->pub->rx_nss; - /* Reset rx_nss and call ieee80211_sta_set_rx_nss() which - * will set the same to max nss value calculated based on capability. - */ - link_sta->pub->rx_nss = 0; - ieee80211_sta_set_rx_nss(link_sta); - /* Do not allow an nss change to rx_nss greater than max_nss - * negotiated and capped to APs capability during association. - */ - if (nss <= link_sta->pub->rx_nss) { - link_sta->pub->rx_nss = nss; - sta_opmode.rx_nss = nss; - changed |= IEEE80211_RC_NSS_CHANGED; - sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED; + if (link_sta->op_mode_nss != nss) { + if (nss <= link_sta->capa_nss) { + link_sta->op_mode_nss = nss; + + if (nss != link_sta->pub->rx_nss) { + link_sta->pub->rx_nss = nss; + changed |= IEEE80211_RC_NSS_CHANGED; + sta_opmode.rx_nss = link_sta->pub->rx_nss; + sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED; + } } else { - link_sta->pub->rx_nss = cur_nss; pr_warn_ratelimited("Ignoring NSS change in VHT Operating Mode Notification from %pM with invalid nss %d", link_sta->pub->addr, nss); } -- cgit 1.2.3-korg From 9ad7974856926129f190ffbe3beea78460b3b7cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 26 Feb 2024 20:34:06 +0100 Subject: wifi: cfg80211: check A-MSDU format more carefully If it looks like there's another subframe in the A-MSDU but the header isn't fully there, we can end up reading data out of bounds, only to discard later. Make this a bit more careful and check if the subframe header can even be present. Reported-by: syzbot+d050d437fe47d479d210@syzkaller.appspotmail.com Link: https://msgid.link/20240226203405.a731e2c95e38.I82ce7d8c0cc8970ce29d0a39fdc07f1ffc425be4@changeid Signed-off-by: Johannes Berg --- net/wireless/util.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 379f742fd7415..2bde8a3546313 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -791,15 +791,19 @@ ieee80211_amsdu_subframe_length(void *field, u8 mesh_flags, u8 hdr_type) bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr) { - int offset = 0, remaining, subframe_len, padding; + int offset = 0, subframe_len, padding; for (offset = 0; offset < skb->len; offset += subframe_len + padding) { + int remaining = skb->len - offset; struct { __be16 len; u8 mesh_flags; } hdr; u16 len; + if (sizeof(hdr) > remaining) + return false; + if (skb_copy_bits(skb, offset + 2 * ETH_ALEN, &hdr, sizeof(hdr)) < 0) return false; @@ -807,7 +811,6 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr) mesh_hdr); subframe_len = sizeof(struct ethhdr) + len; padding = (4 - subframe_len) & 0x3; - remaining = skb->len - offset; if (subframe_len > remaining) return false; @@ -825,7 +828,7 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; - int offset = 0, remaining; + int offset = 0; struct { struct ethhdr eth; uint8_t flags; @@ -839,10 +842,14 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, copy_len = sizeof(hdr); while (!last) { + int remaining = skb->len - offset; unsigned int subframe_len; int len, mesh_len = 0; u8 padding; + if (copy_len > remaining) + goto purge; + skb_copy_bits(skb, offset, &hdr, copy_len); if (iftype == NL80211_IFTYPE_MESH_POINT) mesh_len = __ieee80211_get_mesh_hdrlen(hdr.flags); @@ -852,7 +859,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, padding = (4 - subframe_len) & 0x3; /* the last MSDU has no padding */ - remaining = skb->len - offset; if (subframe_len > remaining) goto purge; /* mitigate A-MSDU aggregation injection attacks */ -- cgit 1.2.3-korg From 1c0d21c4b33a41be9090e73f8225813d72ac88d9 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Tue, 27 Feb 2024 09:52:50 +0530 Subject: wifi: mac80211: remove only link keys during stopping link AP Currently while stopping a link AP, all keys from the interface were removed. However with MLO there is a requirement to free only the link keys. Add changes to remove keys which are associated with the link AP which is going to be stopped. Signed-off-by: Rameshkumar Sundaram Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240227042251.1511122-2-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3aa96898a250d..bc0c1300e4047 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1563,6 +1563,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_link_data *link = sdata_dereference(sdata->link[link_id], sdata); struct ieee80211_bss_conf *link_conf = link->conf; + LIST_HEAD(keys); lockdep_assert_wiphy(local->hw.wiphy); @@ -1617,7 +1618,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link_conf->bssid_indicator = 0; __sta_info_flush(sdata, true, link_id); - ieee80211_free_keys(sdata, true); + + ieee80211_remove_link_keys(link, &keys); + if (!list_empty(&keys)) { + synchronize_net(); + ieee80211_free_key_list(local, &keys); + } link_conf->enable_beacon = false; sdata->beacon_rate_set = false; -- cgit 1.2.3-korg From 5fcc7c51f9e72d1e62991f8b32be4a5adf44d556 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 27 Feb 2024 09:52:51 +0530 Subject: wifi: mac80211: handle netif carrier up/down with link AP during MLO Currently whenever link AP is started, netif_carrier_up() function is called and whenever it is brought down, netif_carrier_down() function is called. However, with MLO, all the links of the same MLD would use the same netdev. Hence there is no need to indicate for each link up/down. Also, calling it down when only one of the links went down is not desirable. Add changes to call the netif_carrier_up() function only when first link is brought up. Similarly, add changes to call the netif_carrier_down() function only when last link is brought down. In order to check the number of beaconing links in the given interface, introduce a new helper function ieee80211_num_beaconing_links(). Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240227042251.1511122-3-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bc0c1300e4047..734368fd3f97f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1241,6 +1241,30 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, return 0; } +static u8 ieee80211_num_beaconing_links(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_link_data *link; + u8 link_id, num = 0; + + if (sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_P2P_GO) + return num; + + if (!sdata->vif.valid_links) + return num; + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + link = sdata_dereference(sdata->link[link_id], sdata); + if (!link) + continue; + + if (sdata_dereference(link->u.ap.beacon, sdata)) + num++; + } + + return num; +} + static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *params) { @@ -1470,7 +1494,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID); ieee80211_link_info_change_notify(sdata, link, changed); - netif_carrier_on(dev); + if (ieee80211_num_beaconing_links(sdata) <= 1) + netif_carrier_on(dev); + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_on(vlan->dev); @@ -1592,7 +1618,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, /* turn off carrier for this interface and dependent VLANs */ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) netif_carrier_off(vlan->dev); - netif_carrier_off(dev); + + if (ieee80211_num_beaconing_links(sdata) <= 1) + netif_carrier_off(dev); /* remove beacon and probe response */ sdata->u.ap.active = false; -- cgit 1.2.3-korg From bf7bc8c5974b78a09f266be3a5a84ed3865f30a6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:40:51 +0100 Subject: wifi: mac80211: don't add VHT capa on links without them When a link doesn't have VHT capability, before the rework we'd have set IEEE80211_CONN_DISABLE_VHT, but now with the linear progression of 'mode', we no longer have that. Add an explicit check for VHT being supported, so we don't add a zeroed VHT capabilities element where it shouldn't be. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094207.bfe4283bcde7.Ib70a558bc6bdbcec3d9e663079229dfcc2493682@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c60d23da91fe3..a2dca15d9c2e7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1319,7 +1319,8 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata, offset); if (sband->band != NL80211_BAND_6GHZ && - assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_VHT) { + assoc_data->link[link_id].conn.mode >= IEEE80211_CONN_MODE_VHT && + sband->vht_cap.vht_supported) { bool mu_mimo_owner = ieee80211_add_vht_ie(sdata, skb, sband, &assoc_data->link[link_id].ap_vht_cap, -- cgit 1.2.3-korg From 7d8b02592d528b073805af752458a589862b5eb0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:40:52 +0100 Subject: wifi: mac80211: obtain AP HT/VHT data for assoc request In the association request, we make some parameters depend on the AP's HT/VHT information. This was broken by my code because it no longer filled that information, making it all zero. For HT that meant we wouldn't reduce our capabilities to 20 MHz if needed, and for VHT we lost beamforming capabilities. Fix this. It seems like it may even have been broken for all but the assoc link before. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094207.7dc812c2060a.Ibd591f9c214b4e166cf7171db3cf63bda8e3c9fd@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a2dca15d9c2e7..2c72d4d0fc259 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8049,6 +8049,67 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, } } +static int +ieee80211_mgd_get_ap_ht_vht_capa(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgd_assoc_data *assoc_data, + int link_id) +{ + struct cfg80211_bss *cbss = assoc_data->link[link_id].bss; + enum nl80211_band band = cbss->channel->band; + struct ieee80211_supported_band *sband; + const struct element *elem; + int err; + + /* neither HT nor VHT elements used on 6 GHz */ + if (band == NL80211_BAND_6GHZ) + return 0; + + if (assoc_data->link[link_id].conn.mode < IEEE80211_CONN_MODE_HT) + return 0; + + rcu_read_lock(); + elem = ieee80211_bss_get_elem(cbss, WLAN_EID_HT_OPERATION); + if (!elem || elem->datalen < sizeof(struct ieee80211_ht_operation)) { + mlme_link_id_dbg(sdata, link_id, "no HT operation on BSS %pM\n", + cbss->bssid); + err = -EINVAL; + goto out_rcu; + } + assoc_data->link[link_id].ap_ht_param = + ((struct ieee80211_ht_operation *)(elem->data))->ht_param; + rcu_read_unlock(); + + if (assoc_data->link[link_id].conn.mode < IEEE80211_CONN_MODE_VHT) + return 0; + + /* some drivers want to support VHT on 2.4 GHz even */ + sband = sdata->local->hw.wiphy->bands[band]; + if (!sband->vht_cap.vht_supported) + return 0; + + rcu_read_lock(); + elem = ieee80211_bss_get_elem(cbss, WLAN_EID_VHT_CAPABILITY); + /* but even then accept it not being present on the AP */ + if (!elem && band == NL80211_BAND_2GHZ) { + err = 0; + goto out_rcu; + } + if (!elem || elem->datalen < sizeof(struct ieee80211_vht_cap)) { + mlme_link_id_dbg(sdata, link_id, "no VHT capa on BSS %pM\n", + cbss->bssid); + err = -EINVAL; + goto out_rcu; + } + memcpy(&assoc_data->link[link_id].ap_vht_cap, elem->data, + sizeof(struct ieee80211_vht_cap)); + rcu_read_unlock(); + + return 0; +out_rcu: + rcu_read_unlock(); + return err; +} + int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_assoc_request *req) { @@ -8193,6 +8254,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, req->links[i].error = err; goto err_free; } + + err = ieee80211_mgd_get_ap_ht_vht_capa(sdata, + assoc_data, i); + if (err) { + err = -EINVAL; + req->links[i].error = err; + goto err_free; + } } assoc_data->wmm = true; @@ -8228,6 +8297,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, &assoc_data->link[0].conn); uapsd_supported = bss->uapsd_supported; + + err = ieee80211_mgd_get_ap_ht_vht_capa(sdata, assoc_data, 0); + if (err) + goto err_free; } assoc_data->spp_amsdu = req->flags & ASSOC_REQ_SPP_AMSDU; -- cgit 1.2.3-korg From b2edc721716f44e2a7e46eb592321960a1227c7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:42:54 +0100 Subject: wifi: cfg80211: print flags in tracing in hex It's confusing to see decimal, e.g. 20, here. Printing the flags in hex (0x14 == 20) is much clearer. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094254.5f7f4ab2e137.Id5e665104bbc51377b4591289e32f8c1d4711dce@changeid Signed-off-by: Johannes Berg --- net/wireless/trace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 361331c29116f..e039e66ab3777 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -818,8 +818,8 @@ DECLARE_EVENT_CLASS(station_add_change, params->link_sta_params.opmode_notif_used; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" - ", station flags mask: %u, station flags set: %u, " - "station modify mask: %u, listen interval: %d, aid: %u, " + ", station flags mask: 0x%x, station flags set: 0x%x, " + "station modify mask: 0x%x, listen interval: %d, aid: %u, " "plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, __entry->sta_flags_mask, __entry->sta_flags_set, @@ -1075,7 +1075,7 @@ TRACE_EVENT(rdev_return_int_mpath_info, ), TP_printk(WIPHY_PR_FMT ", returned %d. mpath info - generation: %d, " "filled: %u, frame qlen: %u, sn: %u, metric: %u, exptime: %u," - " discovery timeout: %u, discovery retries: %u, flags: %u", + " discovery timeout: %u, discovery retries: %u, flags: 0x%x", WIPHY_PR_ARG, __entry->ret, __entry->generation, __entry->filled, __entry->frame_qlen, __entry->sn, __entry->metric, __entry->exptime, __entry->discovery_timeout, @@ -1317,7 +1317,7 @@ TRACE_EVENT(rdev_assoc, req->fils_nonces, 2 * FILS_NONCE_LEN); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM" - ", previous bssid: %pM, use mfp: %s, flags: %u", + ", previous bssid: %pM, use mfp: %s, flags: 0x%x", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->prev_bssid, BOOL_TO_STR(__entry->use_mfp), __entry->flags) @@ -1439,7 +1439,7 @@ TRACE_EVENT(rdev_connect, ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM" ", ssid: %s, auth type: %d, privacy: %s, wpa versions: %u, " - "flags: %u, previous bssid: %pM", + "flags: 0x%x, previous bssid: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid, __entry->auth_type, BOOL_TO_STR(__entry->privacy), __entry->wpa_versions, __entry->flags, __entry->prev_bssid) -- cgit 1.2.3-korg From 04577bfa99ac8cfb7a43dbbba09584e0b1086cee Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 28 Feb 2024 09:44:56 +0100 Subject: wifi: mac80211: add link id to ieee80211_gtk_rekey_add() In MLO, we need the link id in the GTK key to be given by the driver after rekeying in wowlan, so add that. Signed-off-by: Shaul Triebitz Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094500.ce1bfc83a680.I43a6f8ab2804ee07116a37d5b9ec601b843464b1@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 12 +++++++----- include/net/mac80211.h | 4 +++- net/mac80211/key.c | 16 ++++++++++++---- 3 files changed, 22 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 6d5ed79b9fff0..ca2c6d0b605ef 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1976,6 +1976,7 @@ static bool iwl_mvm_gtk_rekey(struct iwl_wowlan_status_data *status, } conf = { .conf.cipher = gtk_cipher, }; + int link_id = vif->active_links ? __ffs(vif->active_links) : -1; BUILD_BUG_ON(WLAN_KEY_LEN_CCMP != WLAN_KEY_LEN_GCMP); BUILD_BUG_ON(sizeof(conf.key) < WLAN_KEY_LEN_CCMP); @@ -2009,7 +2010,7 @@ static bool iwl_mvm_gtk_rekey(struct iwl_wowlan_status_data *status, memcpy(conf.conf.key, status->gtk[i].key, sizeof(status->gtk[i].key)); - key = ieee80211_gtk_rekey_add(vif, &conf.conf); + key = ieee80211_gtk_rekey_add(vif, &conf.conf, link_id); if (IS_ERR(key)) return false; @@ -2040,6 +2041,7 @@ iwl_mvm_d3_igtk_bigtk_rekey_add(struct iwl_wowlan_status_data *status, .conf.keyidx = key_data->id, }; struct ieee80211_key_seq seq; + int link_id = vif->active_links ? __ffs(vif->active_links) : -1; if (!key_data->len) return true; @@ -2065,17 +2067,17 @@ iwl_mvm_d3_igtk_bigtk_rekey_add(struct iwl_wowlan_status_data *status, BUILD_BUG_ON(sizeof(conf.key) < sizeof(key_data->key)); memcpy(conf.conf.key, key_data->key, conf.conf.keylen); - key_config = ieee80211_gtk_rekey_add(vif, &conf.conf); + key_config = ieee80211_gtk_rekey_add(vif, &conf.conf, link_id); if (IS_ERR(key_config)) return false; ieee80211_set_key_rx_seq(key_config, 0, &seq); if (key_config->keyidx == 4 || key_config->keyidx == 5) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - int link_id = vif->active_links ? __ffs(vif->active_links) : 0; - struct iwl_mvm_vif_link_info *mvm_link = - mvmvif->link[link_id]; + struct iwl_mvm_vif_link_info *mvm_link; + link_id = link_id < 0 ? 0 : link_id; + mvm_link = mvmvif->link[link_id]; mvm_link->igtk = key_config; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 56c6ecb2c10a2..34d66d0a24b15 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5912,6 +5912,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); * ieee80211_gtk_rekey_add - add a GTK key from rekeying during WoWLAN * @vif: the virtual interface to add the key on * @keyconf: new key data + * @link_id: the link id of the key or -1 for non-MLO * * When GTK rekeying was done while the system was suspended, (a) new * key(s) will be available. These will be needed by mac80211 for proper @@ -5939,7 +5940,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); */ struct ieee80211_key_conf * ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, - struct ieee80211_key_conf *keyconf); + struct ieee80211_key_conf *keyconf, + int link_id); /** * ieee80211_gtk_rekey_notify - notify userspace supplicant of rekeying diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a2cce62c97b70..eecdd2265eaa6 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -6,7 +6,7 @@ * Copyright 2007-2008 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright 2018-2020, 2022-2023 Intel Corporation + * Copyright 2018-2020, 2022-2024 Intel Corporation */ #include @@ -1372,12 +1372,19 @@ EXPORT_SYMBOL_GPL(ieee80211_remove_key); struct ieee80211_key_conf * ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, - struct ieee80211_key_conf *keyconf) + struct ieee80211_key_conf *keyconf, + int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; int err; + struct ieee80211_link_data *link_data = + link_id < 0 ? &sdata->deflink : + sdata_dereference(sdata->link[link_id], sdata); + + if (WARN_ON(!link_data)) + return ERR_PTR(-EINVAL); if (WARN_ON(!local->wowlan)) return ERR_PTR(-EINVAL); @@ -1394,8 +1401,9 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; - /* FIXME: this function needs to get a link ID */ - err = ieee80211_key_link(key, &sdata->deflink, NULL); + key->conf.link_id = link_id; + + err = ieee80211_key_link(key, link_data, NULL); if (err) return ERR_PTR(err); -- cgit 1.2.3-korg From ddf82e752f8a3253e03fe4c0a957792701f8b2e6 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 28 Feb 2024 09:47:39 +0100 Subject: wifi: mac80211: Allow beacons to update BSS table regardless of scan When a beacon is received use it to update the BSS table regardless of the scanning state. Do so only when there are active non-monitor interfaces. Also, while at it, in any case accept beacons only with broadcast address. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Ilan Peer Link: https://msgid.link/20240228094742.e508605f495b.I3ab24ab3543319e31165111b28bcdcc622b5cf02@changeid Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index bca2a259fda6b..0429e59ba387c 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -257,7 +257,6 @@ static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); - struct ieee80211_sub_if_data *sdata1, *sdata2; struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_bss *bss; struct ieee80211_channel *channel; @@ -281,12 +280,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) if (skb->len < min_hdr_len) return; - sdata1 = rcu_dereference(local->scan_sdata); - sdata2 = rcu_dereference(local->sched_scan_sdata); - - if (likely(!sdata1 && !sdata2)) - return; - if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) { /* * we were passive scanning because of radar/no-IR, but @@ -304,10 +297,17 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) return; if (ieee80211_is_probe_resp(mgmt->frame_control)) { + struct ieee80211_sub_if_data *sdata1, *sdata2; struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; u32 scan_req_flags = 0, sched_scan_req_flags = 0; + sdata1 = rcu_dereference(local->scan_sdata); + sdata2 = rcu_dereference(local->sched_scan_sdata); + + if (likely(!sdata1 && !sdata2)) + return; + scan_req = rcu_dereference(local->scan_req); sched_scan_req = rcu_dereference(local->sched_scan_req); @@ -327,8 +327,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) sched_scan_req_flags, mgmt->da)) return; + } else { + /* Beacons are expected only with broadcast address */ + if (!is_broadcast_ether_addr(mgmt->da)) + return; } + /* Do not update the BSS table in case of only monitor interfaces */ + if (local->open_count == local->monitors) + return; + bss = ieee80211_bss_info_update(local, rx_status, mgmt, skb->len, channel); -- cgit 1.2.3-korg From 0e3a22389defd20215657f0442ae109b160f2eee Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Wed, 28 Feb 2024 09:47:54 +0100 Subject: wifi: mac80211: Adjust CQM handling for MLO The CQM handling did not consider the MLO case and thus notified a not-existing link with the CQM change. To fix this, propagate the CQM notification to all the active links (handling both the non-MLO and MLO cases). TODO: this currently propagates the same configuration to all links regardless of the band. This might not be the correct approach as different links might need to be configured with different values. Signed-off-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094753.bf6a3fefe553.Id738810b73e1087e01d5885508b70a3631707627@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 79 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 734368fd3f97f..b9c1ec2f9cfcc 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3290,33 +3290,57 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, return 0; } +static void ieee80211_set_cqm_rssi_link(struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_data *link, + s32 rssi_thold, u32 rssi_hyst, + s32 rssi_low, s32 rssi_high) +{ + struct ieee80211_bss_conf *conf; + + if (!link || !link->conf) + return; + + conf = link->conf; + + if (rssi_thold && rssi_hyst && + rssi_thold == conf->cqm_rssi_thold && + rssi_hyst == conf->cqm_rssi_hyst) + return; + + conf->cqm_rssi_thold = rssi_thold; + conf->cqm_rssi_hyst = rssi_hyst; + conf->cqm_rssi_low = rssi_low; + conf->cqm_rssi_high = rssi_high; + link->u.mgd.last_cqm_event_signal = 0; + + if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) + return; + + if (sdata->u.mgd.associated && + (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) + ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_CQM); +} + static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; - struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; - - if (rssi_thold == bss_conf->cqm_rssi_thold && - rssi_hyst == bss_conf->cqm_rssi_hyst) - return 0; + int link_id; - if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER && - !(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) + if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER && + !(vif->driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) return -EOPNOTSUPP; - bss_conf->cqm_rssi_thold = rssi_thold; - bss_conf->cqm_rssi_hyst = rssi_hyst; - bss_conf->cqm_rssi_low = 0; - bss_conf->cqm_rssi_high = 0; - sdata->deflink.u.mgd.last_cqm_event_signal = 0; + /* For MLD, handle CQM change on all the active links */ + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + struct ieee80211_link_data *link = + sdata_dereference(sdata->link[link_id], sdata); - /* tell the driver upon association, unless already associated */ - if (sdata->u.mgd.associated && - sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI) - ieee80211_link_info_change_notify(sdata, &sdata->deflink, - BSS_CHANGED_CQM); + ieee80211_set_cqm_rssi_link(sdata, link, rssi_thold, rssi_hyst, + 0, 0); + } return 0; } @@ -3327,22 +3351,19 @@ static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_vif *vif = &sdata->vif; - struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; + int link_id; - if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) + if (vif->driver_flags & IEEE80211_VIF_BEACON_FILTER) return -EOPNOTSUPP; - bss_conf->cqm_rssi_low = rssi_low; - bss_conf->cqm_rssi_high = rssi_high; - bss_conf->cqm_rssi_thold = 0; - bss_conf->cqm_rssi_hyst = 0; - sdata->deflink.u.mgd.last_cqm_event_signal = 0; + /* For MLD, handle CQM change on all the active links */ + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + struct ieee80211_link_data *link = + sdata_dereference(sdata->link[link_id], sdata); - /* tell the driver upon association, unless already associated */ - if (sdata->u.mgd.associated && - sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI) - ieee80211_link_info_change_notify(sdata, &sdata->deflink, - BSS_CHANGED_CQM); + ieee80211_set_cqm_rssi_link(sdata, link, 0, 0, + rssi_low, rssi_high); + } return 0; } -- cgit 1.2.3-korg From 6810ee918d23f67170f127a848cf30c257adcaff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:09 +0100 Subject: wifi: mac80211: update scratch_pos after defrag The scratch_pos update here was lost after defrag, so any other uses of the scratch buffer might overwrite it. Fixes: a286de1aa38f ("wifi: mac80211: Rename multi_link") Reviewed-by: Benjamin Berg Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094901.9da35f39eeb7.I7127f2918ec4cba416fcbc35eacaea10262c1268@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 196a882e4c190..233c761823d3b 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -800,6 +800,7 @@ static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, elems->ml_basic = (const void *)elems->scratch_pos; elems->ml_basic_len = ml_len; + elems->scratch_pos += ml_len; ieee80211_mle_get_sta_prof(elems, params->link_id); prof = elems->prof; -- cgit 1.2.3-korg From 0217972f9684b924b2cbd9219e5ed8c53564782b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:10 +0100 Subject: wifi: mac80211: remove unnecessary ML element type check At this point, since it's taken from elems->ml_basic which is stored only if it's of type basic, we don't really need to check again if it's basic. Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094901.ad1d4a09a6eb.Ib96fa75b1a6db21dd4182dcfa11fe9aff78fa3ed@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 233c761823d3b..ae0f14bd952a8 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -723,10 +723,6 @@ static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, if (!ml || !ml_len) return; - if (le16_get_bits(ml->control, IEEE80211_ML_CONTROL_TYPE) != - IEEE80211_ML_CONTROL_TYPE_BASIC) - return; - for_each_mle_subelement(sub, (u8 *)ml, ml_len) { struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; ssize_t sta_prof_len; -- cgit 1.2.3-korg From 68f6c6afbcebdc3acdc6084abfe453f4cba6b9dc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:11 +0100 Subject: wifi: mac80211: add ieee80211_vif_link_active() helper We sometimes need to check if a link is active, and this is complicated by the fact that active_links has no bits set when the vif isn't (acting as) an MLD. Add a small new helper ieee80211_vif_link_active() to make that a bit easier, and use it in a few places. Reviewed-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094901.688760aff5f7.I06892a503f5ecb9563fbd678d35d08daf7a044b0@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 +++++++++++++++ net/mac80211/cfg.c | 3 +-- net/mac80211/chan.c | 3 +-- net/mac80211/driver-ops.c | 14 +++++--------- net/mac80211/util.c | 3 +-- 5 files changed, 23 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 34d66d0a24b15..6c6d8210d6374 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2011,6 +2011,21 @@ static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif) return vif->valid_links != 0; } +/** + * ieee80211_vif_link_active - check if a given link is active + * @vif: the vif + * @link_id: the link ID to check + * Return: %true if the vif is an MLD and the link is active, or if + * the vif is not an MLD and the link ID is 0; %false otherwise. + */ +static inline bool ieee80211_vif_link_active(const struct ieee80211_vif *vif, + unsigned int link_id) +{ + if (!ieee80211_vif_is_mld(vif)) + return link_id == 0; + return vif->active_links & BIT(link_id); +} + #define for_each_vif_active_link(vif, link, link_id) \ for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \ if ((!(vif)->active_links || \ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b9c1ec2f9cfcc..358593eeb5000 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3189,8 +3189,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return -EINVAL; - if (ieee80211_vif_is_mld(&sdata->vif) && - !(sdata->vif.active_links & BIT(link->link_id))) + if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return 0; old_req = link->u.mgd.req_smps; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 38acdc458c7c9..80e4b9784131d 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1701,8 +1701,7 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, lockdep_assert_wiphy(local->hw.wiphy); - if (sdata->vif.active_links && - !(sdata->vif.active_links & BIT(link->link_id))) { + if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) { ieee80211_link_update_chanreq(link, chanreq); return 0; } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 3b7f70073fc37..dce37ba8ebe37 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2015 Intel Deutschland GmbH - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation */ #include #include "ieee80211_i.h" @@ -214,8 +214,7 @@ int drv_conf_tx(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return -EIO; - if (sdata->vif.active_links && - !(sdata->vif.active_links & BIT(link->link_id))) + if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) return 0; if (params->cw_min == 0 || params->cw_min > params->cw_max) { @@ -315,8 +314,7 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return -EIO; - if (sdata->vif.active_links && - !(sdata->vif.active_links & BIT(link_conf->link_id))) + if (!ieee80211_vif_link_active(&sdata->vif, link_conf->link_id)) return 0; trace_drv_assign_vif_chanctx(local, sdata, link_conf, ctx); @@ -343,8 +341,7 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return; - if (sdata->vif.active_links && - !(sdata->vif.active_links & BIT(link_conf->link_id))) + if (!ieee80211_vif_link_active(&sdata->vif, link_conf->link_id)) return; trace_drv_unassign_vif_chanctx(local, sdata, link_conf, ctx); @@ -461,8 +458,7 @@ void drv_link_info_changed(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return; - if (sdata->vif.active_links && - !(sdata->vif.active_links & BIT(link_id))) + if (!ieee80211_vif_link_active(&sdata->vif, link_id)) return; trace_drv_link_info_changed(local, sdata, info, changed); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f810b83a390af..a237cbcf7b491 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1935,8 +1935,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) for (link_id = 0; link_id < ARRAY_SIZE(sdata->vif.link_conf); link_id++) { - if (ieee80211_vif_is_mld(&sdata->vif) && - !(sdata->vif.active_links & BIT(link_id))) + if (!ieee80211_vif_link_active(&sdata->vif, link_id)) continue; link = sdata_dereference(sdata->link[link_id], sdata); -- cgit 1.2.3-korg From 2015d2d6391bf08115566c80fe2964b434cf0681 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:12 +0100 Subject: wifi: mac80211: remove unnecessary ML element checks Given the prior changes to ieee80211_mle_size_ok(), we can now pass NULL to for_each_mle_subelement(), so no longer need to check for that here explicitly. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094901.9e32c4b63875.Ia2ee0aafdc8a48bd21b485cc36a9866f950d781b@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index ae0f14bd952a8..d231aaecc2199 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -720,9 +720,6 @@ static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, ssize_t ml_len = elems->ml_basic_len; const struct element *sub; - if (!ml || !ml_len) - return; - for_each_mle_subelement(sub, (u8 *)ml, ml_len) { struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; ssize_t sta_prof_len; -- cgit 1.2.3-korg From 508c423d9444a5eeeebd66fcff19ebe346a05150 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:13 +0100 Subject: wifi: mac80211: simplify multi-link element parsing We shouldn't assign elems->ml_basic{,len} before defragmentation, and we don't need elems->ml_reconf{,len} at all since we don't do defragmentation. Clean that up a bit. This does require always defragmention even when it may not be needed, but that's easier to reason about. Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094902.e0115da4d2a6.I89a80f7387eabef8df3955485d4a583ed024c5b1@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 6 ++---- net/mac80211/mlme.c | 6 ++---- net/mac80211/parse.c | 12 ++++-------- 3 files changed, 8 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 32b5ac8947bdd..26cf24a315830 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1736,7 +1736,6 @@ struct ieee802_11_elems { const struct ieee80211_eht_cap_elem *eht_cap; const struct ieee80211_eht_operation *eht_operation; const struct ieee80211_multi_link_elem *ml_basic; - const struct ieee80211_multi_link_elem *ml_reconf; const struct ieee80211_bandwidth_indication *bandwidth_indication; const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT]; @@ -1764,12 +1763,11 @@ struct ieee802_11_elems { /* mult-link element can be de-fragmented and thus u8 is not sufficient */ size_t ml_basic_len; - size_t ml_reconf_len; - /* The basic Multi-Link element in the original IEs */ + /* The basic Multi-Link element in the original elements */ const struct element *ml_basic_elem; - /* The reconfiguration Multi-Link element in the original IEs */ + /* The reconfiguration Multi-Link element in the original elements */ const struct element *ml_reconf_elem; u8 ttlm_num; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2c72d4d0fc259..3be4f4cf12c34 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5760,7 +5760,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, u8 link_id; u32 delay; - if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_reconf) + if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_reconf_elem) return; ml_len = cfg80211_defragment_element(elems->ml_reconf_elem, @@ -5773,9 +5773,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, if (ml_len < 0) return; - elems->ml_reconf = (const void *)elems->scratch_pos; - elems->ml_reconf_len = ml_len; - ml = elems->ml_reconf; + ml = (const void *)elems->scratch_pos; /* Directly parse the sub elements as the common information doesn't * hold any useful information. diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index d231aaecc2199..8bdf6e7efa58b 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -129,19 +129,15 @@ ieee80211_parse_extension_element(u32 *crc, switch (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: - if (elems->ml_basic) { + if (elems->ml_basic_elem) { elems->parse_error |= IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; break; } elems->ml_basic_elem = (void *)elem; - elems->ml_basic = data; - elems->ml_basic_len = len; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: elems->ml_reconf_elem = (void *)elem; - elems->ml_reconf = data; - elems->ml_reconf_len = len; break; default: break; @@ -776,9 +772,6 @@ static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, const struct element *non_inherit = NULL; const u8 *end; - if (params->link_id == -1) - return; - ml_len = cfg80211_defragment_element(elems->ml_basic_elem, elems->ie_start, elems->total_len, @@ -795,6 +788,9 @@ static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, elems->ml_basic_len = ml_len; elems->scratch_pos += ml_len; + if (params->link_id == -1) + return; + ieee80211_mle_get_sta_prof(elems, params->link_id); prof = elems->prof; -- cgit 1.2.3-korg From 4d70e9c5488dd57ff5fcabe4d4ecf3d9dd4555ff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:14 +0100 Subject: wifi: mac80211: defragment reconfiguration MLE when parsing Using the scratch buffer (without advancing it) here in the mlme.c code seems somewhat wrong, defragment the reconfig multi-link element already when parsing. This might be a bit more work in certain cases, but makes the whole thing more regular. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094902.92936a3ce216.I4b736ce4fdc199fa1d6b00d00032f448c873a8b4@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 19 +++---------------- net/mac80211/parse.c | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 26cf24a315830..caee68cc8f14f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1736,6 +1736,7 @@ struct ieee802_11_elems { const struct ieee80211_eht_cap_elem *eht_cap; const struct ieee80211_eht_operation *eht_operation; const struct ieee80211_multi_link_elem *ml_basic; + const struct ieee80211_multi_link_elem *ml_reconf; const struct ieee80211_bandwidth_indication *bandwidth_indication; const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT]; @@ -1763,6 +1764,7 @@ struct ieee802_11_elems { /* mult-link element can be de-fragmented and thus u8 is not sufficient */ size_t ml_basic_len; + size_t ml_reconf_len; /* The basic Multi-Link element in the original elements */ const struct element *ml_basic_elem; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3be4f4cf12c34..90e505778db61 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5752,33 +5752,20 @@ out: static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems) { - const struct ieee80211_multi_link_elem *ml; const struct element *sub; - ssize_t ml_len; unsigned long removed_links = 0; u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {}; u8 link_id; u32 delay; - if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_reconf_elem) + if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_reconf) return; - ml_len = cfg80211_defragment_element(elems->ml_reconf_elem, - elems->ie_start, - elems->total_len, - elems->scratch_pos, - elems->scratch + elems->scratch_len - - elems->scratch_pos, - WLAN_EID_FRAGMENT); - if (ml_len < 0) - return; - - ml = (const void *)elems->scratch_pos; - /* Directly parse the sub elements as the common information doesn't * hold any useful information. */ - for_each_mle_subelement(sub, (u8 *)ml, ml_len) { + for_each_mle_subelement(sub, (const u8 *)elems->ml_reconf, + elems->ml_reconf_len) { struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data; u8 *pos = prof->variable; u16 control; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 8bdf6e7efa58b..804323858977e 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -819,6 +819,26 @@ static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, _ieee802_11_parse_elems_full(&sub, elems, non_inherit); } +static void +ieee80211_mle_defrag_reconf(struct ieee802_11_elems *elems) +{ + ssize_t ml_len; + + ml_len = cfg80211_defragment_element(elems->ml_reconf_elem, + elems->ie_start, + elems->total_len, + elems->scratch_pos, + elems->scratch + + elems->scratch_len - + elems->scratch_pos, + WLAN_EID_FRAGMENT); + if (ml_len < 0) + return; + elems->ml_reconf = (void *)elems->scratch_pos; + elems->ml_reconf_len = ml_len; + elems->scratch_pos += ml_len; +} + struct ieee802_11_elems * ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) { @@ -864,6 +884,8 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) ieee80211_mle_parse_link(elems, params); + ieee80211_mle_defrag_reconf(elems); + if (elems->tim && !elems->parse_error) { const struct ieee80211_tim_ie *tim_ie = elems->tim; -- cgit 1.2.3-korg From b413c0bd9ccc6bf705b51ae268c3a7ebba1cd11f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:15 +0100 Subject: wifi: mac80211: remove unneeded scratch_len subtraction We're always using "scratch + len - pos", so we don't need to subtract here to calculate the remaining length. Remove the unnecessary subtraction. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Ilan Peer Link: https://msgid.link/20240228094902.44e07cfa9e63.I7a9758fb9bc6b726aac49804f2f05cd521bc4128@changeid Signed-off-by: Johannes Berg --- net/mac80211/parse.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 804323858977e..73e52504ed97c 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -862,7 +862,6 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) elems, params->bss, nontransmitted_profile); elems->scratch_pos += nontransmitted_profile_len; - elems->scratch_len -= nontransmitted_profile_len; non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, nontransmitted_profile, nontransmitted_profile_len); -- cgit 1.2.3-korg From 5a21f0eae13515b9a0b6cc66dc5f1903c18afb17 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:16 +0100 Subject: wifi: mac80211: hide element parsing internals Rework the data structures to hide element parsing internals from the users. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094902.19c610b529e2.Ie7ea2dcb6713911590ace6583a4748f32dc37df2@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 14 ------ net/mac80211/parse.c | 118 +++++++++++++++++++++++++++++---------------- net/mac80211/tests/elems.c | 4 +- 3 files changed, 78 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index caee68cc8f14f..da320799fbff8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1766,12 +1766,6 @@ struct ieee802_11_elems { size_t ml_basic_len; size_t ml_reconf_len; - /* The basic Multi-Link element in the original elements */ - const struct element *ml_basic_elem; - - /* The reconfiguration Multi-Link element in the original elements */ - const struct element *ml_reconf_elem; - u8 ttlm_num; /* @@ -1784,14 +1778,6 @@ struct ieee802_11_elems { /* whether/which parse error occurred while retrieving these elements */ u8 parse_error; - - /* - * scratch buffer that can be used for various element parsing related - * tasks, e.g., element de-fragmentation etc. - */ - size_t scratch_len; - u8 *scratch_pos; - u8 scratch[] __counted_by(scratch_len); }; static inline struct ieee80211_local *hw_to_local( diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 73e52504ed97c..55e5497f89781 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -34,12 +34,32 @@ #include "led.h" #include "wep.h" +struct ieee80211_elems_parse { + /* must be first for kfree to work */ + struct ieee802_11_elems elems; + + /* The basic Multi-Link element in the original elements */ + const struct element *ml_basic_elem; + + /* The reconfiguration Multi-Link element in the original elements */ + const struct element *ml_reconf_elem; + + /* + * scratch buffer that can be used for various element parsing related + * tasks, e.g., element de-fragmentation etc. + */ + size_t scratch_len; + u8 *scratch_pos; + u8 scratch[] __counted_by(scratch_len); +}; + static void ieee80211_parse_extension_element(u32 *crc, const struct element *elem, - struct ieee802_11_elems *elems, + struct ieee80211_elems_parse *elems_parse, struct ieee80211_elems_parse_params *params) { + struct ieee802_11_elems *elems = &elems_parse->elems; const void *data = elem->data + 1; bool calc_crc = false; u8 len; @@ -129,15 +149,15 @@ ieee80211_parse_extension_element(u32 *crc, switch (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: - if (elems->ml_basic_elem) { + if (elems_parse->ml_basic_elem) { elems->parse_error |= IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; break; } - elems->ml_basic_elem = (void *)elem; + elems_parse->ml_basic_elem = elem; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: - elems->ml_reconf_elem = (void *)elem; + elems_parse->ml_reconf_elem = elem; break; default: break; @@ -169,9 +189,10 @@ ieee80211_parse_extension_element(u32 *crc, static u32 _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, - struct ieee802_11_elems *elems, + struct ieee80211_elems_parse *elems_parse, const struct element *check_inherit) { + struct ieee802_11_elems *elems = &elems_parse->elems; const struct element *elem; bool calc_crc = params->filter != 0; DECLARE_BITMAP(seen_elems, 256); @@ -586,7 +607,8 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, case WLAN_EID_EXTENSION: ieee80211_parse_extension_element(calc_crc ? &crc : NULL, - elem, elems, params); + elem, elems_parse, + params); break; case WLAN_EID_S1G_CAPABILITIES: if (params->mode != IEEE80211_CONN_MODE_S1G) @@ -709,9 +731,11 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, return found ? profile_len : 0; } -static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, - u8 link_id) +static void +ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse, + u8 link_id) { + struct ieee802_11_elems *elems = &elems_parse->elems; const struct ieee80211_multi_link_elem *ml = elems->ml_basic; ssize_t ml_len = elems->ml_basic_len; const struct element *sub; @@ -741,26 +765,27 @@ static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems, sta_prof_len = cfg80211_defragment_element(sub, (u8 *)ml, ml_len, - elems->scratch_pos, - elems->scratch + - elems->scratch_len - - elems->scratch_pos, + elems_parse->scratch_pos, + elems_parse->scratch + + elems_parse->scratch_len - + elems_parse->scratch_pos, IEEE80211_MLE_SUBELEM_FRAGMENT); if (sta_prof_len < 0) return; - elems->prof = (void *)elems->scratch_pos; + elems->prof = (void *)elems_parse->scratch_pos; elems->sta_prof_len = sta_prof_len; - elems->scratch_pos += sta_prof_len; + elems_parse->scratch_pos += sta_prof_len; return; } } -static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, +static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, struct ieee80211_elems_parse_params *params) { + struct ieee802_11_elems *elems = &elems_parse->elems; struct ieee80211_mle_per_sta_profile *prof; struct ieee80211_elems_parse_params sub = { .mode = params->mode, @@ -772,26 +797,26 @@ static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, const struct element *non_inherit = NULL; const u8 *end; - ml_len = cfg80211_defragment_element(elems->ml_basic_elem, + ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem, elems->ie_start, elems->total_len, - elems->scratch_pos, - elems->scratch + - elems->scratch_len - - elems->scratch_pos, + elems_parse->scratch_pos, + elems_parse->scratch + + elems_parse->scratch_len - + elems_parse->scratch_pos, WLAN_EID_FRAGMENT); if (ml_len < 0) return; - elems->ml_basic = (const void *)elems->scratch_pos; + elems->ml_basic = (const void *)elems_parse->scratch_pos; elems->ml_basic_len = ml_len; - elems->scratch_pos += ml_len; + elems_parse->scratch_pos += ml_len; if (params->link_id == -1) return; - ieee80211_mle_get_sta_prof(elems, params->link_id); + ieee80211_mle_get_sta_prof(elems_parse, params->link_id); prof = elems->prof; if (!prof) @@ -816,57 +841,66 @@ static void ieee80211_mle_parse_link(struct ieee802_11_elems *elems, non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, sub.start, sub.len); - _ieee802_11_parse_elems_full(&sub, elems, non_inherit); + _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit); } static void -ieee80211_mle_defrag_reconf(struct ieee802_11_elems *elems) +ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse) { + struct ieee802_11_elems *elems = &elems_parse->elems; ssize_t ml_len; - ml_len = cfg80211_defragment_element(elems->ml_reconf_elem, + ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem, elems->ie_start, elems->total_len, - elems->scratch_pos, - elems->scratch + - elems->scratch_len - - elems->scratch_pos, + elems_parse->scratch_pos, + elems_parse->scratch + + elems_parse->scratch_len - + elems_parse->scratch_pos, WLAN_EID_FRAGMENT); if (ml_len < 0) return; - elems->ml_reconf = (void *)elems->scratch_pos; + elems->ml_reconf = (void *)elems_parse->scratch_pos; elems->ml_reconf_len = ml_len; - elems->scratch_pos += ml_len; + elems_parse->scratch_pos += ml_len; } struct ieee802_11_elems * ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) { + struct ieee80211_elems_parse *elems_parse; struct ieee802_11_elems *elems; const struct element *non_inherit = NULL; u8 *nontransmitted_profile; int nontransmitted_profile_len = 0; size_t scratch_len = 3 * params->len; - elems = kzalloc(struct_size(elems, scratch, scratch_len), GFP_ATOMIC); - if (!elems) + BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0); + + elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len), + GFP_ATOMIC); + if (!elems_parse) return NULL; + + elems_parse->scratch_len = scratch_len; + elems_parse->scratch_pos = elems_parse->scratch; + + elems = &elems_parse->elems; elems->ie_start = params->start; elems->total_len = params->len; - elems->scratch_len = scratch_len; - elems->scratch_pos = elems->scratch; - nontransmitted_profile = elems->scratch_pos; + nontransmitted_profile = elems_parse->scratch_pos; nontransmitted_profile_len = ieee802_11_find_bssid_profile(params->start, params->len, elems, params->bss, nontransmitted_profile); - elems->scratch_pos += nontransmitted_profile_len; + elems_parse->scratch_pos += nontransmitted_profile_len; non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, nontransmitted_profile, nontransmitted_profile_len); - elems->crc = _ieee802_11_parse_elems_full(params, elems, non_inherit); + elems->crc = _ieee802_11_parse_elems_full(params, elems_parse, + non_inherit); /* Override with nontransmitted profile, if found */ if (nontransmitted_profile_len) { @@ -878,12 +912,12 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) .link_id = params->link_id, }; - _ieee802_11_parse_elems_full(&sub, elems, NULL); + _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } - ieee80211_mle_parse_link(elems, params); + ieee80211_mle_parse_link(elems_parse, params); - ieee80211_mle_defrag_reconf(elems); + ieee80211_mle_defrag_reconf(elems_parse); if (elems->tim && !elems->parse_error) { const struct ieee80211_tim_ie *tim_ie = elems->tim; diff --git a/net/mac80211/tests/elems.c b/net/mac80211/tests/elems.c index 30fc0acb7ac29..a413ba29f7592 100644 --- a/net/mac80211/tests/elems.c +++ b/net/mac80211/tests/elems.c @@ -2,7 +2,7 @@ /* * KUnit tests for element parsing * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation */ #include #include "../ieee80211_i.h" @@ -69,7 +69,7 @@ static void mle_defrag(struct kunit *test) if (IS_ERR_OR_NULL(parsed)) goto free_skb; - KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic_elem); + KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic); KUNIT_EXPECT_EQ(test, parsed->ml_basic_len, 2 /* control */ + -- cgit 1.2.3-korg From 22667035e5ddb7b68c7d473693b321fb9e20a397 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:41 +0100 Subject: wifi: cfg80211: expose cfg80211_iter_rnr() to drivers In mac80211 we'll need to look at reduced neighbor report entries for channel switch purposes, so export the iteration function to make that simpler. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228095718.0954809964ef.I53e95c017aa71f14e8d1057afbbc75982ddb43df@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 32 ++++++++++++++++++++++++++++++++ net/wireless/scan.c | 20 +++++++------------- 2 files changed, 39 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f9eada2a26ec0..53653d234d398 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6870,6 +6870,38 @@ cfg80211_find_vendor_ie(unsigned int oui, int oui_type, return (const void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); } +/** + * enum cfg80211_rnr_iter_ret - reduced neighbor report iteration state + * @RNR_ITER_CONTINUE: continue iterating with the next entry + * @RNR_ITER_BREAK: break iteration and return success + * @RNR_ITER_ERROR: break iteration and return error + */ +enum cfg80211_rnr_iter_ret { + RNR_ITER_CONTINUE, + RNR_ITER_BREAK, + RNR_ITER_ERROR, +}; + +/** + * cfg80211_iter_rnr - iterate reduced neighbor report entries + * @elems: the frame elements to iterate RNR elements and then + * their entries in + * @elems_len: length of the elements + * @iter: iteration function, see also &enum cfg80211_rnr_iter_ret + * for the return value + * @iter_data: additional data passed to the iteration function + * Return: %true on success (after successfully iterating all entries + * or if the iteration function returned %RNR_ITER_BREAK), + * %false on error (iteration function returned %RNR_ITER_ERROR + * or elements were malformed.) + */ +bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len, + enum cfg80211_rnr_iter_ret + (*iter)(void *data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len), + void *iter_data); + /** * cfg80211_defragment_element - Defrag the given element data into a buffer * diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7cf36b8d3ae74..9377a43aa5f78 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -611,19 +611,12 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry, return 0; } -enum cfg80211_rnr_iter_ret { - RNR_ITER_CONTINUE, - RNR_ITER_BREAK, - RNR_ITER_ERROR, -}; - -static bool -cfg80211_iter_rnr(const u8 *elems, size_t elems_len, - enum cfg80211_rnr_iter_ret - (*iter)(void *data, u8 type, - const struct ieee80211_neighbor_ap_info *info, - const u8 *tbtt_info, u8 tbtt_info_len), - void *iter_data) +bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len, + enum cfg80211_rnr_iter_ret + (*iter)(void *data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len), + void *iter_data) { const struct element *rnr; const u8 *pos, *end; @@ -675,6 +668,7 @@ cfg80211_iter_rnr(const u8 *elems, size_t elems_len, return true; } +EXPORT_SYMBOL_GPL(cfg80211_iter_rnr); struct colocated_ap_data { const struct element *ssid_elem; -- cgit 1.2.3-korg From 8ade3356b25ab2522892a21832a709e7ad5f8168 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:42 +0100 Subject: wifi: cfg80211: allow cfg80211_defragment_element() without output If we just want to determine the length of the fragmented data, we basically need the same logic, and really we want it to be _literally_ the same logic, so it cannot be out of sync in any way. Allow calling cfg80211_defragment_element() without an output buffer, where it then just returns the required output size. Also add this to the tests, just to exercise it, using the pre-calculated length to really do the defragmentation, which checks that this is sufficient. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Benjamin Berg Link: https://msgid.link/20240228095718.6d6565b9e3f2.Ib441903f4b8644ba04b1c766f90580ee6f54fc66@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- net/wireless/scan.c | 27 ++++++++++++++++++--------- net/wireless/tests/fragmentation.c | 30 +++++++++++++++++++++++++----- 3 files changed, 45 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 53653d234d398..2e2be4fd2bb65 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6908,8 +6908,8 @@ bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len, * @elem: the element to defragment * @ies: elements where @elem is contained * @ieslen: length of @ies - * @data: buffer to store element data - * @data_len: length of @data + * @data: buffer to store element data, or %NULL to just determine size + * @data_len: length of @data, or 0 * @frag_id: the element ID of fragments * * Return: length of @data, or -EINVAL on error diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9377a43aa5f78..5a5dd3ce497fc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2504,16 +2504,22 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies, if (elem->id == WLAN_EID_EXTENSION) { copied = elem->datalen - 1; - if (copied > data_len) - return -ENOSPC; - memmove(data, elem->data + 1, copied); + if (data) { + if (copied > data_len) + return -ENOSPC; + + memmove(data, elem->data + 1, copied); + } } else { copied = elem->datalen; - if (copied > data_len) - return -ENOSPC; - memmove(data, elem->data, copied); + if (data) { + if (copied > data_len) + return -ENOSPC; + + memmove(data, elem->data, copied); + } } /* Fragmented elements must have 255 bytes */ @@ -2532,10 +2538,13 @@ ssize_t cfg80211_defragment_element(const struct element *elem, const u8 *ies, elem_datalen = elem->datalen; - if (copied + elem_datalen > data_len) - return -ENOSPC; + if (data) { + if (copied + elem_datalen > data_len) + return -ENOSPC; + + memmove(data + copied, elem->data, elem_datalen); + } - memmove(data + copied, elem->data, elem_datalen); copied += elem_datalen; /* Only the last fragment may be short */ diff --git a/net/wireless/tests/fragmentation.c b/net/wireless/tests/fragmentation.c index 49a339ca88807..411fae18cd882 100644 --- a/net/wireless/tests/fragmentation.c +++ b/net/wireless/tests/fragmentation.c @@ -2,7 +2,7 @@ /* * KUnit tests for element fragmentation * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation */ #include #include @@ -27,7 +27,12 @@ static void defragment_0(struct kunit *test) ret = cfg80211_defragment_element((void *)input, input, sizeof(input), - data, sizeof(input), + NULL, 0, + WLAN_EID_FRAGMENT); + KUNIT_EXPECT_EQ(test, ret, 253); + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, ret, WLAN_EID_FRAGMENT); KUNIT_EXPECT_EQ(test, ret, 253); KUNIT_EXPECT_MEMEQ(test, data, input + 3, 253); @@ -63,7 +68,12 @@ static void defragment_1(struct kunit *test) ret = cfg80211_defragment_element((void *)input, input, sizeof(input), - data, sizeof(input), + NULL, 0, + WLAN_EID_FRAGMENT); + KUNIT_EXPECT_EQ(test, ret, 254 + 7); + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, ret, WLAN_EID_FRAGMENT); /* this means the last fragment was not used */ KUNIT_EXPECT_EQ(test, ret, 254 + 7); @@ -106,10 +116,15 @@ static void defragment_2(struct kunit *test) ret = cfg80211_defragment_element((void *)input, input, sizeof(input), - data, sizeof(input), + NULL, 0, WLAN_EID_FRAGMENT); /* this means the last fragment was not used */ KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1); + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, ret, + WLAN_EID_FRAGMENT); + KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1); KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254); KUNIT_EXPECT_MEMEQ(test, data + 254, input + 257 + 2, 255); KUNIT_EXPECT_MEMEQ(test, data + 254 + 255, input + 2 * 257 + 2, 1); @@ -134,7 +149,12 @@ static void defragment_at_end(struct kunit *test) ret = cfg80211_defragment_element((void *)input, input, sizeof(input), - data, sizeof(input), + NULL, 0, + WLAN_EID_FRAGMENT); + KUNIT_EXPECT_EQ(test, ret, 254 + 7); + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, ret, WLAN_EID_FRAGMENT); KUNIT_EXPECT_EQ(test, ret, 254 + 7); KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254); -- cgit 1.2.3-korg From e6ee3a3713fe38e4ae94318e9cb18b39ec30da4a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:43 +0100 Subject: wifi: mac80211: pass link_id to channel switch ops For CSA to work correctly in multi-link scenarios, pass the link_id to the relevant callbacks. While at it, unify/deduplicate the tracing for them. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Emmanuel Grumbach Link: https://msgid.link/20240228095718.b7726635c054.I0be5d00af4acb48cfbd23a9dbf067f9aeb66469d@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++-- net/mac80211/cfg.c | 4 ++- net/mac80211/mlme.c | 4 ++- net/mac80211/trace.h | 82 +++++++++++--------------------------------------- 4 files changed, 28 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6c6d8210d6374..8ea9fa81e68c5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1763,8 +1763,9 @@ struct ieee80211_conf { * @chandef: the new channel to switch to * @count: the number of TBTT's until the channel switch event * @delay: maximum delay between the time the AP transmitted the last beacon in - * current channel and the expected time of the first beacon in the new - * channel, expressed in TU. + * current channel and the expected time of the first beacon in the new + * channel, expressed in TU. + * @link_id: the link ID of the link doing the channel switch, 0 for non-MLO */ struct ieee80211_channel_switch { u64 timestamp; @@ -1772,6 +1773,7 @@ struct ieee80211_channel_switch { bool block_tx; struct cfg80211_chan_def chandef; u8 count; + u8 link_id; u32 delay; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 358593eeb5000..4bc0cc2dff83a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3931,7 +3931,9 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_chan_req chanreq = { .oper = params->chandef }; struct ieee80211_local *local = sdata->local; - struct ieee80211_channel_switch ch_switch; + struct ieee80211_channel_switch ch_switch = { + .link_id = params->link_id, + }; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *chanctx; struct ieee80211_bss_conf *link_conf; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 90e505778db61..d1d4480c0352d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2021,7 +2021,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, struct ieee80211_chanctx *chanctx; enum nl80211_band current_band; struct ieee80211_csa_ie csa_ie; - struct ieee80211_channel_switch ch_switch; + struct ieee80211_channel_switch ch_switch = { + .link_id = link->link_id, + }; struct ieee80211_bss *bss; unsigned long timeout; int res; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 478b32d2520a1..8e758b5074bde 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1209,7 +1209,7 @@ DEFINE_EVENT(sta_event, drv_flush_sta, TP_ARGS(local, sdata, sta) ); -TRACE_EVENT(drv_channel_switch, +DECLARE_EVENT_CLASS(chanswitch_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch *ch_switch), @@ -1224,6 +1224,7 @@ TRACE_EVENT(drv_channel_switch, __field(u32, device_timestamp) __field(bool, block_tx) __field(u8, count) + __field(u8, link_id) ), TP_fast_assign( @@ -1234,14 +1235,24 @@ TRACE_EVENT(drv_channel_switch, __entry->device_timestamp = ch_switch->device_timestamp; __entry->block_tx = ch_switch->block_tx; __entry->count = ch_switch->count; + __entry->link_id = ch_switch->link_id; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " new " CHANDEF_PR_FMT " count:%d", - LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count + LOCAL_PR_FMT VIF_PR_FMT CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu device_ts:%u link_id:%d", + LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count, + __entry->block_tx, __entry->timestamp, + __entry->device_timestamp, __entry->link_id ) ); +DEFINE_EVENT(chanswitch_evt, drv_channel_switch, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel_switch *ch_switch), + TP_ARGS(local, sdata, ch_switch) +); + TRACE_EVENT(drv_set_antenna, TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), @@ -2121,39 +2132,11 @@ TRACE_EVENT(drv_channel_switch_beacon, ) ); -TRACE_EVENT(drv_pre_channel_switch, +DEFINE_EVENT(chanswitch_evt, drv_pre_channel_switch, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch *ch_switch), - - TP_ARGS(local, sdata, ch_switch), - - TP_STRUCT__entry( - LOCAL_ENTRY - VIF_ENTRY - CHANDEF_ENTRY - __field(u64, timestamp) - __field(u32, device_timestamp) - __field(bool, block_tx) - __field(u8, count) - ), - - TP_fast_assign( - LOCAL_ASSIGN; - VIF_ASSIGN; - CHANDEF_ASSIGN(&ch_switch->chandef) - __entry->timestamp = ch_switch->timestamp; - __entry->device_timestamp = ch_switch->device_timestamp; - __entry->block_tx = ch_switch->block_tx; - __entry->count = ch_switch->count; - ), - - TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " prepare channel switch to " - CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu", - LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count, - __entry->block_tx, __entry->timestamp - ) + TP_ARGS(local, sdata, ch_switch) ); DEFINE_EVENT(local_sdata_evt, drv_post_channel_switch, @@ -2168,40 +2151,11 @@ DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch, TP_ARGS(local, sdata) ); -TRACE_EVENT(drv_channel_switch_rx_beacon, +DEFINE_EVENT(chanswitch_evt, drv_channel_switch_rx_beacon, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_switch *ch_switch), - - TP_ARGS(local, sdata, ch_switch), - - TP_STRUCT__entry( - LOCAL_ENTRY - VIF_ENTRY - CHANDEF_ENTRY - __field(u64, timestamp) - __field(u32, device_timestamp) - __field(bool, block_tx) - __field(u8, count) - ), - - TP_fast_assign( - LOCAL_ASSIGN; - VIF_ASSIGN; - CHANDEF_ASSIGN(&ch_switch->chandef) - __entry->timestamp = ch_switch->timestamp; - __entry->device_timestamp = ch_switch->device_timestamp; - __entry->block_tx = ch_switch->block_tx; - __entry->count = ch_switch->count; - ), - - TP_printk( - LOCAL_PR_FMT VIF_PR_FMT - " received a channel switch beacon to " - CHANDEF_PR_FMT " count:%d block_tx:%d timestamp:%llu", - LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count, - __entry->block_tx, __entry->timestamp - ) + TP_ARGS(local, sdata, ch_switch) ); TRACE_EVENT(drv_get_txpower, -- cgit 1.2.3-korg From 5ecd5d82b17ee2818548aeb5eb52f3a5b5cae18c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:44 +0100 Subject: wifi: mac80211: pass link conf to abort_channel_switch Pass the link conf to the abort_channel_switch driver method so the driver can handle things correctly. Reviewed-by: Emmanuel Grumbach Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228095718.27f621106ddd.Iadd3d69b722ffe5934779a32a0e4e596a4e33ed4@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 13 +++++++++++-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 ++- include/net/mac80211.h | 5 +++-- net/mac80211/driver-ops.h | 8 +++++--- net/mac80211/mlme.c | 2 +- 5 files changed, 22 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 69f6a96b0cfbe..1935630d3def0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1470,7 +1470,8 @@ out_unlock: } void iwl_mvm_abort_channel_switch(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); @@ -5551,8 +5552,16 @@ void iwl_mvm_channel_switch_rx_beacon(struct ieee80211_hw *hw, if (chsw->count >= mvmvif->csa_count && chsw->block_tx) { if (mvmvif->csa_misbehave) { + struct ieee80211_bss_conf *link_conf; + /* Second time, give up on this AP*/ - iwl_mvm_abort_channel_switch(hw, vif); + + link_conf = wiphy_dereference(hw->wiphy, + vif->link_conf[chsw->link_id]); + if (WARN_ON(!link_conf)) + return; + + iwl_mvm_abort_channel_switch(hw, vif, link_conf); ieee80211_chswitch_done(vif, false, 0); mvmvif->csa_misbehave = false; return; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index c85d9e460ad2e..63c8027125964 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2709,7 +2709,8 @@ int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel_switch *chsw); void iwl_mvm_abort_channel_switch(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); void iwl_mvm_channel_switch_rx_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel_switch *chsw); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8ea9fa81e68c5..c622450ac0125 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4268,7 +4268,7 @@ struct ieee80211_prep_tx_info { * after a channel switch procedure is completed, allowing the * driver to go back to a normal configuration. * @abort_channel_switch: This is an optional callback that is called - * when channel switch procedure was completed, allowing the + * when channel switch procedure was aborted, allowing the * driver to go back to a normal configuration. * @channel_switch_rx_beacon: This is an optional callback that is called * when channel switch procedure is in progress and additional beacon with @@ -4664,7 +4664,8 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); void (*abort_channel_switch)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel_switch *ch_switch); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index e20c64edb880c..5d078c0a23236 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016 Intel Deutschland GmbH -* Copyright (C) 2018 - 2019, 2021 - 2023 Intel Corporation +* Copyright (C) 2018-2019, 2021-2024 Intel Corporation */ #ifndef __MAC80211_DRIVER_OPS @@ -1180,8 +1180,9 @@ drv_post_channel_switch(struct ieee80211_link_data *link) } static inline void -drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata) +drv_abort_channel_switch(struct ieee80211_link_data *link) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; might_sleep(); @@ -1193,7 +1194,8 @@ drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata) trace_drv_abort_channel_switch(local, sdata); if (local->ops->abort_channel_switch) - local->ops->abort_channel_switch(&local->hw, &sdata->vif); + local->ops->abort_channel_switch(&local->hw, &sdata->vif, + link->conf); } static inline void diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d1d4480c0352d..c5c7575141796 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2004,7 +2004,7 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) link->csa_block_tx = false; link->conf->csa_active = false; - drv_abort_channel_switch(sdata); + drv_abort_channel_switch(link); } static void -- cgit 1.2.3-korg From 6f0107d195a812074c6f977d492ffc99ba3ff2bb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:45 +0100 Subject: wifi: mac80211: introduce a feature flag for quiet in CSA When doing CSA in multi-link, there really isn't a need to stop transmissions entirely. Add a feature flag for drivers to indicate they can handle quiet in CSA (be it by parsing themselves, or by implementing drv_pre_channel_switch()), to make that possible. Also clean up the csa_block_tx handling: it clearly cannot handle multi-link due to the way queues are stopped, move it to the sdata. Drivers should be doing it themselves for working properly during CSA in MLO anyway. Also rename it to indicate that it reflects TX was blocked at mac80211. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228095719.258439191541.I2469d206e2bf5cb244cfde2b4bbc2ae6d1cd3dd9@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/cfg.c | 16 +++++++++------- net/mac80211/debugfs.c | 3 ++- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/iface.c | 6 +++--- net/mac80211/mlme.c | 35 +++++++++++++++++++++-------------- 6 files changed, 43 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c622450ac0125..353488ab94a29 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2778,6 +2778,11 @@ struct ieee80211_txq { * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT * and connecting with a lower bandwidth instead * + * @IEEE80211_HW_HANDLES_QUIET_CSA: HW/driver handles quieting for CSA, so + * no need to stop queues. This really should be set by a driver that + * implements MLO, so operation can continue on other links when one + * link is switching. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2836,6 +2841,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_DETECTS_COLOR_COLLISION, IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX, IEEE80211_HW_DISALLOW_PUNCTURING, + IEEE80211_HW_HANDLES_QUIET_CSA, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4bc0cc2dff83a..3a9d2ceec7523 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1607,10 +1607,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, /* abort any running channel switch or color change */ link_conf->csa_active = false; link_conf->color_change_active = false; - if (link->csa_block_tx) { + if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - link->csa_block_tx = false; + sdata->csa_blocked_tx = false; } ieee80211_free_next_beacon(link); @@ -3649,7 +3649,7 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - sdata->deflink.csa_block_tx = block_tx; + sdata->csa_blocked_tx = block_tx; sdata_info(sdata, "channel switch failed, disconnecting\n"); wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work); } @@ -3735,10 +3735,10 @@ static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) ieee80211_link_info_change_notify(sdata, link_data, changed); - if (link_data->csa_block_tx) { + if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - link_data->csa_block_tx = false; + sdata->csa_blocked_tx = false; } err = drv_post_channel_switch(link_data); @@ -4014,12 +4014,14 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } link_data->csa_chanreq = chanreq; - link_data->csa_block_tx = params->block_tx; link_conf->csa_active = true; - if (link_data->csa_block_tx) + if (params->block_tx && + !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) { ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_blocked_tx = true; + } cfg80211_ch_switch_started_notify(sdata->dev, &link_data->csa_chanreq.oper, 0, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 74be49191e704..2f68e92a7404b 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -4,7 +4,7 @@ * * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018 - 2019, 2021-2023 Intel Corporation + * Copyright (C) 2018 - 2019, 2021-2024 Intel Corporation */ #include @@ -498,6 +498,7 @@ static const char *hw_flag_names[] = { FLAG(DETECTS_COLOR_COLLISION), FLAG(MLO_MCAST_MULTI_LINK_TX), FLAG(DISALLOW_PUNCTURING), + FLAG(HANDLES_QUIET_CSA), #undef FLAG }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index da320799fbff8..cfd0c03e4bd6a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1034,7 +1034,6 @@ struct ieee80211_link_data { struct ieee80211_key __rcu *default_beacon_key; struct wiphy_work csa_finalize_work; - bool csa_block_tx; bool operating_11g_mode; @@ -1093,6 +1092,8 @@ struct ieee80211_sub_if_data { unsigned long state; + bool csa_blocked_tx; + char name[IFNAMSIZ]; struct ieee80211_fragment_cache frags; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b75b83a5142be..395de62d9cb2d 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include #include @@ -544,10 +544,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do sdata->vif.bss_conf.csa_active = false; if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->deflink.u.mgd.csa_waiting_bcn = false; - if (sdata->deflink.csa_block_tx) { + if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->deflink.csa_block_tx = false; + sdata->csa_blocked_tx = false; } wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c5c7575141796..3d5cc3dd92388 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1926,10 +1926,10 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) WARN_ON(!link->conf->csa_active); - if (link->csa_block_tx) { + if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - link->csa_block_tx = false; + sdata->csa_blocked_tx = false; } link->conf->csa_active = false; @@ -1997,11 +1997,12 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) ieee80211_link_unreserve_chanctx(link); - if (link->csa_block_tx) + if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_blocked_tx = false; + } - link->csa_block_tx = false; link->conf->csa_active = false; drv_abort_channel_switch(link); @@ -2145,13 +2146,15 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, link->conf->csa_active = true; link->csa_chanreq = csa_ie.chanreq; - link->csa_block_tx = csa_ie.mode; link->u.mgd.csa_ignored_same_chan = false; link->u.mgd.beacon_crc_valid = false; - if (link->csa_block_tx) + if (csa_ie.mode && + !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA)) { ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); + sdata->csa_blocked_tx = true; + } cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chanreq.oper, link->link_id, csa_ie.count, @@ -2179,7 +2182,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, * reset when the disconnection worker runs. */ link->conf->csa_active = true; - link->csa_block_tx = csa_ie.mode; + sdata->csa_blocked_tx = + csa_ie.mode && !ieee80211_hw_check(&local->hw, HANDLES_QUIET_CSA); wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); @@ -3233,10 +3237,10 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.csa_active = false; sdata->deflink.u.mgd.csa_waiting_bcn = false; sdata->deflink.u.mgd.csa_ignored_same_chan = false; - if (sdata->deflink.csa_block_tx) { + if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->deflink.csa_block_tx = false; + sdata->csa_blocked_tx = false; } /* existing TX TSPEC sessions no longer exist */ @@ -3549,9 +3553,12 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) if (!ifmgd->associated) return; - /* in MLO assume we have a link where we can TX the frame */ - tx = ieee80211_vif_is_mld(&sdata->vif) || - !sdata->deflink.csa_block_tx; + /* + * MLO drivers should have HANDLES_QUIET_CSA, so that csa_blocked_tx + * is always false; if they don't then this may try to transmit the + * frame but queues will be stopped. + */ + tx = !sdata->csa_blocked_tx; if (!ifmgd->driver_disconnect) { unsigned int link_id; @@ -3584,10 +3591,10 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) /* the other links will be destroyed */ sdata->vif.bss_conf.csa_active = false; sdata->deflink.u.mgd.csa_waiting_bcn = false; - if (sdata->deflink.csa_block_tx) { + if (sdata->csa_blocked_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->deflink.csa_block_tx = false; + sdata->csa_blocked_tx = false; } ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, -- cgit 1.2.3-korg From f3dee30c6791e22633d9d7e43e48c1dff946cc0b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:46 +0100 Subject: wifi: mac80211: mlme: unify CSA handling Unify all the CSA handling, including handling of a beacon after the CSA, into ieee80211_sta_process_chanswitch(). The CRC of the beacon will change due to changes in the CSA/ECSA elements, so there's really no need to have the 'beacon after CSA' handling before the CRC processing or to change the beacon_crc_valid value here. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228095719.e269c0e02905.I9dc68ff1e84d51349822bc7d3b33b578fcf8e360@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3d5cc3dd92388..2180b1ba0e420 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1934,11 +1934,6 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) link->conf->csa_active = false; link->u.mgd.csa_waiting_bcn = false; - /* - * If the CSA IE is still present on the beacon after the switch, - * we need to consider it as a new CSA (possibly to self). - */ - link->u.mgd.beacon_crc_valid = false; ret = drv_post_channel_switch(link); if (ret) { @@ -2053,18 +2048,32 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, if (res < 0) goto drop_connection; - if (beacon && link->conf->csa_active && - !link->u.mgd.csa_waiting_bcn) { - if (res) + if (link->conf->csa_active) { + /* already processing - disregard action frames */ + if (!beacon) + return; + + if (link->u.mgd.csa_waiting_bcn) { + ieee80211_chswitch_post_beacon(link); + /* + * If the CSA IE is still present in the beacon after + * the switch, we need to consider it as a new CSA + * (possibly to self) - this happens by not returning + * here so we'll get to the check below. + */ + } else if (res) { ieee80211_sta_abort_chanswitch(link); - else + return; + } else { drv_channel_switch_rx_beacon(sdata, &ch_switch); - return; - } else if (link->conf->csa_active || res) { - /* disregard subsequent announcements if already processing */ - return; + return; + } } + /* nothing to do at all - no active CSA nor a new one */ + if (res) + return; + if (link->conf->chanreq.oper.chan->band != csa_ie.chanreq.oper.chan->band) { sdata_info(sdata, @@ -6293,9 +6302,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } } - if (link->u.mgd.csa_waiting_bcn) - ieee80211_chswitch_post_beacon(link); - /* * Update beacon timing and dtim count on every beacon appearance. This * will allow the driver to use the most updated values. Do it before -- cgit 1.2.3-korg From 85977fc0aa489420709779cbc859966db94be68f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:47 +0100 Subject: wifi: mac80211: remove TDLS peers only on affected link If a link does CSA, or if it changes SMPS mode, we need to drop the TDLS peers, but we really should drop them only on the affected link. Fix that. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Emmanuel Grumbach Link: https://msgid.link/20240228095719.00d1d793f5b8.Ia9971316c6b3922dd371d64ac2198f91ed5ad9d2@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 7 ++++--- net/mac80211/tdls.c | 6 +++++- 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3a9d2ceec7523..f03452dc716d5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3242,7 +3242,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, if (err) link->u.mgd.req_smps = old_req; else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found) - ieee80211_teardown_tdls_peers(sdata); + ieee80211_teardown_tdls_peers(link); return err; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cfd0c03e4bd6a..b6fead612b66b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2611,7 +2611,7 @@ int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy, struct net_device *dev, const u8 *addr); -void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata); +void ieee80211_teardown_tdls_peers(struct ieee80211_link_data *link); void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata, const u8 *peer, u16 reason); void diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2180b1ba0e420..47a2cba8313f0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2115,12 +2115,13 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } /* - * Drop all TDLS peers - either we disconnect or move to a different - * channel from this point on. There's no telling what our peer will do. + * Drop all TDLS peers on the affected link - either we disconnect or + * move to a different channel from this point on. There's no telling + * what our peer will do. * The TDLS WIDER_BW scenario is also problematic, as peers might now * have an incompatible wider chandef. */ - ieee80211_teardown_tdls_peers(sdata); + ieee80211_teardown_tdls_peers(link); conf = rcu_dereference_protected(link->conf->chanctx_conf, lockdep_is_held(&local->hw.wiphy->mtx)); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 42d9c06cbb845..f07b409164854 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -2028,8 +2028,9 @@ ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata, } } -void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata) +void ieee80211_teardown_tdls_peers(struct ieee80211_link_data *link) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct sta_info *sta; u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED; @@ -2039,6 +2040,9 @@ void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata) !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) continue; + if (sta->deflink.link_id != link->link_id) + continue; + ieee80211_tdls_oper_request(&sdata->vif, sta->sta.addr, NL80211_TDLS_TEARDOWN, reason, GFP_ATOMIC); -- cgit 1.2.3-korg From 07fba2277fcedd0948bec657f76ef374d0157d93 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:48 +0100 Subject: wifi: mac80211: remove TDLS peers on link deactivation If a link is deactivated, we really cannot sustain any TDLS connections on that link any more. With the API now changed, fix this issue and remove TDLS connections. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Emmanuel Grumbach Link: https://msgid.link/20240228095719.a7dd812c37bf.I3474dbde79e9e7a539d47f6f81f32e6c3e459080@changeid Signed-off-by: Johannes Berg --- net/mac80211/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 87a413374ecee..685ec66b42642 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -356,7 +356,7 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, link = sdata_dereference(sdata->link[link_id], sdata); - /* FIXME: kill TDLS connections on the link */ + ieee80211_teardown_tdls_peers(link); ieee80211_link_release_channel(link); } -- cgit 1.2.3-korg From 73e4f9e615d7b99f39663d4722dc73e8fa5db5f9 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Sat, 24 Feb 2024 14:34:16 -0800 Subject: bpf, net: validate struct_ops when updating value. Perform all validations when updating values of struct_ops maps. Doing validation in st_ops->reg() and st_ops->update() is not necessary anymore. However, tcp_register_congestion_control() has been called in various places. It still needs to do validations. Cc: netdev@vger.kernel.org Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240224223418.526631-2-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/bpf_struct_ops.c | 11 ++++++----- net/ipv4/tcp_cong.c | 6 +----- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index a6019087b467c..07e554c191d1f 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -672,13 +672,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, *(unsigned long *)(udata + moff) = prog->aux->id; } + if (st_ops->validate) { + err = st_ops->validate(kdata); + if (err) + goto reset_unlock; + } + if (st_map->map.map_flags & BPF_F_LINK) { err = 0; - if (st_ops->validate) { - err = st_ops->validate(kdata); - if (err) - goto reset_unlock; - } arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE); /* Let bpf_link handle registration & unregistration. * diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1b34050a7538b..28ffcfbeef14e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -146,11 +146,7 @@ EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca) { struct tcp_congestion_ops *existing; - int ret; - - ret = tcp_validate_congestion_control(ca); - if (ret) - return ret; + int ret = 0; ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); -- cgit 1.2.3-korg From 187e2af05abe6bf80581490239c449456627d17a Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Sat, 24 Feb 2024 14:34:17 -0800 Subject: bpf: struct_ops supports more than one page for trampolines. The BPF struct_ops previously only allowed one page of trampolines. Each function pointer of a struct_ops is implemented by a struct_ops bpf program. Each struct_ops bpf program requires a trampoline. The following selftest patch shows each page can hold a little more than 20 trampolines. While one page is more than enough for the tcp-cc usecase, the sched_ext use case shows that one page is not always enough and hits the one page limit. This patch overcomes the one page limit by allocating another page when needed and it is limited to a total of MAX_IMAGE_PAGES (8) pages which is more than enough for reasonable usages. The variable st_map->image has been changed to st_map->image_pages, and its type has been changed to an array of pointers to pages. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240224223418.526631-3-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 4 +- kernel/bpf/bpf_struct_ops.c | 130 ++++++++++++++++++++++++++++------------- net/bpf/bpf_dummy_struct_ops.c | 12 ++-- 3 files changed, 96 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 814dc913a9685..785660810e6a7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1763,7 +1763,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, void *stub_func, - void *image, void *image_end); + void **image, u32 *image_off, + bool allow_alloc); +void bpf_struct_ops_image_free(void *image); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 07e554c191d1f..43356faaa0578 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -18,6 +18,8 @@ struct bpf_struct_ops_value { char data[] ____cacheline_aligned_in_smp; }; +#define MAX_TRAMP_IMAGE_PAGES 8 + struct bpf_struct_ops_map { struct bpf_map map; struct rcu_head rcu; @@ -30,12 +32,11 @@ struct bpf_struct_ops_map { */ struct bpf_link **links; u32 links_cnt; - /* image is a page that has all the trampolines + u32 image_pages_cnt; + /* image_pages is an array of pages that has all the trampolines * that stores the func args before calling the bpf_prog. - * A PAGE_SIZE "image" is enough to store all trampoline for - * "links[]". */ - void *image; + void *image_pages[MAX_TRAMP_IMAGE_PAGES]; /* The owner moduler's btf. */ struct btf *btf; /* uvalue->data stores the kernel struct @@ -116,6 +117,31 @@ static bool is_valid_value_type(struct btf *btf, s32 value_id, return true; } +static void *bpf_struct_ops_image_alloc(void) +{ + void *image; + int err; + + err = bpf_jit_charge_modmem(PAGE_SIZE); + if (err) + return ERR_PTR(err); + image = arch_alloc_bpf_trampoline(PAGE_SIZE); + if (!image) { + bpf_jit_uncharge_modmem(PAGE_SIZE); + return ERR_PTR(-ENOMEM); + } + + return image; +} + +void bpf_struct_ops_image_free(void *image) +{ + if (image) { + arch_free_bpf_trampoline(image, PAGE_SIZE); + bpf_jit_uncharge_modmem(PAGE_SIZE); + } +} + #define MAYBE_NULL_SUFFIX "__nullable" #define MAX_STUB_NAME 128 @@ -461,6 +487,15 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) } } +static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map) +{ + int i; + + for (i = 0; i < st_map->image_pages_cnt; i++) + bpf_struct_ops_image_free(st_map->image_pages[i]); + st_map->image_pages_cnt = 0; +} + static int check_zero_holes(const struct btf *btf, const struct btf_type *t, void *data) { const struct btf_member *member; @@ -506,9 +541,12 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = { int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, - void *stub_func, void *image, void *image_end) + void *stub_func, + void **_image, u32 *_image_off, + bool allow_alloc) { - u32 flags = BPF_TRAMP_F_INDIRECT; + u32 image_off = *_image_off, flags = BPF_TRAMP_F_INDIRECT; + void *image = *_image; int size; tlinks[BPF_TRAMP_FENTRY].links[0] = link; @@ -518,12 +556,32 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, flags |= BPF_TRAMP_F_RET_FENTRY_RET; size = arch_bpf_trampoline_size(model, flags, tlinks, NULL); - if (size < 0) - return size; - if (size > (unsigned long)image_end - (unsigned long)image) - return -E2BIG; - return arch_prepare_bpf_trampoline(NULL, image, image_end, + if (size <= 0) + return size ? : -EFAULT; + + /* Allocate image buffer if necessary */ + if (!image || size > PAGE_SIZE - image_off) { + if (!allow_alloc) + return -E2BIG; + + image = bpf_struct_ops_image_alloc(); + if (IS_ERR(image)) + return PTR_ERR(image); + image_off = 0; + } + + size = arch_prepare_bpf_trampoline(NULL, image + image_off, + image + PAGE_SIZE, model, flags, tlinks, stub_func); + if (size <= 0) { + if (image != *_image) + bpf_struct_ops_image_free(image); + return size ? : -EFAULT; + } + + *_image = image; + *_image_off = image_off + size; + return 0; } static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, @@ -539,8 +597,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_tramp_links *tlinks; void *udata, *kdata; int prog_fd, err; - void *image, *image_end; - u32 i; + u32 i, trampoline_start, image_off = 0; + void *cur_image = NULL, *image = NULL; if (flags) return -EINVAL; @@ -578,8 +636,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, udata = &uvalue->data; kdata = &kvalue->data; - image = st_map->image; - image_end = st_map->image + PAGE_SIZE; module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]); for_each_member(i, t, member) { @@ -658,15 +714,24 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, &bpf_struct_ops_link_lops, prog); st_map->links[i] = &link->link; + trampoline_start = image_off; err = bpf_struct_ops_prepare_trampoline(tlinks, link, - &st_ops->func_models[i], - *(void **)(st_ops->cfi_stubs + moff), - image, image_end); + &st_ops->func_models[i], + *(void **)(st_ops->cfi_stubs + moff), + &image, &image_off, + st_map->image_pages_cnt < MAX_TRAMP_IMAGE_PAGES); + if (err) + goto reset_unlock; + + if (cur_image != image) { + st_map->image_pages[st_map->image_pages_cnt++] = image; + cur_image = image; + trampoline_start = 0; + } if (err < 0) goto reset_unlock; - *(void **)(kdata + moff) = image + cfi_get_offset(); - image += err; + *(void **)(kdata + moff) = image + trampoline_start + cfi_get_offset(); /* put prog_id to udata */ *(unsigned long *)(udata + moff) = prog->aux->id; @@ -677,10 +742,11 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (err) goto reset_unlock; } + for (i = 0; i < st_map->image_pages_cnt; i++) + arch_protect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE); if (st_map->map.map_flags & BPF_F_LINK) { err = 0; - arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE); /* Let bpf_link handle registration & unregistration. * * Pair with smp_load_acquire() during lookup_elem(). @@ -689,7 +755,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } - arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE); err = st_ops->reg(kdata); if (likely(!err)) { /* This refcnt increment on the map here after @@ -712,9 +777,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, * there was a race in registering the struct_ops (under the same name) to * a sub-system through different struct_ops's maps. */ - arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE); reset_unlock: + bpf_struct_ops_map_free_image(st_map); bpf_struct_ops_map_put_progs(st_map); memset(uvalue, 0, map->value_size); memset(kvalue, 0, map->value_size); @@ -781,10 +846,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map) if (st_map->links) bpf_struct_ops_map_put_progs(st_map); bpf_map_area_free(st_map->links); - if (st_map->image) { - arch_free_bpf_trampoline(st_map->image, PAGE_SIZE); - bpf_jit_uncharge_modmem(PAGE_SIZE); - } + bpf_struct_ops_map_free_image(st_map); bpf_map_area_free(st_map->uvalue); bpf_map_area_free(st_map); } @@ -894,20 +956,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) st_map->st_ops_desc = st_ops_desc; map = &st_map->map; - ret = bpf_jit_charge_modmem(PAGE_SIZE); - if (ret) - goto errout_free; - - st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE); - if (!st_map->image) { - /* __bpf_struct_ops_map_free() uses st_map->image as flag - * for "charged or not". In this case, we need to unchange - * here. - */ - bpf_jit_uncharge_modmem(PAGE_SIZE); - ret = -ENOMEM; - goto errout_free; - } st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); st_map->links_cnt = btf_type_vlen(t); st_map->links = diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 02de71719aeda..1b5f812e6972c 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -91,6 +91,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, struct bpf_tramp_link *link = NULL; void *image = NULL; unsigned int op_idx; + u32 image_off = 0; int prog_ret; s32 type_id; int err; @@ -114,12 +115,6 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, goto out; } - image = arch_alloc_bpf_trampoline(PAGE_SIZE); - if (!image) { - err = -ENOMEM; - goto out; - } - link = kzalloc(sizeof(*link), GFP_USER); if (!link) { err = -ENOMEM; @@ -133,7 +128,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, err = bpf_struct_ops_prepare_trampoline(tlinks, link, &st_ops->func_models[op_idx], &dummy_ops_test_ret_function, - image, image + PAGE_SIZE); + &image, &image_off, + true); if (err < 0) goto out; @@ -147,7 +143,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, err = -EFAULT; out: kfree(args); - arch_free_bpf_trampoline(image, PAGE_SIZE); + bpf_struct_ops_image_free(image); if (link) bpf_link_put(&link->link); kfree(tlinks); -- cgit 1.2.3-korg From 345a6e2631c1267221b684e110bba03e4c26ece0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Mar 2024 17:19:45 +0000 Subject: tcp: align tcp_sock_write_rx group Stephen Rothwell and kernel test robot reported that some arches (parisc, hexagon) and/or compilers would not like blamed commit. Lets make sure tcp_sock_write_rx group does not start with a hole. While we are at it, correct tcp_sock_write_tx CACHELINE_ASSERT_GROUP_SIZE() since after the blamed commit, we went to 105 bytes. Fixes: 99123622050f ("tcp: remove some holes in struct tcp_sock") Reported-by: Stephen Rothwell Reported-by: kernel test robot Link: https://lore.kernel.org/netdev/20240301121108.5d39e4f9@canb.auug.org.au/ Closes: https://lore.kernel.org/oe-kbuild-all/202403011451.csPYOS3C-lkp@intel.com/ Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Link: https://lore.kernel.org/r/20240301171945.2958176-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/tcp.h | 2 +- net/ipv4/tcp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 988a30ef6bfe9..55399ee2a57e7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -304,7 +304,7 @@ struct tcp_sock { __cacheline_group_end(tcp_sock_write_txrx); /* RX read-write hotpath cache lines */ - __cacheline_group_begin(tcp_sock_write_rx); + __cacheline_group_begin(tcp_sock_write_rx) __aligned(8); u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82dc42f57c65..7e1b848398d04 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4651,7 +4651,7 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 105); /* TXRX read-write hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags); -- cgit 1.2.3-korg From 411c5f36805c02c7c412f1ad6bfa4459a1148011 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 28 Feb 2024 17:30:08 +0800 Subject: mm/page_alloc: modify page_frag_alloc_align() to accept align as an argument napi_alloc_frag_align() and netdev_alloc_frag_align() accept align as an argument, and they are thin wrappers around the __napi_alloc_frag_align() and __netdev_alloc_frag_align() APIs doing the alignment checking and align mask conversion, in order to call page_frag_alloc_align() directly. The intention here is to keep the alignment checking and the alignmask conversion in in-line wrapper to avoid those kind of operations during execution time since it can usually be handled during compile time. We are going to use page_frag_alloc_align() in vhost_net.c, it need the same kind of alignment checking and alignmask conversion, so split up page_frag_alloc_align into an inline wrapper doing the above operation, and add __page_frag_alloc_align() which is passed with the align mask the original function expected as suggested by Alexander. Signed-off-by: Yunsheng Lin CC: Alexander Duyck Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- include/linux/gfp.h | 15 +++++++++++---- mm/page_alloc.c | 8 ++++---- net/core/skbuff.c | 9 ++++++--- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index de292a0071389..28aea17fa59b5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -312,14 +312,21 @@ extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} static inline void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); } extern void page_frag_free(void *addr); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 150d4f23b0104..c0f7e67c42500 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4708,9 +4708,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) } EXPORT_SYMBOL(__page_frag_cache_drain); -void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask) +void *__page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align_mask) { unsigned int size = PAGE_SIZE; struct page *page; @@ -4779,7 +4779,7 @@ refill: return nc->va + offset; } -EXPORT_SYMBOL(page_frag_alloc_align); +EXPORT_SYMBOL(__page_frag_alloc_align); /* * Frees a page fragment allocated out of either a compound or order 0 page. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1f918e602bc4f..43d7fc150acc9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -315,7 +315,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) fragsz = SKB_DATA_ALIGN(fragsz); - return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + align_mask); } EXPORT_SYMBOL(__napi_alloc_frag_align); @@ -327,13 +328,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); + data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, + align_mask); } else { struct napi_alloc_cache *nc; local_bh_disable(); nc = this_cpu_ptr(&napi_alloc_cache); - data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + align_mask); local_bh_enable(); } return data; -- cgit 1.2.3-korg From 93e16ea025d234d0ed01d9dc9c819257a2159bb6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Mar 2024 19:37:37 +0000 Subject: net: gro: rename skb_gro_header_hard() skb_gro_header_hard() is renamed to skb_gro_may_pull() to match the convention used by common helpers like pskb_may_pull(). This means the condition is inverted: if (skb_gro_header_hard(skb, hlen)) slow_path(); becomes: if (!skb_gro_may_pull(skb, hlen)) slow_path(); Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Signed-off-by: Paolo Abeni --- drivers/net/geneve.c | 2 +- include/net/gro.h | 7 ++++--- net/core/gro.c | 2 +- net/ipv4/fou_core.c | 2 +- net/ipv4/gre_offload.c | 2 +- net/ipv4/tcp_offload.c | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6f3f9b446b1d2..e25e0a31126c1 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -508,7 +508,7 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk, gh_len = geneve_hlen(gh); hlen = off_gnv + gh_len; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { gh = skb_gro_header_slow(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; diff --git a/include/net/gro.h b/include/net/gro.h index b435f0ddbf64f..ffc2c96d263b0 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -145,9 +145,10 @@ static inline void *skb_gro_header_fast(struct sk_buff *skb, return NAPI_GRO_CB(skb)->frag0 + offset; } -static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +static inline bool skb_gro_may_pull(const struct sk_buff *skb, + unsigned int hlen) { - return NAPI_GRO_CB(skb)->frag0_len < hlen; + return hlen <= NAPI_GRO_CB(skb)->frag0_len; } static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) @@ -172,7 +173,7 @@ static inline void *skb_gro_header(struct sk_buff *skb, void *ptr; ptr = skb_gro_header_fast(skb, offset); - if (skb_gro_header_hard(skb, hlen)) + if (!skb_gro_may_pull(skb, hlen)) ptr = skb_gro_header_slow(skb, hlen, offset); return ptr; } diff --git a/net/core/gro.c b/net/core/gro.c index 0759277dc14ee..927ccf6814909 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -700,7 +700,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) skb_reset_mac_header(skb); skb_gro_reset_offset(skb, hlen); - if (unlikely(skb_gro_header_hard(skb, hlen))) { + if (unlikely(!skb_gro_may_pull(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { net_warn_ratelimited("%s: dropping impossible skb from %s\n", diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 0c41076e31eda..a8494f796dca3 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -351,7 +351,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, optlen = guehdr->hlen << 2; len += optlen; - if (skb_gro_header_hard(skb, len)) { + if (!skb_gro_may_pull(skb, len)) { guehdr = skb_gro_header_slow(skb, len, off); if (unlikely(!guehdr)) goto out; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 311e70bfce407..5028c72d494ab 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -174,7 +174,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head, grehlen += GRE_HEADER_SECTION; hlen = off + grehlen; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { greh = skb_gro_header_slow(skb, hlen, off); if (unlikely(!greh)) goto out; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 8311c38267b55..875456efc92dd 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -204,7 +204,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) goto out; hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { th = skb_gro_header_slow(skb, hlen, off); if (unlikely(!th)) goto out; -- cgit 1.2.3-korg From c7583e9f768eeb82f2531c8372584ba89cfade8b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Mar 2024 19:37:39 +0000 Subject: net: gro: enable fast path for more cases Currently the so-called GRO fast path is only enabled for napi_frags_skb() callers. After the prior patch, we no longer have to clear frag0 whenever we pulled bytes to skb->head. We therefore can initialize frag0 to skb->data so that GRO fast path can be used in the following additional cases: - Drivers using header split (populating skb->data with headers, and having payload in one or more page fragments). - Drivers not using any page frag (entire packet is in skb->data) Add a likely() in skb_gro_may_pull() to help the compiler to generate better code if possible. Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Signed-off-by: Paolo Abeni --- include/net/gro.h | 2 +- net/core/gro.c | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/gro.h b/include/net/gro.h index 3c3666e46b305..2b58671a65492 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -148,7 +148,7 @@ static inline void *skb_gro_header_fast(const struct sk_buff *skb, static inline bool skb_gro_may_pull(const struct sk_buff *skb, unsigned int hlen) { - return hlen <= NAPI_GRO_CB(skb)->frag0_len; + return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); } static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, diff --git a/net/core/gro.c b/net/core/gro.c index 927ccf6814909..6a0edbd826a17 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -369,15 +369,21 @@ static void gro_list_prepare(const struct list_head *head, static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) { - const struct skb_shared_info *pinfo = skb_shinfo(skb); - const skb_frag_t *frag0 = &pinfo->frags[0]; + const struct skb_shared_info *pinfo; + const skb_frag_t *frag0; + unsigned int headlen; NAPI_GRO_CB(skb)->data_offset = 0; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + headlen = skb_headlen(skb); + NAPI_GRO_CB(skb)->frag0 = skb->data; + NAPI_GRO_CB(skb)->frag0_len = headlen; + if (headlen) + return; + + pinfo = skb_shinfo(skb); + frag0 = &pinfo->frags[0]; - if (!skb_headlen(skb) && pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0)) && + if (pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) && (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, @@ -710,7 +716,10 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) } } else { eth = (const struct ethhdr *)skb->data; - gro_pull_from_frag0(skb, hlen); + + if (NAPI_GRO_CB(skb)->frag0 != skb->data) + gro_pull_from_frag0(skb, hlen); + NAPI_GRO_CB(skb)->frag0 += hlen; NAPI_GRO_CB(skb)->frag0_len -= hlen; } -- cgit 1.2.3-korg From 8f78010b701d8fdc290063f29fefc09aaaca4085 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Mar 2024 19:37:40 +0000 Subject: tcp: gro: micro optimizations in tcp[4]_gro_complete() In tcp_gro_complete() : Moving the skb->inner_transport_header setting allows the compiler to reuse the previously loaded value of skb->transport_header. Caching skb_shinfo() avoids duplications as well. In tcp4_gro_complete(), doing a single change on skb_shinfo(skb)->gso_type also generates better code. Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Signed-off-by: Paolo Abeni --- net/ipv4/tcp_offload.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 875456efc92dd..b955ab3b236d9 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -299,18 +299,20 @@ out: void tcp_gro_complete(struct sk_buff *skb) { struct tcphdr *th = tcp_hdr(skb); + struct skb_shared_info *shinfo; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; skb->csum_start = (unsigned char *)th - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + shinfo = skb_shinfo(skb); + shinfo->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - - if (skb->encapsulation) - skb->inner_transport_header = skb->transport_header; + shinfo->gso_type |= SKB_GSO_TCP_ECN; } EXPORT_SYMBOL(tcp_gro_complete); @@ -335,10 +337,9 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; - if (NAPI_GRO_CB(skb)->is_atomic) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 | + (NAPI_GRO_CB(skb)->is_atomic * SKB_GSO_TCP_FIXEDID); tcp_gro_complete(skb); return 0; -- cgit 1.2.3-korg From 885c36e59f46375c138de18ff1692f18eff67b7f Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Fri, 1 Mar 2024 12:13:48 -0800 Subject: net: Re-use and set mono_delivery_time bit for userspace tstamp packets Bridge driver today has no support to forward the userspace timestamp packets and ends up resetting the timestamp. ETF qdisc checks the packet coming from userspace and encounters to be 0 thereby dropping time sensitive packets. These changes will allow userspace timestamps packets to be forwarded from the bridge to NIC drivers. Setting the same bit (mono_delivery_time) to avoid dropping of userspace tstamp packets in the forwarding path. Existing functionality of mono_delivery_time remains unaltered here, instead just extended with userspace tstamp support for bridge forwarding path. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240301201348.2815102-1-quic_abchauha@quicinc.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 6 +++--- net/ipv4/ip_output.c | 1 + net/ipv4/raw.c | 1 + net/ipv6/ip6_output.c | 2 +- net/ipv6/raw.c | 2 +- net/packet/af_packet.c | 4 +++- 6 files changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d577e0bee18d2..3013355b63f5f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -822,9 +822,9 @@ typedef unsigned char *sk_buff_data_t; * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * delivery_time in mono clock base (i.e., EDT) or a clock base chosen + * by SO_TXTIME. If zero, skb->tstamp has the (rcv) timestamp at + * ingress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1fe794967211e..33f93dc730a33 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1458,6 +1458,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; skb->tstamp = cork->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a82fd102df05a..494a6284bd7ee 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -353,6 +353,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b9dd3a66e4236..02eeca5492cd4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1925,7 +1925,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 76e6eb3b643d0..779274055abf9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -615,7 +615,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; skb_put(skb, length); skb_reset_network_header(skb); iph = ipv6_hdr(skb); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9bbc26866907..0db31ca4982da 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2057,7 +2057,7 @@ retry: skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2586,6 +2586,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); skb->tstamp = sockc->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3064,6 +3065,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; + skb->mono_delivery_time = !!skb->tstamp; if (unlikely(extra_len == 4)) skb->no_fcs = 1; -- cgit 1.2.3-korg From 00af2aa93b76b1bade471ad0d0525d4d29ca5cc0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 2 Mar 2024 10:07:44 +0000 Subject: net/smc: reduce rtnl pressure in smc_pnet_create_pnetids_list() Many syzbot reports show extreme rtnl pressure, and many of them hint that smc acquires rtnl in netns creation for no good reason [1] This patch returns early from smc_pnet_net_init() if there is no netdevice yet. I am not even sure why smc_pnet_create_pnetids_list() even exists, because smc_pnet_netdev_event() is also calling smc_pnet_add_base_pnetid() when handling NETDEV_UP event. [1] extract of typical syzbot reports 2 locks held by syz-executor.3/12252: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.4/12253: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.1/12257: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.2/12261: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.0/12265: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.3/12268: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.4/12271: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.1/12274: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 2 locks held by syz-executor.2/12280: #0: ffffffff8f369610 (pernet_ops_rwsem){++++}-{3:3}, at: copy_net_ns+0x4c7/0x7b0 net/core/net_namespace.c:491 #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_create_pnetids_list net/smc/smc_pnet.c:809 [inline] #1: ffffffff8f375b88 (rtnl_mutex){+.+.}-{3:3}, at: smc_pnet_net_init+0x10a/0x1e0 net/smc/smc_pnet.c:878 Signed-off-by: Eric Dumazet Cc: Wenjia Zhang Cc: Jan Karcher Cc: "D. Wythe" Cc: Tony Lu Cc: Wen Gu Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20240302100744.3868021-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/smc/smc_pnet.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 9f2c58c5a86b7..2adb92b8c4699 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -806,6 +806,16 @@ static void smc_pnet_create_pnetids_list(struct net *net) u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *dev; + /* Newly created netns do not have devices. + * Do not even acquire rtnl. + */ + if (list_empty(&net->dev_base_head)) + return; + + /* Note: This might not be needed, because smc_pnet_netdev_event() + * is also calling smc_pnet_add_base_pnetid() when handling + * NETDEV_UP event. + */ rtnl_lock(); for_each_netdev(net, dev) smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); -- cgit 1.2.3-korg From e5560011692981bc8bfeae7fc0673c65a02badba Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 2 Mar 2024 14:06:02 -0300 Subject: nfc: core: make nfc_class constant Since commit 43a7206b0963 ("driver core: class: make class_register() take a const *"), the driver core allows for struct class to be in read-only memory, so move the nfc_class structure to be declared at build time placing it into read-only memory, instead of having to be dynamically allocated at boot time. Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240302-class_cleanup-net-next-v1-6-8fa378595b93@marliere.net Signed-off-by: Jakub Kicinski --- include/net/nfc/nfc.h | 2 +- net/nfc/core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 5dee575fbe86a..3d07abacf08bd 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -196,7 +196,7 @@ struct nfc_dev { }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) -extern struct class nfc_class; +extern const struct class nfc_class; struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, diff --git a/net/nfc/core.c b/net/nfc/core.c index eb2c0959e5b6a..e58dc64050545 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1015,7 +1015,7 @@ static void nfc_check_pres_timeout(struct timer_list *t) schedule_work(&dev->check_pres_work); } -struct class nfc_class = { +const struct class nfc_class = { .name = "nfc", .dev_release = nfc_release, }; -- cgit 1.2.3-korg From 49489bb03a501547450e8fdc6d85d023d8a3b2c4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 23:47:57 +0000 Subject: rxrpc: Do zerocopy using MSG_SPLICE_PAGES and page frags Switch from keeping the transmission buffers in the rxrpc_txbuf struct and allocated from the slab, to allocating them using page fragment allocators (which uses raw pages), thereby allowing them to be passed to MSG_SPLICE_PAGES and avoid copying into the UDP buffers. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/ar-internal.h | 32 +++------ net/rxrpc/conn_object.c | 4 ++ net/rxrpc/insecure.c | 11 ++- net/rxrpc/local_object.c | 3 + net/rxrpc/output.c | 65 ++++++++--------- net/rxrpc/rxkad.c | 46 ++++++------ net/rxrpc/sendmsg.c | 22 ++---- net/rxrpc/txbuf.c | 180 ++++++++++++++++++++++++++++++++++++----------- 8 files changed, 219 insertions(+), 144 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9ea4e7e9d9f75..47f4689379ca2 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -248,10 +248,9 @@ struct rxrpc_security { struct rxrpc_key_token *); /* Work out how much data we can store in a packet, given an estimate - * of the amount of data remaining. + * of the amount of data remaining and allocate a data buffer. */ - int (*how_much_data)(struct rxrpc_call *, size_t, - size_t *, size_t *, size_t *); + struct rxrpc_txbuf *(*alloc_txbuf)(struct rxrpc_call *call, size_t remaining, gfp_t gfp); /* impose security on a packet */ int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *); @@ -292,6 +291,7 @@ struct rxrpc_local { struct socket *socket; /* my UDP socket */ struct task_struct *io_thread; struct completion io_thread_ready; /* Indication that the I/O thread started */ + struct page_frag_cache tx_alloc; /* Tx control packet allocation (I/O thread only) */ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */ #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY struct sk_buff_head rx_delay_queue; /* Delay injection queue */ @@ -500,6 +500,8 @@ struct rxrpc_connection { struct list_head proc_link; /* link in procfs list */ struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ + struct page_frag_cache tx_data_alloc; /* Tx DATA packet allocation */ + struct mutex tx_data_alloc_lock; struct mutex security_lock; /* Lock for security management */ const struct rxrpc_security *security; /* applied security module */ @@ -788,7 +790,6 @@ struct rxrpc_send_params { * Buffer of data to be output as a packet. */ struct rxrpc_txbuf { - struct rcu_head rcu; struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */ struct list_head tx_link; /* Link in live Enc queue or Tx queue */ ktime_t last_sent; /* Time at which last transmitted */ @@ -806,22 +807,8 @@ struct rxrpc_txbuf { __be16 cksum; /* Checksum to go in header */ unsigned short ack_rwind; /* ACK receive window */ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ - u8 nr_kvec; - struct kvec kvec[1]; - struct { - /* The packet for encrypting and DMA'ing. We align it such - * that data[] aligns correctly for any crypto blocksize. - */ - u8 pad[64 - sizeof(struct rxrpc_wire_header)]; - struct rxrpc_wire_header _wire; /* Network-ready header */ - union { - u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */ - struct { - struct rxrpc_ackpacket _ack; - DECLARE_FLEX_ARRAY(u8, acks); - }; - }; - } __aligned(64); + u8 nr_kvec; /* Amount of kvec[] used */ + struct kvec kvec[3]; }; static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) @@ -1299,8 +1286,9 @@ static inline void rxrpc_sysctl_exit(void) {} * txbuf.c */ extern atomic_t rxrpc_nr_txbuf; -struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, - gfp_t gfp); +struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, + size_t data_align, gfp_t gfp); +struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size); void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index df8a271948a1c..0af4642aeec4b 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -68,6 +68,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); mutex_init(&conn->security_lock); + mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); conn->rxnet = rxnet; conn->security = &rxrpc_no_security; @@ -341,6 +342,9 @@ static void rxrpc_clean_up_connection(struct work_struct *work) */ rxrpc_purge_queue(&conn->rx_queue); + if (conn->tx_data_alloc.va) + __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va), + conn->tx_data_alloc.pagecnt_bias); call_rcu(&conn->rcu, rxrpc_rcu_free_connection); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 34353b6e584bc..f2701068ed9e4 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -15,14 +15,11 @@ static int none_init_connection_security(struct rxrpc_connection *conn, } /* - * Work out how much data we can put in an unsecured packet. + * Allocate an appropriately sized buffer for the amount of data remaining. */ -static int none_how_much_data(struct rxrpc_call *call, size_t remain, - size_t *_buf_size, size_t *_data_size, size_t *_offset) +static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) { - *_buf_size = *_data_size = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); - *_offset = 0; - return 0; + return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 0, gfp); } static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) @@ -79,7 +76,7 @@ const struct rxrpc_security rxrpc_no_security = { .exit = none_exit, .init_connection_security = none_init_connection_security, .free_call_crypto = none_free_call_crypto, - .how_much_data = none_how_much_data, + .alloc_txbuf = none_alloc_txbuf, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, .respond_to_challenge = none_respond_to_challenge, diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 34d3073681353..504453c688d75 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -452,6 +452,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local) #endif rxrpc_purge_queue(&local->rx_queue); rxrpc_purge_client_connections(local); + if (local->tx_alloc.va) + __page_frag_cache_drain(virt_to_page(local->tx_alloc.va), + local->tx_alloc.pagecnt_bias); } /* diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5398aa24bb8e3..0a317498b8e06 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -83,18 +83,16 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, rxrpc_serial_t serial) { struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; + struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3; struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); - struct rxrpc_acktrailer trailer; unsigned int qsize, sack, wrap, to; rxrpc_seq_t window, wtop; int rsize; u32 mtu, jmax; - u8 *ackp = txb->acks; + u8 *filler = txb->kvec[2].iov_base; + u8 *sackp = txb->kvec[1].iov_base; - call->ackr_nr_unacked = 0; - atomic_set(&call->ackr_nr_consumed, 0); rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); - clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); window = call->ackr_window; wtop = call->ackr_wtop; @@ -110,20 +108,27 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, ack->serial = htonl(serial); ack->reason = ack_reason; ack->nAcks = wtop - window; + filler[0] = 0; + filler[1] = 0; + filler[2] = 0; + + if (ack_reason == RXRPC_ACK_PING) + txb->flags |= RXRPC_REQUEST_ACK; if (after(wtop, window)) { + txb->len += ack->nAcks; + txb->kvec[1].iov_base = sackp; + txb->kvec[1].iov_len = ack->nAcks; + wrap = RXRPC_SACK_SIZE - sack; to = min_t(unsigned int, ack->nAcks, RXRPC_SACK_SIZE); if (sack + ack->nAcks <= RXRPC_SACK_SIZE) { - memcpy(txb->acks, call->ackr_sack_table + sack, ack->nAcks); + memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks); } else { - memcpy(txb->acks, call->ackr_sack_table + sack, wrap); - memcpy(txb->acks + wrap, call->ackr_sack_table, - to - wrap); + memcpy(sackp, call->ackr_sack_table + sack, wrap); + memcpy(sackp + wrap, call->ackr_sack_table, to - wrap); } - - ackp += to; } else if (before(wtop, window)) { pr_warn("ack window backward %x %x", window, wtop); } else if (ack->reason == RXRPC_ACK_DELAY) { @@ -135,18 +140,11 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, jmax = rxrpc_rx_jumbo_max; qsize = (window - 1) - call->rx_consumed; rsize = max_t(int, call->rx_winsize - qsize, 0); - txb->ack_rwind = rsize; - trailer.maxMTU = htonl(rxrpc_rx_mtu); - trailer.ifMTU = htonl(mtu); - trailer.rwind = htonl(rsize); - trailer.jumbo_max = htonl(jmax); - - *ackp++ = 0; - *ackp++ = 0; - *ackp++ = 0; - memcpy(ackp, &trailer, sizeof(trailer)); - txb->kvec[0].iov_len += sizeof(*ack) + ack->nAcks + 3 + sizeof(trailer); - txb->len = txb->kvec[0].iov_len; + txb->ack_rwind = rsize; + trailer->maxMTU = htonl(rxrpc_rx_mtu); + trailer->ifMTU = htonl(mtu); + trailer->rwind = htonl(rsize); + trailer->jumbo_max = htonl(jmax); } /* @@ -195,7 +193,7 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call, /* * Transmit an ACK packet. */ -static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; struct rxrpc_connection *conn; @@ -204,7 +202,7 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *tx int ret, rtt_slot = -1; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) - return -ECONNRESET; + return; conn = call->conn; @@ -212,10 +210,8 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *tx msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_flags = 0; + msg.msg_flags = MSG_SPLICE_PAGES; - if (ack->reason == RXRPC_ACK_PING) - txb->flags |= RXRPC_REQUEST_ACK; whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; txb->serial = rxrpc_get_next_serial(conn); @@ -250,8 +246,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *tx rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); rxrpc_set_keepalive(call); } - - return ret; } /* @@ -267,16 +261,19 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); - txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK, - rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS); + txb = rxrpc_alloc_ack_txbuf(call, call->ackr_wtop - call->ackr_window); if (!txb) { kleave(" = -ENOMEM"); return; } + txb->ack_why = why; + rxrpc_fill_out_ack(call, txb, ack_reason, serial); + call->ackr_nr_unacked = 0; + atomic_set(&call->ackr_nr_consumed, 0); + clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); - txb->ack_why = why; trace_rxrpc_send_ack(call, why, ack_reason, serial); rxrpc_send_ack_packet(call, txb); rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); @@ -465,7 +462,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_flags = 0; + msg.msg_flags = MSG_SPLICE_PAGES; /* Track what we've attempted to transmit at least once so that the * retransmission algorithm doesn't try to resend what we haven't sent diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index ef0849c8329ca..f1a68270862db 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -145,16 +145,17 @@ error: /* * Work out how much data we can put in a packet. */ -static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain, - size_t *_buf_size, size_t *_data_size, size_t *_offset) +static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) { - size_t shdr, buf_size, chunk; + struct rxrpc_txbuf *txb; + size_t shdr, space; + + remain = min(remain, 65535 - sizeof(struct rxrpc_wire_header)); switch (call->conn->security_level) { default: - buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); - shdr = 0; - goto out; + space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); + return rxrpc_alloc_data_txbuf(call, space, 0, gfp); case RXRPC_SECURITY_AUTH: shdr = sizeof(struct rxkad_level1_hdr); break; @@ -163,17 +164,16 @@ static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain, break; } - buf_size = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN); + space = min_t(size_t, round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), remain + shdr); + space = round_up(space, RXKAD_ALIGN); - chunk = buf_size - shdr; - if (remain < chunk) - buf_size = round_up(shdr + remain, RXKAD_ALIGN); + txb = rxrpc_alloc_data_txbuf(call, space, RXKAD_ALIGN, gfp); + if (!txb) + return NULL; -out: - *_buf_size = buf_size; - *_data_size = chunk; - *_offset = shdr; - return 0; + txb->offset += shdr; + txb->space -= shdr; + return txb; } /* @@ -251,7 +251,8 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, struct rxrpc_txbuf *txb, struct skcipher_request *req) { - struct rxkad_level1_hdr *hdr = (void *)txb->data; + struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; + struct rxkad_level1_hdr *hdr = (void *)(whdr + 1); struct rxrpc_crypt iv; struct scatterlist sg; size_t pad; @@ -267,14 +268,14 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, pad = RXKAD_ALIGN - pad; pad &= RXKAD_ALIGN - 1; if (pad) { - memset(txb->data + txb->offset, 0, pad); + memset(txb->kvec[0].iov_base + txb->offset, 0, pad); txb->len += pad; } /* start the encryption afresh */ memset(&iv, 0, sizeof(iv)); - sg_init_one(&sg, txb->data, 8); + sg_init_one(&sg, hdr, 8); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); @@ -293,7 +294,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct skcipher_request *req) { const struct rxrpc_key_token *token; - struct rxkad_level2_hdr *rxkhdr = (void *)txb->data; + struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; + struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1); struct rxrpc_crypt iv; struct scatterlist sg; size_t pad; @@ -312,7 +314,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, pad = RXKAD_ALIGN - pad; pad &= RXKAD_ALIGN - 1; if (pad) { - memset(txb->data + txb->offset, 0, pad); + memset(txb->kvec[0].iov_base + txb->offset, 0, pad); txb->len += pad; } @@ -320,7 +322,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, token = call->conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); - sg_init_one(&sg, txb->data, txb->len); + sg_init_one(&sg, rxkhdr, txb->len); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x); @@ -1255,7 +1257,7 @@ const struct rxrpc_security rxkad = { .free_preparse_server_key = rxkad_free_preparse_server_key, .destroy_server_key = rxkad_destroy_server_key, .init_connection_security = rxkad_init_connection_security, - .how_much_data = rxkad_how_much_data, + .alloc_txbuf = rxkad_alloc_txbuf, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, .free_call_crypto = rxkad_free_call_crypto, diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1e81046ea8a60..4d152f06b039c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -336,7 +336,7 @@ reload: do { if (!txb) { - size_t remain, bufsize, chunk, offset; + size_t remain; _debug("alloc"); @@ -348,23 +348,11 @@ reload: * region (enc blocksize), but the trailer is not. */ remain = more ? INT_MAX : msg_data_left(msg); - ret = call->conn->security->how_much_data(call, remain, - &bufsize, &chunk, &offset); - if (ret < 0) - goto maybe_error; - - _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset); - - /* create a buffer that we can retain until it's ACK'd */ - ret = -ENOMEM; - txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_DATA, - GFP_KERNEL); - if (!txb) + txb = call->conn->security->alloc_txbuf(call, remain, sk->sk_allocation); + if (IS_ERR(txb)) { + ret = PTR_ERR(txb); goto maybe_error; - - txb->offset = offset + sizeof(struct rxrpc_wire_header); - txb->space -= offset; - txb->space = min_t(size_t, chunk, txb->space); + } } _debug("append"); diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 2e8c5b15a84f3..b2a82ab756c24 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -14,53 +14,146 @@ static atomic_t rxrpc_txbuf_debug_ids; atomic_t rxrpc_nr_txbuf; /* - * Allocate and partially initialise an I/O request structure. + * Allocate and partially initialise a data transmission buffer. */ -struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, - gfp_t gfp) +struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, + size_t data_align, gfp_t gfp) { struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; + size_t total, hoff = 0; + void *buf; txb = kmalloc(sizeof(*txb), gfp); - if (txb) { - whdr = &txb->_wire; - - INIT_LIST_HEAD(&txb->call_link); - INIT_LIST_HEAD(&txb->tx_link); - refcount_set(&txb->ref, 1); - txb->call_debug_id = call->debug_id; - txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); - txb->space = sizeof(txb->data); - txb->len = 0; - txb->offset = 0; - txb->flags = call->conn->out_clientflag; - txb->ack_why = 0; - txb->seq = call->tx_prepared + 1; - txb->serial = 0; - txb->cksum = 0; - txb->nr_kvec = 1; - txb->kvec[0].iov_base = whdr; - txb->kvec[0].iov_len = sizeof(*whdr); - whdr->epoch = htonl(call->conn->proto.epoch); - whdr->cid = htonl(call->cid); - whdr->callNumber = htonl(call->call_id); - whdr->seq = htonl(txb->seq); - whdr->type = packet_type; - whdr->flags = 0; - whdr->userStatus = 0; - whdr->securityIndex = call->security_ix; - whdr->_rsvd = 0; - whdr->serviceId = htons(call->dest_srx.srx_service); - - trace_rxrpc_txbuf(txb->debug_id, - txb->call_debug_id, txb->seq, 1, - packet_type == RXRPC_PACKET_TYPE_DATA ? - rxrpc_txbuf_alloc_data : - rxrpc_txbuf_alloc_ack); - atomic_inc(&rxrpc_nr_txbuf); + if (!txb) + return NULL; + + if (data_align) + hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); + total = hoff + sizeof(*whdr) + data_size; + + mutex_lock(&call->conn->tx_data_alloc_lock); + buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, + ~(data_align - 1) & ~(L1_CACHE_BYTES - 1)); + mutex_unlock(&call->conn->tx_data_alloc_lock); + if (!buf) { + kfree(txb); + return NULL; + } + + whdr = buf + hoff; + + INIT_LIST_HEAD(&txb->call_link); + INIT_LIST_HEAD(&txb->tx_link); + refcount_set(&txb->ref, 1); + txb->last_sent = KTIME_MIN; + txb->call_debug_id = call->debug_id; + txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); + txb->space = data_size; + txb->len = 0; + txb->offset = sizeof(*whdr); + txb->flags = call->conn->out_clientflag; + txb->ack_why = 0; + txb->seq = call->tx_prepared + 1; + txb->serial = 0; + txb->cksum = 0; + txb->nr_kvec = 1; + txb->kvec[0].iov_base = whdr; + txb->kvec[0].iov_len = sizeof(*whdr); + + whdr->epoch = htonl(call->conn->proto.epoch); + whdr->cid = htonl(call->cid); + whdr->callNumber = htonl(call->call_id); + whdr->seq = htonl(txb->seq); + whdr->type = RXRPC_PACKET_TYPE_DATA; + whdr->flags = 0; + whdr->userStatus = 0; + whdr->securityIndex = call->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(call->dest_srx.srx_service); + + trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, + rxrpc_txbuf_alloc_data); + + atomic_inc(&rxrpc_nr_txbuf); + return txb; +} + +/* + * Allocate and partially initialise an ACK packet. + */ +struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size) +{ + struct rxrpc_wire_header *whdr; + struct rxrpc_acktrailer *trailer; + struct rxrpc_ackpacket *ack; + struct rxrpc_txbuf *txb; + gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; + void *buf, *buf2 = NULL; + u8 *filler; + + txb = kmalloc(sizeof(*txb), gfp); + if (!txb) + return NULL; + + buf = page_frag_alloc(&call->local->tx_alloc, + sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); + if (!buf) { + kfree(txb); + return NULL; + } + + if (sack_size) { + buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); + if (!buf2) { + page_frag_free(buf); + kfree(txb); + return NULL; + } } + whdr = buf; + ack = buf + sizeof(*whdr); + filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; + trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; + + INIT_LIST_HEAD(&txb->call_link); + INIT_LIST_HEAD(&txb->tx_link); + refcount_set(&txb->ref, 1); + txb->call_debug_id = call->debug_id; + txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); + txb->space = 0; + txb->len = sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer); + txb->offset = 0; + txb->flags = call->conn->out_clientflag; + txb->ack_rwind = 0; + txb->seq = 0; + txb->serial = 0; + txb->cksum = 0; + txb->nr_kvec = 3; + txb->kvec[0].iov_base = whdr; + txb->kvec[0].iov_len = sizeof(*whdr) + sizeof(*ack); + txb->kvec[1].iov_base = buf2; + txb->kvec[1].iov_len = sack_size; + txb->kvec[2].iov_base = filler; + txb->kvec[2].iov_len = 3 + sizeof(*trailer); + + whdr->epoch = htonl(call->conn->proto.epoch); + whdr->cid = htonl(call->cid); + whdr->callNumber = htonl(call->call_id); + whdr->seq = 0; + whdr->type = RXRPC_PACKET_TYPE_ACK; + whdr->flags = 0; + whdr->userStatus = 0; + whdr->securityIndex = call->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(call->dest_srx.srx_service); + + get_page(virt_to_head_page(trailer)); + + trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, + rxrpc_txbuf_alloc_ack); + atomic_inc(&rxrpc_nr_txbuf); return txb; } @@ -79,12 +172,15 @@ void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what); } -static void rxrpc_free_txbuf(struct rcu_head *rcu) +static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb) { - struct rxrpc_txbuf *txb = container_of(rcu, struct rxrpc_txbuf, rcu); + int i; trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, rxrpc_txbuf_free); + for (i = 0; i < txb->nr_kvec; i++) + if (txb->kvec[i].iov_base) + page_frag_free(txb->kvec[i].iov_base); kfree(txb); atomic_dec(&rxrpc_nr_txbuf); } @@ -103,7 +199,7 @@ void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) dead = __refcount_dec_and_test(&txb->ref, &r); trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what); if (dead) - call_rcu(&txb->rcu, rxrpc_free_txbuf); + rxrpc_free_txbuf(txb); } } -- cgit 1.2.3-korg From 3e0b83ee535d44befddb3f0a6c75a531cf47f869 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Feb 2024 16:46:21 +0000 Subject: rxrpc: Parse received packets before dealing with timeouts Parse the received packets before going and processing timeouts as the timeouts may be reset by the reception of a packet. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/call_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e19ea54dce545..58826710322de 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -358,6 +358,9 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) goto out; + if (skb) + rxrpc_input_call_packet(call, skb); + /* If we see our async-event poke, check for timeout trippage. */ now = jiffies; t = call->expect_rx_by; @@ -417,9 +420,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) resend = true; } - if (skb) - rxrpc_input_call_packet(call, skb); - rxrpc_transmit_some_data(call); if (skb) { -- cgit 1.2.3-korg From a711d976e1cd7a58a51ecf2816e705fd01fe3489 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 29 Feb 2024 16:50:52 +0000 Subject: rxrpc: Don't permit resending after all Tx packets acked Once all the packets transmitted as part of a call have been acked, don't permit any resending. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/call_event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 58826710322de..ef28ebf37c7d1 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -450,7 +450,9 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_lost_ack); - if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY) + if (resend && + __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY && + !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) rxrpc_resend(call, NULL); if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) -- cgit 1.2.3-korg From 12a66e77c4999b97a2c2b8f5e4e7533c656cee12 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jan 2024 20:30:03 +0000 Subject: rxrpc: Differentiate PING ACK transmission traces. There are three points that transmit PING ACKs and all of them use the same trace string. Change two of them to use different strings. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/call_event.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index c730cd732348e..3b726a6c8c426 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -364,11 +364,13 @@ #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ + EM(rxrpc_propose_ack_delayed_ack, "DlydAck") \ EM(rxrpc_propose_ack_input_data, "DataIn ") \ EM(rxrpc_propose_ack_input_data_hole, "DataInH") \ EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ + EM(rxrpc_propose_ack_ping_for_0_retrans, "0-Retrn") \ EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \ EM(rxrpc_propose_ack_ping_for_params, "Params ") \ EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index ef28ebf37c7d1..bc1a5abb7d6fb 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -197,7 +197,7 @@ no_resend: if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) goto out; rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, - rxrpc_propose_ack_ping_for_lost_ack); + rxrpc_propose_ack_ping_for_0_retrans); goto out; } @@ -387,7 +387,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); call->delay_ack_at = now + MAX_JIFFY_OFFSET; rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0, - rxrpc_propose_ack_ping_for_lost_ack); + rxrpc_propose_ack_delayed_ack); } t = call->ack_lost_at; -- cgit 1.2.3-korg From 153f90a066dd4a91ef7edc0df3964dd097a5e2a5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jan 2024 21:37:16 +0000 Subject: rxrpc: Use ktimes for call timeout tracking and set the timer lazily Track the call timeouts as ktimes rather than jiffies as the latter's granularity is too high and only set the timer at the end of the event handling function. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- include/trace/events/rxrpc.h | 174 ++++++++++++++++++++------------------- net/rxrpc/af_rxrpc.c | 12 +-- net/rxrpc/ar-internal.h | 35 ++++---- net/rxrpc/call_event.c | 189 +++++++++++++++++++++---------------------- net/rxrpc/call_object.c | 56 ++++++------- net/rxrpc/input.c | 28 +++---- net/rxrpc/misc.c | 8 +- net/rxrpc/output.c | 40 ++++----- net/rxrpc/proc.c | 10 +-- net/rxrpc/rtt.c | 36 ++++----- net/rxrpc/sendmsg.c | 25 +++--- net/rxrpc/sysctl.c | 16 ++-- 12 files changed, 307 insertions(+), 322 deletions(-) (limited to 'net') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 3b726a6c8c426..a1b126a6b0d72 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -119,6 +119,7 @@ EM(rxrpc_call_poke_complete, "Compl") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ + EM(rxrpc_call_poke_set_timeout, "Set-timo") \ EM(rxrpc_call_poke_start, "Start") \ EM(rxrpc_call_poke_timer, "Timer") \ E_(rxrpc_call_poke_timer_now, "Timer-now") @@ -340,27 +341,16 @@ E_(rxrpc_rtt_rx_requested_ack, "RACK") #define rxrpc_timer_traces \ - EM(rxrpc_timer_begin, "Begin ") \ - EM(rxrpc_timer_exp_ack, "ExpAck") \ - EM(rxrpc_timer_exp_hard, "ExpHrd") \ - EM(rxrpc_timer_exp_idle, "ExpIdl") \ - EM(rxrpc_timer_exp_keepalive, "ExpKA ") \ - EM(rxrpc_timer_exp_lost_ack, "ExpLoA") \ - EM(rxrpc_timer_exp_normal, "ExpNml") \ - EM(rxrpc_timer_exp_ping, "ExpPng") \ - EM(rxrpc_timer_exp_resend, "ExpRsn") \ - EM(rxrpc_timer_init_for_reply, "IniRpl") \ - EM(rxrpc_timer_init_for_send_reply, "SndRpl") \ - EM(rxrpc_timer_restart, "Restrt") \ - EM(rxrpc_timer_set_for_ack, "SetAck") \ - EM(rxrpc_timer_set_for_hard, "SetHrd") \ - EM(rxrpc_timer_set_for_idle, "SetIdl") \ - EM(rxrpc_timer_set_for_keepalive, "KeepAl") \ - EM(rxrpc_timer_set_for_lost_ack, "SetLoA") \ - EM(rxrpc_timer_set_for_normal, "SetNml") \ - EM(rxrpc_timer_set_for_ping, "SetPng") \ - EM(rxrpc_timer_set_for_resend, "SetRTx") \ - E_(rxrpc_timer_set_for_send, "SetSnd") + EM(rxrpc_timer_trace_delayed_ack, "DelayAck ") \ + EM(rxrpc_timer_trace_expect_rx, "ExpectRx ") \ + EM(rxrpc_timer_trace_hard, "HardLimit") \ + EM(rxrpc_timer_trace_idle, "IdleLimit") \ + EM(rxrpc_timer_trace_keepalive, "KeepAlive") \ + EM(rxrpc_timer_trace_lost_ack, "LostAck ") \ + EM(rxrpc_timer_trace_ping, "DelayPing") \ + EM(rxrpc_timer_trace_resend, "Resend ") \ + EM(rxrpc_timer_trace_resend_reset, "ResendRst") \ + E_(rxrpc_timer_trace_resend_tx, "ResendTx ") #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ @@ -1314,90 +1304,112 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->rto) ); -TRACE_EVENT(rxrpc_timer, - TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, - unsigned long now), +TRACE_EVENT(rxrpc_timer_set, + TP_PROTO(struct rxrpc_call *call, ktime_t delay, + enum rxrpc_timer_trace why), - TP_ARGS(call, why, now), + TP_ARGS(call, delay, why), TP_STRUCT__entry( __field(unsigned int, call) __field(enum rxrpc_timer_trace, why) - __field(long, now) - __field(long, ack_at) - __field(long, ack_lost_at) - __field(long, resend_at) - __field(long, ping_at) - __field(long, expect_rx_by) - __field(long, expect_req_by) - __field(long, expect_term_by) - __field(long, timer) + __field(ktime_t, delay) ), TP_fast_assign( __entry->call = call->debug_id; __entry->why = why; - __entry->now = now; - __entry->ack_at = call->delay_ack_at; - __entry->ack_lost_at = call->ack_lost_at; - __entry->resend_at = call->resend_at; - __entry->expect_rx_by = call->expect_rx_by; - __entry->expect_req_by = call->expect_req_by; - __entry->expect_term_by = call->expect_term_by; - __entry->timer = call->timer.expires; + __entry->delay = delay; ), - TP_printk("c=%08x %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", + TP_printk("c=%08x %s to=%lld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), - __entry->ack_at - __entry->now, - __entry->ack_lost_at - __entry->now, - __entry->resend_at - __entry->now, - __entry->expect_rx_by - __entry->now, - __entry->expect_req_by - __entry->now, - __entry->expect_term_by - __entry->now, - __entry->timer - __entry->now) + ktime_to_us(__entry->delay)) + ); + +TRACE_EVENT(rxrpc_timer_exp, + TP_PROTO(struct rxrpc_call *call, ktime_t delay, + enum rxrpc_timer_trace why), + + TP_ARGS(call, delay, why), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(enum rxrpc_timer_trace, why) + __field(ktime_t, delay) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->why = why; + __entry->delay = delay; + ), + + TP_printk("c=%08x %s to=%lld", + __entry->call, + __print_symbolic(__entry->why, rxrpc_timer_traces), + ktime_to_us(__entry->delay)) + ); + +TRACE_EVENT(rxrpc_timer_can, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why), + + TP_ARGS(call, why), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(enum rxrpc_timer_trace, why) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->why = why; + ), + + TP_printk("c=%08x %s", + __entry->call, + __print_symbolic(__entry->why, rxrpc_timer_traces)) + ); + +TRACE_EVENT(rxrpc_timer_restart, + TP_PROTO(struct rxrpc_call *call, ktime_t delay, unsigned long delayj), + + TP_ARGS(call, delay, delayj), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(unsigned long, delayj) + __field(ktime_t, delay) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->delayj = delayj; + __entry->delay = delay; + ), + + TP_printk("c=%08x to=%lld j=%ld", + __entry->call, + ktime_to_us(__entry->delay), + __entry->delayj) ); TRACE_EVENT(rxrpc_timer_expired, - TP_PROTO(struct rxrpc_call *call, unsigned long now), + TP_PROTO(struct rxrpc_call *call), - TP_ARGS(call, now), + TP_ARGS(call), TP_STRUCT__entry( __field(unsigned int, call) - __field(long, now) - __field(long, ack_at) - __field(long, ack_lost_at) - __field(long, resend_at) - __field(long, ping_at) - __field(long, expect_rx_by) - __field(long, expect_req_by) - __field(long, expect_term_by) - __field(long, timer) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->now = now; - __entry->ack_at = call->delay_ack_at; - __entry->ack_lost_at = call->ack_lost_at; - __entry->resend_at = call->resend_at; - __entry->expect_rx_by = call->expect_rx_by; - __entry->expect_req_by = call->expect_req_by; - __entry->expect_term_by = call->expect_term_by; - __entry->timer = call->timer.expires; ), - TP_printk("c=%08x EXPIRED a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", - __entry->call, - __entry->ack_at - __entry->now, - __entry->ack_lost_at - __entry->now, - __entry->resend_at - __entry->now, - __entry->expect_rx_by - __entry->now, - __entry->expect_req_by - __entry->now, - __entry->expect_term_by - __entry->now, - __entry->timer - __entry->now) + TP_printk("c=%08x EXPIRED", + __entry->call) ); TRACE_EVENT(rxrpc_rx_lose, @@ -1507,7 +1519,7 @@ TRACE_EVENT(rxrpc_drop_ack, TRACE_EVENT(rxrpc_retransmit, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, s64 expiry), + rxrpc_serial_t serial, ktime_t expiry), TP_ARGS(call, seq, serial, expiry), @@ -1515,7 +1527,7 @@ TRACE_EVENT(rxrpc_retransmit, __field(unsigned int, call) __field(rxrpc_seq_t, seq) __field(rxrpc_serial_t, serial) - __field(s64, expiry) + __field(ktime_t, expiry) ), TP_fast_assign( @@ -1529,7 +1541,7 @@ TRACE_EVENT(rxrpc_retransmit, __entry->call, __entry->seq, __entry->serial, - __entry->expiry) + ktime_to_us(__entry->expiry)) ); TRACE_EVENT(rxrpc_congest, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 465bfe5eb0617..5222bc97d192e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -487,7 +487,7 @@ EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); * rxrpc_kernel_set_max_life - Set maximum lifespan on a call * @sock: The socket the call is on * @call: The call to configure - * @hard_timeout: The maximum lifespan of the call in jiffies + * @hard_timeout: The maximum lifespan of the call in ms * * Set the maximum lifespan of a call. The call will end with ETIME or * ETIMEDOUT if it takes longer than this. @@ -495,14 +495,14 @@ EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call, unsigned long hard_timeout) { - unsigned long now; + ktime_t delay = ms_to_ktime(hard_timeout), expect_term_by; mutex_lock(&call->user_mutex); - now = jiffies; - hard_timeout += now; - WRITE_ONCE(call->expect_term_by, hard_timeout); - rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard); + expect_term_by = ktime_add(ktime_get_real(), delay); + WRITE_ONCE(call->expect_term_by, expect_term_by); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard); + rxrpc_poke_call(call, rxrpc_call_poke_set_timeout); mutex_unlock(&call->user_mutex); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 47f4689379ca2..21ecac22b51dc 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -352,8 +352,8 @@ struct rxrpc_peer { u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ - u32 rto_j; /* Retransmission timeout in jiffies */ - u8 backoff; /* Backoff timeout */ + u32 rto_us; /* Retransmission timeout in usec */ + u8 backoff; /* Backoff timeout (as shift) */ u8 cong_ssthresh; /* Congestion slow-start threshold */ }; @@ -620,17 +620,17 @@ struct rxrpc_call { const struct rxrpc_security *security; /* applied security module */ struct mutex user_mutex; /* User access mutex */ struct sockaddr_rxrpc dest_srx; /* Destination address */ - unsigned long delay_ack_at; /* When DELAY ACK needs to happen */ - unsigned long ack_lost_at; /* When ACK is figured as lost */ - unsigned long resend_at; /* When next resend needs to happen */ - unsigned long ping_at; /* When next to send a ping */ - unsigned long keepalive_at; /* When next to send a keepalive ping */ - unsigned long expect_rx_by; /* When we expect to get a packet by */ - unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ - unsigned long expect_term_by; /* When we expect call termination by */ - u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ - u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ - u32 hard_timo; /* Maximum lifetime or 0 (jif) */ + ktime_t delay_ack_at; /* When DELAY ACK needs to happen */ + ktime_t ack_lost_at; /* When ACK is figured as lost */ + ktime_t resend_at; /* When next resend needs to happen */ + ktime_t ping_at; /* When next to send a ping */ + ktime_t keepalive_at; /* When next to send a keepalive ping */ + ktime_t expect_rx_by; /* When we expect to get a packet by */ + ktime_t expect_req_by; /* When we expect to get a request DATA packet by */ + ktime_t expect_term_by; /* When we expect call termination by */ + u32 next_rx_timo; /* Timeout for next Rx packet (ms) */ + u32 next_req_timo; /* Timeout for next Rx request packet (ms) */ + u32 hard_timo; /* Maximum lifetime or 0 (s) */ struct timer_list timer; /* Combined event timer */ struct work_struct destroyer; /* In-process-context destroyer */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@ -675,7 +675,7 @@ struct rxrpc_call { rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ - u16 tx_backoff; /* Delay to insert due to Tx failure */ + u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */ u8 tx_winsize; /* Maximum size of Tx window */ #define RXRPC_TX_MAX_WINDOW 128 ktime_t tx_last_sent; /* Last time a transmission occurred */ @@ -866,11 +866,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); -void rxrpc_reduce_call_timer(struct rxrpc_call *call, - unsigned long expire_at, - unsigned long now, - enum rxrpc_timer_trace why); - bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); /* @@ -1214,7 +1209,7 @@ static inline int rxrpc_abort_eproto(struct rxrpc_call *call, */ void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int, rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); -unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool); +ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans); void rxrpc_peer_init_rtt(struct rxrpc_peer *); /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index bc1a5abb7d6fb..2a9f74eb7c462 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -23,14 +23,14 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why) { - unsigned long now = jiffies; - unsigned long ping_at = now + rxrpc_idle_ack_delay; + ktime_t delay = ms_to_ktime(READ_ONCE(rxrpc_idle_ack_delay)); + ktime_t now = ktime_get_real(); + ktime_t ping_at = ktime_add(now, delay); - if (time_before(ping_at, call->ping_at)) { + trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial); + if (ktime_before(ping_at, call->ping_at)) { call->ping_at = ping_at; - rxrpc_reduce_call_timer(call, ping_at, now, - rxrpc_timer_set_for_ping); - trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_ping); } } @@ -40,25 +40,18 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) { - unsigned long expiry = rxrpc_soft_ack_delay; - unsigned long now = jiffies, ack_at; + ktime_t now = ktime_get_real(), delay; - if (rxrpc_soft_ack_delay < expiry) - expiry = rxrpc_soft_ack_delay; - if (call->peer->srtt_us != 0) - ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3); + trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial); + + if (call->peer->srtt_us) + delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC; else - ack_at = expiry; - - ack_at += READ_ONCE(call->tx_backoff); - ack_at += now; - if (time_before(ack_at, call->delay_ack_at)) { - call->delay_ack_at = ack_at; - rxrpc_reduce_call_timer(call, ack_at, now, - rxrpc_timer_set_for_ack); - } + delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay)); + ktime_add_ms(delay, call->tx_backoff); - trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial); + call->delay_ack_at = ktime_add(now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_delayed_ack); } /* @@ -77,9 +70,8 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) struct rxrpc_ackpacket *ack = NULL; struct rxrpc_skb_priv *sp; struct rxrpc_txbuf *txb; - unsigned long resend_at; rxrpc_seq_t transmitted = call->tx_transmitted; - ktime_t now, max_age, oldest, ack_ts; + ktime_t now, max_age, oldest, ack_ts, delay; bool unacked = false; unsigned int i; LIST_HEAD(retrans_queue); @@ -87,7 +79,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); now = ktime_get_real(); - max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j)); + max_age = ktime_sub_us(now, call->peer->rto_us); oldest = now; if (list_empty(&call->tx_buffer)) @@ -178,10 +170,8 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) no_further_resend: no_resend: - resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, - !list_empty(&retrans_queue)); - call->resend_at = resend_at; + delay = rxrpc_get_rto_backoff(call->peer, !list_empty(&retrans_queue)); + call->resend_at = ktime_add(oldest, delay); if (unacked) rxrpc_congestion_timeout(call); @@ -191,8 +181,7 @@ no_resend: * retransmitting data. */ if (list_empty(&retrans_queue)) { - rxrpc_reduce_call_timer(call, resend_at, jiffies, - rxrpc_timer_set_for_resend); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_reset); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) goto out; @@ -218,13 +207,11 @@ out: */ static void rxrpc_begin_service_reply(struct rxrpc_call *call) { - unsigned long now = jiffies; - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY); - call->delay_ack_at = now + MAX_JIFFY_OFFSET; if (call->ackr_reason == RXRPC_ACK_DELAY) call->ackr_reason = 0; - trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); + call->delay_ack_at = KTIME_MAX; + trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack); } /* @@ -333,8 +320,8 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) */ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) { - unsigned long now, next, t; - bool resend = false, expired = false; + ktime_t now, t; + bool resend = false; s32 abort_code; rxrpc_see_call(call, rxrpc_call_see_input); @@ -362,66 +349,69 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_input_call_packet(call, skb); /* If we see our async-event poke, check for timeout trippage. */ - now = jiffies; - t = call->expect_rx_by; - if (time_after_eq(now, t)) { - trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); - expired = true; + now = ktime_get_real(); + t = ktime_sub(call->expect_rx_by, now); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_expect_rx); + goto expired; } - t = call->expect_req_by; - if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST && - time_after_eq(now, t)) { - trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); - expired = true; + t = ktime_sub(call->expect_req_by, now); + if (t <= 0) { + call->expect_req_by = KTIME_MAX; + if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST) { + trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_idle); + goto expired; + } } - t = READ_ONCE(call->expect_term_by); - if (time_after_eq(now, t)) { - trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); - expired = true; + t = ktime_sub(READ_ONCE(call->expect_term_by), now); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_hard); + goto expired; } - t = call->delay_ack_at; - if (time_after_eq(now, t)) { - trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); - call->delay_ack_at = now + MAX_JIFFY_OFFSET; + t = ktime_sub(call->delay_ack_at, now); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_delayed_ack); + call->delay_ack_at = KTIME_MAX; rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0, rxrpc_propose_ack_delayed_ack); } - t = call->ack_lost_at; - if (time_after_eq(now, t)) { - trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); - call->ack_lost_at = now + MAX_JIFFY_OFFSET; + t = ktime_sub(call->ack_lost_at, now); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack); + call->ack_lost_at = KTIME_MAX; set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); } - t = call->keepalive_at; - if (time_after_eq(now, t)) { - trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); - call->keepalive_at = now + MAX_JIFFY_OFFSET; + t = ktime_sub(call->ping_at, now); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping); + call->ping_at = KTIME_MAX; rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_keepalive); } - t = call->ping_at; - if (time_after_eq(now, t)) { - trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); - call->ping_at = now + MAX_JIFFY_OFFSET; - rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, - rxrpc_propose_ack_ping_for_keepalive); - } - - t = call->resend_at; - if (time_after_eq(now, t)) { - trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); - call->resend_at = now + MAX_JIFFY_OFFSET; + t = ktime_sub(call->resend_at, now); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend); + call->resend_at = KTIME_MAX; resend = true; } rxrpc_transmit_some_data(call); + now = ktime_get_real(); + t = ktime_sub(call->keepalive_at, now); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_keepalive); + call->keepalive_at = KTIME_MAX; + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_keepalive); + } + if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -433,19 +423,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_send_initial_ping(call); /* Process events */ - if (expired) { - if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && - (int)call->conn->hi_serial - (int)call->rx_serial > 0) { - trace_rxrpc_call_reset(call); - rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET, - rxrpc_abort_call_reset); - } else { - rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME, - rxrpc_abort_call_timeout); - } - goto out; - } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_lost_ack); @@ -474,23 +451,33 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) /* Make sure the timer is restarted */ if (!__rxrpc_call_is_complete(call)) { - next = call->expect_rx_by; + ktime_t next = READ_ONCE(call->expect_term_by), delay; -#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } +#define set(T) { ktime_t _t = (T); if (ktime_before(_t, next)) next = _t; } set(call->expect_req_by); - set(call->expect_term_by); + set(call->expect_rx_by); set(call->delay_ack_at); set(call->ack_lost_at); set(call->resend_at); set(call->keepalive_at); set(call->ping_at); - now = jiffies; - if (time_after_eq(now, next)) + now = ktime_get_real(); + delay = ktime_sub(next, now); + if (delay <= 0) { rxrpc_poke_call(call, rxrpc_call_poke_timer_now); - - rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); + } else { + unsigned long nowj = jiffies, delayj, nextj; + + delayj = max(nsecs_to_jiffies(delay), 1); + nextj = nowj + delayj; + if (time_before(nextj, call->timer.expires) || + !timer_pending(&call->timer)) { + trace_rxrpc_timer_restart(call, delay, delayj); + timer_reduce(&call->timer, nextj); + } + } } out: @@ -505,4 +492,16 @@ out: rxrpc_shrink_call_tx_buffer(call); _leave(""); return true; + +expired: + if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && + (int)call->conn->hi_serial - (int)call->rx_serial > 0) { + trace_rxrpc_call_reset(call); + rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_reset); + } else { + rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME, + rxrpc_abort_call_timeout); + } + goto out; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 9fc9a6c3f6858..01fa71e8b1f72 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -70,20 +70,11 @@ static void rxrpc_call_timer_expired(struct timer_list *t) _enter("%d", call->debug_id); if (!__rxrpc_call_is_complete(call)) { - trace_rxrpc_timer_expired(call, jiffies); + trace_rxrpc_timer_expired(call); rxrpc_poke_call(call, rxrpc_call_poke_timer); } } -void rxrpc_reduce_call_timer(struct rxrpc_call *call, - unsigned long expire_at, - unsigned long now, - enum rxrpc_timer_trace why) -{ - trace_rxrpc_timer(call, why, now); - timer_reduce(&call->timer, expire_at); -} - static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; static void rxrpc_destroy_call(struct work_struct *); @@ -163,12 +154,20 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, spin_lock_init(&call->notify_lock); spin_lock_init(&call->tx_lock); refcount_set(&call->ref, 1); - call->debug_id = debug_id; - call->tx_total_len = -1; - call->next_rx_timo = 20 * HZ; - call->next_req_timo = 1 * HZ; - call->ackr_window = 1; - call->ackr_wtop = 1; + call->debug_id = debug_id; + call->tx_total_len = -1; + call->next_rx_timo = 20 * HZ; + call->next_req_timo = 1 * HZ; + call->ackr_window = 1; + call->ackr_wtop = 1; + call->delay_ack_at = KTIME_MAX; + call->ack_lost_at = KTIME_MAX; + call->resend_at = KTIME_MAX; + call->ping_at = KTIME_MAX; + call->keepalive_at = KTIME_MAX; + call->expect_rx_by = KTIME_MAX; + call->expect_req_by = KTIME_MAX; + call->expect_term_by = KTIME_MAX; memset(&call->sock_node, 0xed, sizeof(call->sock_node)); @@ -226,11 +225,11 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); if (p->timeouts.normal) - call->next_rx_timo = min(msecs_to_jiffies(p->timeouts.normal), 1UL); + call->next_rx_timo = min(p->timeouts.normal, 1); if (p->timeouts.idle) - call->next_req_timo = min(msecs_to_jiffies(p->timeouts.idle), 1UL); + call->next_req_timo = min(p->timeouts.idle, 1); if (p->timeouts.hard) - call->hard_timo = p->timeouts.hard * HZ; + call->hard_timo = p->timeouts.hard; ret = rxrpc_init_client_call_security(call); if (ret < 0) { @@ -253,18 +252,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, */ void rxrpc_start_call_timer(struct rxrpc_call *call) { - unsigned long now = jiffies; - unsigned long j = now + MAX_JIFFY_OFFSET; - - call->delay_ack_at = j; - call->ack_lost_at = j; - call->resend_at = j; - call->ping_at = j; - call->keepalive_at = j; - call->expect_rx_by = j; - call->expect_req_by = j; - call->expect_term_by = j + call->hard_timo; - call->timer.expires = now; + if (call->hard_timo) { + ktime_t delay = ms_to_ktime(call->hard_timo * 1000); + + call->expect_term_by = ktime_add(ktime_get_real(), delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard); + } + call->timer.expires = jiffies; } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index e53a49accc16c..09cce1d5d6055 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -252,6 +252,9 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, { ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); + call->resend_at = KTIME_MAX; + trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); + if (unlikely(call->cong_last_nack)) { rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); call->cong_last_nack = NULL; @@ -288,13 +291,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, static bool rxrpc_receiving_reply(struct rxrpc_call *call) { struct rxrpc_ack_summary summary = { 0 }; - unsigned long now; rxrpc_seq_t top = READ_ONCE(call->tx_top); if (call->ackr_reason) { - now = jiffies; - call->delay_ack_at = now + MAX_JIFFY_OFFSET; - trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); + call->delay_ack_at = KTIME_MAX; + trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { @@ -327,7 +328,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) case RXRPC_CALL_SERVER_RECV_REQUEST: rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST); - call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; + call->expect_req_by = KTIME_MAX; rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op); break; @@ -587,14 +588,12 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_CALL_SERVER_RECV_REQUEST: { unsigned long timo = READ_ONCE(call->next_req_timo); - unsigned long now, expect_req_by; if (timo) { - now = jiffies; - expect_req_by = now + timo; - call->expect_req_by = now + timo; - rxrpc_reduce_call_timer(call, expect_req_by, now, - rxrpc_timer_set_for_idle); + ktime_t delay = ms_to_ktime(timo); + + call->expect_req_by = ktime_add(ktime_get_real(), delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_idle); } break; } @@ -1046,11 +1045,10 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) timo = READ_ONCE(call->next_rx_timo); if (timo) { - unsigned long now = jiffies; + ktime_t delay = ms_to_ktime(timo); - call->expect_rx_by = now + timo; - rxrpc_reduce_call_timer(call, call->expect_rx_by, now, - rxrpc_timer_set_for_normal); + call->expect_rx_by = ktime_add(ktime_get_real(), delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); } switch (sp->hdr.type) { diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 825b81183046e..657cf35089a62 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -17,22 +17,22 @@ unsigned int rxrpc_max_backlog __read_mostly = 10; /* - * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * How long to wait before scheduling an ACK with subtype DELAY (in ms). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned long rxrpc_soft_ack_delay = HZ; +unsigned long rxrpc_soft_ack_delay = 1000; /* - * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * How long to wait before scheduling an ACK with subtype IDLE (in ms). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned long rxrpc_idle_ack_delay = HZ / 2; +unsigned long rxrpc_idle_ack_delay = 500; /* * Receive window size in packets. This indicates the maximum number of diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0a317498b8e06..ec82193e5681e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -48,12 +48,10 @@ static const char rxrpc_keepalive_string[] = ""; static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) { if (ret < 0) { - u16 tx_backoff = READ_ONCE(call->tx_backoff); - - if (tx_backoff < HZ) - WRITE_ONCE(call->tx_backoff, tx_backoff + 1); + if (call->tx_backoff < 1000) + call->tx_backoff += 100; } else { - WRITE_ONCE(call->tx_backoff, 0); + call->tx_backoff = 0; } } @@ -67,11 +65,10 @@ static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) */ static void rxrpc_set_keepalive(struct rxrpc_call *call) { - unsigned long now = jiffies; + ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6); - call->keepalive_at = now + call->next_rx_timo / 6; - rxrpc_reduce_call_timer(call, call->keepalive_at, now, - rxrpc_timer_set_for_keepalive); + call->keepalive_at = ktime_add(ktime_get_real(), delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive); } /* @@ -515,24 +512,21 @@ done: if (txb->flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = txb->last_sent; if (call->peer->rtt_count > 1) { - unsigned long nowj = jiffies, ack_lost_at; + ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + ktime_t now = ktime_get_real(); - ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); - ack_lost_at += nowj; - call->ack_lost_at = ack_lost_at; - rxrpc_reduce_call_timer(call, ack_lost_at, nowj, - rxrpc_timer_set_for_lost_ack); + call->ack_lost_at = ktime_add(now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); } } if (txb->seq == 1 && !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { - unsigned long nowj = jiffies; + ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); - call->expect_rx_by = nowj + call->next_rx_timo; - rxrpc_reduce_call_timer(call, call->expect_rx_by, nowj, - rxrpc_timer_set_for_normal); + call->expect_rx_by = ktime_add(ktime_get_real(), delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); } rxrpc_set_keepalive(call); @@ -755,11 +749,9 @@ void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb) rxrpc_instant_resend(call, txb); } } else { - unsigned long now = jiffies; - unsigned long resend_at = now + call->peer->rto_j; + ktime_t delay = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); - call->resend_at = resend_at; - rxrpc_reduce_call_timer(call, resend_at, now, - rxrpc_timer_set_for_send); + call->resend_at = ktime_add(ktime_get_real(), delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_tx); } } diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 26dc2f26d92d8..263a2251e3d24 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -52,9 +52,9 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); enum rxrpc_call_state state; - unsigned long timeout = 0; rxrpc_seq_t acks_hard_ack; char lbuff[50], rbuff[50]; + long timeout = 0; if (v == &rxnet->calls) { seq_puts(seq, @@ -76,10 +76,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) sprintf(rbuff, "%pISpc", &call->dest_srx.transport); state = rxrpc_call_state(call); - if (state != RXRPC_CALL_SERVER_PREALLOC) { - timeout = READ_ONCE(call->expect_rx_by); - timeout -= jiffies; - } + if (state != RXRPC_CALL_SERVER_PREALLOC) + timeout = ktime_ms_delta(READ_ONCE(call->expect_rx_by), ktime_get_real()); acks_hard_ack = READ_ONCE(call->acks_hard_ack); seq_printf(seq, @@ -309,7 +307,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) peer->mtu, now - peer->last_tx_at, peer->srtt_us >> 3, - jiffies_to_usecs(peer->rto_j)); + peer->rto_us); return 0; } diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index be61d6f5be8d1..cdab7b7d08a00 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -11,8 +11,8 @@ #include #include "ar-internal.h" -#define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) -#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ +#define RXRPC_RTO_MAX (120 * USEC_PER_SEC) +#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * MSEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) @@ -22,7 +22,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) { - return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); + return (peer->srtt_us >> 3) + peer->rttvar_us; } static u32 rxrpc_bound_rto(u32 rto) @@ -124,7 +124,7 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer) /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo * guarantees that rto is higher. */ - peer->rto_j = rxrpc_bound_rto(rto); + peer->rto_us = rxrpc_bound_rto(rto); } static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) @@ -163,33 +163,33 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, spin_unlock(&peer->rtt_input_lock); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, - peer->srtt_us >> 3, peer->rto_j); + peer->srtt_us >> 3, peer->rto_us); } /* - * Get the retransmission timeout to set in jiffies, backing it off each time - * we retransmit. + * Get the retransmission timeout to set in nanoseconds, backing it off each + * time we retransmit. */ -unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) +ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) { - u64 timo_j; - u8 backoff = READ_ONCE(peer->backoff); + u64 timo_us; + u32 backoff = READ_ONCE(peer->backoff); - timo_j = peer->rto_j; - timo_j <<= backoff; - if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) + timo_us = peer->rto_us; + timo_us <<= backoff; + if (retrans && timo_us * 2 <= RXRPC_RTO_MAX) WRITE_ONCE(peer->backoff, backoff + 1); - if (timo_j < 1) - timo_j = 1; + if (timo_us < 1) + timo_us = 1; - return timo_j; + return ns_to_ktime(timo_us * NSEC_PER_USEC); } void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) { - peer->rto_j = RXRPC_TIMEOUT_INIT; - peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); + peer->rto_us = RXRPC_TIMEOUT_INIT; + peer->mdev_us = RXRPC_TIMEOUT_INIT; peer->backoff = 0; //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 4d152f06b039c..6f765768c49c5 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -609,7 +609,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call; - unsigned long now, j; bool dropped_lock = false; int ret; @@ -687,25 +686,21 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) switch (p.call.nr_timeouts) { case 3: - j = msecs_to_jiffies(p.call.timeouts.normal); - if (p.call.timeouts.normal > 0 && j == 0) - j = 1; - WRITE_ONCE(call->next_rx_timo, j); + WRITE_ONCE(call->next_rx_timo, p.call.timeouts.normal); fallthrough; case 2: - j = msecs_to_jiffies(p.call.timeouts.idle); - if (p.call.timeouts.idle > 0 && j == 0) - j = 1; - WRITE_ONCE(call->next_req_timo, j); + WRITE_ONCE(call->next_req_timo, p.call.timeouts.idle); fallthrough; case 1: if (p.call.timeouts.hard > 0) { - j = p.call.timeouts.hard * HZ; - now = jiffies; - j += now; - WRITE_ONCE(call->expect_term_by, j); - rxrpc_reduce_call_timer(call, j, now, - rxrpc_timer_set_for_hard); + ktime_t delay = ms_to_ktime(p.call.timeouts.hard * MSEC_PER_SEC); + + WRITE_ONCE(call->expect_term_by, + ktime_add(p.call.timeouts.hard, + ktime_get_real())); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard); + rxrpc_poke_call(call, rxrpc_call_poke_set_timeout); + } break; } diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index ecaeb4ecfb58a..c9bedd0e2d867 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -15,6 +15,8 @@ static const unsigned int four = 4; static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1; static const unsigned int n_65535 = 65535; static const unsigned int n_max_acks = 255; +static const unsigned long one_ms = 1; +static const unsigned long max_ms = 1000; static const unsigned long one_jiffy = 1; static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY @@ -28,24 +30,24 @@ static const unsigned long max_500 = 500; * information on the individual parameters. */ static struct ctl_table rxrpc_sysctl_table[] = { - /* Values measured in milliseconds but used in jiffies */ + /* Values measured in milliseconds */ { .procname = "soft_ack_delay", .data = &rxrpc_soft_ack_delay, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_doulongvec_ms_jiffies_minmax, - .extra1 = (void *)&one_jiffy, - .extra2 = (void *)&max_jiffies, + .proc_handler = proc_doulongvec_minmax, + .extra1 = (void *)&one_ms, + .extra2 = (void *)&max_ms, }, { .procname = "idle_ack_delay", .data = &rxrpc_idle_ack_delay, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_doulongvec_ms_jiffies_minmax, - .extra1 = (void *)&one_jiffy, - .extra2 = (void *)&max_jiffies, + .proc_handler = proc_doulongvec_minmax, + .extra1 = (void *)&one_ms, + .extra2 = (void *)&max_ms, }, { .procname = "idle_conn_expiry", -- cgit 1.2.3-korg From 4d267ad6fd566c58d33ac899fefd5357b9a71908 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 31 Jan 2024 11:03:52 +0000 Subject: rxrpc: Record probes after transmission and reduce number of time-gets Move the recording of a successfully transmitted DATA or ACK packet that will provide RTT probing to after the transmission. With the I/O thread model, this can be done because parsing of the responding ACK can no longer race with the post-transmission code. Move the various timeout-settings done after successfully transmitting a DATA packet into rxrpc_tstamp_data_packets() and eliminate a number of calls to get the current time. As a consequence we no longer need to cancel a proposed RTT probe on transmission failure. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/output.c | 105 ++++++++++++++++++++--------------------------------- 1 file changed, 40 insertions(+), 65 deletions(-) (limited to 'net') diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ec82193e5681e..5ea9601efd05a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -63,7 +63,7 @@ static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) * Receiving a response to the ping will prevent the ->expect_rx_by timer from * expiring. */ -static void rxrpc_set_keepalive(struct rxrpc_call *call) +static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now) { ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6); @@ -147,8 +147,8 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, /* * Record the beginning of an RTT probe. */ -static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, - enum rxrpc_rtt_tx_trace why) +static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, + ktime_t now, enum rxrpc_rtt_tx_trace why) { unsigned long avail = call->rtt_avail; int rtt_slot = 9; @@ -161,30 +161,15 @@ static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, goto no_slot; call->rtt_serial[rtt_slot] = serial; - call->rtt_sent_at[rtt_slot] = ktime_get_real(); + call->rtt_sent_at[rtt_slot] = now; smp_wmb(); /* Write data before avail bit */ set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); trace_rxrpc_rtt_tx(call, why, rtt_slot, serial); - return rtt_slot; + return; no_slot: trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial); - return -1; -} - -/* - * Cancel an RTT probe. - */ -static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call, - rxrpc_serial_t serial, int rtt_slot) -{ - if (rtt_slot != -1) { - clear_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); - smp_wmb(); /* Clear pending bit before setting slot */ - set_bit(rtt_slot, &call->rtt_avail); - trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_cancel, rtt_slot, serial); - } } /* @@ -196,7 +181,8 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t struct rxrpc_connection *conn; struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); struct msghdr msg; - int ret, rtt_slot = -1; + ktime_t now; + int ret; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return; @@ -218,9 +204,6 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t ntohl(ack->serial), ack->reason, ack->nAcks, txb->ack_rwind); - if (ack->reason == RXRPC_ACK_PING) - rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_ping); - rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, txb->len); @@ -233,16 +216,14 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t } else { trace_rxrpc_tx_packet(call->debug_id, whdr, rxrpc_tx_point_call_ack); + now = ktime_get_real(); + if (ack->reason == RXRPC_ACK_PING) + rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_ping); if (txb->flags & RXRPC_REQUEST_ACK) - call->peer->rtt_last_req = ktime_get_real(); + call->peer->rtt_last_req = now; + rxrpc_set_keepalive(call, now); } rxrpc_tx_backoff(call, ret); - - if (!__rxrpc_call_is_complete(call)) { - if (ret < 0) - rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); - rxrpc_set_keepalive(call); - } } /* @@ -413,18 +394,36 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_tx } /* - * Set the times on a packet before transmission + * Set timeouts after transmitting a packet. */ -static int rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { - ktime_t tstamp = ktime_get_real(); - int rtt_slot = -1; + ktime_t now = ktime_get_real(); + bool ack_requested = txb->flags & RXRPC_REQUEST_ACK; - txb->last_sent = tstamp; - if (txb->flags & RXRPC_REQUEST_ACK) - rtt_slot = rxrpc_begin_rtt_probe(call, txb->serial, rxrpc_rtt_tx_data); + call->tx_last_sent = now; + txb->last_sent = now; + + if (ack_requested) { + rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_data); + + call->peer->rtt_last_req = now; + if (call->peer->rtt_count > 1) { + ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); - return rtt_slot; + call->ack_lost_at = ktime_add(now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); + } + } + + if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { + ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); + + call->expect_rx_by = ktime_add(now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); + } + + rxrpc_set_keepalive(call, now); } /* @@ -437,7 +436,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t enum rxrpc_tx_point frag; struct msghdr msg; size_t len; - int ret, rtt_slot = -1; + int ret; _enter("%x,{%d}", txb->seq, txb->len); @@ -479,8 +478,6 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t } retry: - rtt_slot = rxrpc_tstamp_data_packets(call, txb); - /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet * to go out of the interface @@ -493,7 +490,6 @@ retry: if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); - rxrpc_cancel_rtt_probe(call, txb->serial, rtt_slot); trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); } else { trace_rxrpc_tx_packet(call->debug_id, whdr, frag); @@ -508,28 +504,7 @@ retry: done: if (ret >= 0) { - call->tx_last_sent = txb->last_sent; - if (txb->flags & RXRPC_REQUEST_ACK) { - call->peer->rtt_last_req = txb->last_sent; - if (call->peer->rtt_count > 1) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); - ktime_t now = ktime_get_real(); - - call->ack_lost_at = ktime_add(now, delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); - } - } - - if (txb->seq == 1 && - !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, - &call->flags)) { - ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); - - call->expect_rx_by = ktime_add(ktime_get_real(), delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); - } - - rxrpc_set_keepalive(call); + rxrpc_tstamp_data_packets(call, txb); } else { /* Cancel the call if the initial transmission fails, * particularly if that's due to network routing issues that -- cgit 1.2.3-korg From 37473e41623409286ab2f5db628a59d4da9a79d7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 31 Jan 2024 14:58:40 +0000 Subject: rxrpc: Clean up the resend algorithm Clean up the DATA packet resending algorithm to retransmit packets as we come across them whilst walking the transmission buffer rather than queuing them for retransmission at the end. This can be done as ACK parsing - and thus the discarding of successful packets - is now done in the same thread rather than separately in softirq context and a locked section is no longer required. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/call_event.c | 79 ++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2a9f74eb7c462..6c5e3054209b3 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -71,23 +71,18 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) struct rxrpc_skb_priv *sp; struct rxrpc_txbuf *txb; rxrpc_seq_t transmitted = call->tx_transmitted; - ktime_t now, max_age, oldest, ack_ts, delay; - bool unacked = false; + ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); + ktime_t resend_at = KTIME_MAX, now, delay; + bool unacked = false, did_send = false; unsigned int i; - LIST_HEAD(retrans_queue); _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); now = ktime_get_real(); - max_age = ktime_sub_us(now, call->peer->rto_us); - oldest = now; if (list_empty(&call->tx_buffer)) goto no_resend; - if (list_empty(&call->tx_buffer)) - goto no_further_resend; - trace_rxrpc_resend(call, ack_skb); txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link); @@ -115,19 +110,23 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) goto no_further_resend; found_txb: - if (after(txb->serial, call->acks_highest_serial)) + resend_at = ktime_add(txb->last_sent, rto); + if (after(txb->serial, call->acks_highest_serial)) { + if (ktime_after(resend_at, now) && + ktime_before(resend_at, next_resend)) + next_resend = resend_at; continue; /* Ack point not yet reached */ + } rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); - if (list_empty(&txb->tx_link)) { - list_add_tail(&txb->tx_link, &retrans_queue); - txb->flags |= RXRPC_TXBUF_RESENT; - } - trace_rxrpc_retransmit(call, txb->seq, txb->serial, - ktime_to_ns(ktime_sub(txb->last_sent, - max_age))); + ktime_sub(resend_at, now)); + + txb->flags |= RXRPC_TXBUF_RESENT; + rxrpc_transmit_one(call, txb); + did_send = true; + now = ktime_get_real(); if (list_is_last(&txb->call_link, &call->tx_buffer)) goto no_further_resend; @@ -144,6 +143,8 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) goto no_further_resend; list_for_each_entry_from(txb, &call->tx_buffer, call_link) { + resend_at = ktime_add(txb->last_sent, rto); + if (before_eq(txb->seq, call->acks_prev_seq)) continue; if (after(txb->seq, call->tx_transmitted)) @@ -153,25 +154,30 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) before(txb->serial, ntohl(ack->serial))) goto do_resend; /* Wasn't accounted for by a more recent ping. */ - if (ktime_after(txb->last_sent, max_age)) { - if (ktime_before(txb->last_sent, oldest)) - oldest = txb->last_sent; + if (ktime_after(resend_at, now)) { + if (ktime_before(resend_at, next_resend)) + next_resend = resend_at; continue; } do_resend: unacked = true; - if (list_empty(&txb->tx_link)) { - list_add_tail(&txb->tx_link, &retrans_queue); - txb->flags |= RXRPC_TXBUF_RESENT; - rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); - } + + txb->flags |= RXRPC_TXBUF_RESENT; + rxrpc_transmit_one(call, txb); + did_send = true; + rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); + now = ktime_get_real(); } no_further_resend: no_resend: - delay = rxrpc_get_rto_backoff(call->peer, !list_empty(&retrans_queue)); - call->resend_at = ktime_add(oldest, delay); + if (resend_at < KTIME_MAX) { + delay = rxrpc_get_rto_backoff(call->peer, did_send); + resend_at = ktime_add(resend_at, delay); + trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset); + } + call->resend_at = resend_at; if (unacked) rxrpc_congestion_timeout(call); @@ -180,24 +186,15 @@ no_resend: * that an ACK got lost somewhere. Send a ping to find out instead of * retransmitting data. */ - if (list_empty(&retrans_queue)) { - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_reset); - ack_ts = ktime_sub(now, call->acks_latest_ts); - if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) - goto out; - rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, - rxrpc_propose_ack_ping_for_0_retrans); - goto out; - } + if (!did_send) { + ktime_t next_ping = ktime_add_us(call->acks_latest_ts, + call->peer->srtt_us >> 3); - /* Retransmit the queue */ - while ((txb = list_first_entry_or_null(&retrans_queue, - struct rxrpc_txbuf, tx_link))) { - list_del_init(&txb->tx_link); - rxrpc_transmit_one(call, txb); + if (ktime_sub(next_ping, now) <= 0) + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_0_retrans); } -out: _leave(""); } -- cgit 1.2.3-korg From 4b68137a20bc88fbbdf32301ed604ef000c94e5c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Feb 2024 10:29:36 +0000 Subject: rxrpc: Extract useful fields from a received ACK to skb priv data Extract useful fields from a received ACK packet into the skb private data early on in the process of parsing incoming packets. This makes the ACK fields available even before we've matched the ACK up to a call and will allow us to deal with path MTU discovery probe responses even after the relevant call has been completed. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org --- net/rxrpc/ar-internal.h | 7 ++++-- net/rxrpc/call_event.c | 4 ++-- net/rxrpc/input.c | 61 ++++++++++++++++++++++--------------------------- net/rxrpc/io_thread.c | 11 +++++++++ 4 files changed, 45 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 21ecac22b51dc..08c0a32db8c74 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -198,8 +198,8 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ union { + struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ struct { u16 offset; /* Offset of data */ u16 len; /* Length of data */ @@ -208,9 +208,12 @@ struct rxrpc_skb_priv { }; struct { rxrpc_seq_t first_ack; /* First packet in acks table */ + rxrpc_seq_t prev_ack; /* Highest seq seen */ + rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */ + u8 reason; /* Reason for ack */ u8 nr_acks; /* Number of acks+nacks */ u8 nr_nacks; /* Number of nacks */ - }; + } ack; }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6c5e3054209b3..7bbb685047667 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -93,12 +93,12 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) sp = rxrpc_skb(ack_skb); ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); - for (i = 0; i < sp->nr_acks; i++) { + for (i = 0; i < sp->ack.nr_acks; i++) { rxrpc_seq_t seq; if (ack->acks[i] & 1) continue; - seq = sp->first_ack + i; + seq = sp->ack.first_ack + i; if (after(txb->seq, transmitted)) break; if (after(txb->seq, seq)) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 09cce1d5d6055..3dedb8c0618ca 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -710,20 +710,19 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, rxrpc_seq_t seq) { struct sk_buff *skb = call->cong_last_nack; - struct rxrpc_ackpacket ack; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int i, new_acks = 0, retained_nacks = 0; - rxrpc_seq_t old_seq = sp->first_ack; - u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack); + rxrpc_seq_t old_seq = sp->ack.first_ack; + u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - if (after_eq(seq, old_seq + sp->nr_acks)) { - summary->nr_new_acks += sp->nr_nacks; - summary->nr_new_acks += seq - (old_seq + sp->nr_acks); + if (after_eq(seq, old_seq + sp->ack.nr_acks)) { + summary->nr_new_acks += sp->ack.nr_nacks; + summary->nr_new_acks += seq - (old_seq + sp->ack.nr_acks); summary->nr_retained_nacks = 0; } else if (seq == old_seq) { - summary->nr_retained_nacks = sp->nr_nacks; + summary->nr_retained_nacks = sp->ack.nr_nacks; } else { - for (i = 0; i < sp->nr_acks; i++) { + for (i = 0; i < sp->ack.nr_acks; i++) { if (acks[i] == RXRPC_ACK_TYPE_NACK) { if (before(old_seq + i, seq)) new_acks++; @@ -736,7 +735,7 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, summary->nr_retained_nacks = retained_nacks; } - return old_seq + sp->nr_acks; + return old_seq + sp->ack.nr_acks; } /* @@ -756,10 +755,10 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int i, old_nacks = 0; - rxrpc_seq_t lowest_nak = seq + sp->nr_acks; + rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks; u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - for (i = 0; i < sp->nr_acks; i++) { + for (i = 0; i < sp->ack.nr_acks; i++) { if (acks[i] == RXRPC_ACK_TYPE_ACK) { summary->nr_acks++; if (after_eq(seq, since)) @@ -771,7 +770,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, old_nacks++; } else { summary->nr_new_nacks++; - sp->nr_nacks++; + sp->ack.nr_nacks++; } if (before(seq, lowest_nak)) @@ -832,7 +831,6 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - struct rxrpc_ackpacket ack; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_acktrailer trailer; rxrpc_serial_t ack_serial, acked_serial; @@ -841,29 +839,24 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) _enter(""); - offset = sizeof(struct rxrpc_wire_header); - if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) - return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack); - offset += sizeof(ack); - - ack_serial = sp->hdr.serial; - acked_serial = ntohl(ack.serial); - first_soft_ack = ntohl(ack.firstPacket); - prev_pkt = ntohl(ack.previousPacket); - hard_ack = first_soft_ack - 1; - nr_acks = ack.nAcks; - sp->first_ack = first_soft_ack; - sp->nr_acks = nr_acks; - summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ? - ack.reason : RXRPC_ACK__INVALID); + offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); + + ack_serial = sp->hdr.serial; + acked_serial = sp->ack.acked_serial; + first_soft_ack = sp->ack.first_ack; + prev_pkt = sp->ack.prev_ack; + nr_acks = sp->ack.nr_acks; + hard_ack = first_soft_ack - 1; + summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? + sp->ack.reason : RXRPC_ACK__INVALID); trace_rxrpc_rx_ack(call, ack_serial, acked_serial, first_soft_ack, prev_pkt, summary.ack_reason, nr_acks); - rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]); + rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); if (acked_serial != 0) { - switch (ack.reason) { + switch (summary.ack_reason) { case RXRPC_ACK_PING_RESPONSE: rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, rxrpc_rtt_rx_ping_response); @@ -883,7 +876,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) * indicates that the client address changed due to NAT. The server * lost the call because it switched to a different peer. */ - if (unlikely(ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) && + if (unlikely(summary.ack_reason == RXRPC_ACK_EXCEEDS_WINDOW) && first_soft_ack == 1 && prev_pkt == 0 && rxrpc_is_client_call(call)) { @@ -896,7 +889,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) * indicate a change of address. However, we can retransmit the call * if we still have it buffered to the beginning. */ - if (unlikely(ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) && + if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) && first_soft_ack == 1 && prev_pkt == 0 && call->acks_hard_ack == 0 && @@ -937,7 +930,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) call->acks_first_seq = first_soft_ack; call->acks_prev_seq = prev_pkt; - switch (ack.reason) { + switch (summary.ack_reason) { case RXRPC_ACK_PING: break; default: @@ -994,7 +987,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_congestion_management(call, skb, &summary, acked_serial); send_response: - if (ack.reason == RXRPC_ACK_PING) + if (summary.ack_reason == RXRPC_ACK_PING) rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, rxrpc_propose_ack_respond_to_ping); else if (sp->hdr.flags & RXRPC_REQUEST_ACK) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 4a3a08a0e2cd0..0300baa9afcd3 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -124,6 +124,7 @@ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) { struct rxrpc_wire_header whdr; + struct rxrpc_ackpacket ack; /* dig out the RxRPC connection details */ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) @@ -141,6 +142,16 @@ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp, sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); + + if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { + if (skb_copy_bits(skb, sizeof(whdr), &ack, sizeof(ack)) < 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_short_ack); + sp->ack.first_ack = ntohl(ack.firstPacket); + sp->ack.prev_ack = ntohl(ack.previousPacket); + sp->ack.acked_serial = ntohl(ack.serial); + sp->ack.reason = ack.reason; + sp->ack.nr_acks = ack.nAcks; + } return true; } -- cgit 1.2.3-korg From ff73f8344e58e7557819f92c88f289ffa6116be7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 Mar 2024 13:29:31 -0800 Subject: sock: Use unsafe_memcpy() for sock_copy() While testing for places where zero-sized destinations were still showing up in the kernel, sock_copy() and inet_reqsk_clone() were found, which are using very specific memcpy() offsets for both avoiding a portion of struct sock, and copying beyond the end of it (since struct sock is really just a common header before the protocol-specific allocation). Instead of trying to unravel this historical lack of container_of(), just switch to unsafe_memcpy(), since that's effectively what was happening already (memcpy() wasn't checking 0-sized destinations while the code base was being converted away from fake flexible arrays). Avoid the following false positive warning with future changes to CONFIG_FORTIFY_SOURCE: memcpy: detected field-spanning write (size 3068) of destination "&nsk->__sk_common.skc_dontcopy_end" at net/core/sock.c:2057 (size 0) Signed-off-by: Kees Cook Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240304212928.make.772-kees@kernel.org Signed-off-by: Jakub Kicinski --- net/core/sock.c | 5 +++-- net/ipv4/inet_connection_sock.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index df2ac54a8f74e..43bf3818c19e8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2053,8 +2053,9 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); - memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, - prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, + prot->obj_size - offsetof(struct sock, sk_dontcopy_end), + /* alloc is larger than struct, see sk_prot_alloc() */); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 747ed7344cbe4..7d8090f109ef4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -906,8 +906,9 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req, memcpy(nreq_sk, req_sk, offsetof(struct sock, sk_dontcopy_begin)); - memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end, - req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end)); + unsafe_memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end, + req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end), + /* alloc is larger than struct, see above */); sk_node_init(&nreq_sk->sk_node); nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping; -- cgit 1.2.3-korg From 344f7a4651497ffc62166ec6318b33f79d71c3df Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 2 Mar 2024 15:18:27 +0100 Subject: ethtool: ignore unused/unreliable fields in set_eee op This function is used with the set_eee() ethtool operation. Certain fields of struct ethtool_keee() are relevant only for the get_eee() operation. In addition, in case of the ioctl interface, we have no guarantee that userspace sends sane values in struct ethtool_eee. Therefore explicitly ignore all fields not needed for set_eee(). This protects from drivers trying to use unchecked and unreliable data, relying on specific userspace behavior. Note: Such unsafe driver behavior has been found and fixed in the tg3 driver. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/ad7ee11e-eb7a-4975-9122-547e13a161d8@gmail.com Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 317308bdbda95..5a55270aa86e8 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1514,17 +1514,12 @@ static void eee_to_keee(struct ethtool_keee *keee, { memset(keee, 0, sizeof(*keee)); - keee->eee_active = eee->eee_active; keee->eee_enabled = eee->eee_enabled; keee->tx_lpi_enabled = eee->tx_lpi_enabled; keee->tx_lpi_timer = eee->tx_lpi_timer; - ethtool_convert_legacy_u32_to_link_mode(keee->supported, - eee->supported); ethtool_convert_legacy_u32_to_link_mode(keee->advertised, eee->advertised); - ethtool_convert_legacy_u32_to_link_mode(keee->lp_advertised, - eee->lp_advertised); } static void keee_to_eee(struct ethtool_eee *eee, -- cgit 1.2.3-korg From b5a899154aa94cc573db3ae1f61dabe7bfe8b579 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Mar 2024 21:24:06 -0800 Subject: netlink: handle EMSGSIZE errors in the core Eric points out that our current suggested way of handling EMSGSIZE errors ((err == -EMSGSIZE) ? skb->len : err) will break if we didn't fit even a single object into the buffer provided by the user. This should not happen for well behaved applications, but we can fix that, and free netlink families from dealing with that completely by moving error handling into the core. Let's assume from now on that all EMSGSIZE errors in dumps are because we run out of skb space. Families can now propagate the error nla_put_*() etc generated and not worry about any return value magic. If some family really wants to send EMSGSIZE to user space, assuming it generates the same error on the next dump iteration the skb->len should be 0, and user space should still see the EMSGSIZE. This should simplify families and prevent mistakes in return values which lead to DONE being forced into a separate recv() call as discovered by Ido some time ago. Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ad7b645e3ae7e..da846212fb9b2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2267,6 +2267,15 @@ static int netlink_dump(struct sock *sk, bool lock_taken) if (extra_mutex) mutex_unlock(extra_mutex); + /* EMSGSIZE plus something already in the skb means + * that there's more to dump but current skb has filled up. + * If the callback really wants to return EMSGSIZE to user space + * it needs to do so again, on the next cb->dump() call, + * without putting data in the skb. + */ + if (nlk->dump_done_errno == -EMSGSIZE && skb->len) + nlk->dump_done_errno = skb->len; + cb->extack = NULL; } -- cgit 1.2.3-korg From 0b11b1c5c320555483e8a94c44549db24c289987 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Mar 2024 21:24:07 -0800 Subject: netdev: let netlink core handle -EMSGSIZE errors Previous change added -EMSGSIZE handling to af_netlink, we don't have to hide these errors any longer. Theoretically the error handling changes from: if (err == -EMSGSIZE) to if (err == -EMSGSIZE && skb->len) everywhere, but in practice it doesn't matter. All messages fit into NLMSG_GOODSIZE, so overflow of an empty skb cannot happen. Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/netdev-genl.c | 15 +++------------ net/core/page_pool_user.c | 2 -- 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fd98936da3aec..918b109e0cf40 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -152,10 +152,7 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int @@ -287,10 +284,7 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int @@ -463,10 +457,7 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } rtnl_unlock(); - if (err != -EMSGSIZE) - return err; - - return skb->len; + return err; } static int netdev_genl_netdevice_event(struct notifier_block *nb, diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index ffe5244e5597e..53ad96f71b636 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -102,8 +102,6 @@ netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb, mutex_unlock(&page_pools_lock); rtnl_unlock(); - if (skb->len && err == -EMSGSIZE) - return skb->len; return err; } -- cgit 1.2.3-korg From 87d381973e49404f658d6923a617932eeda9415f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Mar 2024 21:24:08 -0800 Subject: genetlink: fit NLMSG_DONE into same read() as families Make sure ctrl_fill_info() returns sensible error codes and propagate them out to netlink core. Let netlink core decide when to return skb->len and when to treat the exit as an error. Netlink core does better job at it, if we always return skb->len the core doesn't know when we're done dumping and NLMSG_DONE ends up in a separate read(). Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 50ec599a5cff5..3b7666944b11c 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1232,7 +1232,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) - return -1; + return -EMSGSIZE; if (nla_put_string(skb, CTRL_ATTR_FAMILY_NAME, family->name) || nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id) || @@ -1355,6 +1355,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); int fams_to_skip = cb->args[0]; unsigned int id; + int err = 0; idr_for_each_entry(&genl_fam_idr, rt, id) { if (!rt->netnsok && !net_eq(net, &init_net)) @@ -1363,16 +1364,17 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) if (n++ < fams_to_skip) continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - skb, CTRL_CMD_NEWFAMILY) < 0) { + err = ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, CTRL_CMD_NEWFAMILY); + if (err) { n--; break; } } cb->args[0] = n; - return skb->len; + return err; } static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family, -- cgit 1.2.3-korg From eeb78df4063c0b162324a9408ef573b24791871f Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Mon, 4 Mar 2024 11:32:08 +0000 Subject: inet: Add getsockopt support for IP_ROUTER_ALERT and IPV6_ROUTER_ALERT Currently getsockopt does not support IP_ROUTER_ALERT and IPV6_ROUTER_ALERT, and we are unable to get the values of these two socket options through getsockopt. This patch adds getsockopt support for IP_ROUTER_ALERT and IPV6_ROUTER_ALERT. Signed-off-by: Juntong Deng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + net/ipv4/ip_sockglue.c | 13 ++++++++++--- net/ipv6/ipv6_sockglue.c | 6 ++++++ 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index d94c242eb3ed2..f9ddd47dc4f8d 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -274,6 +274,7 @@ enum { INET_FLAGS_REPFLOW = 27, INET_FLAGS_RTALERT_ISOLATE = 28, INET_FLAGS_SNDFLOW = 29, + INET_FLAGS_RTALERT = 30, }; /* cmsg flags for inet */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 21d2ffa919e98..cf377377b52d8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -894,7 +894,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); - int val = 0, err; + int val = 0, err, retv; bool needs_rtnl = setsockopt_needs_rtnl(optname); switch (optname) { @@ -938,8 +938,12 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, /* If optlen==0, it is equivalent to val == 0 */ - if (optname == IP_ROUTER_ALERT) - return ip_ra_control(sk, val ? 1 : 0, NULL); + if (optname == IP_ROUTER_ALERT) { + retv = ip_ra_control(sk, val ? 1 : 0, NULL); + if (retv == 0) + inet_assign_bit(RTALERT, sk, val); + return retv; + } if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen); @@ -1575,6 +1579,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_BIND_ADDRESS_NO_PORT: val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk); goto copyval; + case IP_ROUTER_ALERT: + val = inet_test_bit(RTALERT, sk); + goto copyval; case IP_TTL: val = READ_ONCE(inet->uc_ttl); if (val < 0) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index f61d977ac0528..d4c28ec1bc517 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -948,6 +948,8 @@ done: if (optlen < sizeof(int)) goto e_inval; retv = ip6_ra_control(sk, val); + if (retv == 0) + inet6_assign_bit(RTALERT, sk, valbool); break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); @@ -1445,6 +1447,10 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rxopt.bits.recvfragsize; break; + case IPV6_ROUTER_ALERT: + val = inet6_test_bit(RTALERT, sk); + break; + case IPV6_ROUTER_ALERT_ISOLATE: val = inet6_test_bit(RTALERT_ISOLATE, sk); break; -- cgit 1.2.3-korg From e8a1e58345cf40b7b272e08ac7b32328b2543e40 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 28 Feb 2024 19:38:39 +0300 Subject: mac802154: fix llsec key resources release in mac802154_llsec_key_del mac802154_llsec_key_del() can free resources of a key directly without following the RCU rules for waiting before the end of a grace period. This may lead to use-after-free in case llsec_lookup_key() is traversing the list of keys in parallel with a key deletion: refcount_t: addition on 0; use-after-free. WARNING: CPU: 4 PID: 16000 at lib/refcount.c:25 refcount_warn_saturate+0x162/0x2a0 Modules linked in: CPU: 4 PID: 16000 Comm: wpan-ping Not tainted 6.7.0 #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:refcount_warn_saturate+0x162/0x2a0 Call Trace: llsec_lookup_key.isra.0+0x890/0x9e0 mac802154_llsec_encrypt+0x30c/0x9c0 ieee802154_subif_start_xmit+0x24/0x1e0 dev_hard_start_xmit+0x13e/0x690 sch_direct_xmit+0x2ae/0xbc0 __dev_queue_xmit+0x11dd/0x3c20 dgram_sendmsg+0x90b/0xd60 __sys_sendto+0x466/0x4c0 __x64_sys_sendto+0xe0/0x1c0 do_syscall_64+0x45/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Also, ieee802154_llsec_key_entry structures are not freed by mac802154_llsec_key_del(): unreferenced object 0xffff8880613b6980 (size 64): comm "iwpan", pid 2176, jiffies 4294761134 (age 60.475s) hex dump (first 32 bytes): 78 0d 8f 18 80 88 ff ff 22 01 00 00 00 00 ad de x......."....... 00 00 00 00 00 00 00 00 03 00 cd ab 00 00 00 00 ................ backtrace: [] __kmem_cache_alloc_node+0x1e2/0x2d0 [] kmalloc_trace+0x25/0xc0 [] mac802154_llsec_key_add+0xac9/0xcf0 [] ieee802154_add_llsec_key+0x5a/0x80 [] nl802154_add_llsec_key+0x426/0x5b0 [] genl_family_rcv_msg_doit+0x1fe/0x2f0 [] genl_rcv_msg+0x531/0x7d0 [] netlink_rcv_skb+0x169/0x440 [] genl_rcv+0x28/0x40 [] netlink_unicast+0x53c/0x820 [] netlink_sendmsg+0x93b/0xe60 [] ____sys_sendmsg+0xac5/0xca0 [] ___sys_sendmsg+0x11d/0x1c0 [] __sys_sendmsg+0xfa/0x1d0 [] do_syscall_64+0x45/0xf0 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 Handle the proper resource release in the RCU callback function mac802154_llsec_key_del_rcu(). Note that if llsec_lookup_key() finds a key, it gets a refcount via llsec_key_get() and locally copies key id from key_entry (which is a list element). So it's safe to call llsec_key_put() and free the list entry after the RCU grace period elapses. Found by Linux Verification Center (linuxtesting.org). Fixes: 5d637d5aabd8 ("mac802154: add llsec structures and mutators") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Alexander Aring Message-ID: <20240228163840.6667-1-pchelkin@ispras.ru> Signed-off-by: Stefan Schmidt --- include/net/cfg802154.h | 1 + net/mac802154/llsec.c | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index cd95711b12b8e..76d2cd2e2b309 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -401,6 +401,7 @@ struct ieee802154_llsec_key { struct ieee802154_llsec_key_entry { struct list_head list; + struct rcu_head rcu; struct ieee802154_llsec_key_id id; struct ieee802154_llsec_key *key; diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 8d2eabc71bbeb..f13b07ebfb98a 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -265,19 +265,27 @@ fail: return -ENOMEM; } +static void mac802154_llsec_key_del_rcu(struct rcu_head *rcu) +{ + struct ieee802154_llsec_key_entry *pos; + struct mac802154_llsec_key *mkey; + + pos = container_of(rcu, struct ieee802154_llsec_key_entry, rcu); + mkey = container_of(pos->key, struct mac802154_llsec_key, key); + + llsec_key_put(mkey); + kfree_sensitive(pos); +} + int mac802154_llsec_key_del(struct mac802154_llsec *sec, const struct ieee802154_llsec_key_id *key) { struct ieee802154_llsec_key_entry *pos; list_for_each_entry(pos, &sec->table.keys, list) { - struct mac802154_llsec_key *mkey; - - mkey = container_of(pos->key, struct mac802154_llsec_key, key); - if (llsec_key_id_equal(&pos->id, key)) { list_del_rcu(&pos->list); - llsec_key_put(mkey); + call_rcu(&pos->rcu, mac802154_llsec_key_del_rcu); return 0; } } -- cgit 1.2.3-korg From b2d23256615c8f8b3215f0155b0234f0e310dfde Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Tue, 5 Mar 2024 16:55:24 -0300 Subject: ieee802154: cfg802154: make wpan_phy_class constant Since commit 43a7206b0963 ("driver core: class: make class_register() take a const *"), the driver core allows for struct class to be in read-only memory, so move the wpan_phy_class structure to be declared at build time placing it into read-only memory, instead of having to be dynamically allocated at boot time. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Acked-by: Miquel Raynal Message-ID: <20240305-class_cleanup-wpan-v1-1-376f751fd481@marliere.net> [changed prefix from wifi to ieee802154 by stefan@datenfreihafen.org] Signed-off-by: Stefan Schmidt --- net/ieee802154/sysfs.c | 2 +- net/ieee802154/sysfs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c index d2903933805c5..6708160ebf9f6 100644 --- a/net/ieee802154/sysfs.c +++ b/net/ieee802154/sysfs.c @@ -93,7 +93,7 @@ static SIMPLE_DEV_PM_OPS(wpan_phy_pm_ops, wpan_phy_suspend, wpan_phy_resume); #define WPAN_PHY_PM_OPS NULL #endif -struct class wpan_phy_class = { +const struct class wpan_phy_class = { .name = "ieee802154", .dev_release = wpan_phy_release, .dev_groups = pmib_groups, diff --git a/net/ieee802154/sysfs.h b/net/ieee802154/sysfs.h index 337545b639e9c..69961e1662575 100644 --- a/net/ieee802154/sysfs.h +++ b/net/ieee802154/sysfs.h @@ -5,6 +5,6 @@ int wpan_phy_sysfs_init(void); void wpan_phy_sysfs_exit(void); -extern struct class wpan_phy_class; +extern const struct class wpan_phy_class; #endif /* __IEEE802154_SYSFS_H */ -- cgit 1.2.3-korg From fee054b7579fe252f8b9e6c17b9c5bfdaa84dd7e Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Sun, 7 Jan 2024 19:02:48 +0100 Subject: Bluetooth: mgmt: Remove leftover queuing of power_off work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Queuing of power_off work was introduced in these functions with commits 8b064a3ad377 ("Bluetooth: Clean up HCI state when doing power off") and c9910d0fb4fc ("Bluetooth: Fix disconnecting connections in non-connected states") in an effort to clean up state and do things like disconnecting devices before actually powering off the device. After that, commit a3172b7eb4a2 ("Bluetooth: Add timer to force power off") introduced a timeout to ensure that the device actually got powered off, even if some of the cleanup work would never complete. This code later got refactored with commit cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED"), which made powering off the device synchronous and removed the need for initiating the power_off work from other places. The timeout mentioned above got removed too, because we now also made use of the command timeout during power on/off. These days the power_off work still exists, but it only seems to only be used for HCI_AUTO_OFF functionality, which is why we never noticed those two leftover places where we queue power_off work. So let's remove that code. Fixes: cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ee3b4aad8bd8d..688890f581cba 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9766,14 +9766,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_ev_device_disconnected ev; struct sock *sk = NULL; - /* The connection is still in hci_conn_hash so test for 1 - * instead of 0 to know if this is the last one. - */ - if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) { - cancel_delayed_work(&hdev->power_off); - queue_work(hdev->req_workqueue, &hdev->power_off.work); - } - if (!mgmt_connected) return; @@ -9830,14 +9822,6 @@ void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, { struct mgmt_ev_connect_failed ev; - /* The connection is still in hci_conn_hash so test for 1 - * instead of 0 to know if this is the last one. - */ - if (mgmt_powering_down(hdev) && hci_conn_count(hdev) == 1) { - cancel_delayed_work(&hdev->power_off); - queue_work(hdev->req_workqueue, &hdev->power_off.work); - } - bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.status = mgmt_status(status); -- cgit 1.2.3-korg From b14202aff5acba3b672704d792e821f02f8f562a Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Sun, 7 Jan 2024 19:02:49 +0100 Subject: Bluetooth: Add new state HCI_POWERING_DOWN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new state HCI_POWERING_DOWN that indicates that the device is currently powering down, this will be useful for the next commit. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + net/bluetooth/hci_sync.c | 16 +++++++++++----- net/bluetooth/mgmt.c | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index f7918c7551834..a94a8491ec7a1 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -372,6 +372,7 @@ enum { HCI_SETUP, HCI_CONFIG, HCI_DEBUGFS_CREATED, + HCI_POWERING_DOWN, HCI_AUTO_OFF, HCI_RFKILLED, HCI_MGMT, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 5716345a26dfb..b146562a65fc4 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5403,27 +5403,33 @@ static int hci_power_off_sync(struct hci_dev *hdev) if (!test_bit(HCI_UP, &hdev->flags)) return 0; + hci_dev_set_flag(hdev, HCI_POWERING_DOWN); + if (test_bit(HCI_ISCAN, &hdev->flags) || test_bit(HCI_PSCAN, &hdev->flags)) { err = hci_write_scan_enable_sync(hdev, 0x00); if (err) - return err; + goto out; } err = hci_clear_adv_sync(hdev, NULL, false); if (err) - return err; + goto out; err = hci_stop_discovery_sync(hdev); if (err) - return err; + goto out; /* Terminated due to Power Off */ err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF); if (err) - return err; + goto out; + + err = hci_dev_close_sync(hdev); - return hci_dev_close_sync(hdev); +out: + hci_dev_clear_flag(hdev, HCI_POWERING_DOWN); + return err; } int hci_set_powered_sync(struct hci_dev *hdev, u8 val) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 688890f581cba..3fe6db8a0ec02 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1390,6 +1390,14 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); + if (!cp->val) { + if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN)) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_BUSY); + goto failed; + } + } + if (pending_find(MGMT_OP_SET_POWERED, hdev)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, MGMT_STATUS_BUSY); @@ -9748,6 +9756,9 @@ bool mgmt_powering_down(struct hci_dev *hdev) struct mgmt_pending_cmd *cmd; struct mgmt_mode *cp; + if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN)) + return true; + cmd = pending_find(MGMT_OP_SET_POWERED, hdev); if (!cmd) return false; @@ -10055,6 +10066,9 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) /* If this is a HCI command related to powering on the * HCI dev don't send any mgmt signals. */ + if (hci_dev_test_flag(hdev, HCI_POWERING_DOWN)) + return; + if (pending_find(MGMT_OP_SET_POWERED, hdev)) return; } -- cgit 1.2.3-korg From d77433cdd2524cb924a5ec0476429330e9ee9f0e Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Sun, 7 Jan 2024 19:02:50 +0100 Subject: Bluetooth: Disconnect connected devices before rfkilling adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a lot of platforms (at least the MS Surface devices, M1 macbooks, and a few ThinkPads) firmware doesn't do its job when rfkilling a device and the bluetooth adapter is not actually shut down properly on rfkill. This leads to connected devices remaining in connected state and the bluetooth connection eventually timing out after rfkilling an adapter. Use the rfkill hook in the HCI driver to go through the full power-off sequence (including stopping scans and disconnecting devices) before rfkilling it, just like MGMT_OP_SET_POWERED would do. In case anything during the larger power-off sequence fails, make sure the device is still closed and the rfkill ends up being effective in the end. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2821a42cefdc6..e5cb618fa6d39 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -940,20 +940,51 @@ int hci_get_dev_info(void __user *arg) /* ---- Interface to HCI drivers ---- */ +static int hci_dev_do_poweroff(struct hci_dev *hdev) +{ + int err; + + BT_DBG("%s %p", hdev->name, hdev); + + hci_req_sync_lock(hdev); + + err = hci_set_powered_sync(hdev, false); + + hci_req_sync_unlock(hdev); + + return err; +} + static int hci_rfkill_set_block(void *data, bool blocked) { struct hci_dev *hdev = data; + int err; BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; + if (blocked == hci_dev_test_flag(hdev, HCI_RFKILLED)) + return 0; + if (blocked) { hci_dev_set_flag(hdev, HCI_RFKILLED); + if (!hci_dev_test_flag(hdev, HCI_SETUP) && - !hci_dev_test_flag(hdev, HCI_CONFIG)) - hci_dev_do_close(hdev); + !hci_dev_test_flag(hdev, HCI_CONFIG)) { + err = hci_dev_do_poweroff(hdev); + if (err) { + bt_dev_err(hdev, "Error when powering off device on rfkill (%d)", + err); + + /* Make sure the device is still closed even if + * anything during power off sequence (eg. + * disconnecting devices) failed. + */ + hci_dev_do_close(hdev); + } + } } else { hci_dev_clear_flag(hdev, HCI_RFKILLED); } -- cgit 1.2.3-korg From 78e3639fc8031275010c3287ac548c0bc8de83b1 Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Mon, 8 Jan 2024 23:46:06 +0100 Subject: Bluetooth: Remove superfluous call to hci_conn_check_pending() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "pending connections" feature was originally introduced with commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests") and 6bd57416127e ("[Bluetooth] Handling pending connect attempts after inquiry") to handle controllers supporting only a single connection request at a time. Later things were extended to also cancel ongoing inquiries on connect() with commit 89e65975fea5 ("Bluetooth: Cancel Inquiry before Create Connection"). With commit a9de9248064b ("[Bluetooth] Switch from OGF+OCF to using only opcodes"), hci_conn_check_pending() was introduced as a helper to consolidate a few places where we check for pending connections (indicated by the BT_CONNECT2 flag) and then try to connect. This refactoring commit also snuck in two more calls to hci_conn_check_pending(): - One is in the failure callback of hci_cs_inquiry(), this one probably makes sense: If we send an "HCI Inquiry" command and then immediately after a "Create Connection" command, the "Create Connection" command might fail before the "HCI Inquiry" command, and then we want to retry the "Create Connection" on failure of the "HCI Inquiry". - The other added call to hci_conn_check_pending() is in the event handler for the "Remote Name" event, this seems unrelated and is possibly a copy-paste error, so remove that one. Fixes: a9de9248064b ("[Bluetooth] Switch from OGF+OCF to using only opcodes") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2a5f5a7d2412b..1f63f77661dce 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3556,8 +3556,6 @@ static void hci_remote_name_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - hci_conn_check_pending(hdev); - hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); -- cgit 1.2.3-korg From 79c0868ad65a8fc7cdfaa5f2b77a4b70d0b0ea16 Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Mon, 8 Jan 2024 23:46:07 +0100 Subject: Bluetooth: hci_event: Use HCI error defines instead of magic values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have error defines already, so let's use them. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 ++ net/bluetooth/hci_event.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a94a8491ec7a1..1cd212bb37891 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -653,6 +653,7 @@ enum { #define HCI_ERROR_PIN_OR_KEY_MISSING 0x06 #define HCI_ERROR_MEMORY_EXCEEDED 0x07 #define HCI_ERROR_CONNECTION_TIMEOUT 0x08 +#define HCI_ERROR_COMMAND_DISALLOWED 0x0c #define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d #define HCI_ERROR_REJ_BAD_ADDR 0x0f #define HCI_ERROR_INVALID_PARAMETERS 0x12 @@ -661,6 +662,7 @@ enum { #define HCI_ERROR_REMOTE_POWER_OFF 0x15 #define HCI_ERROR_LOCAL_HOST_TERM 0x16 #define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18 +#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1e #define HCI_ERROR_INVALID_LL_PARAMS 0x1e #define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1f63f77661dce..31df5f5b79945 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -95,11 +95,11 @@ static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, /* It is possible that we receive Inquiry Complete event right * before we receive Inquiry Cancel Command Complete event, in * which case the latter event should have status of Command - * Disallowed (0x0c). This should not be treated as error, since + * Disallowed. This should not be treated as error, since * we actually achieve what Inquiry Cancel wants to achieve, * which is to end the last Inquiry session. */ - if (rp->status == 0x0c && !test_bit(HCI_INQUIRY, &hdev->flags)) { + if (rp->status == HCI_ERROR_COMMAND_DISALLOWED && !test_bit(HCI_INQUIRY, &hdev->flags)) { bt_dev_warn(hdev, "Ignoring error of Inquiry Cancel command"); rp->status = 0x00; } @@ -2342,7 +2342,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) if (status) { if (conn && conn->state == BT_CONNECT) { - if (status != 0x0c || conn->attempt > 2) { + if (status != HCI_ERROR_COMMAND_DISALLOWED || conn->attempt > 2) { conn->state = BT_CLOSED; hci_connect_cfm(conn, status); hci_conn_del(conn); @@ -6682,7 +6682,7 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, void *data, * transition into connected state and mark it as * successful. */ - if (!conn->out && ev->status == 0x1a && + if (!conn->out && ev->status == HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE && (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) status = 0x00; else -- cgit 1.2.3-korg From 63298d6e752fc0ec7f5093860af8bc9f047b30c8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 9 Jan 2024 13:45:40 -0500 Subject: Bluetooth: hci_core: Cancel request on command timeout If command has timed out call __hci_cmd_sync_cancel to notify the hci_req since it will inevitably cause a timeout. This also rework the code around __hci_cmd_sync_cancel since it was wrongly assuming it needs to cancel timer as well, but sometimes the timers have not been started or in fact they already had timed out in which case they don't need to be cancel yet again. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 +- net/bluetooth/hci_core.c | 84 +++++++++++++++++++++++++++------------- net/bluetooth/hci_request.c | 2 +- net/bluetooth/hci_sync.c | 20 +++++----- net/bluetooth/mgmt.c | 2 +- 5 files changed, 71 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 6efbc2152146b..e2582c2425449 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -42,7 +42,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err); int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e5cb618fa6d39..de730d210ccb4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1523,10 +1523,11 @@ static void hci_cmd_timeout(struct work_struct *work) cmd_timer.work); if (hdev->sent_cmd) { - struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; - u16 opcode = __le16_to_cpu(sent->opcode); + u16 opcode = hci_skb_opcode(hdev->sent_cmd); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); + + hci_cmd_sync_cancel_sync(hdev, ETIMEDOUT); } else { bt_dev_err(hdev, "command tx timeout"); } @@ -2857,6 +2858,23 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev) return ret; } +/* Cancel ongoing command synchronously: + * + * - Cancel command timer + * - Reset command counter + * - Cancel command request + */ +static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) +{ + bt_dev_dbg(hdev, "err 0x%2.2x", err); + + cancel_delayed_work_sync(&hdev->cmd_timer); + cancel_delayed_work_sync(&hdev->ncmd_timer); + atomic_set(&hdev->cmd_cnt, 1); + + hci_cmd_sync_cancel_sync(hdev, -err); +} + /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { @@ -2874,7 +2892,7 @@ int hci_suspend_dev(struct hci_dev *hdev) return 0; /* Cancel potentially blocking sync operation before suspend */ - __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + hci_cancel_cmd_sync(hdev, -EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); @@ -4159,6 +4177,33 @@ static void hci_rx_work(struct work_struct *work) } } +static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) +{ + int err; + + bt_dev_dbg(hdev, "skb %p", skb); + + kfree_skb(hdev->sent_cmd); + + hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); + if (!hdev->sent_cmd) { + skb_queue_head(&hdev->cmd_q, skb); + queue_work(hdev->workqueue, &hdev->cmd_work); + return; + } + + err = hci_send_frame(hdev, skb); + if (err < 0) { + hci_cmd_sync_cancel_sync(hdev, err); + return; + } + + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); + + atomic_dec(&hdev->cmd_cnt); +} + static void hci_cmd_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work); @@ -4173,30 +4218,15 @@ static void hci_cmd_work(struct work_struct *work) if (!skb) return; - kfree_skb(hdev->sent_cmd); - - hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); - if (hdev->sent_cmd) { - int res; - if (hci_req_status_pend(hdev)) - hci_dev_set_flag(hdev, HCI_CMD_PENDING); - atomic_dec(&hdev->cmd_cnt); + hci_send_cmd_sync(hdev, skb); - res = hci_send_frame(hdev, skb); - if (res < 0) - __hci_cmd_sync_cancel(hdev, -res); - - rcu_read_lock(); - if (test_bit(HCI_RESET, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - cancel_delayed_work(&hdev->cmd_timer); - else - queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, - HCI_CMD_TIMEOUT); - rcu_read_unlock(); - } else { - skb_queue_head(&hdev->cmd_q, skb); - queue_work(hdev->workqueue, &hdev->cmd_work); - } + rcu_read_lock(); + if (test_bit(HCI_RESET, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + cancel_delayed_work(&hdev->cmd_timer); + else + queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, + HCI_CMD_TIMEOUT); + rcu_read_unlock(); } } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 6e023b0104b03..00e02138003ec 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -895,7 +895,7 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { - __hci_cmd_sync_cancel(hdev, ENODEV); + hci_cmd_sync_cancel_sync(hdev, ENODEV); cancel_interleave_scan(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index b146562a65fc4..1122296ce3fa3 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -584,7 +584,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) mutex_unlock(&hdev->cmd_sync_work_lock); } -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) +void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -592,15 +592,17 @@ void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; - cancel_delayed_work_sync(&hdev->cmd_timer); - cancel_delayed_work_sync(&hdev->ncmd_timer); - atomic_set(&hdev->cmd_cnt, 1); - - wake_up_interruptible(&hdev->req_wait_q); + queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); } } +EXPORT_SYMBOL(hci_cmd_sync_cancel); -void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) +/* Cancel ongoing command request synchronously: + * + * - Set result and mark status to HCI_REQ_CANCELED + * - Wakeup command sync thread + */ +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); @@ -608,10 +610,10 @@ void hci_cmd_sync_cancel(struct hci_dev *hdev, int err) hdev->req_result = err; hdev->req_status = HCI_REQ_CANCELED; - queue_work(hdev->workqueue, &hdev->cmd_sync_cancel_work); + wake_up_interruptible(&hdev->req_wait_q); } } -EXPORT_SYMBOL(hci_cmd_sync_cancel); +EXPORT_SYMBOL(hci_cmd_sync_cancel_sync); /* Submit HCI command to be run in as cmd_sync_work: * diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3fe6db8a0ec02..8c4493255f92a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1417,7 +1417,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, /* Cancel potentially blocking sync operation before power off */ if (cp->val == 0x00) { - __hci_cmd_sync_cancel(hdev, -EHOSTDOWN); + hci_cmd_sync_cancel_sync(hdev, -EHOSTDOWN); err = hci_cmd_sync_queue(hdev, set_powered_sync, cmd, mgmt_set_powered_complete); } else { -- cgit 1.2.3-korg From 9c16d0c8d93e3d2a95c5ed927b061f244db75579 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 15 Jan 2024 21:12:19 +0100 Subject: Bluetooth: Remove usage of the deprecated ida_simple_xx() API ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_max() is inclusive. So a -1 has been added when needed. Signed-off-by: Christophe JAILLET Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 9 +++++---- net/bluetooth/hci_sock.c | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index de730d210ccb4..34c8dca2069f6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2640,10 +2640,11 @@ int hci_register_dev(struct hci_dev *hdev) */ switch (hdev->dev_type) { case HCI_PRIMARY: - id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL); + id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL); break; case HCI_AMP: - id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL); + id = ida_alloc_range(&hci_index_ida, 1, HCI_MAX_ID - 1, + GFP_KERNEL); break; default: return -EINVAL; @@ -2742,7 +2743,7 @@ err_wqueue: destroy_workqueue(hdev->workqueue); destroy_workqueue(hdev->req_workqueue); err: - ida_simple_remove(&hci_index_ida, hdev->id); + ida_free(&hci_index_ida, hdev->id); return error; } @@ -2825,7 +2826,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_dev_unlock(hdev); ida_destroy(&hdev->unset_handle_ida); - ida_simple_remove(&hci_index_ida, hdev->id); + ida_free(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); kfree_skb(hdev->recv_event); kfree(hdev); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 3e7cd330d731a..4ee1b976678b2 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -101,7 +101,7 @@ static bool hci_sock_gen_cookie(struct sock *sk) int id = hci_pi(sk)->cookie; if (!id) { - id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + id = ida_alloc_min(&sock_cookie_ida, 1, GFP_KERNEL); if (id < 0) id = 0xffffffff; @@ -119,7 +119,7 @@ static void hci_sock_free_cookie(struct sock *sk) if (id) { hci_pi(sk)->cookie = 0xffffffff; - ida_simple_remove(&sock_cookie_ida, id); + ida_free(&sock_cookie_ida, id); } } -- cgit 1.2.3-korg From e7b02296fb400ee64822fbdd81a0718449066333 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 1 Feb 2024 11:18:58 -0500 Subject: Bluetooth: Remove BT_HS High Speed, Alternate MAC and PHY (AMP) extension, has been removed from Bluetooth Core specification on 5.3: https://www.bluetooth.com/blog/new-core-specification-v5-3-feature-enhancements/ Fixes: 244bc377591c ("Bluetooth: Add BT_HS config option") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 - include/net/bluetooth/l2cap.h | 42 -- net/bluetooth/Kconfig | 8 - net/bluetooth/Makefile | 1 - net/bluetooth/a2mp.c | 1054 ---------------------------------------- net/bluetooth/a2mp.h | 154 ------ net/bluetooth/amp.c | 590 ----------------------- net/bluetooth/amp.h | 60 --- net/bluetooth/hci_conn.c | 4 - net/bluetooth/hci_event.c | 2 - net/bluetooth/l2cap_core.c | 1069 +---------------------------------------- net/bluetooth/l2cap_sock.c | 18 +- net/bluetooth/mgmt.c | 73 +-- 13 files changed, 20 insertions(+), 3056 deletions(-) delete mode 100644 net/bluetooth/a2mp.c delete mode 100644 net/bluetooth/a2mp.h delete mode 100644 net/bluetooth/amp.c delete mode 100644 net/bluetooth/amp.h (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1cd212bb37891..aa6c69053d7cd 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -394,7 +394,6 @@ enum { HCI_LIMITED_PRIVACY, HCI_RPA_EXPIRED, HCI_RPA_RESOLVING, - HCI_HS_ENABLED, HCI_LE_ENABLED, HCI_ADVERTISING, HCI_ADVERTISING_CONNECTABLE, diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index cf393e72d6ed6..92d7197f9a563 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -59,8 +59,6 @@ #define L2CAP_WAIT_ACK_POLL_PERIOD msecs_to_jiffies(200) #define L2CAP_WAIT_ACK_TIMEOUT msecs_to_jiffies(10000) -#define L2CAP_A2MP_DEFAULT_MTU 670 - /* L2CAP socket address */ struct sockaddr_l2 { sa_family_t l2_family; @@ -109,12 +107,6 @@ struct l2cap_conninfo { #define L2CAP_ECHO_RSP 0x09 #define L2CAP_INFO_REQ 0x0a #define L2CAP_INFO_RSP 0x0b -#define L2CAP_CREATE_CHAN_REQ 0x0c -#define L2CAP_CREATE_CHAN_RSP 0x0d -#define L2CAP_MOVE_CHAN_REQ 0x0e -#define L2CAP_MOVE_CHAN_RSP 0x0f -#define L2CAP_MOVE_CHAN_CFM 0x10 -#define L2CAP_MOVE_CHAN_CFM_RSP 0x11 #define L2CAP_CONN_PARAM_UPDATE_REQ 0x12 #define L2CAP_CONN_PARAM_UPDATE_RSP 0x13 #define L2CAP_LE_CONN_REQ 0x14 @@ -144,7 +136,6 @@ struct l2cap_conninfo { /* L2CAP fixed channels */ #define L2CAP_FC_SIG_BREDR 0x02 #define L2CAP_FC_CONNLESS 0x04 -#define L2CAP_FC_A2MP 0x08 #define L2CAP_FC_ATT 0x10 #define L2CAP_FC_SIG_LE 0x20 #define L2CAP_FC_SMP_LE 0x40 @@ -267,7 +258,6 @@ struct l2cap_conn_rsp { /* channel identifier */ #define L2CAP_CID_SIGNALING 0x0001 #define L2CAP_CID_CONN_LESS 0x0002 -#define L2CAP_CID_A2MP 0x0003 #define L2CAP_CID_ATT 0x0004 #define L2CAP_CID_LE_SIGNALING 0x0005 #define L2CAP_CID_SMP 0x0006 @@ -282,7 +272,6 @@ struct l2cap_conn_rsp { #define L2CAP_CR_BAD_PSM 0x0002 #define L2CAP_CR_SEC_BLOCK 0x0003 #define L2CAP_CR_NO_MEM 0x0004 -#define L2CAP_CR_BAD_AMP 0x0005 #define L2CAP_CR_INVALID_SCID 0x0006 #define L2CAP_CR_SCID_IN_USE 0x0007 @@ -404,29 +393,6 @@ struct l2cap_info_rsp { __u8 data[]; } __packed; -struct l2cap_create_chan_req { - __le16 psm; - __le16 scid; - __u8 amp_id; -} __packed; - -struct l2cap_create_chan_rsp { - __le16 dcid; - __le16 scid; - __le16 result; - __le16 status; -} __packed; - -struct l2cap_move_chan_req { - __le16 icid; - __u8 dest_amp_id; -} __packed; - -struct l2cap_move_chan_rsp { - __le16 icid; - __le16 result; -} __packed; - #define L2CAP_MR_SUCCESS 0x0000 #define L2CAP_MR_PEND 0x0001 #define L2CAP_MR_BAD_ID 0x0002 @@ -539,8 +505,6 @@ struct l2cap_seq_list { struct l2cap_chan { struct l2cap_conn *conn; - struct hci_conn *hs_hcon; - struct hci_chan *hs_hchan; struct kref kref; atomic_t nesting; @@ -591,12 +555,6 @@ struct l2cap_chan { unsigned long conn_state; unsigned long flags; - __u8 remote_amp_id; - __u8 local_amp_id; - __u8 move_id; - __u8 move_state; - __u8 move_role; - __u16 next_tx_seq; __u16 expected_ack_seq; __u16 expected_tx_seq; diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index da7cac0a1b716..6b2b65a667008 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -62,14 +62,6 @@ source "net/bluetooth/cmtp/Kconfig" source "net/bluetooth/hidp/Kconfig" -config BT_HS - bool "Bluetooth High Speed (HS) features" - depends on BT_BREDR - help - Bluetooth High Speed includes support for off-loading - Bluetooth connections via 802.11 (wifi) physical layer - available with Bluetooth version 3.0 or later. - config BT_LE bool "Bluetooth Low Energy (LE) features" depends on BT diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 141ac1fda0bfa..628d448d78be3 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -21,7 +21,6 @@ bluetooth-$(CONFIG_DEV_COREDUMP) += coredump.o bluetooth-$(CONFIG_BT_BREDR) += sco.o bluetooth-$(CONFIG_BT_LE) += iso.o -bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o bluetooth-$(CONFIG_BT_LEDS) += leds.o bluetooth-$(CONFIG_BT_MSFTEXT) += msft.o bluetooth-$(CONFIG_BT_AOSPEXT) += aosp.o diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c deleted file mode 100644 index e7adb8a98cf90..0000000000000 --- a/net/bluetooth/a2mp.c +++ /dev/null @@ -1,1054 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - Copyright (c) 2010,2011 Code Aurora Forum. All rights reserved. - Copyright (c) 2011,2012 Intel Corp. - -*/ - -#include -#include -#include - -#include "hci_request.h" -#include "a2mp.h" -#include "amp.h" - -#define A2MP_FEAT_EXT 0x8000 - -/* Global AMP Manager list */ -static LIST_HEAD(amp_mgr_list); -static DEFINE_MUTEX(amp_mgr_list_lock); - -/* A2MP build & send command helper functions */ -static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data) -{ - struct a2mp_cmd *cmd; - int plen; - - plen = sizeof(*cmd) + len; - cmd = kzalloc(plen, GFP_KERNEL); - if (!cmd) - return NULL; - - cmd->code = code; - cmd->ident = ident; - cmd->len = cpu_to_le16(len); - - memcpy(cmd->data, data, len); - - return cmd; -} - -static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data) -{ - struct l2cap_chan *chan = mgr->a2mp_chan; - struct a2mp_cmd *cmd; - u16 total_len = len + sizeof(*cmd); - struct kvec iv; - struct msghdr msg; - - cmd = __a2mp_build(code, ident, len, data); - if (!cmd) - return; - - iv.iov_base = cmd; - iv.iov_len = total_len; - - memset(&msg, 0, sizeof(msg)); - - iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iv, 1, total_len); - - l2cap_chan_send(chan, &msg, total_len); - - kfree(cmd); -} - -static u8 __next_ident(struct amp_mgr *mgr) -{ - if (++mgr->ident == 0) - mgr->ident = 1; - - return mgr->ident; -} - -static struct amp_mgr *amp_mgr_lookup_by_state(u8 state) -{ - struct amp_mgr *mgr; - - mutex_lock(&_mgr_list_lock); - list_for_each_entry(mgr, &_mgr_list, list) { - if (test_and_clear_bit(state, &mgr->state)) { - amp_mgr_get(mgr); - mutex_unlock(&_mgr_list_lock); - return mgr; - } - } - mutex_unlock(&_mgr_list_lock); - - return NULL; -} - -/* hci_dev_list shall be locked */ -static void __a2mp_add_cl(struct amp_mgr *mgr, struct a2mp_cl *cl) -{ - struct hci_dev *hdev; - int i = 1; - - cl[0].id = AMP_ID_BREDR; - cl[0].type = AMP_TYPE_BREDR; - cl[0].status = AMP_STATUS_BLUETOOTH_ONLY; - - list_for_each_entry(hdev, &hci_dev_list, list) { - if (hdev->dev_type == HCI_AMP) { - cl[i].id = hdev->id; - cl[i].type = hdev->amp_type; - if (test_bit(HCI_UP, &hdev->flags)) - cl[i].status = hdev->amp_status; - else - cl[i].status = AMP_STATUS_POWERED_DOWN; - i++; - } - } -} - -/* Processing A2MP messages */ -static int a2mp_command_rej(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_cmd_rej *rej = (void *) skb->data; - - if (le16_to_cpu(hdr->len) < sizeof(*rej)) - return -EINVAL; - - BT_DBG("ident %u reason %d", hdr->ident, le16_to_cpu(rej->reason)); - - skb_pull(skb, sizeof(*rej)); - - return 0; -} - -static int a2mp_discover_req(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_discov_req *req = (void *) skb->data; - u16 len = le16_to_cpu(hdr->len); - struct a2mp_discov_rsp *rsp; - u16 ext_feat; - u8 num_ctrl; - struct hci_dev *hdev; - - if (len < sizeof(*req)) - return -EINVAL; - - skb_pull(skb, sizeof(*req)); - - ext_feat = le16_to_cpu(req->ext_feat); - - BT_DBG("mtu %d efm 0x%4.4x", le16_to_cpu(req->mtu), ext_feat); - - /* check that packet is not broken for now */ - while (ext_feat & A2MP_FEAT_EXT) { - if (len < sizeof(ext_feat)) - return -EINVAL; - - ext_feat = get_unaligned_le16(skb->data); - BT_DBG("efm 0x%4.4x", ext_feat); - len -= sizeof(ext_feat); - skb_pull(skb, sizeof(ext_feat)); - } - - read_lock(&hci_dev_list_lock); - - /* at minimum the BR/EDR needs to be listed */ - num_ctrl = 1; - - list_for_each_entry(hdev, &hci_dev_list, list) { - if (hdev->dev_type == HCI_AMP) - num_ctrl++; - } - - len = struct_size(rsp, cl, num_ctrl); - rsp = kmalloc(len, GFP_ATOMIC); - if (!rsp) { - read_unlock(&hci_dev_list_lock); - return -ENOMEM; - } - - rsp->mtu = cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU); - rsp->ext_feat = 0; - - __a2mp_add_cl(mgr, rsp->cl); - - read_unlock(&hci_dev_list_lock); - - a2mp_send(mgr, A2MP_DISCOVER_RSP, hdr->ident, len, rsp); - - kfree(rsp); - return 0; -} - -static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_discov_rsp *rsp = (void *) skb->data; - u16 len = le16_to_cpu(hdr->len); - struct a2mp_cl *cl; - u16 ext_feat; - bool found = false; - - if (len < sizeof(*rsp)) - return -EINVAL; - - len -= sizeof(*rsp); - skb_pull(skb, sizeof(*rsp)); - - ext_feat = le16_to_cpu(rsp->ext_feat); - - BT_DBG("mtu %d efm 0x%4.4x", le16_to_cpu(rsp->mtu), ext_feat); - - /* check that packet is not broken for now */ - while (ext_feat & A2MP_FEAT_EXT) { - if (len < sizeof(ext_feat)) - return -EINVAL; - - ext_feat = get_unaligned_le16(skb->data); - BT_DBG("efm 0x%4.4x", ext_feat); - len -= sizeof(ext_feat); - skb_pull(skb, sizeof(ext_feat)); - } - - cl = (void *) skb->data; - while (len >= sizeof(*cl)) { - BT_DBG("Remote AMP id %u type %u status %u", cl->id, cl->type, - cl->status); - - if (cl->id != AMP_ID_BREDR && cl->type != AMP_TYPE_BREDR) { - struct a2mp_info_req req; - - found = true; - - memset(&req, 0, sizeof(req)); - - req.id = cl->id; - a2mp_send(mgr, A2MP_GETINFO_REQ, __next_ident(mgr), - sizeof(req), &req); - } - - len -= sizeof(*cl); - cl = skb_pull(skb, sizeof(*cl)); - } - - /* Fall back to L2CAP init sequence */ - if (!found) { - struct l2cap_conn *conn = mgr->l2cap_conn; - struct l2cap_chan *chan; - - mutex_lock(&conn->chan_lock); - - list_for_each_entry(chan, &conn->chan_l, list) { - - BT_DBG("chan %p state %s", chan, - state_to_string(chan->state)); - - if (chan->scid == L2CAP_CID_A2MP) - continue; - - l2cap_chan_lock(chan); - - if (chan->state == BT_CONNECT) - l2cap_send_conn_req(chan); - - l2cap_chan_unlock(chan); - } - - mutex_unlock(&conn->chan_lock); - } - - return 0; -} - -static int a2mp_change_notify(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_cl *cl = (void *) skb->data; - - while (skb->len >= sizeof(*cl)) { - BT_DBG("Controller id %u type %u status %u", cl->id, cl->type, - cl->status); - cl = skb_pull(skb, sizeof(*cl)); - } - - /* TODO send A2MP_CHANGE_RSP */ - - return 0; -} - -static void read_local_amp_info_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - a2mp_send_getinfo_rsp(hdev); -} - -static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_info_req *req = (void *) skb->data; - struct hci_dev *hdev; - struct hci_request hreq; - int err = 0; - - if (le16_to_cpu(hdr->len) < sizeof(*req)) - return -EINVAL; - - BT_DBG("id %u", req->id); - - hdev = hci_dev_get(req->id); - if (!hdev || hdev->dev_type != HCI_AMP) { - struct a2mp_info_rsp rsp; - - memset(&rsp, 0, sizeof(rsp)); - - rsp.id = req->id; - rsp.status = A2MP_STATUS_INVALID_CTRL_ID; - - a2mp_send(mgr, A2MP_GETINFO_RSP, hdr->ident, sizeof(rsp), - &rsp); - - goto done; - } - - set_bit(READ_LOC_AMP_INFO, &mgr->state); - hci_req_init(&hreq, hdev); - hci_req_add(&hreq, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); - err = hci_req_run(&hreq, read_local_amp_info_complete); - if (err < 0) - a2mp_send_getinfo_rsp(hdev); - -done: - if (hdev) - hci_dev_put(hdev); - - skb_pull(skb, sizeof(*req)); - return 0; -} - -static int a2mp_getinfo_rsp(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_info_rsp *rsp = (struct a2mp_info_rsp *) skb->data; - struct a2mp_amp_assoc_req req; - struct amp_ctrl *ctrl; - - if (le16_to_cpu(hdr->len) < sizeof(*rsp)) - return -EINVAL; - - BT_DBG("id %u status 0x%2.2x", rsp->id, rsp->status); - - if (rsp->status) - return -EINVAL; - - ctrl = amp_ctrl_add(mgr, rsp->id); - if (!ctrl) - return -ENOMEM; - - memset(&req, 0, sizeof(req)); - - req.id = rsp->id; - a2mp_send(mgr, A2MP_GETAMPASSOC_REQ, __next_ident(mgr), sizeof(req), - &req); - - skb_pull(skb, sizeof(*rsp)); - return 0; -} - -static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_amp_assoc_req *req = (void *) skb->data; - struct hci_dev *hdev; - struct amp_mgr *tmp; - - if (le16_to_cpu(hdr->len) < sizeof(*req)) - return -EINVAL; - - BT_DBG("id %u", req->id); - - /* Make sure that other request is not processed */ - tmp = amp_mgr_lookup_by_state(READ_LOC_AMP_ASSOC); - - hdev = hci_dev_get(req->id); - if (!hdev || hdev->amp_type == AMP_TYPE_BREDR || tmp) { - struct a2mp_amp_assoc_rsp rsp; - - memset(&rsp, 0, sizeof(rsp)); - rsp.id = req->id; - - if (tmp) { - rsp.status = A2MP_STATUS_COLLISION_OCCURED; - amp_mgr_put(tmp); - } else { - rsp.status = A2MP_STATUS_INVALID_CTRL_ID; - } - - a2mp_send(mgr, A2MP_GETAMPASSOC_RSP, hdr->ident, sizeof(rsp), - &rsp); - - goto done; - } - - amp_read_loc_assoc(hdev, mgr); - -done: - if (hdev) - hci_dev_put(hdev); - - skb_pull(skb, sizeof(*req)); - return 0; -} - -static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_amp_assoc_rsp *rsp = (void *) skb->data; - u16 len = le16_to_cpu(hdr->len); - struct hci_dev *hdev; - struct amp_ctrl *ctrl; - struct hci_conn *hcon; - size_t assoc_len; - - if (len < sizeof(*rsp)) - return -EINVAL; - - assoc_len = len - sizeof(*rsp); - - BT_DBG("id %u status 0x%2.2x assoc len %zu", rsp->id, rsp->status, - assoc_len); - - if (rsp->status) - return -EINVAL; - - /* Save remote ASSOC data */ - ctrl = amp_ctrl_lookup(mgr, rsp->id); - if (ctrl) { - u8 *assoc; - - assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL); - if (!assoc) { - amp_ctrl_put(ctrl); - return -ENOMEM; - } - - ctrl->assoc = assoc; - ctrl->assoc_len = assoc_len; - ctrl->assoc_rem_len = assoc_len; - ctrl->assoc_len_so_far = 0; - - amp_ctrl_put(ctrl); - } - - /* Create Phys Link */ - hdev = hci_dev_get(rsp->id); - if (!hdev) - return -EINVAL; - - hcon = phylink_add(hdev, mgr, rsp->id, true); - if (!hcon) - goto done; - - BT_DBG("Created hcon %p: loc:%u -> rem:%u", hcon, hdev->id, rsp->id); - - mgr->bredr_chan->remote_amp_id = rsp->id; - - amp_create_phylink(hdev, mgr, hcon); - -done: - hci_dev_put(hdev); - skb_pull(skb, len); - return 0; -} - -static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_physlink_req *req = (void *) skb->data; - struct a2mp_physlink_rsp rsp; - struct hci_dev *hdev; - struct hci_conn *hcon; - struct amp_ctrl *ctrl; - - if (le16_to_cpu(hdr->len) < sizeof(*req)) - return -EINVAL; - - BT_DBG("local_id %u, remote_id %u", req->local_id, req->remote_id); - - memset(&rsp, 0, sizeof(rsp)); - - rsp.local_id = req->remote_id; - rsp.remote_id = req->local_id; - - hdev = hci_dev_get(req->remote_id); - if (!hdev || hdev->amp_type == AMP_TYPE_BREDR) { - rsp.status = A2MP_STATUS_INVALID_CTRL_ID; - goto send_rsp; - } - - ctrl = amp_ctrl_lookup(mgr, rsp.remote_id); - if (!ctrl) { - ctrl = amp_ctrl_add(mgr, rsp.remote_id); - if (ctrl) { - amp_ctrl_get(ctrl); - } else { - rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION; - goto send_rsp; - } - } - - if (ctrl) { - size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req); - u8 *assoc; - - assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL); - if (!assoc) { - amp_ctrl_put(ctrl); - hci_dev_put(hdev); - return -ENOMEM; - } - - ctrl->assoc = assoc; - ctrl->assoc_len = assoc_len; - ctrl->assoc_rem_len = assoc_len; - ctrl->assoc_len_so_far = 0; - - amp_ctrl_put(ctrl); - } - - hcon = phylink_add(hdev, mgr, req->local_id, false); - if (hcon) { - amp_accept_phylink(hdev, mgr, hcon); - rsp.status = A2MP_STATUS_SUCCESS; - } else { - rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION; - } - -send_rsp: - if (hdev) - hci_dev_put(hdev); - - /* Reply error now and success after HCI Write Remote AMP Assoc - command complete with success status - */ - if (rsp.status != A2MP_STATUS_SUCCESS) { - a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, hdr->ident, - sizeof(rsp), &rsp); - } else { - set_bit(WRITE_REMOTE_AMP_ASSOC, &mgr->state); - mgr->ident = hdr->ident; - } - - skb_pull(skb, le16_to_cpu(hdr->len)); - return 0; -} - -static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - struct a2mp_physlink_req *req = (void *) skb->data; - struct a2mp_physlink_rsp rsp; - struct hci_dev *hdev; - struct hci_conn *hcon; - - if (le16_to_cpu(hdr->len) < sizeof(*req)) - return -EINVAL; - - BT_DBG("local_id %u remote_id %u", req->local_id, req->remote_id); - - memset(&rsp, 0, sizeof(rsp)); - - rsp.local_id = req->remote_id; - rsp.remote_id = req->local_id; - rsp.status = A2MP_STATUS_SUCCESS; - - hdev = hci_dev_get(req->remote_id); - if (!hdev) { - rsp.status = A2MP_STATUS_INVALID_CTRL_ID; - goto send_rsp; - } - - hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, - &mgr->l2cap_conn->hcon->dst); - if (!hcon) { - bt_dev_err(hdev, "no phys link exist"); - rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS; - goto clean; - } - - /* TODO Disconnect Phys Link here */ - -clean: - hci_dev_put(hdev); - -send_rsp: - a2mp_send(mgr, A2MP_DISCONNPHYSLINK_RSP, hdr->ident, sizeof(rsp), &rsp); - - skb_pull(skb, sizeof(*req)); - return 0; -} - -static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb, - struct a2mp_cmd *hdr) -{ - BT_DBG("ident %u code 0x%2.2x", hdr->ident, hdr->code); - - skb_pull(skb, le16_to_cpu(hdr->len)); - return 0; -} - -/* Handle A2MP signalling */ -static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) -{ - struct a2mp_cmd *hdr; - struct amp_mgr *mgr = chan->data; - int err = 0; - - amp_mgr_get(mgr); - - while (skb->len >= sizeof(*hdr)) { - u16 len; - - hdr = (void *) skb->data; - len = le16_to_cpu(hdr->len); - - BT_DBG("code 0x%2.2x id %u len %u", hdr->code, hdr->ident, len); - - skb_pull(skb, sizeof(*hdr)); - - if (len > skb->len || !hdr->ident) { - err = -EINVAL; - break; - } - - mgr->ident = hdr->ident; - - switch (hdr->code) { - case A2MP_COMMAND_REJ: - a2mp_command_rej(mgr, skb, hdr); - break; - - case A2MP_DISCOVER_REQ: - err = a2mp_discover_req(mgr, skb, hdr); - break; - - case A2MP_CHANGE_NOTIFY: - err = a2mp_change_notify(mgr, skb, hdr); - break; - - case A2MP_GETINFO_REQ: - err = a2mp_getinfo_req(mgr, skb, hdr); - break; - - case A2MP_GETAMPASSOC_REQ: - err = a2mp_getampassoc_req(mgr, skb, hdr); - break; - - case A2MP_CREATEPHYSLINK_REQ: - err = a2mp_createphyslink_req(mgr, skb, hdr); - break; - - case A2MP_DISCONNPHYSLINK_REQ: - err = a2mp_discphyslink_req(mgr, skb, hdr); - break; - - case A2MP_DISCOVER_RSP: - err = a2mp_discover_rsp(mgr, skb, hdr); - break; - - case A2MP_GETINFO_RSP: - err = a2mp_getinfo_rsp(mgr, skb, hdr); - break; - - case A2MP_GETAMPASSOC_RSP: - err = a2mp_getampassoc_rsp(mgr, skb, hdr); - break; - - case A2MP_CHANGE_RSP: - case A2MP_CREATEPHYSLINK_RSP: - case A2MP_DISCONNPHYSLINK_RSP: - err = a2mp_cmd_rsp(mgr, skb, hdr); - break; - - default: - BT_ERR("Unknown A2MP sig cmd 0x%2.2x", hdr->code); - err = -EINVAL; - break; - } - } - - if (err) { - struct a2mp_cmd_rej rej; - - memset(&rej, 0, sizeof(rej)); - - rej.reason = cpu_to_le16(0); - hdr = (void *) skb->data; - - BT_DBG("Send A2MP Rej: cmd 0x%2.2x err %d", hdr->code, err); - - a2mp_send(mgr, A2MP_COMMAND_REJ, hdr->ident, sizeof(rej), - &rej); - } - - /* Always free skb and return success error code to prevent - from sending L2CAP Disconnect over A2MP channel */ - kfree_skb(skb); - - amp_mgr_put(mgr); - - return 0; -} - -static void a2mp_chan_close_cb(struct l2cap_chan *chan) -{ - l2cap_chan_put(chan); -} - -static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state, - int err) -{ - struct amp_mgr *mgr = chan->data; - - if (!mgr) - return; - - BT_DBG("chan %p state %s", chan, state_to_string(state)); - - chan->state = state; - - switch (state) { - case BT_CLOSED: - if (mgr) - amp_mgr_put(mgr); - break; - } -} - -static struct sk_buff *a2mp_chan_alloc_skb_cb(struct l2cap_chan *chan, - unsigned long hdr_len, - unsigned long len, int nb) -{ - struct sk_buff *skb; - - skb = bt_skb_alloc(hdr_len + len, GFP_KERNEL); - if (!skb) - return ERR_PTR(-ENOMEM); - - return skb; -} - -static const struct l2cap_ops a2mp_chan_ops = { - .name = "L2CAP A2MP channel", - .recv = a2mp_chan_recv_cb, - .close = a2mp_chan_close_cb, - .state_change = a2mp_chan_state_change_cb, - .alloc_skb = a2mp_chan_alloc_skb_cb, - - /* Not implemented for A2MP */ - .new_connection = l2cap_chan_no_new_connection, - .teardown = l2cap_chan_no_teardown, - .ready = l2cap_chan_no_ready, - .defer = l2cap_chan_no_defer, - .resume = l2cap_chan_no_resume, - .set_shutdown = l2cap_chan_no_set_shutdown, - .get_sndtimeo = l2cap_chan_no_get_sndtimeo, -}; - -static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked) -{ - struct l2cap_chan *chan; - int err; - - chan = l2cap_chan_create(); - if (!chan) - return NULL; - - BT_DBG("chan %p", chan); - - chan->chan_type = L2CAP_CHAN_FIXED; - chan->scid = L2CAP_CID_A2MP; - chan->dcid = L2CAP_CID_A2MP; - chan->omtu = L2CAP_A2MP_DEFAULT_MTU; - chan->imtu = L2CAP_A2MP_DEFAULT_MTU; - chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; - - chan->ops = &a2mp_chan_ops; - - l2cap_chan_set_defaults(chan); - chan->remote_max_tx = chan->max_tx; - chan->remote_tx_win = chan->tx_win; - - chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; - chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; - - skb_queue_head_init(&chan->tx_q); - - chan->mode = L2CAP_MODE_ERTM; - - err = l2cap_ertm_init(chan); - if (err < 0) { - l2cap_chan_del(chan, 0); - return NULL; - } - - chan->conf_state = 0; - - if (locked) - __l2cap_chan_add(conn, chan); - else - l2cap_chan_add(conn, chan); - - chan->remote_mps = chan->omtu; - chan->mps = chan->omtu; - - chan->state = BT_CONNECTED; - - return chan; -} - -/* AMP Manager functions */ -struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr) -{ - BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref)); - - kref_get(&mgr->kref); - - return mgr; -} - -static void amp_mgr_destroy(struct kref *kref) -{ - struct amp_mgr *mgr = container_of(kref, struct amp_mgr, kref); - - BT_DBG("mgr %p", mgr); - - mutex_lock(&_mgr_list_lock); - list_del(&mgr->list); - mutex_unlock(&_mgr_list_lock); - - amp_ctrl_list_flush(mgr); - kfree(mgr); -} - -int amp_mgr_put(struct amp_mgr *mgr) -{ - BT_DBG("mgr %p orig refcnt %d", mgr, kref_read(&mgr->kref)); - - return kref_put(&mgr->kref, &_mgr_destroy); -} - -static struct amp_mgr *amp_mgr_create(struct l2cap_conn *conn, bool locked) -{ - struct amp_mgr *mgr; - struct l2cap_chan *chan; - - mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); - if (!mgr) - return NULL; - - BT_DBG("conn %p mgr %p", conn, mgr); - - mgr->l2cap_conn = conn; - - chan = a2mp_chan_open(conn, locked); - if (!chan) { - kfree(mgr); - return NULL; - } - - mgr->a2mp_chan = chan; - chan->data = mgr; - - conn->hcon->amp_mgr = mgr; - - kref_init(&mgr->kref); - - /* Remote AMP ctrl list initialization */ - INIT_LIST_HEAD(&mgr->amp_ctrls); - mutex_init(&mgr->amp_ctrls_lock); - - mutex_lock(&_mgr_list_lock); - list_add(&mgr->list, &_mgr_list); - mutex_unlock(&_mgr_list_lock); - - return mgr; -} - -struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, - struct sk_buff *skb) -{ - struct amp_mgr *mgr; - - if (conn->hcon->type != ACL_LINK) - return NULL; - - mgr = amp_mgr_create(conn, false); - if (!mgr) { - BT_ERR("Could not create AMP manager"); - return NULL; - } - - BT_DBG("mgr: %p chan %p", mgr, mgr->a2mp_chan); - - return mgr->a2mp_chan; -} - -void a2mp_send_getinfo_rsp(struct hci_dev *hdev) -{ - struct amp_mgr *mgr; - struct a2mp_info_rsp rsp; - - mgr = amp_mgr_lookup_by_state(READ_LOC_AMP_INFO); - if (!mgr) - return; - - BT_DBG("%s mgr %p", hdev->name, mgr); - - memset(&rsp, 0, sizeof(rsp)); - - rsp.id = hdev->id; - rsp.status = A2MP_STATUS_INVALID_CTRL_ID; - - if (hdev->amp_type != AMP_TYPE_BREDR) { - rsp.status = 0; - rsp.total_bw = cpu_to_le32(hdev->amp_total_bw); - rsp.max_bw = cpu_to_le32(hdev->amp_max_bw); - rsp.min_latency = cpu_to_le32(hdev->amp_min_latency); - rsp.pal_cap = cpu_to_le16(hdev->amp_pal_cap); - rsp.assoc_size = cpu_to_le16(hdev->amp_assoc_size); - } - - a2mp_send(mgr, A2MP_GETINFO_RSP, mgr->ident, sizeof(rsp), &rsp); - amp_mgr_put(mgr); -} - -void a2mp_send_getampassoc_rsp(struct hci_dev *hdev, u8 status) -{ - struct amp_mgr *mgr; - struct amp_assoc *loc_assoc = &hdev->loc_assoc; - struct a2mp_amp_assoc_rsp *rsp; - size_t len; - - mgr = amp_mgr_lookup_by_state(READ_LOC_AMP_ASSOC); - if (!mgr) - return; - - BT_DBG("%s mgr %p", hdev->name, mgr); - - len = sizeof(struct a2mp_amp_assoc_rsp) + loc_assoc->len; - rsp = kzalloc(len, GFP_KERNEL); - if (!rsp) { - amp_mgr_put(mgr); - return; - } - - rsp->id = hdev->id; - - if (status) { - rsp->status = A2MP_STATUS_INVALID_CTRL_ID; - } else { - rsp->status = A2MP_STATUS_SUCCESS; - memcpy(rsp->amp_assoc, loc_assoc->data, loc_assoc->len); - } - - a2mp_send(mgr, A2MP_GETAMPASSOC_RSP, mgr->ident, len, rsp); - amp_mgr_put(mgr); - kfree(rsp); -} - -void a2mp_send_create_phy_link_req(struct hci_dev *hdev, u8 status) -{ - struct amp_mgr *mgr; - struct amp_assoc *loc_assoc = &hdev->loc_assoc; - struct a2mp_physlink_req *req; - struct l2cap_chan *bredr_chan; - size_t len; - - mgr = amp_mgr_lookup_by_state(READ_LOC_AMP_ASSOC_FINAL); - if (!mgr) - return; - - len = sizeof(*req) + loc_assoc->len; - - BT_DBG("%s mgr %p assoc_len %zu", hdev->name, mgr, len); - - req = kzalloc(len, GFP_KERNEL); - if (!req) { - amp_mgr_put(mgr); - return; - } - - bredr_chan = mgr->bredr_chan; - if (!bredr_chan) - goto clean; - - req->local_id = hdev->id; - req->remote_id = bredr_chan->remote_amp_id; - memcpy(req->amp_assoc, loc_assoc->data, loc_assoc->len); - - a2mp_send(mgr, A2MP_CREATEPHYSLINK_REQ, __next_ident(mgr), len, req); - -clean: - amp_mgr_put(mgr); - kfree(req); -} - -void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status) -{ - struct amp_mgr *mgr; - struct a2mp_physlink_rsp rsp; - struct hci_conn *hs_hcon; - - mgr = amp_mgr_lookup_by_state(WRITE_REMOTE_AMP_ASSOC); - if (!mgr) - return; - - memset(&rsp, 0, sizeof(rsp)); - - hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT); - if (!hs_hcon) { - rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION; - } else { - rsp.remote_id = hs_hcon->remote_id; - rsp.status = A2MP_STATUS_SUCCESS; - } - - BT_DBG("%s mgr %p hs_hcon %p status %u", hdev->name, mgr, hs_hcon, - status); - - rsp.local_id = hdev->id; - a2mp_send(mgr, A2MP_CREATEPHYSLINK_RSP, mgr->ident, sizeof(rsp), &rsp); - amp_mgr_put(mgr); -} - -void a2mp_discover_amp(struct l2cap_chan *chan) -{ - struct l2cap_conn *conn = chan->conn; - struct amp_mgr *mgr = conn->hcon->amp_mgr; - struct a2mp_discov_req req; - - BT_DBG("chan %p conn %p mgr %p", chan, conn, mgr); - - if (!mgr) { - mgr = amp_mgr_create(conn, true); - if (!mgr) - return; - } - - mgr->bredr_chan = chan; - - memset(&req, 0, sizeof(req)); - - req.mtu = cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU); - req.ext_feat = 0; - a2mp_send(mgr, A2MP_DISCOVER_REQ, 1, sizeof(req), &req); -} diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h deleted file mode 100644 index 2fd253a61a2a1..0000000000000 --- a/net/bluetooth/a2mp.h +++ /dev/null @@ -1,154 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - Copyright (c) 2010,2011 Code Aurora Forum. All rights reserved. - Copyright (c) 2011,2012 Intel Corp. - -*/ - -#ifndef __A2MP_H -#define __A2MP_H - -#include - -enum amp_mgr_state { - READ_LOC_AMP_INFO, - READ_LOC_AMP_ASSOC, - READ_LOC_AMP_ASSOC_FINAL, - WRITE_REMOTE_AMP_ASSOC, -}; - -struct amp_mgr { - struct list_head list; - struct l2cap_conn *l2cap_conn; - struct l2cap_chan *a2mp_chan; - struct l2cap_chan *bredr_chan; - struct kref kref; - __u8 ident; - __u8 handle; - unsigned long state; - unsigned long flags; - - struct list_head amp_ctrls; - struct mutex amp_ctrls_lock; -}; - -struct a2mp_cmd { - __u8 code; - __u8 ident; - __le16 len; - __u8 data[]; -} __packed; - -/* A2MP command codes */ -#define A2MP_COMMAND_REJ 0x01 -struct a2mp_cmd_rej { - __le16 reason; - __u8 data[]; -} __packed; - -#define A2MP_DISCOVER_REQ 0x02 -struct a2mp_discov_req { - __le16 mtu; - __le16 ext_feat; -} __packed; - -struct a2mp_cl { - __u8 id; - __u8 type; - __u8 status; -} __packed; - -#define A2MP_DISCOVER_RSP 0x03 -struct a2mp_discov_rsp { - __le16 mtu; - __le16 ext_feat; - struct a2mp_cl cl[]; -} __packed; - -#define A2MP_CHANGE_NOTIFY 0x04 -#define A2MP_CHANGE_RSP 0x05 - -#define A2MP_GETINFO_REQ 0x06 -struct a2mp_info_req { - __u8 id; -} __packed; - -#define A2MP_GETINFO_RSP 0x07 -struct a2mp_info_rsp { - __u8 id; - __u8 status; - __le32 total_bw; - __le32 max_bw; - __le32 min_latency; - __le16 pal_cap; - __le16 assoc_size; -} __packed; - -#define A2MP_GETAMPASSOC_REQ 0x08 -struct a2mp_amp_assoc_req { - __u8 id; -} __packed; - -#define A2MP_GETAMPASSOC_RSP 0x09 -struct a2mp_amp_assoc_rsp { - __u8 id; - __u8 status; - __u8 amp_assoc[]; -} __packed; - -#define A2MP_CREATEPHYSLINK_REQ 0x0A -#define A2MP_DISCONNPHYSLINK_REQ 0x0C -struct a2mp_physlink_req { - __u8 local_id; - __u8 remote_id; - __u8 amp_assoc[]; -} __packed; - -#define A2MP_CREATEPHYSLINK_RSP 0x0B -#define A2MP_DISCONNPHYSLINK_RSP 0x0D -struct a2mp_physlink_rsp { - __u8 local_id; - __u8 remote_id; - __u8 status; -} __packed; - -/* A2MP response status */ -#define A2MP_STATUS_SUCCESS 0x00 -#define A2MP_STATUS_INVALID_CTRL_ID 0x01 -#define A2MP_STATUS_UNABLE_START_LINK_CREATION 0x02 -#define A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS 0x02 -#define A2MP_STATUS_COLLISION_OCCURED 0x03 -#define A2MP_STATUS_DISCONN_REQ_RECVD 0x04 -#define A2MP_STATUS_PHYS_LINK_EXISTS 0x05 -#define A2MP_STATUS_SECURITY_VIOLATION 0x06 - -struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr); - -#if IS_ENABLED(CONFIG_BT_HS) -int amp_mgr_put(struct amp_mgr *mgr); -struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, - struct sk_buff *skb); -void a2mp_discover_amp(struct l2cap_chan *chan); -#else -static inline int amp_mgr_put(struct amp_mgr *mgr) -{ - return 0; -} - -static inline struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, - struct sk_buff *skb) -{ - return NULL; -} - -static inline void a2mp_discover_amp(struct l2cap_chan *chan) -{ -} -#endif - -void a2mp_send_getinfo_rsp(struct hci_dev *hdev); -void a2mp_send_getampassoc_rsp(struct hci_dev *hdev, u8 status); -void a2mp_send_create_phy_link_req(struct hci_dev *hdev, u8 status); -void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status); - -#endif /* __A2MP_H */ diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c deleted file mode 100644 index 5d698f19868c5..0000000000000 --- a/net/bluetooth/amp.c +++ /dev/null @@ -1,590 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - Copyright (c) 2011,2012 Intel Corp. - -*/ - -#include -#include -#include -#include - -#include "hci_request.h" -#include "a2mp.h" -#include "amp.h" - -/* Remote AMP Controllers interface */ -void amp_ctrl_get(struct amp_ctrl *ctrl) -{ - BT_DBG("ctrl %p orig refcnt %d", ctrl, - kref_read(&ctrl->kref)); - - kref_get(&ctrl->kref); -} - -static void amp_ctrl_destroy(struct kref *kref) -{ - struct amp_ctrl *ctrl = container_of(kref, struct amp_ctrl, kref); - - BT_DBG("ctrl %p", ctrl); - - kfree(ctrl->assoc); - kfree(ctrl); -} - -int amp_ctrl_put(struct amp_ctrl *ctrl) -{ - BT_DBG("ctrl %p orig refcnt %d", ctrl, - kref_read(&ctrl->kref)); - - return kref_put(&ctrl->kref, &_ctrl_destroy); -} - -struct amp_ctrl *amp_ctrl_add(struct amp_mgr *mgr, u8 id) -{ - struct amp_ctrl *ctrl; - - ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); - if (!ctrl) - return NULL; - - kref_init(&ctrl->kref); - ctrl->id = id; - - mutex_lock(&mgr->amp_ctrls_lock); - list_add(&ctrl->list, &mgr->amp_ctrls); - mutex_unlock(&mgr->amp_ctrls_lock); - - BT_DBG("mgr %p ctrl %p", mgr, ctrl); - - return ctrl; -} - -void amp_ctrl_list_flush(struct amp_mgr *mgr) -{ - struct amp_ctrl *ctrl, *n; - - BT_DBG("mgr %p", mgr); - - mutex_lock(&mgr->amp_ctrls_lock); - list_for_each_entry_safe(ctrl, n, &mgr->amp_ctrls, list) { - list_del(&ctrl->list); - amp_ctrl_put(ctrl); - } - mutex_unlock(&mgr->amp_ctrls_lock); -} - -struct amp_ctrl *amp_ctrl_lookup(struct amp_mgr *mgr, u8 id) -{ - struct amp_ctrl *ctrl; - - BT_DBG("mgr %p id %u", mgr, id); - - mutex_lock(&mgr->amp_ctrls_lock); - list_for_each_entry(ctrl, &mgr->amp_ctrls, list) { - if (ctrl->id == id) { - amp_ctrl_get(ctrl); - mutex_unlock(&mgr->amp_ctrls_lock); - return ctrl; - } - } - mutex_unlock(&mgr->amp_ctrls_lock); - - return NULL; -} - -/* Physical Link interface */ -static u8 __next_handle(struct amp_mgr *mgr) -{ - if (++mgr->handle == 0) - mgr->handle = 1; - - return mgr->handle; -} - -struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr, - u8 remote_id, bool out) -{ - bdaddr_t *dst = &mgr->l2cap_conn->hcon->dst; - struct hci_conn *hcon; - u8 role = out ? HCI_ROLE_MASTER : HCI_ROLE_SLAVE; - - hcon = hci_conn_add(hdev, AMP_LINK, dst, role, __next_handle(mgr)); - if (!hcon) - return NULL; - - BT_DBG("hcon %p dst %pMR", hcon, dst); - - hcon->state = BT_CONNECT; - hcon->attempt++; - hcon->remote_id = remote_id; - hcon->amp_mgr = amp_mgr_get(mgr); - - return hcon; -} - -/* AMP crypto key generation interface */ -static int hmac_sha256(u8 *key, u8 ksize, char *plaintext, u8 psize, u8 *output) -{ - struct crypto_shash *tfm; - struct shash_desc *shash; - int ret; - - if (!ksize) - return -EINVAL; - - tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); - if (IS_ERR(tfm)) { - BT_DBG("crypto_alloc_ahash failed: err %ld", PTR_ERR(tfm)); - return PTR_ERR(tfm); - } - - ret = crypto_shash_setkey(tfm, key, ksize); - if (ret) { - BT_DBG("crypto_ahash_setkey failed: err %d", ret); - goto failed; - } - - shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm), - GFP_KERNEL); - if (!shash) { - ret = -ENOMEM; - goto failed; - } - - shash->tfm = tfm; - - ret = crypto_shash_digest(shash, plaintext, psize, output); - - kfree(shash); - -failed: - crypto_free_shash(tfm); - return ret; -} - -int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type) -{ - struct hci_dev *hdev = conn->hdev; - struct link_key *key; - u8 keybuf[HCI_AMP_LINK_KEY_SIZE]; - u8 gamp_key[HCI_AMP_LINK_KEY_SIZE]; - int err; - - if (!hci_conn_check_link_mode(conn)) - return -EACCES; - - BT_DBG("conn %p key_type %d", conn, conn->key_type); - - /* Legacy key */ - if (conn->key_type < 3) { - bt_dev_err(hdev, "legacy key type %u", conn->key_type); - return -EACCES; - } - - *type = conn->key_type; - *len = HCI_AMP_LINK_KEY_SIZE; - - key = hci_find_link_key(hdev, &conn->dst); - if (!key) { - BT_DBG("No Link key for conn %p dst %pMR", conn, &conn->dst); - return -EACCES; - } - - /* BR/EDR Link Key concatenated together with itself */ - memcpy(&keybuf[0], key->val, HCI_LINK_KEY_SIZE); - memcpy(&keybuf[HCI_LINK_KEY_SIZE], key->val, HCI_LINK_KEY_SIZE); - - /* Derive Generic AMP Link Key (gamp) */ - err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key); - if (err) { - bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err); - return err; - } - - if (conn->key_type == HCI_LK_DEBUG_COMBINATION) { - BT_DBG("Use Generic AMP Key (gamp)"); - memcpy(data, gamp_key, HCI_AMP_LINK_KEY_SIZE); - return err; - } - - /* Derive Dedicated AMP Link Key: "802b" is 802.11 PAL keyID */ - return hmac_sha256(gamp_key, HCI_AMP_LINK_KEY_SIZE, "802b", 4, data); -} - -static void read_local_amp_assoc_complete(struct hci_dev *hdev, u8 status, - u16 opcode, struct sk_buff *skb) -{ - struct hci_rp_read_local_amp_assoc *rp = (void *)skb->data; - struct amp_assoc *assoc = &hdev->loc_assoc; - size_t rem_len, frag_len; - - BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - - if (rp->status) - goto send_rsp; - - frag_len = skb->len - sizeof(*rp); - rem_len = __le16_to_cpu(rp->rem_len); - - if (rem_len > frag_len) { - BT_DBG("frag_len %zu rem_len %zu", frag_len, rem_len); - - memcpy(assoc->data + assoc->offset, rp->frag, frag_len); - assoc->offset += frag_len; - - /* Read other fragments */ - amp_read_loc_assoc_frag(hdev, rp->phy_handle); - - return; - } - - memcpy(assoc->data + assoc->offset, rp->frag, rem_len); - assoc->len = assoc->offset + rem_len; - assoc->offset = 0; - -send_rsp: - /* Send A2MP Rsp when all fragments are received */ - a2mp_send_getampassoc_rsp(hdev, rp->status); - a2mp_send_create_phy_link_req(hdev, rp->status); -} - -void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle) -{ - struct hci_cp_read_local_amp_assoc cp; - struct amp_assoc *loc_assoc = &hdev->loc_assoc; - struct hci_request req; - int err; - - BT_DBG("%s handle %u", hdev->name, phy_handle); - - cp.phy_handle = phy_handle; - cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - cp.len_so_far = cpu_to_le16(loc_assoc->offset); - - hci_req_init(&req, hdev); - hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); - err = hci_req_run_skb(&req, read_local_amp_assoc_complete); - if (err < 0) - a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID); -} - -void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr) -{ - struct hci_cp_read_local_amp_assoc cp; - struct hci_request req; - int err; - - memset(&hdev->loc_assoc, 0, sizeof(struct amp_assoc)); - memset(&cp, 0, sizeof(cp)); - - cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - - set_bit(READ_LOC_AMP_ASSOC, &mgr->state); - hci_req_init(&req, hdev); - hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); - err = hci_req_run_skb(&req, read_local_amp_assoc_complete); - if (err < 0) - a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID); -} - -void amp_read_loc_assoc_final_data(struct hci_dev *hdev, - struct hci_conn *hcon) -{ - struct hci_cp_read_local_amp_assoc cp; - struct amp_mgr *mgr = hcon->amp_mgr; - struct hci_request req; - int err; - - if (!mgr) - return; - - cp.phy_handle = hcon->handle; - cp.len_so_far = cpu_to_le16(0); - cp.max_len = cpu_to_le16(hdev->amp_assoc_size); - - set_bit(READ_LOC_AMP_ASSOC_FINAL, &mgr->state); - - /* Read Local AMP Assoc final link information data */ - hci_req_init(&req, hdev); - hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); - err = hci_req_run_skb(&req, read_local_amp_assoc_complete); - if (err < 0) - a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID); -} - -static void write_remote_amp_assoc_complete(struct hci_dev *hdev, u8 status, - u16 opcode, struct sk_buff *skb) -{ - struct hci_rp_write_remote_amp_assoc *rp = (void *)skb->data; - - BT_DBG("%s status 0x%2.2x phy_handle 0x%2.2x", - hdev->name, rp->status, rp->phy_handle); - - if (rp->status) - return; - - amp_write_rem_assoc_continue(hdev, rp->phy_handle); -} - -/* Write AMP Assoc data fragments, returns true with last fragment written*/ -static bool amp_write_rem_assoc_frag(struct hci_dev *hdev, - struct hci_conn *hcon) -{ - struct hci_cp_write_remote_amp_assoc *cp; - struct amp_mgr *mgr = hcon->amp_mgr; - struct amp_ctrl *ctrl; - struct hci_request req; - u16 frag_len, len; - - ctrl = amp_ctrl_lookup(mgr, hcon->remote_id); - if (!ctrl) - return false; - - if (!ctrl->assoc_rem_len) { - BT_DBG("all fragments are written"); - ctrl->assoc_rem_len = ctrl->assoc_len; - ctrl->assoc_len_so_far = 0; - - amp_ctrl_put(ctrl); - return true; - } - - frag_len = min_t(u16, 248, ctrl->assoc_rem_len); - len = frag_len + sizeof(*cp); - - cp = kzalloc(len, GFP_KERNEL); - if (!cp) { - amp_ctrl_put(ctrl); - return false; - } - - BT_DBG("hcon %p ctrl %p frag_len %u assoc_len %u rem_len %u", - hcon, ctrl, frag_len, ctrl->assoc_len, ctrl->assoc_rem_len); - - cp->phy_handle = hcon->handle; - cp->len_so_far = cpu_to_le16(ctrl->assoc_len_so_far); - cp->rem_len = cpu_to_le16(ctrl->assoc_rem_len); - memcpy(cp->frag, ctrl->assoc, frag_len); - - ctrl->assoc_len_so_far += frag_len; - ctrl->assoc_rem_len -= frag_len; - - amp_ctrl_put(ctrl); - - hci_req_init(&req, hdev); - hci_req_add(&req, HCI_OP_WRITE_REMOTE_AMP_ASSOC, len, cp); - hci_req_run_skb(&req, write_remote_amp_assoc_complete); - - kfree(cp); - - return false; -} - -void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle) -{ - struct hci_conn *hcon; - - BT_DBG("%s phy handle 0x%2.2x", hdev->name, handle); - - hcon = hci_conn_hash_lookup_handle(hdev, handle); - if (!hcon) - return; - - /* Send A2MP create phylink rsp when all fragments are written */ - if (amp_write_rem_assoc_frag(hdev, hcon)) - a2mp_send_create_phy_link_rsp(hdev, 0); -} - -void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle) -{ - struct hci_conn *hcon; - - BT_DBG("%s phy handle 0x%2.2x", hdev->name, handle); - - hcon = hci_conn_hash_lookup_handle(hdev, handle); - if (!hcon) - return; - - BT_DBG("%s phy handle 0x%2.2x hcon %p", hdev->name, handle, hcon); - - amp_write_rem_assoc_frag(hdev, hcon); -} - -static void create_phylink_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - struct hci_cp_create_phy_link *cp; - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - cp = hci_sent_cmd_data(hdev, HCI_OP_CREATE_PHY_LINK); - if (!cp) - return; - - hci_dev_lock(hdev); - - if (status) { - struct hci_conn *hcon; - - hcon = hci_conn_hash_lookup_handle(hdev, cp->phy_handle); - if (hcon) - hci_conn_del(hcon); - } else { - amp_write_remote_assoc(hdev, cp->phy_handle); - } - - hci_dev_unlock(hdev); -} - -void amp_create_phylink(struct hci_dev *hdev, struct amp_mgr *mgr, - struct hci_conn *hcon) -{ - struct hci_cp_create_phy_link cp; - struct hci_request req; - - cp.phy_handle = hcon->handle; - - BT_DBG("%s hcon %p phy handle 0x%2.2x", hdev->name, hcon, - hcon->handle); - - if (phylink_gen_key(mgr->l2cap_conn->hcon, cp.key, &cp.key_len, - &cp.key_type)) { - BT_DBG("Cannot create link key"); - return; - } - - hci_req_init(&req, hdev); - hci_req_add(&req, HCI_OP_CREATE_PHY_LINK, sizeof(cp), &cp); - hci_req_run(&req, create_phylink_complete); -} - -static void accept_phylink_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - struct hci_cp_accept_phy_link *cp; - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - if (status) - return; - - cp = hci_sent_cmd_data(hdev, HCI_OP_ACCEPT_PHY_LINK); - if (!cp) - return; - - amp_write_remote_assoc(hdev, cp->phy_handle); -} - -void amp_accept_phylink(struct hci_dev *hdev, struct amp_mgr *mgr, - struct hci_conn *hcon) -{ - struct hci_cp_accept_phy_link cp; - struct hci_request req; - - cp.phy_handle = hcon->handle; - - BT_DBG("%s hcon %p phy handle 0x%2.2x", hdev->name, hcon, - hcon->handle); - - if (phylink_gen_key(mgr->l2cap_conn->hcon, cp.key, &cp.key_len, - &cp.key_type)) { - BT_DBG("Cannot create link key"); - return; - } - - hci_req_init(&req, hdev); - hci_req_add(&req, HCI_OP_ACCEPT_PHY_LINK, sizeof(cp), &cp); - hci_req_run(&req, accept_phylink_complete); -} - -void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon) -{ - struct hci_dev *bredr_hdev = hci_dev_hold(bredr_hcon->hdev); - struct amp_mgr *mgr = hs_hcon->amp_mgr; - struct l2cap_chan *bredr_chan; - - BT_DBG("bredr_hcon %p hs_hcon %p mgr %p", bredr_hcon, hs_hcon, mgr); - - if (!bredr_hdev || !mgr || !mgr->bredr_chan) - return; - - bredr_chan = mgr->bredr_chan; - - l2cap_chan_lock(bredr_chan); - - set_bit(FLAG_EFS_ENABLE, &bredr_chan->flags); - bredr_chan->remote_amp_id = hs_hcon->remote_id; - bredr_chan->local_amp_id = hs_hcon->hdev->id; - bredr_chan->hs_hcon = hs_hcon; - bredr_chan->conn->mtu = hs_hcon->hdev->block_mtu; - - __l2cap_physical_cfm(bredr_chan, 0); - - l2cap_chan_unlock(bredr_chan); - - hci_dev_put(bredr_hdev); -} - -void amp_create_logical_link(struct l2cap_chan *chan) -{ - struct hci_conn *hs_hcon = chan->hs_hcon; - struct hci_cp_create_accept_logical_link cp; - struct hci_dev *hdev; - - BT_DBG("chan %p hs_hcon %p dst %pMR", chan, hs_hcon, - &chan->conn->hcon->dst); - - if (!hs_hcon) - return; - - hdev = hci_dev_hold(chan->hs_hcon->hdev); - if (!hdev) - return; - - cp.phy_handle = hs_hcon->handle; - - cp.tx_flow_spec.id = chan->local_id; - cp.tx_flow_spec.stype = chan->local_stype; - cp.tx_flow_spec.msdu = cpu_to_le16(chan->local_msdu); - cp.tx_flow_spec.sdu_itime = cpu_to_le32(chan->local_sdu_itime); - cp.tx_flow_spec.acc_lat = cpu_to_le32(chan->local_acc_lat); - cp.tx_flow_spec.flush_to = cpu_to_le32(chan->local_flush_to); - - cp.rx_flow_spec.id = chan->remote_id; - cp.rx_flow_spec.stype = chan->remote_stype; - cp.rx_flow_spec.msdu = cpu_to_le16(chan->remote_msdu); - cp.rx_flow_spec.sdu_itime = cpu_to_le32(chan->remote_sdu_itime); - cp.rx_flow_spec.acc_lat = cpu_to_le32(chan->remote_acc_lat); - cp.rx_flow_spec.flush_to = cpu_to_le32(chan->remote_flush_to); - - if (hs_hcon->out) - hci_send_cmd(hdev, HCI_OP_CREATE_LOGICAL_LINK, sizeof(cp), - &cp); - else - hci_send_cmd(hdev, HCI_OP_ACCEPT_LOGICAL_LINK, sizeof(cp), - &cp); - - hci_dev_put(hdev); -} - -void amp_disconnect_logical_link(struct hci_chan *hchan) -{ - struct hci_conn *hcon = hchan->conn; - struct hci_cp_disconn_logical_link cp; - - if (hcon->state != BT_CONNECTED) { - BT_DBG("hchan %p not connected", hchan); - return; - } - - cp.log_handle = cpu_to_le16(hchan->handle); - hci_send_cmd(hcon->hdev, HCI_OP_DISCONN_LOGICAL_LINK, sizeof(cp), &cp); -} - -void amp_destroy_logical_link(struct hci_chan *hchan, u8 reason) -{ - BT_DBG("hchan %p", hchan); - - hci_chan_del(hchan); -} diff --git a/net/bluetooth/amp.h b/net/bluetooth/amp.h deleted file mode 100644 index 97c87abd129f6..0000000000000 --- a/net/bluetooth/amp.h +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - Copyright (c) 2011,2012 Intel Corp. - -*/ - -#ifndef __AMP_H -#define __AMP_H - -struct amp_ctrl { - struct list_head list; - struct kref kref; - __u8 id; - __u16 assoc_len_so_far; - __u16 assoc_rem_len; - __u16 assoc_len; - __u8 *assoc; -}; - -int amp_ctrl_put(struct amp_ctrl *ctrl); -void amp_ctrl_get(struct amp_ctrl *ctrl); -struct amp_ctrl *amp_ctrl_add(struct amp_mgr *mgr, u8 id); -struct amp_ctrl *amp_ctrl_lookup(struct amp_mgr *mgr, u8 id); -void amp_ctrl_list_flush(struct amp_mgr *mgr); - -struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr, - u8 remote_id, bool out); - -int phylink_gen_key(struct hci_conn *hcon, u8 *data, u8 *len, u8 *type); - -void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle); -void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr); -void amp_read_loc_assoc_final_data(struct hci_dev *hdev, - struct hci_conn *hcon); -void amp_create_phylink(struct hci_dev *hdev, struct amp_mgr *mgr, - struct hci_conn *hcon); -void amp_accept_phylink(struct hci_dev *hdev, struct amp_mgr *mgr, - struct hci_conn *hcon); - -#if IS_ENABLED(CONFIG_BT_HS) -void amp_create_logical_link(struct l2cap_chan *chan); -void amp_disconnect_logical_link(struct hci_chan *hchan); -#else -static inline void amp_create_logical_link(struct l2cap_chan *chan) -{ -} - -static inline void amp_disconnect_logical_link(struct hci_chan *hchan) -{ -} -#endif - -void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle); -void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle); -void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon); -void amp_create_logical_link(struct l2cap_chan *chan); -void amp_disconnect_logical_link(struct hci_chan *hchan); -void amp_destroy_logical_link(struct hci_chan *hchan, u8 reason); - -#endif /* __AMP_H */ diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a41d2693f4d8c..fc4d72f83ac25 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -36,7 +36,6 @@ #include "hci_request.h" #include "smp.h" -#include "a2mp.h" #include "eir.h" struct sco_param { @@ -1175,9 +1174,6 @@ void hci_conn_del(struct hci_conn *conn) } } - if (conn->amp_mgr) - amp_mgr_put(conn->amp_mgr); - skb_queue_purge(&conn->data_q); /* Remove the connection from the list and cleanup its remaining diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 31df5f5b79945..11b55d1f97727 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -36,8 +36,6 @@ #include "hci_request.h" #include "hci_debugfs.h" #include "hci_codec.h" -#include "a2mp.h" -#include "amp.h" #include "smp.h" #include "msft.h" #include "eir.h" diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 656f49b299d20..ab5a9d42fae71 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -39,8 +39,6 @@ #include #include "smp.h" -#include "a2mp.h" -#include "amp.h" #define LE_FLOWCTL_MAX_CREDITS 65535 @@ -167,24 +165,6 @@ static struct l2cap_chan *__l2cap_get_chan_by_ident(struct l2cap_conn *conn, return NULL; } -static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, - u8 ident) -{ - struct l2cap_chan *c; - - mutex_lock(&conn->chan_lock); - c = __l2cap_get_chan_by_ident(conn, ident); - if (c) { - /* Only lock if chan reference is not 0 */ - c = l2cap_chan_hold_unless_zero(c); - if (c) - l2cap_chan_lock(c); - } - mutex_unlock(&conn->chan_lock); - - return c; -} - static struct l2cap_chan *__l2cap_global_chan_by_addr(__le16 psm, bdaddr_t *src, u8 src_type) { @@ -651,7 +631,6 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) chan->ops->teardown(chan, err); if (conn) { - struct amp_mgr *mgr = conn->hcon->amp_mgr; /* Delete from channel list */ list_del(&chan->list); @@ -666,16 +645,6 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) if (chan->chan_type != L2CAP_CHAN_FIXED || test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) hci_conn_drop(conn->hcon); - - if (mgr && mgr->bredr_chan == chan) - mgr->bredr_chan = NULL; - } - - if (chan->hs_hchan) { - struct hci_chan *hs_hchan = chan->hs_hchan; - - BT_DBG("chan %p disconnect hs_hchan %p", chan, hs_hchan); - amp_disconnect_logical_link(hs_hchan); } if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state)) @@ -977,12 +946,6 @@ static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, hci_send_acl(conn->hchan, skb, flags); } -static bool __chan_is_moving(struct l2cap_chan *chan) -{ - return chan->move_state != L2CAP_MOVE_STABLE && - chan->move_state != L2CAP_MOVE_WAIT_PREPARE; -} - static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) { struct hci_conn *hcon = chan->conn->hcon; @@ -991,15 +954,6 @@ static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) BT_DBG("chan %p, skb %p len %d priority %u", chan, skb, skb->len, skb->priority); - if (chan->hs_hcon && !__chan_is_moving(chan)) { - if (chan->hs_hchan) - hci_send_acl(chan->hs_hchan, skb, ACL_COMPLETE); - else - kfree_skb(skb); - - return; - } - /* Use NO_FLUSH for LE links (where this is the only option) or * if the BR/EDR link supports it and flushing has not been * explicitly requested (through FLAG_FLUSHABLE). @@ -1180,9 +1134,6 @@ static void l2cap_send_sframe(struct l2cap_chan *chan, if (!control->sframe) return; - if (__chan_is_moving(chan)) - return; - if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state) && !control->poll) control->final = 1; @@ -1237,40 +1188,6 @@ static inline int __l2cap_no_conn_pending(struct l2cap_chan *chan) return !test_bit(CONF_CONNECT_PEND, &chan->conf_state); } -static bool __amp_capable(struct l2cap_chan *chan) -{ - struct l2cap_conn *conn = chan->conn; - struct hci_dev *hdev; - bool amp_available = false; - - if (!(conn->local_fixed_chan & L2CAP_FC_A2MP)) - return false; - - if (!(conn->remote_fixed_chan & L2CAP_FC_A2MP)) - return false; - - read_lock(&hci_dev_list_lock); - list_for_each_entry(hdev, &hci_dev_list, list) { - if (hdev->amp_type != AMP_TYPE_BREDR && - test_bit(HCI_UP, &hdev->flags)) { - amp_available = true; - break; - } - } - read_unlock(&hci_dev_list_lock); - - if (chan->chan_policy == BT_CHANNEL_POLICY_AMP_PREFERRED) - return amp_available; - - return false; -} - -static bool l2cap_check_efs(struct l2cap_chan *chan) -{ - /* Check EFS parameters */ - return true; -} - void l2cap_send_conn_req(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; @@ -1286,76 +1203,6 @@ void l2cap_send_conn_req(struct l2cap_chan *chan) l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_REQ, sizeof(req), &req); } -static void l2cap_send_create_chan_req(struct l2cap_chan *chan, u8 amp_id) -{ - struct l2cap_create_chan_req req; - req.scid = cpu_to_le16(chan->scid); - req.psm = chan->psm; - req.amp_id = amp_id; - - chan->ident = l2cap_get_ident(chan->conn); - - l2cap_send_cmd(chan->conn, chan->ident, L2CAP_CREATE_CHAN_REQ, - sizeof(req), &req); -} - -static void l2cap_move_setup(struct l2cap_chan *chan) -{ - struct sk_buff *skb; - - BT_DBG("chan %p", chan); - - if (chan->mode != L2CAP_MODE_ERTM) - return; - - __clear_retrans_timer(chan); - __clear_monitor_timer(chan); - __clear_ack_timer(chan); - - chan->retry_count = 0; - skb_queue_walk(&chan->tx_q, skb) { - if (bt_cb(skb)->l2cap.retries) - bt_cb(skb)->l2cap.retries = 1; - else - break; - } - - chan->expected_tx_seq = chan->buffer_seq; - - clear_bit(CONN_REJ_ACT, &chan->conn_state); - clear_bit(CONN_SREJ_ACT, &chan->conn_state); - l2cap_seq_list_clear(&chan->retrans_list); - l2cap_seq_list_clear(&chan->srej_list); - skb_queue_purge(&chan->srej_q); - - chan->tx_state = L2CAP_TX_STATE_XMIT; - chan->rx_state = L2CAP_RX_STATE_MOVE; - - set_bit(CONN_REMOTE_BUSY, &chan->conn_state); -} - -static void l2cap_move_done(struct l2cap_chan *chan) -{ - u8 move_role = chan->move_role; - BT_DBG("chan %p", chan); - - chan->move_state = L2CAP_MOVE_STABLE; - chan->move_role = L2CAP_MOVE_ROLE_NONE; - - if (chan->mode != L2CAP_MODE_ERTM) - return; - - switch (move_role) { - case L2CAP_MOVE_ROLE_INITIATOR: - l2cap_tx(chan, NULL, NULL, L2CAP_EV_EXPLICIT_POLL); - chan->rx_state = L2CAP_RX_STATE_WAIT_F; - break; - case L2CAP_MOVE_ROLE_RESPONDER: - chan->rx_state = L2CAP_RX_STATE_WAIT_P; - break; - } -} - static void l2cap_chan_ready(struct l2cap_chan *chan) { /* The channel may have already been flagged as connected in @@ -1505,10 +1352,7 @@ static void l2cap_le_start(struct l2cap_chan *chan) static void l2cap_start_connection(struct l2cap_chan *chan) { - if (__amp_capable(chan)) { - BT_DBG("chan %p AMP capable: discover AMPs", chan); - a2mp_discover_amp(chan); - } else if (chan->conn->hcon->type == LE_LINK) { + if (chan->conn->hcon->type == LE_LINK) { l2cap_le_start(chan); } else { l2cap_send_conn_req(chan); @@ -1611,11 +1455,6 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err) __clear_ack_timer(chan); } - if (chan->scid == L2CAP_CID_A2MP) { - l2cap_state_change(chan, BT_DISCONN); - return; - } - req.dcid = cpu_to_le16(chan->dcid); req.scid = cpu_to_le16(chan->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_DISCONN_REQ, @@ -1754,11 +1593,6 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) l2cap_chan_lock(chan); - if (chan->scid == L2CAP_CID_A2MP) { - l2cap_chan_unlock(chan); - continue; - } - if (hcon->type == LE_LINK) { l2cap_le_start(chan); } else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { @@ -2067,9 +1901,6 @@ static void l2cap_streaming_send(struct l2cap_chan *chan, BT_DBG("chan %p, skbs %p", chan, skbs); - if (__chan_is_moving(chan)) - return; - skb_queue_splice_tail_init(skbs, &chan->tx_q); while (!skb_queue_empty(&chan->tx_q)) { @@ -2112,9 +1943,6 @@ static int l2cap_ertm_send(struct l2cap_chan *chan) if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) return 0; - if (__chan_is_moving(chan)) - return 0; - while (chan->tx_send_head && chan->unacked_frames < chan->remote_tx_win && chan->tx_state == L2CAP_TX_STATE_XMIT) { @@ -2180,9 +2008,6 @@ static void l2cap_ertm_resend(struct l2cap_chan *chan) if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state)) return; - if (__chan_is_moving(chan)) - return; - while (chan->retrans_list.head != L2CAP_SEQ_LIST_CLEAR) { seq = l2cap_seq_list_pop(&chan->retrans_list); @@ -2522,8 +2347,7 @@ static int l2cap_segment_sdu(struct l2cap_chan *chan, pdu_len = chan->conn->mtu; /* Constrain PDU size for BR/EDR connections */ - if (!chan->hs_hcon) - pdu_len = min_t(size_t, pdu_len, L2CAP_BREDR_MAX_PAYLOAD); + pdu_len = min_t(size_t, pdu_len, L2CAP_BREDR_MAX_PAYLOAD); /* Adjust for largest possible L2CAP overhead. */ if (chan->fcs) @@ -3287,11 +3111,6 @@ int l2cap_ertm_init(struct l2cap_chan *chan) skb_queue_head_init(&chan->tx_q); - chan->local_amp_id = AMP_ID_BREDR; - chan->move_id = AMP_ID_BREDR; - chan->move_state = L2CAP_MOVE_STABLE; - chan->move_role = L2CAP_MOVE_ROLE_NONE; - if (chan->mode != L2CAP_MODE_ERTM) return 0; @@ -3326,52 +3145,19 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) static inline bool __l2cap_ews_supported(struct l2cap_conn *conn) { - return ((conn->local_fixed_chan & L2CAP_FC_A2MP) && - (conn->feat_mask & L2CAP_FEAT_EXT_WINDOW)); + return (conn->feat_mask & L2CAP_FEAT_EXT_WINDOW); } static inline bool __l2cap_efs_supported(struct l2cap_conn *conn) { - return ((conn->local_fixed_chan & L2CAP_FC_A2MP) && - (conn->feat_mask & L2CAP_FEAT_EXT_FLOW)); + return (conn->feat_mask & L2CAP_FEAT_EXT_FLOW); } static void __l2cap_set_ertm_timeouts(struct l2cap_chan *chan, struct l2cap_conf_rfc *rfc) { - if (chan->local_amp_id != AMP_ID_BREDR && chan->hs_hcon) { - u64 ertm_to = chan->hs_hcon->hdev->amp_be_flush_to; - - /* Class 1 devices have must have ERTM timeouts - * exceeding the Link Supervision Timeout. The - * default Link Supervision Timeout for AMP - * controllers is 10 seconds. - * - * Class 1 devices use 0xffffffff for their - * best-effort flush timeout, so the clamping logic - * will result in a timeout that meets the above - * requirement. ERTM timeouts are 16-bit values, so - * the maximum timeout is 65.535 seconds. - */ - - /* Convert timeout to milliseconds and round */ - ertm_to = DIV_ROUND_UP_ULL(ertm_to, 1000); - - /* This is the recommended formula for class 2 devices - * that start ERTM timers when packets are sent to the - * controller. - */ - ertm_to = 3 * ertm_to + 500; - - if (ertm_to > 0xffff) - ertm_to = 0xffff; - - rfc->retrans_timeout = cpu_to_le16((u16) ertm_to); - rfc->monitor_timeout = rfc->retrans_timeout; - } else { - rfc->retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO); - rfc->monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO); - } + rfc->retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO); + rfc->monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO); } static inline void l2cap_txwin_setup(struct l2cap_chan *chan) @@ -3623,13 +3409,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data case L2CAP_CONF_EWS: if (olen != 2) break; - if (!(chan->conn->local_fixed_chan & L2CAP_FC_A2MP)) - return -ECONNREFUSED; - set_bit(FLAG_EXT_CTRL, &chan->flags); - set_bit(CONF_EWS_RECV, &chan->conf_state); - chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; - chan->remote_tx_win = val; - break; + return -ECONNREFUSED; default: if (hint) @@ -4027,11 +3807,7 @@ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) rsp.dcid = cpu_to_le16(chan->scid); rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - - if (chan->hs_hcon) - rsp_code = L2CAP_CREATE_CHAN_RSP; - else - rsp_code = L2CAP_CONN_RSP; + rsp_code = L2CAP_CONN_RSP; BT_DBG("chan %p rsp_code %u", chan, rsp_code); @@ -4190,7 +3966,6 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, chan->dst_type = bdaddr_dst_type(conn->hcon); chan->psm = psm; chan->dcid = scid; - chan->local_amp_id = amp_id; __l2cap_chan_add(conn, chan); @@ -4516,10 +4291,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, /* check compatibility */ /* Send rsp for BR/EDR channel */ - if (!chan->hs_hcon) - l2cap_send_efs_conf_rsp(chan, rsp, cmd->ident, flags); - else - chan->ident = cmd->ident; + l2cap_send_efs_conf_rsp(chan, rsp, cmd->ident, flags); } unlock: @@ -4571,15 +4343,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, goto done; } - if (!chan->hs_hcon) { - l2cap_send_efs_conf_rsp(chan, buf, cmd->ident, - 0); - } else { - if (l2cap_check_efs(chan)) { - amp_create_logical_link(chan); - chan->ident = cmd->ident; - } - } + l2cap_send_efs_conf_rsp(chan, buf, cmd->ident, 0); } goto done; @@ -4750,9 +4514,6 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, if (!disable_ertm) feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING | L2CAP_FEAT_FCS; - if (conn->local_fixed_chan & L2CAP_FC_A2MP) - feat_mask |= L2CAP_FEAT_EXT_FLOW - | L2CAP_FEAT_EXT_WINDOW; put_unaligned_le32(feat_mask, rsp->data); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf), @@ -4841,751 +4602,6 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, return 0; } -static int l2cap_create_channel_req(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, - u16 cmd_len, void *data) -{ - struct l2cap_create_chan_req *req = data; - struct l2cap_create_chan_rsp rsp; - struct l2cap_chan *chan; - struct hci_dev *hdev; - u16 psm, scid; - - if (cmd_len != sizeof(*req)) - return -EPROTO; - - if (!(conn->local_fixed_chan & L2CAP_FC_A2MP)) - return -EINVAL; - - psm = le16_to_cpu(req->psm); - scid = le16_to_cpu(req->scid); - - BT_DBG("psm 0x%2.2x, scid 0x%4.4x, amp_id %d", psm, scid, req->amp_id); - - /* For controller id 0 make BR/EDR connection */ - if (req->amp_id == AMP_ID_BREDR) { - l2cap_connect(conn, cmd, data, L2CAP_CREATE_CHAN_RSP, - req->amp_id); - return 0; - } - - /* Validate AMP controller id */ - hdev = hci_dev_get(req->amp_id); - if (!hdev) - goto error; - - if (hdev->dev_type != HCI_AMP || !test_bit(HCI_UP, &hdev->flags)) { - hci_dev_put(hdev); - goto error; - } - - chan = l2cap_connect(conn, cmd, data, L2CAP_CREATE_CHAN_RSP, - req->amp_id); - if (chan) { - struct amp_mgr *mgr = conn->hcon->amp_mgr; - struct hci_conn *hs_hcon; - - hs_hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, - &conn->hcon->dst); - if (!hs_hcon) { - hci_dev_put(hdev); - cmd_reject_invalid_cid(conn, cmd->ident, chan->scid, - chan->dcid); - return 0; - } - - BT_DBG("mgr %p bredr_chan %p hs_hcon %p", mgr, chan, hs_hcon); - - mgr->bredr_chan = chan; - chan->hs_hcon = hs_hcon; - chan->fcs = L2CAP_FCS_NONE; - conn->mtu = hdev->block_mtu; - } - - hci_dev_put(hdev); - - return 0; - -error: - rsp.dcid = 0; - rsp.scid = cpu_to_le16(scid); - rsp.result = cpu_to_le16(L2CAP_CR_BAD_AMP); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - - l2cap_send_cmd(conn, cmd->ident, L2CAP_CREATE_CHAN_RSP, - sizeof(rsp), &rsp); - - return 0; -} - -static void l2cap_send_move_chan_req(struct l2cap_chan *chan, u8 dest_amp_id) -{ - struct l2cap_move_chan_req req; - u8 ident; - - BT_DBG("chan %p, dest_amp_id %d", chan, dest_amp_id); - - ident = l2cap_get_ident(chan->conn); - chan->ident = ident; - - req.icid = cpu_to_le16(chan->scid); - req.dest_amp_id = dest_amp_id; - - l2cap_send_cmd(chan->conn, ident, L2CAP_MOVE_CHAN_REQ, sizeof(req), - &req); - - __set_chan_timer(chan, L2CAP_MOVE_TIMEOUT); -} - -static void l2cap_send_move_chan_rsp(struct l2cap_chan *chan, u16 result) -{ - struct l2cap_move_chan_rsp rsp; - - BT_DBG("chan %p, result 0x%4.4x", chan, result); - - rsp.icid = cpu_to_le16(chan->dcid); - rsp.result = cpu_to_le16(result); - - l2cap_send_cmd(chan->conn, chan->ident, L2CAP_MOVE_CHAN_RSP, - sizeof(rsp), &rsp); -} - -static void l2cap_send_move_chan_cfm(struct l2cap_chan *chan, u16 result) -{ - struct l2cap_move_chan_cfm cfm; - - BT_DBG("chan %p, result 0x%4.4x", chan, result); - - chan->ident = l2cap_get_ident(chan->conn); - - cfm.icid = cpu_to_le16(chan->scid); - cfm.result = cpu_to_le16(result); - - l2cap_send_cmd(chan->conn, chan->ident, L2CAP_MOVE_CHAN_CFM, - sizeof(cfm), &cfm); - - __set_chan_timer(chan, L2CAP_MOVE_TIMEOUT); -} - -static void l2cap_send_move_chan_cfm_icid(struct l2cap_conn *conn, u16 icid) -{ - struct l2cap_move_chan_cfm cfm; - - BT_DBG("conn %p, icid 0x%4.4x", conn, icid); - - cfm.icid = cpu_to_le16(icid); - cfm.result = cpu_to_le16(L2CAP_MC_UNCONFIRMED); - - l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_MOVE_CHAN_CFM, - sizeof(cfm), &cfm); -} - -static void l2cap_send_move_chan_cfm_rsp(struct l2cap_conn *conn, u8 ident, - u16 icid) -{ - struct l2cap_move_chan_cfm_rsp rsp; - - BT_DBG("icid 0x%4.4x", icid); - - rsp.icid = cpu_to_le16(icid); - l2cap_send_cmd(conn, ident, L2CAP_MOVE_CHAN_CFM_RSP, sizeof(rsp), &rsp); -} - -static void __release_logical_link(struct l2cap_chan *chan) -{ - chan->hs_hchan = NULL; - chan->hs_hcon = NULL; - - /* Placeholder - release the logical link */ -} - -static void l2cap_logical_fail(struct l2cap_chan *chan) -{ - /* Logical link setup failed */ - if (chan->state != BT_CONNECTED) { - /* Create channel failure, disconnect */ - l2cap_send_disconn_req(chan, ECONNRESET); - return; - } - - switch (chan->move_role) { - case L2CAP_MOVE_ROLE_RESPONDER: - l2cap_move_done(chan); - l2cap_send_move_chan_rsp(chan, L2CAP_MR_NOT_SUPP); - break; - case L2CAP_MOVE_ROLE_INITIATOR: - if (chan->move_state == L2CAP_MOVE_WAIT_LOGICAL_COMP || - chan->move_state == L2CAP_MOVE_WAIT_LOGICAL_CFM) { - /* Remote has only sent pending or - * success responses, clean up - */ - l2cap_move_done(chan); - } - - /* Other amp move states imply that the move - * has already aborted - */ - l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); - break; - } -} - -static void l2cap_logical_finish_create(struct l2cap_chan *chan, - struct hci_chan *hchan) -{ - struct l2cap_conf_rsp rsp; - - chan->hs_hchan = hchan; - chan->hs_hcon->l2cap_data = chan->conn; - - l2cap_send_efs_conf_rsp(chan, &rsp, chan->ident, 0); - - if (test_bit(CONF_INPUT_DONE, &chan->conf_state)) { - int err; - - set_default_fcs(chan); - - err = l2cap_ertm_init(chan); - if (err < 0) - l2cap_send_disconn_req(chan, -err); - else - l2cap_chan_ready(chan); - } -} - -static void l2cap_logical_finish_move(struct l2cap_chan *chan, - struct hci_chan *hchan) -{ - chan->hs_hcon = hchan->conn; - chan->hs_hcon->l2cap_data = chan->conn; - - BT_DBG("move_state %d", chan->move_state); - - switch (chan->move_state) { - case L2CAP_MOVE_WAIT_LOGICAL_COMP: - /* Move confirm will be sent after a success - * response is received - */ - chan->move_state = L2CAP_MOVE_WAIT_RSP_SUCCESS; - break; - case L2CAP_MOVE_WAIT_LOGICAL_CFM: - if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - chan->move_state = L2CAP_MOVE_WAIT_LOCAL_BUSY; - } else if (chan->move_role == L2CAP_MOVE_ROLE_INITIATOR) { - chan->move_state = L2CAP_MOVE_WAIT_CONFIRM_RSP; - l2cap_send_move_chan_cfm(chan, L2CAP_MC_CONFIRMED); - } else if (chan->move_role == L2CAP_MOVE_ROLE_RESPONDER) { - chan->move_state = L2CAP_MOVE_WAIT_CONFIRM; - l2cap_send_move_chan_rsp(chan, L2CAP_MR_SUCCESS); - } - break; - default: - /* Move was not in expected state, free the channel */ - __release_logical_link(chan); - - chan->move_state = L2CAP_MOVE_STABLE; - } -} - -/* Call with chan locked */ -void l2cap_logical_cfm(struct l2cap_chan *chan, struct hci_chan *hchan, - u8 status) -{ - BT_DBG("chan %p, hchan %p, status %d", chan, hchan, status); - - if (status) { - l2cap_logical_fail(chan); - __release_logical_link(chan); - return; - } - - if (chan->state != BT_CONNECTED) { - /* Ignore logical link if channel is on BR/EDR */ - if (chan->local_amp_id != AMP_ID_BREDR) - l2cap_logical_finish_create(chan, hchan); - } else { - l2cap_logical_finish_move(chan, hchan); - } -} - -void l2cap_move_start(struct l2cap_chan *chan) -{ - BT_DBG("chan %p", chan); - - if (chan->local_amp_id == AMP_ID_BREDR) { - if (chan->chan_policy != BT_CHANNEL_POLICY_AMP_PREFERRED) - return; - chan->move_role = L2CAP_MOVE_ROLE_INITIATOR; - chan->move_state = L2CAP_MOVE_WAIT_PREPARE; - /* Placeholder - start physical link setup */ - } else { - chan->move_role = L2CAP_MOVE_ROLE_INITIATOR; - chan->move_state = L2CAP_MOVE_WAIT_RSP_SUCCESS; - chan->move_id = 0; - l2cap_move_setup(chan); - l2cap_send_move_chan_req(chan, 0); - } -} - -static void l2cap_do_create(struct l2cap_chan *chan, int result, - u8 local_amp_id, u8 remote_amp_id) -{ - BT_DBG("chan %p state %s %u -> %u", chan, state_to_string(chan->state), - local_amp_id, remote_amp_id); - - chan->fcs = L2CAP_FCS_NONE; - - /* Outgoing channel on AMP */ - if (chan->state == BT_CONNECT) { - if (result == L2CAP_CR_SUCCESS) { - chan->local_amp_id = local_amp_id; - l2cap_send_create_chan_req(chan, remote_amp_id); - } else { - /* Revert to BR/EDR connect */ - l2cap_send_conn_req(chan); - } - - return; - } - - /* Incoming channel on AMP */ - if (__l2cap_no_conn_pending(chan)) { - struct l2cap_conn_rsp rsp; - char buf[128]; - rsp.scid = cpu_to_le16(chan->dcid); - rsp.dcid = cpu_to_le16(chan->scid); - - if (result == L2CAP_CR_SUCCESS) { - /* Send successful response */ - rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - } else { - /* Send negative response */ - rsp.result = cpu_to_le16(L2CAP_CR_NO_MEM); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - } - - l2cap_send_cmd(chan->conn, chan->ident, L2CAP_CREATE_CHAN_RSP, - sizeof(rsp), &rsp); - - if (result == L2CAP_CR_SUCCESS) { - l2cap_state_change(chan, BT_CONFIG); - set_bit(CONF_REQ_SENT, &chan->conf_state); - l2cap_send_cmd(chan->conn, l2cap_get_ident(chan->conn), - L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); - chan->num_conf_req++; - } - } -} - -static void l2cap_do_move_initiate(struct l2cap_chan *chan, u8 local_amp_id, - u8 remote_amp_id) -{ - l2cap_move_setup(chan); - chan->move_id = local_amp_id; - chan->move_state = L2CAP_MOVE_WAIT_RSP; - - l2cap_send_move_chan_req(chan, remote_amp_id); -} - -static void l2cap_do_move_respond(struct l2cap_chan *chan, int result) -{ - struct hci_chan *hchan = NULL; - - /* Placeholder - get hci_chan for logical link */ - - if (hchan) { - if (hchan->state == BT_CONNECTED) { - /* Logical link is ready to go */ - chan->hs_hcon = hchan->conn; - chan->hs_hcon->l2cap_data = chan->conn; - chan->move_state = L2CAP_MOVE_WAIT_CONFIRM; - l2cap_send_move_chan_rsp(chan, L2CAP_MR_SUCCESS); - - l2cap_logical_cfm(chan, hchan, L2CAP_MR_SUCCESS); - } else { - /* Wait for logical link to be ready */ - chan->move_state = L2CAP_MOVE_WAIT_LOGICAL_CFM; - } - } else { - /* Logical link not available */ - l2cap_send_move_chan_rsp(chan, L2CAP_MR_NOT_ALLOWED); - } -} - -static void l2cap_do_move_cancel(struct l2cap_chan *chan, int result) -{ - if (chan->move_role == L2CAP_MOVE_ROLE_RESPONDER) { - u8 rsp_result; - if (result == -EINVAL) - rsp_result = L2CAP_MR_BAD_ID; - else - rsp_result = L2CAP_MR_NOT_ALLOWED; - - l2cap_send_move_chan_rsp(chan, rsp_result); - } - - chan->move_role = L2CAP_MOVE_ROLE_NONE; - chan->move_state = L2CAP_MOVE_STABLE; - - /* Restart data transmission */ - l2cap_ertm_send(chan); -} - -/* Invoke with locked chan */ -void __l2cap_physical_cfm(struct l2cap_chan *chan, int result) -{ - u8 local_amp_id = chan->local_amp_id; - u8 remote_amp_id = chan->remote_amp_id; - - BT_DBG("chan %p, result %d, local_amp_id %d, remote_amp_id %d", - chan, result, local_amp_id, remote_amp_id); - - if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) - return; - - if (chan->state != BT_CONNECTED) { - l2cap_do_create(chan, result, local_amp_id, remote_amp_id); - } else if (result != L2CAP_MR_SUCCESS) { - l2cap_do_move_cancel(chan, result); - } else { - switch (chan->move_role) { - case L2CAP_MOVE_ROLE_INITIATOR: - l2cap_do_move_initiate(chan, local_amp_id, - remote_amp_id); - break; - case L2CAP_MOVE_ROLE_RESPONDER: - l2cap_do_move_respond(chan, result); - break; - default: - l2cap_do_move_cancel(chan, result); - break; - } - } -} - -static inline int l2cap_move_channel_req(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, - u16 cmd_len, void *data) -{ - struct l2cap_move_chan_req *req = data; - struct l2cap_move_chan_rsp rsp; - struct l2cap_chan *chan; - u16 icid = 0; - u16 result = L2CAP_MR_NOT_ALLOWED; - - if (cmd_len != sizeof(*req)) - return -EPROTO; - - icid = le16_to_cpu(req->icid); - - BT_DBG("icid 0x%4.4x, dest_amp_id %d", icid, req->dest_amp_id); - - if (!(conn->local_fixed_chan & L2CAP_FC_A2MP)) - return -EINVAL; - - chan = l2cap_get_chan_by_dcid(conn, icid); - if (!chan) { - rsp.icid = cpu_to_le16(icid); - rsp.result = cpu_to_le16(L2CAP_MR_NOT_ALLOWED); - l2cap_send_cmd(conn, cmd->ident, L2CAP_MOVE_CHAN_RSP, - sizeof(rsp), &rsp); - return 0; - } - - chan->ident = cmd->ident; - - if (chan->scid < L2CAP_CID_DYN_START || - chan->chan_policy == BT_CHANNEL_POLICY_BREDR_ONLY || - (chan->mode != L2CAP_MODE_ERTM && - chan->mode != L2CAP_MODE_STREAMING)) { - result = L2CAP_MR_NOT_ALLOWED; - goto send_move_response; - } - - if (chan->local_amp_id == req->dest_amp_id) { - result = L2CAP_MR_SAME_ID; - goto send_move_response; - } - - if (req->dest_amp_id != AMP_ID_BREDR) { - struct hci_dev *hdev; - hdev = hci_dev_get(req->dest_amp_id); - if (!hdev || hdev->dev_type != HCI_AMP || - !test_bit(HCI_UP, &hdev->flags)) { - if (hdev) - hci_dev_put(hdev); - - result = L2CAP_MR_BAD_ID; - goto send_move_response; - } - hci_dev_put(hdev); - } - - /* Detect a move collision. Only send a collision response - * if this side has "lost", otherwise proceed with the move. - * The winner has the larger bd_addr. - */ - if ((__chan_is_moving(chan) || - chan->move_role != L2CAP_MOVE_ROLE_NONE) && - bacmp(&conn->hcon->src, &conn->hcon->dst) > 0) { - result = L2CAP_MR_COLLISION; - goto send_move_response; - } - - chan->move_role = L2CAP_MOVE_ROLE_RESPONDER; - l2cap_move_setup(chan); - chan->move_id = req->dest_amp_id; - - if (req->dest_amp_id == AMP_ID_BREDR) { - /* Moving to BR/EDR */ - if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - chan->move_state = L2CAP_MOVE_WAIT_LOCAL_BUSY; - result = L2CAP_MR_PEND; - } else { - chan->move_state = L2CAP_MOVE_WAIT_CONFIRM; - result = L2CAP_MR_SUCCESS; - } - } else { - chan->move_state = L2CAP_MOVE_WAIT_PREPARE; - /* Placeholder - uncomment when amp functions are available */ - /*amp_accept_physical(chan, req->dest_amp_id);*/ - result = L2CAP_MR_PEND; - } - -send_move_response: - l2cap_send_move_chan_rsp(chan, result); - - l2cap_chan_unlock(chan); - l2cap_chan_put(chan); - - return 0; -} - -static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) -{ - struct l2cap_chan *chan; - struct hci_chan *hchan = NULL; - - chan = l2cap_get_chan_by_scid(conn, icid); - if (!chan) { - l2cap_send_move_chan_cfm_icid(conn, icid); - return; - } - - __clear_chan_timer(chan); - if (result == L2CAP_MR_PEND) - __set_chan_timer(chan, L2CAP_MOVE_ERTX_TIMEOUT); - - switch (chan->move_state) { - case L2CAP_MOVE_WAIT_LOGICAL_COMP: - /* Move confirm will be sent when logical link - * is complete. - */ - chan->move_state = L2CAP_MOVE_WAIT_LOGICAL_CFM; - break; - case L2CAP_MOVE_WAIT_RSP_SUCCESS: - if (result == L2CAP_MR_PEND) { - break; - } else if (test_bit(CONN_LOCAL_BUSY, - &chan->conn_state)) { - chan->move_state = L2CAP_MOVE_WAIT_LOCAL_BUSY; - } else { - /* Logical link is up or moving to BR/EDR, - * proceed with move - */ - chan->move_state = L2CAP_MOVE_WAIT_CONFIRM_RSP; - l2cap_send_move_chan_cfm(chan, L2CAP_MC_CONFIRMED); - } - break; - case L2CAP_MOVE_WAIT_RSP: - /* Moving to AMP */ - if (result == L2CAP_MR_SUCCESS) { - /* Remote is ready, send confirm immediately - * after logical link is ready - */ - chan->move_state = L2CAP_MOVE_WAIT_LOGICAL_CFM; - } else { - /* Both logical link and move success - * are required to confirm - */ - chan->move_state = L2CAP_MOVE_WAIT_LOGICAL_COMP; - } - - /* Placeholder - get hci_chan for logical link */ - if (!hchan) { - /* Logical link not available */ - l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); - break; - } - - /* If the logical link is not yet connected, do not - * send confirmation. - */ - if (hchan->state != BT_CONNECTED) - break; - - /* Logical link is already ready to go */ - - chan->hs_hcon = hchan->conn; - chan->hs_hcon->l2cap_data = chan->conn; - - if (result == L2CAP_MR_SUCCESS) { - /* Can confirm now */ - l2cap_send_move_chan_cfm(chan, L2CAP_MC_CONFIRMED); - } else { - /* Now only need move success - * to confirm - */ - chan->move_state = L2CAP_MOVE_WAIT_RSP_SUCCESS; - } - - l2cap_logical_cfm(chan, hchan, L2CAP_MR_SUCCESS); - break; - default: - /* Any other amp move state means the move failed. */ - chan->move_id = chan->local_amp_id; - l2cap_move_done(chan); - l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); - } - - l2cap_chan_unlock(chan); - l2cap_chan_put(chan); -} - -static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, - u16 result) -{ - struct l2cap_chan *chan; - - chan = l2cap_get_chan_by_ident(conn, ident); - if (!chan) { - /* Could not locate channel, icid is best guess */ - l2cap_send_move_chan_cfm_icid(conn, icid); - return; - } - - __clear_chan_timer(chan); - - if (chan->move_role == L2CAP_MOVE_ROLE_INITIATOR) { - if (result == L2CAP_MR_COLLISION) { - chan->move_role = L2CAP_MOVE_ROLE_RESPONDER; - } else { - /* Cleanup - cancel move */ - chan->move_id = chan->local_amp_id; - l2cap_move_done(chan); - } - } - - l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); - - l2cap_chan_unlock(chan); - l2cap_chan_put(chan); -} - -static int l2cap_move_channel_rsp(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, - u16 cmd_len, void *data) -{ - struct l2cap_move_chan_rsp *rsp = data; - u16 icid, result; - - if (cmd_len != sizeof(*rsp)) - return -EPROTO; - - icid = le16_to_cpu(rsp->icid); - result = le16_to_cpu(rsp->result); - - BT_DBG("icid 0x%4.4x, result 0x%4.4x", icid, result); - - if (result == L2CAP_MR_SUCCESS || result == L2CAP_MR_PEND) - l2cap_move_continue(conn, icid, result); - else - l2cap_move_fail(conn, cmd->ident, icid, result); - - return 0; -} - -static int l2cap_move_channel_confirm(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, - u16 cmd_len, void *data) -{ - struct l2cap_move_chan_cfm *cfm = data; - struct l2cap_chan *chan; - u16 icid, result; - - if (cmd_len != sizeof(*cfm)) - return -EPROTO; - - icid = le16_to_cpu(cfm->icid); - result = le16_to_cpu(cfm->result); - - BT_DBG("icid 0x%4.4x, result 0x%4.4x", icid, result); - - chan = l2cap_get_chan_by_dcid(conn, icid); - if (!chan) { - /* Spec requires a response even if the icid was not found */ - l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); - return 0; - } - - if (chan->move_state == L2CAP_MOVE_WAIT_CONFIRM) { - if (result == L2CAP_MC_CONFIRMED) { - chan->local_amp_id = chan->move_id; - if (chan->local_amp_id == AMP_ID_BREDR) - __release_logical_link(chan); - } else { - chan->move_id = chan->local_amp_id; - } - - l2cap_move_done(chan); - } - - l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); - - l2cap_chan_unlock(chan); - l2cap_chan_put(chan); - - return 0; -} - -static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, - u16 cmd_len, void *data) -{ - struct l2cap_move_chan_cfm_rsp *rsp = data; - struct l2cap_chan *chan; - u16 icid; - - if (cmd_len != sizeof(*rsp)) - return -EPROTO; - - icid = le16_to_cpu(rsp->icid); - - BT_DBG("icid 0x%4.4x", icid); - - chan = l2cap_get_chan_by_scid(conn, icid); - if (!chan) - return 0; - - __clear_chan_timer(chan); - - if (chan->move_state == L2CAP_MOVE_WAIT_CONFIRM_RSP) { - chan->local_amp_id = chan->move_id; - - if (chan->local_amp_id == AMP_ID_BREDR && chan->hs_hchan) - __release_logical_link(chan); - - l2cap_move_done(chan); - } - - l2cap_chan_unlock(chan); - l2cap_chan_put(chan); - - return 0; -} - static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) @@ -5745,7 +4761,6 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, break; case L2CAP_CONN_RSP: - case L2CAP_CREATE_CHAN_RSP: l2cap_connect_create_rsp(conn, cmd, cmd_len, data); break; @@ -5780,26 +4795,6 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, l2cap_information_rsp(conn, cmd, cmd_len, data); break; - case L2CAP_CREATE_CHAN_REQ: - err = l2cap_create_channel_req(conn, cmd, cmd_len, data); - break; - - case L2CAP_MOVE_CHAN_REQ: - err = l2cap_move_channel_req(conn, cmd, cmd_len, data); - break; - - case L2CAP_MOVE_CHAN_RSP: - l2cap_move_channel_rsp(conn, cmd, cmd_len, data); - break; - - case L2CAP_MOVE_CHAN_CFM: - err = l2cap_move_channel_confirm(conn, cmd, cmd_len, data); - break; - - case L2CAP_MOVE_CHAN_CFM_RSP: - l2cap_move_channel_confirm_rsp(conn, cmd, cmd_len, data); - break; - default: BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code); err = -EINVAL; @@ -7051,8 +6046,8 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, if (control->final) { clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state) && - !__chan_is_moving(chan)) { + if (!test_and_clear_bit(CONN_REJ_ACT, + &chan->conn_state)) { control->final = 0; l2cap_retransmit_all(chan, control); } @@ -7245,11 +6240,7 @@ static int l2cap_finish_move(struct l2cap_chan *chan) BT_DBG("chan %p", chan); chan->rx_state = L2CAP_RX_STATE_RECV; - - if (chan->hs_hcon) - chan->conn->mtu = chan->hs_hcon->hdev->block_mtu; - else - chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu; + chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu; return l2cap_resegment(chan); } @@ -7316,11 +6307,7 @@ static int l2cap_rx_state_wait_f(struct l2cap_chan *chan, */ chan->next_tx_seq = control->reqseq; chan->unacked_frames = 0; - - if (chan->hs_hcon) - chan->conn->mtu = chan->hs_hcon->hdev->block_mtu; - else - chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu; + chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu; err = l2cap_resegment(chan); @@ -7672,21 +6659,10 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, chan = l2cap_get_chan_by_scid(conn, cid); if (!chan) { - if (cid == L2CAP_CID_A2MP) { - chan = a2mp_channel_create(conn, skb); - if (!chan) { - kfree_skb(skb); - return; - } - - l2cap_chan_hold(chan); - l2cap_chan_lock(chan); - } else { - BT_DBG("unknown cid 0x%4.4x", cid); - /* Drop packet and return */ - kfree_skb(skb); - return; - } + BT_DBG("unknown cid 0x%4.4x", cid); + /* Drop packet and return */ + kfree_skb(skb); + return; } BT_DBG("chan %p, len %d", chan, skb->len); @@ -7887,10 +6863,6 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS; - if (hcon->type == ACL_LINK && - hci_dev_test_flag(hcon->hdev, HCI_HS_ENABLED)) - conn->local_fixed_chan |= L2CAP_FC_A2MP; - if (hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED) && (bredr_sc_enabled(hcon->hdev) || hci_dev_test_flag(hcon->hdev, HCI_FORCE_BREDR_SMP))) @@ -8355,11 +7327,6 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid, state_to_string(chan->state)); - if (chan->scid == L2CAP_CID_A2MP) { - l2cap_chan_unlock(chan); - continue; - } - if (!status && encrypt) chan->sec_level = hcon->sec_level; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index e50d3d102078e..ee7a41d6994fc 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1027,23 +1027,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (opt > BT_CHANNEL_POLICY_AMP_PREFERRED) { - err = -EINVAL; - break; - } - - if (chan->mode != L2CAP_MODE_ERTM && - chan->mode != L2CAP_MODE_STREAMING) { - err = -EOPNOTSUPP; - break; - } - - chan->chan_policy = (u8) opt; - - if (sk->sk_state == BT_CONNECTED && - chan->move_role == L2CAP_MOVE_ROLE_NONE) - l2cap_move_start(chan); - + err = -EOPNOTSUPP; break; case BT_SNDMTU: diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8c4493255f92a..7713e2cdf9e12 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -835,8 +835,6 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_ssp_capable(hdev)) { settings |= MGMT_SETTING_SSP; - if (IS_ENABLED(CONFIG_BT_HS)) - settings |= MGMT_SETTING_HS; } if (lmp_sc_capable(hdev)) @@ -901,9 +899,6 @@ static u32 get_current_settings(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) settings |= MGMT_SETTING_SSP; - if (hci_dev_test_flag(hdev, HCI_HS_ENABLED)) - settings |= MGMT_SETTING_HS; - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) settings |= MGMT_SETTING_ADVERTISING; @@ -1938,7 +1933,6 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) if (enable && hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED)) { - hci_dev_clear_flag(hdev, HCI_HS_ENABLED); new_settings(hdev, NULL); } @@ -1951,12 +1945,6 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) changed = !hci_dev_test_and_set_flag(hdev, HCI_SSP_ENABLED); } else { changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); - - if (!changed) - changed = hci_dev_test_and_clear_flag(hdev, - HCI_HS_ENABLED); - else - hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); @@ -2020,11 +2008,6 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) } else { changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); - if (!changed) - changed = hci_dev_test_and_clear_flag(hdev, - HCI_HS_ENABLED); - else - hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } err = send_settings_rsp(sk, MGMT_OP_SET_SSP, hdev); @@ -2070,63 +2053,10 @@ failed: static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { - struct mgmt_mode *cp = data; - bool changed; - u8 status; - int err; - bt_dev_dbg(hdev, "sock %p", sk); - if (!IS_ENABLED(CONFIG_BT_HS)) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, MGMT_STATUS_NOT_SUPPORTED); - - status = mgmt_bredr_support(hdev); - if (status) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status); - - if (!lmp_ssp_capable(hdev)) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_NOT_SUPPORTED); - - if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_REJECTED); - - if (cp->val != 0x00 && cp->val != 0x01) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_INVALID_PARAMS); - - hci_dev_lock(hdev); - - if (pending_find(MGMT_OP_SET_SSP, hdev)) { - err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_BUSY); - goto unlock; - } - - if (cp->val) { - changed = !hci_dev_test_and_set_flag(hdev, HCI_HS_ENABLED); - } else { - if (hdev_is_powered(hdev)) { - err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_REJECTED); - goto unlock; - } - - changed = hci_dev_test_and_clear_flag(hdev, HCI_HS_ENABLED); - } - - err = send_settings_rsp(sk, MGMT_OP_SET_HS, hdev); - if (err < 0) - goto unlock; - - if (changed) - err = new_settings(hdev, sk); - -unlock: - hci_dev_unlock(hdev); - return err; } static void set_le_complete(struct hci_dev *hdev, void *data, int err) @@ -6774,7 +6704,6 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_clear_flag(hdev, HCI_SSP_ENABLED); hci_dev_clear_flag(hdev, HCI_LINK_SECURITY); hci_dev_clear_flag(hdev, HCI_FAST_CONNECTABLE); - hci_dev_clear_flag(hdev, HCI_HS_ENABLED); } hci_dev_change_flag(hdev, HCI_BREDR_ENABLED); -- cgit 1.2.3-korg From eeda1bf97bb500a901f7a9ee5615bad2160f2378 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 31 Jan 2024 11:24:19 -0500 Subject: Bluetooth: hci_event: Fix not indicating new connection for BIG Sync BIG Sync (aka. Broadcast sink) requires to inform that the device is connected when a data path is active otherwise userspace could attempt to free resources allocated to the device object while scanning. Fixes: 1d11d70d1f6b ("Bluetooth: ISO: Pass BIG encryption info through QoS") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 21 ++++++++++++++------- net/bluetooth/mgmt.c | 4 ++++ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 11b55d1f97727..271c00792801c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2524,9 +2524,7 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, * Only those in BT_CONFIG or BT_CONNECTED states can be * considered connected. */ - if (conn && - (conn->state == BT_CONFIG || conn->state == BT_CONNECTED) && - !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) + if (conn && (conn->state == BT_CONFIG || conn->state == BT_CONNECTED)) mgmt_device_connected(hdev, conn, name, name_len); if (discov->state == DISCOVERY_STOPPED) @@ -3758,8 +3756,9 @@ static void hci_remote_features_evt(struct hci_dev *hdev, void *data, bacpy(&cp.bdaddr, &conn->dst); cp.pscan_rep_mode = 0x02; hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp); - } else if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) + } else { mgmt_device_connected(hdev, conn, NULL, 0); + } if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; @@ -3932,6 +3931,11 @@ static u8 hci_cc_le_setup_iso_path(struct hci_dev *hdev, void *data, * last. */ hci_connect_cfm(conn, rp->status); + + /* Notify device connected in case it is a BIG Sync */ + if (!rp->status && test_bit(HCI_CONN_BIG_SYNC, &conn->flags)) + mgmt_device_connected(hdev, conn, NULL, 0); + break; } @@ -5006,8 +5010,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, void *data, bacpy(&cp.bdaddr, &conn->dst); cp.pscan_rep_mode = 0x02; hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp); - } else if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) + } else { mgmt_device_connected(hdev, conn, NULL, 0); + } if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; @@ -5980,8 +5985,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, goto unlock; } - if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) - mgmt_device_connected(hdev, conn, NULL, 0); + mgmt_device_connected(hdev, conn, NULL, 0); conn->sec_level = BT_SECURITY_LOW; conn->state = BT_CONFIG; @@ -7210,6 +7214,9 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, /* Notify iso layer */ hci_connect_cfm(pa_sync, 0x00); + /* Notify MGMT layer */ + mgmt_device_connected(hdev, pa_sync, NULL, 0); + unlock: hci_dev_unlock(hdev); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7713e2cdf9e12..064a67157d438 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3126,6 +3126,7 @@ failed: static u8 link_to_bdaddr(u8 link_type, u8 addr_type) { switch (link_type) { + case ISO_LINK: case LE_LINK: switch (addr_type) { case ADDR_LE_DEV_PUBLIC: @@ -9618,6 +9619,9 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, u16 eir_len = 0; u32 flags = 0; + if (test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) + return; + /* allocate buff for LE or BR/EDR adv */ if (conn->le_adv_data_len > 0) skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_CONNECTED, -- cgit 1.2.3-korg From 45340097ce6ea7e875674a5a7d24c95ecbc93ef9 Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Tue, 6 Feb 2024 12:08:13 +0100 Subject: Bluetooth: hci_conn: Only do ACL connections sequentially MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pretty much all bluetooth chipsets only support paging a single device at a time, and if they don't reject a secondary "Create Connection" request while another is still ongoing, they'll most likely serialize those requests in the firware. With commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests") we started adding some serialization of our own in case the adapter returns "Command Disallowed" HCI error. This commit was using the BT_CONNECT2 state for the serialization, this state is also used for a few more things (most notably to indicate we're waiting for an inquiry to cancel) and therefore a bit unreliable. Also not all BT firwares would respond with "Command Disallowed" on too many connection requests, some will also respond with "Hardware Failure" (BCM4378), and others will error out later and send a "Connect Complete" event with error "Rejected Limited Resources" (Marvell 88W8897). We can clean things up a bit and also make the serialization more reliable by using our hci_sync machinery to always do "Create Connection" requests in a sequential manner. This is very similar to what we're already doing for establishing LE connections, and it works well there. Note that this causes a test failure in mgmt-tester (test "Pair Device - Power off 1") because the hci_abort_conn_sync() changes the error we return on timeout of the "Create Connection". We'll fix this on the mgmt-tester side by adjusting the expected error for the test. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_sync.h | 3 ++ net/bluetooth/hci_conn.c | 69 ++++++--------------------------------- net/bluetooth/hci_sync.c | 70 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 60 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index aa6c69053d7cd..08cb5cb249a49 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -437,6 +437,7 @@ enum { #define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +#define HCI_ACL_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index e2582c2425449..824660f8f30da 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -138,3 +138,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason); int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); + +int hci_acl_create_connection_sync(struct hci_dev *hdev, + struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index fc4d72f83ac25..816be7667a8cf 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -178,64 +178,6 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_dev_put(hdev); } -static void hci_acl_create_connection(struct hci_conn *conn) -{ - struct hci_dev *hdev = conn->hdev; - struct inquiry_entry *ie; - struct hci_cp_create_conn cp; - - BT_DBG("hcon %p", conn); - - /* Many controllers disallow HCI Create Connection while it is doing - * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create - * Connection. This may cause the MGMT discovering state to become false - * without user space's request but it is okay since the MGMT Discovery - * APIs do not promise that discovery should be done forever. Instead, - * the user space monitors the status of MGMT discovering and it may - * request for discovery again when this flag becomes false. - */ - if (test_bit(HCI_INQUIRY, &hdev->flags)) { - /* Put this connection to "pending" state so that it will be - * executed after the inquiry cancel command complete event. - */ - conn->state = BT_CONNECT2; - hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL); - return; - } - - conn->state = BT_CONNECT; - conn->out = true; - conn->role = HCI_ROLE_MASTER; - - conn->attempt++; - - conn->link_policy = hdev->link_policy; - - memset(&cp, 0, sizeof(cp)); - bacpy(&cp.bdaddr, &conn->dst); - cp.pscan_rep_mode = 0x02; - - ie = hci_inquiry_cache_lookup(hdev, &conn->dst); - if (ie) { - if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { - cp.pscan_rep_mode = ie->data.pscan_rep_mode; - cp.pscan_mode = ie->data.pscan_mode; - cp.clock_offset = ie->data.clock_offset | - cpu_to_le16(0x8000); - } - - memcpy(conn->dev_class, ie->data.dev_class, 3); - } - - cp.pkt_type = cpu_to_le16(conn->pkt_type); - if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) - cp.role_switch = 0x01; - else - cp.role_switch = 0x00; - - hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp); -} - int hci_disconnect(struct hci_conn *conn, __u8 reason) { BT_DBG("hcon %p", conn); @@ -1696,10 +1638,17 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, acl->conn_reason = conn_reason; if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { + int err; + acl->sec_level = BT_SECURITY_LOW; acl->pending_sec_level = sec_level; acl->auth_type = auth_type; - hci_acl_create_connection(acl); + + err = hci_acl_create_connection_sync(hdev, acl); + if (err) { + hci_conn_del(acl); + return ERR_PTR(err); + } } return acl; @@ -2654,7 +2603,7 @@ void hci_conn_check_pending(struct hci_dev *hdev) conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); if (conn) - hci_acl_create_connection(conn); + hci_acl_create_connection_sync(hdev, conn); hci_dev_unlock(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 1122296ce3fa3..617407b81ffec 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6492,3 +6492,73 @@ int hci_update_adv_data(struct hci_dev *hdev, u8 instance) return hci_cmd_sync_queue(hdev, _update_adv_data_sync, UINT_PTR(instance), NULL); } + +static int __hci_acl_create_connection_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn *conn = data; + struct inquiry_entry *ie; + struct hci_cp_create_conn cp; + int err; + + /* Many controllers disallow HCI Create Connection while it is doing + * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create + * Connection. This may cause the MGMT discovering state to become false + * without user space's request but it is okay since the MGMT Discovery + * APIs do not promise that discovery should be done forever. Instead, + * the user space monitors the status of MGMT discovering and it may + * request for discovery again when this flag becomes false. + */ + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + err = __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY_CANCEL, 0, + NULL, HCI_CMD_TIMEOUT); + if (err) + bt_dev_warn(hdev, "Failed to cancel inquiry %d", err); + } + + conn->state = BT_CONNECT; + conn->out = true; + conn->role = HCI_ROLE_MASTER; + + conn->attempt++; + + conn->link_policy = hdev->link_policy; + + memset(&cp, 0, sizeof(cp)); + bacpy(&cp.bdaddr, &conn->dst); + cp.pscan_rep_mode = 0x02; + + ie = hci_inquiry_cache_lookup(hdev, &conn->dst); + if (ie) { + if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { + cp.pscan_rep_mode = ie->data.pscan_rep_mode; + cp.pscan_mode = ie->data.pscan_mode; + cp.clock_offset = ie->data.clock_offset | + cpu_to_le16(0x8000); + } + + memcpy(conn->dev_class, ie->data.dev_class, 3); + } + + cp.pkt_type = cpu_to_le16(conn->pkt_type); + if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) + cp.role_switch = 0x01; + else + cp.role_switch = 0x00; + + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN, + sizeof(cp), &cp, + HCI_EV_CONN_COMPLETE, + HCI_ACL_CONN_TIMEOUT, NULL); + + if (err == -ETIMEDOUT) + hci_abort_conn_sync(hdev, conn, HCI_ERROR_LOCAL_HOST_TERM); + + return err; +} + +int hci_acl_create_connection_sync(struct hci_dev *hdev, + struct hci_conn *conn) +{ + return hci_cmd_sync_queue(hdev, __hci_acl_create_connection_sync, + conn, NULL); +} -- cgit 1.2.3-korg From 4aa42119d971603dc9e4d8cf4f53d5fcf082ea7d Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Tue, 6 Feb 2024 12:08:14 +0100 Subject: Bluetooth: Remove pending ACL connection attempts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the last commit we moved to using the hci_sync queue for "Create Connection" requests, removing the need for retrying the paging after finished/failed "Create Connection" requests and after the end of inquiries. hci_conn_check_pending() was used to trigger this retry, we can remove it now. Note that we can also remove the special handling for COMMAND_DISALLOWED errors in the completion handler of "Create Connection", because "Create Connection" requests are now always serialized. This is somewhat reverting commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests"). With this, the BT_CONNECT2 state of ACL hci_conn objects should now be back to meaning only one thing: That we received a "Connection Request" from another device (see hci_conn_request_evt), but the response to that is going to be deferred. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_conn.c | 16 ---------------- net/bluetooth/hci_event.c | 21 ++++----------------- 3 files changed, 4 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 8f8dd91737142..34aa9d0290fee 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1480,7 +1480,6 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role); void hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); -void hci_conn_check_pending(struct hci_dev *hdev); struct hci_chan *hci_chan_create(struct hci_conn *conn); void hci_chan_del(struct hci_chan *chan); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 816be7667a8cf..a4beed8587eb7 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2592,22 +2592,6 @@ void hci_conn_hash_flush(struct hci_dev *hdev) } } -/* Check pending connect attempts */ -void hci_conn_check_pending(struct hci_dev *hdev) -{ - struct hci_conn *conn; - - BT_DBG("hdev %s", hdev->name); - - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); - if (conn) - hci_acl_create_connection_sync(hdev, conn); - - hci_dev_unlock(hdev); -} - static u32 get_link_mode(struct hci_conn *conn) { u32 link_mode = 0; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 271c00792801c..e7887bae334b2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -118,8 +118,6 @@ static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, hci_discovery_set_state(hdev, DISCOVERY_STOPPED); hci_dev_unlock(hdev); - hci_conn_check_pending(hdev); - return rp->status; } @@ -150,8 +148,6 @@ static u8 hci_cc_exit_periodic_inq(struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); - hci_conn_check_pending(hdev); - return rp->status; } @@ -2312,10 +2308,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { bt_dev_dbg(hdev, "status 0x%2.2x", status); - if (status) { - hci_conn_check_pending(hdev); + if (status) return; - } if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY)) set_bit(HCI_INQUIRY, &hdev->flags); @@ -2340,12 +2334,9 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) if (status) { if (conn && conn->state == BT_CONNECT) { - if (status != HCI_ERROR_COMMAND_DISALLOWED || conn->attempt > 2) { - conn->state = BT_CLOSED; - hci_connect_cfm(conn, status); - hci_conn_del(conn); - } else - conn->state = BT_CONNECT2; + conn->state = BT_CLOSED; + hci_connect_cfm(conn, status); + hci_conn_del(conn); } } else { if (!conn) { @@ -3035,8 +3026,6 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - hci_conn_check_pending(hdev); - if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) return; @@ -3258,8 +3247,6 @@ done: unlock: hci_dev_unlock(hdev); - - hci_conn_check_pending(hdev); } static void hci_reject_conn(struct hci_dev *hdev, bdaddr_t *bdaddr) -- cgit 1.2.3-korg From f4b0c2b4cd78b75acde56c2ee5aa732b6fb2a6a9 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 7 Feb 2024 14:42:11 +0100 Subject: Bluetooth: hci_event: Remove code to removed CONFIG_BT_HS Commit cec9f3c5561d ("Bluetooth: Remove BT_HS") removes config BT_HS, but misses two "ifdef BT_HS" blocks in hci_event.c. Remove this dead code from this removed config option. Signed-off-by: Lukas Bulwahn Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 163 ---------------------------------------------- 1 file changed, 163 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e7887bae334b2..6071a1226e1b4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5663,150 +5663,6 @@ unlock: hci_dev_unlock(hdev); } -#if IS_ENABLED(CONFIG_BT_HS) -static void hci_chan_selected_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_channel_selected *ev = data; - struct hci_conn *hcon; - - bt_dev_dbg(hdev, "handle 0x%2.2x", ev->phy_handle); - - hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (!hcon) - return; - - amp_read_loc_assoc_final_data(hdev, hcon); -} - -static void hci_phy_link_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_phy_link_complete *ev = data; - struct hci_conn *hcon, *bredr_hcon; - - bt_dev_dbg(hdev, "handle 0x%2.2x status 0x%2.2x", ev->phy_handle, - ev->status); - - hci_dev_lock(hdev); - - hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (!hcon) - goto unlock; - - if (!hcon->amp_mgr) - goto unlock; - - if (ev->status) { - hci_conn_del(hcon); - goto unlock; - } - - bredr_hcon = hcon->amp_mgr->l2cap_conn->hcon; - - hcon->state = BT_CONNECTED; - bacpy(&hcon->dst, &bredr_hcon->dst); - - hci_conn_hold(hcon); - hcon->disc_timeout = HCI_DISCONN_TIMEOUT; - hci_conn_drop(hcon); - - hci_debugfs_create_conn(hcon); - hci_conn_add_sysfs(hcon); - - amp_physical_cfm(bredr_hcon, hcon); - -unlock: - hci_dev_unlock(hdev); -} - -static void hci_loglink_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_logical_link_complete *ev = data; - struct hci_conn *hcon; - struct hci_chan *hchan; - struct amp_mgr *mgr; - - bt_dev_dbg(hdev, "log_handle 0x%4.4x phy_handle 0x%2.2x status 0x%2.2x", - le16_to_cpu(ev->handle), ev->phy_handle, ev->status); - - hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (!hcon) - return; - - /* Create AMP hchan */ - hchan = hci_chan_create(hcon); - if (!hchan) - return; - - hchan->handle = le16_to_cpu(ev->handle); - hchan->amp = true; - - BT_DBG("hcon %p mgr %p hchan %p", hcon, hcon->amp_mgr, hchan); - - mgr = hcon->amp_mgr; - if (mgr && mgr->bredr_chan) { - struct l2cap_chan *bredr_chan = mgr->bredr_chan; - - l2cap_chan_lock(bredr_chan); - - bredr_chan->conn->mtu = hdev->block_mtu; - l2cap_logical_cfm(bredr_chan, hchan, 0); - hci_conn_hold(hcon); - - l2cap_chan_unlock(bredr_chan); - } -} - -static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_disconn_logical_link_complete *ev = data; - struct hci_chan *hchan; - - bt_dev_dbg(hdev, "handle 0x%4.4x status 0x%2.2x", - le16_to_cpu(ev->handle), ev->status); - - if (ev->status) - return; - - hci_dev_lock(hdev); - - hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle)); - if (!hchan || !hchan->amp) - goto unlock; - - amp_destroy_logical_link(hchan, ev->reason); - -unlock: - hci_dev_unlock(hdev); -} - -static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_disconn_phy_link_complete *ev = data; - struct hci_conn *hcon; - - bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - - if (ev->status) - return; - - hci_dev_lock(hdev); - - hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); - if (hcon && hcon->type == AMP_LINK) { - hcon->state = BT_CLOSED; - hci_disconn_cfm(hcon, ev->reason); - hci_conn_del(hcon); - } - - hci_dev_unlock(hdev); -} -#endif - static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr, u8 bdaddr_type, bdaddr_t *local_rpa) { @@ -7616,25 +7472,6 @@ static const struct hci_ev { /* [0x3e = HCI_EV_LE_META] */ HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt, sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE), -#if IS_ENABLED(CONFIG_BT_HS) - /* [0x40 = HCI_EV_PHY_LINK_COMPLETE] */ - HCI_EV(HCI_EV_PHY_LINK_COMPLETE, hci_phy_link_complete_evt, - sizeof(struct hci_ev_phy_link_complete)), - /* [0x41 = HCI_EV_CHANNEL_SELECTED] */ - HCI_EV(HCI_EV_CHANNEL_SELECTED, hci_chan_selected_evt, - sizeof(struct hci_ev_channel_selected)), - /* [0x42 = HCI_EV_DISCONN_PHY_LINK_COMPLETE] */ - HCI_EV(HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE, - hci_disconn_loglink_complete_evt, - sizeof(struct hci_ev_disconn_logical_link_complete)), - /* [0x45 = HCI_EV_LOGICAL_LINK_COMPLETE] */ - HCI_EV(HCI_EV_LOGICAL_LINK_COMPLETE, hci_loglink_complete_evt, - sizeof(struct hci_ev_logical_link_complete)), - /* [0x46 = HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE] */ - HCI_EV(HCI_EV_DISCONN_PHY_LINK_COMPLETE, - hci_disconn_phylink_complete_evt, - sizeof(struct hci_ev_disconn_phy_link_complete)), -#endif /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */ HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt, sizeof(struct hci_ev_num_comp_blocks)), -- cgit 1.2.3-korg From bf98feea5b65ced367a871cf35fc044dedbcfb85 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 7 Feb 2024 15:26:20 -0500 Subject: Bluetooth: hci_conn: Always use sk_timeo as conn_timeout This aligns the use socket sk_timeo as conn_timeout when initiating a connection and then use it when scheduling the resulting HCI command, that way the command is actually aborted synchronously thus not blocking commands generated by hci_abort_conn_sync to inform the controller the connection is to be aborted. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 5 +++-- include/net/bluetooth/l2cap.h | 2 +- net/bluetooth/6lowpan.c | 2 +- net/bluetooth/hci_conn.c | 8 +++++--- net/bluetooth/hci_sync.c | 2 +- net/bluetooth/l2cap_core.c | 10 ++++------ net/bluetooth/l2cap_sock.c | 3 ++- net/bluetooth/mgmt.c | 3 ++- net/bluetooth/sco.c | 3 ++- 9 files changed, 21 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 34aa9d0290fee..2bdea85b7c447 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1495,9 +1495,10 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u16 conn_timeout, u8 role); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, - enum conn_reasons conn_reason); + enum conn_reasons conn_reason, u16 timeout); struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u16 setting, struct bt_codec *codec); + __u16 setting, struct bt_codec *codec, + u16 timeout); struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 92d7197f9a563..a4278aa618ab1 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -939,7 +939,7 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid); struct l2cap_chan *l2cap_chan_create(void); void l2cap_chan_close(struct l2cap_chan *chan, int reason); int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, - bdaddr_t *dst, u8 dst_type); + bdaddr_t *dst, u8 dst_type, u16 timeout); int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len); void l2cap_chan_busy(struct l2cap_chan *chan, int busy); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 4eb1b3ced0d27..715cbafbf6631 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -892,7 +892,7 @@ static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type) chan->ops = &bt_6lowpan_chan_ops; err = l2cap_chan_connect(chan, cpu_to_le16(L2CAP_PSM_IPSP), 0, - addr, dst_type); + addr, dst_type, L2CAP_CONN_TIMEOUT); BT_DBG("chan %p err %d", chan, err); if (err < 0) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a4beed8587eb7..8164502234c55 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1607,7 +1607,7 @@ done: struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, - enum conn_reasons conn_reason) + enum conn_reasons conn_reason, u16 timeout) { struct hci_conn *acl; @@ -1643,6 +1643,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, acl->sec_level = BT_SECURITY_LOW; acl->pending_sec_level = sec_level; acl->auth_type = auth_type; + acl->conn_timeout = timeout; err = hci_acl_create_connection_sync(hdev, acl); if (err) { @@ -1683,14 +1684,15 @@ static struct hci_link *hci_conn_link(struct hci_conn *parent, } struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u16 setting, struct bt_codec *codec) + __u16 setting, struct bt_codec *codec, + u16 timeout) { struct hci_conn *acl; struct hci_conn *sco; struct hci_link *link; acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING, - CONN_REASON_SCO_CONNECT); + CONN_REASON_SCO_CONNECT, timeout); if (IS_ERR(acl)) return acl; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 617407b81ffec..788a889210d86 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6548,7 +6548,7 @@ static int __hci_acl_create_connection_sync(struct hci_dev *hdev, void *data) err = __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp, HCI_EV_CONN_COMPLETE, - HCI_ACL_CONN_TIMEOUT, NULL); + conn->conn_timeout, NULL); if (err == -ETIMEDOUT) hci_abort_conn_sync(hdev, conn, HCI_ERROR_LOCAL_HOST_TERM); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ab5a9d42fae71..467b242d8be07 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6925,7 +6925,7 @@ static void l2cap_chan_by_pid(struct l2cap_chan *chan, void *data) } int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, - bdaddr_t *dst, u8 dst_type) + bdaddr_t *dst, u8 dst_type, u16 timeout) { struct l2cap_conn *conn; struct hci_conn *hcon; @@ -7018,19 +7018,17 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) hcon = hci_connect_le(hdev, dst, dst_type, false, - chan->sec_level, - HCI_LE_CONN_TIMEOUT, + chan->sec_level, timeout, HCI_ROLE_SLAVE); else hcon = hci_connect_le_scan(hdev, dst, dst_type, - chan->sec_level, - HCI_LE_CONN_TIMEOUT, + chan->sec_level, timeout, CONN_REASON_L2CAP_CHAN); } else { u8 auth_type = l2cap_get_auth_type(chan); hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type, - CONN_REASON_L2CAP_CHAN); + CONN_REASON_L2CAP_CHAN, timeout); } if (IS_ERR(hcon)) { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ee7a41d6994fc..4287aa6cc988e 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -254,7 +254,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, chan->mode = L2CAP_MODE_LE_FLOWCTL; err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid), - &la.l2_bdaddr, la.l2_bdaddr_type); + &la.l2_bdaddr, la.l2_bdaddr_type, + sk->sk_sndtimeo); if (err) return err; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 064a67157d438..78ab562807d0f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3444,7 +3444,8 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->addr.type == BDADDR_BREDR) { conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level, - auth_type, CONN_REASON_PAIR_DEVICE); + auth_type, CONN_REASON_PAIR_DEVICE, + HCI_ACL_CONN_TIMEOUT); } else { u8 addr_type = le_addr_type(cp->addr.type); struct hci_conn_params *p; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index c736186aba26b..43daf965a01e4 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -264,7 +264,8 @@ static int sco_connect(struct sock *sk) } hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, - sco_pi(sk)->setting, &sco_pi(sk)->codec); + sco_pi(sk)->setting, &sco_pi(sk)->codec, + sk->sk_sndtimeo); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto unlock; -- cgit 1.2.3-korg From 5f641f03abccddd1a37233ff1b8e774b9ff1f4e8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 9 Feb 2024 09:08:06 -0500 Subject: Bluetooth: hci_conn: Fix UAF Write in __hci_acl_create_connection_sync This fixes the UAF on __hci_acl_create_connection_sync caused by connection abortion, it uses the same logic as to LE_LINK which uses hci_cmd_sync_cancel to prevent the callback to run if the connection is abort prematurely. Reported-by: syzbot+3f0a39be7a2035700868@syzkaller.appspotmail.com Fixes: 45340097ce6e ("Bluetooth: hci_conn: Only do ACL connections sequentially") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 3 +-- net/bluetooth/hci_conn.c | 3 ++- net/bluetooth/hci_sync.c | 16 ++++++++++------ 3 files changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 824660f8f30da..ed334c253ebcd 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -139,5 +139,4 @@ int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); -int hci_acl_create_connection_sync(struct hci_dev *hdev, - struct hci_conn *conn); +int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 8164502234c55..587eb27f374c9 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1645,7 +1645,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, acl->auth_type = auth_type; acl->conn_timeout = timeout; - err = hci_acl_create_connection_sync(hdev, acl); + err = hci_connect_acl_sync(hdev, acl); if (err) { hci_conn_del(acl); return ERR_PTR(err); @@ -2942,6 +2942,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) */ if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { switch (hci_skb_event(hdev->sent_cmd)) { + case HCI_EV_CONN_COMPLETE: case HCI_EV_LE_CONN_COMPLETE: case HCI_EV_LE_ENHANCED_CONN_COMPLETE: case HCI_EVT_LE_CIS_ESTABLISHED: diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 788a889210d86..e1fdcb3c27062 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6493,13 +6493,18 @@ int hci_update_adv_data(struct hci_dev *hdev, u8 instance) UINT_PTR(instance), NULL); } -static int __hci_acl_create_connection_sync(struct hci_dev *hdev, void *data) +static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) { - struct hci_conn *conn = data; + struct hci_conn *conn; + u16 handle = PTR_UINT(data); struct inquiry_entry *ie; struct hci_cp_create_conn cp; int err; + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) + return 0; + /* Many controllers disallow HCI Create Connection while it is doing * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create * Connection. This may cause the MGMT discovering state to become false @@ -6556,9 +6561,8 @@ static int __hci_acl_create_connection_sync(struct hci_dev *hdev, void *data) return err; } -int hci_acl_create_connection_sync(struct hci_dev *hdev, - struct hci_conn *conn) +int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue(hdev, __hci_acl_create_connection_sync, - conn, NULL); + return hci_cmd_sync_queue(hdev, hci_acl_create_conn_sync, + UINT_PTR(conn->handle), NULL); } -- cgit 1.2.3-korg From 505ea2b295929e7be2b4e1bc86ee31cb7862fb01 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 9 Aug 2023 13:43:53 -0700 Subject: Bluetooth: hci_sync: Add helper functions to manipulate cmd_sync queue This adds functions to queue, dequeue and lookup into the cmd_sync list. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 12 ++++ net/bluetooth/hci_sync.c | 132 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 136 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index ed334c253ebcd..4ff4aa68ee196 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -48,6 +48,18 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +struct hci_cmd_sync_work_entry * +hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, + struct hci_cmd_sync_work_entry *entry); +bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, + hci_cmd_sync_work_func_t func, void *data, + hci_cmd_sync_work_destroy_t destroy); int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index e1fdcb3c27062..5b314bf844f84 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -566,6 +566,17 @@ void hci_cmd_sync_init(struct hci_dev *hdev) INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } +static void _hci_cmd_sync_cancel_entry(struct hci_dev *hdev, + struct hci_cmd_sync_work_entry *entry, + int err) +{ + if (entry->destroy) + entry->destroy(hdev, entry->data, err); + + list_del(&entry->list); + kfree(entry); +} + void hci_cmd_sync_clear(struct hci_dev *hdev) { struct hci_cmd_sync_work_entry *entry, *tmp; @@ -574,13 +585,8 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) cancel_work_sync(&hdev->reenable_adv_work); mutex_lock(&hdev->cmd_sync_work_lock); - list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { - if (entry->destroy) - entry->destroy(hdev, entry->data, -ECANCELED); - - list_del(&entry->list); - kfree(entry); - } + list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); mutex_unlock(&hdev->cmd_sync_work_lock); } @@ -669,6 +675,115 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, } EXPORT_SYMBOL(hci_cmd_sync_queue); +static struct hci_cmd_sync_work_entry * +_hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + struct hci_cmd_sync_work_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { + if (func && entry->func != func) + continue; + + if (data && entry->data != data) + continue; + + if (destroy && entry->destroy != destroy) + continue; + + return entry; + } + + return NULL; +} + +/* Queue HCI command entry once: + * + * - Lookup if an entry already exist and only if it doesn't creates a new entry + * and queue it. + */ +int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) + return 0; + + return hci_cmd_sync_queue(hdev, func, data, destroy); +} +EXPORT_SYMBOL(hci_cmd_sync_queue_once); + +/* Lookup HCI command entry: + * + * - Return first entry that matches by function callback or data or + * destroy callback. + */ +struct hci_cmd_sync_work_entry * +hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + struct hci_cmd_sync_work_entry *entry; + + mutex_lock(&hdev->cmd_sync_work_lock); + entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy); + mutex_unlock(&hdev->cmd_sync_work_lock); + + return entry; +} +EXPORT_SYMBOL(hci_cmd_sync_lookup_entry); + +/* Cancel HCI command entry */ +void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, + struct hci_cmd_sync_work_entry *entry) +{ + mutex_lock(&hdev->cmd_sync_work_lock); + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); + mutex_unlock(&hdev->cmd_sync_work_lock); +} +EXPORT_SYMBOL(hci_cmd_sync_cancel_entry); + +/* Dequeue one HCI command entry: + * + * - Lookup and cancel first entry that matches. + */ +bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, + hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + struct hci_cmd_sync_work_entry *entry; + + entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy); + if (!entry) + return false; + + hci_cmd_sync_cancel_entry(hdev, entry); + + return true; +} +EXPORT_SYMBOL(hci_cmd_sync_dequeue_once); + +/* Dequeue HCI command entry: + * + * - Lookup and cancel any entry that matches by function callback or data or + * destroy callback. + */ +bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + struct hci_cmd_sync_work_entry *entry; + bool ret = false; + + mutex_lock(&hdev->cmd_sync_work_lock); + while ((entry = _hci_cmd_sync_lookup_entry(hdev, func, data, + destroy))) { + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); + ret = true; + } + mutex_unlock(&hdev->cmd_sync_work_lock); + + return ret; +} +EXPORT_SYMBOL(hci_cmd_sync_dequeue); + int hci_update_eir_sync(struct hci_dev *hdev) { struct hci_cp_write_eir cp; @@ -2881,7 +2996,8 @@ int hci_update_passive_scan(struct hci_dev *hdev) hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; - return hci_cmd_sync_queue(hdev, update_passive_scan_sync, NULL, NULL); + return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, + NULL); } int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val) -- cgit 1.2.3-korg From 881559af5f5c545f6828e7c74d79813eb886d523 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 13 Feb 2024 09:59:32 -0500 Subject: Bluetooth: hci_sync: Attempt to dequeue connection attempt If connection is still queued/pending in the cmd_sync queue it means no command has been generated and it should be safe to just dequeue the callback when it is being aborted. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 19 +++++++++++ include/net/bluetooth/hci_sync.h | 10 +++--- net/bluetooth/hci_conn.c | 70 +++++++------------------------------ net/bluetooth/hci_sync.c | 74 +++++++++++++++++++++++++++++++++++----- 4 files changed, 102 insertions(+), 71 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2bdea85b7c447..317d495cfcf5e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1083,6 +1083,24 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num; } +static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c == conn) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} + static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) { struct hci_conn_hash *h = &hdev->conn_hash; @@ -1493,6 +1511,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, u16 conn_timeout, u8 role); +void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason, u16 timeout); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 4ff4aa68ee196..6a9d063e9f472 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -48,11 +48,11 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); struct hci_cmd_sync_work_entry * hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); -int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, - void *data, hci_cmd_sync_work_destroy_t destroy); void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, struct hci_cmd_sync_work_entry *entry); bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, @@ -139,8 +139,6 @@ struct hci_conn; int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason); -int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn); - int hci_le_create_cis_sync(struct hci_dev *hdev); int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle); @@ -152,3 +150,7 @@ int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); + +int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); + +int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 587eb27f374c9..21e0b4064d05d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -68,7 +68,7 @@ static const struct sco_param esco_param_msbc[] = { }; /* This function requires the caller holds hdev->lock */ -static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) +void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) { struct hci_conn_params *params; struct hci_dev *hdev = conn->hdev; @@ -1124,6 +1124,9 @@ void hci_conn_del(struct hci_conn *conn) * rest of hci_conn_del. */ hci_conn_cleanup(conn); + + /* Dequeue callbacks using connection pointer as data */ + hci_cmd_sync_dequeue(hdev, NULL, conn, NULL); } struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) @@ -1258,53 +1261,6 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle) return 0; } -static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) -{ - struct hci_conn *conn; - u16 handle = PTR_UINT(data); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return; - - bt_dev_dbg(hdev, "err %d", err); - - hci_dev_lock(hdev); - - if (!err) { - hci_connect_le_scan_cleanup(conn, 0x00); - goto done; - } - - /* Check if connection is still pending */ - if (conn != hci_lookup_le_connect(hdev)) - goto done; - - /* Flush to make sure we send create conn cancel command if needed */ - flush_delayed_work(&conn->le_conn_timeout); - hci_conn_failed(conn, bt_status(err)); - -done: - hci_dev_unlock(hdev); -} - -static int hci_connect_le_sync(struct hci_dev *hdev, void *data) -{ - struct hci_conn *conn; - u16 handle = PTR_UINT(data); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; - - bt_dev_dbg(hdev, "conn %p", conn); - - clear_bit(HCI_CONN_SCANNING, &conn->flags); - conn->state = BT_CONNECT; - - return hci_le_create_conn_sync(hdev, conn); -} - struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, u16 conn_timeout, u8 role) @@ -1371,9 +1327,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; - err = hci_cmd_sync_queue(hdev, hci_connect_le_sync, - UINT_PTR(conn->handle), - create_le_conn_complete); + err = hci_connect_le_sync(hdev, conn); if (err) { hci_conn_del(conn); return ERR_PTR(err); @@ -2909,12 +2863,10 @@ u32 hci_conn_get_phy(struct hci_conn *conn) static int abort_conn_sync(struct hci_dev *hdev, void *data) { - struct hci_conn *conn; - u16 handle = PTR_UINT(data); + struct hci_conn *conn = data; - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; return hci_abort_conn_sync(hdev, conn, conn->abort_reason); } @@ -2949,8 +2901,10 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) hci_cmd_sync_cancel(hdev, -ECANCELED); break; } + /* Cancel connect attempt if still queued/pending */ + } else if (!hci_cancel_connect_sync(hdev, conn)) { + return 0; } - return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle), - NULL); + return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 5b314bf844f84..b7d8e99e2a30e 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6285,12 +6285,21 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, conn->conn_timeout, NULL); } -int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) +static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data) { struct hci_cp_le_create_conn cp; struct hci_conn_params *params; u8 own_addr_type; int err; + struct hci_conn *conn = data; + + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + + bt_dev_dbg(hdev, "conn %p", conn); + + clear_bit(HCI_CONN_SCANNING, &conn->flags); + conn->state = BT_CONNECT; /* If requested to connect as peripheral use directed advertising */ if (conn->role == HCI_ROLE_SLAVE) { @@ -6611,16 +6620,11 @@ int hci_update_adv_data(struct hci_dev *hdev, u8 instance) static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) { - struct hci_conn *conn; - u16 handle = PTR_UINT(data); + struct hci_conn *conn = data; struct inquiry_entry *ie; struct hci_cp_create_conn cp; int err; - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; - /* Many controllers disallow HCI Create Connection while it is doing * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create * Connection. This may cause the MGMT discovering state to become false @@ -6679,6 +6683,58 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue(hdev, hci_acl_create_conn_sync, - UINT_PTR(conn->handle), NULL); + return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, + NULL); +} + +static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) +{ + struct hci_conn *conn = data; + + bt_dev_dbg(hdev, "err %d", err); + + if (err == -ECANCELED) + return; + + hci_dev_lock(hdev); + + if (!err) { + hci_connect_le_scan_cleanup(conn, 0x00); + goto done; + } + + /* Check if connection is still pending */ + if (conn != hci_lookup_le_connect(hdev)) + goto done; + + /* Flush to make sure we send create conn cancel command if needed */ + flush_delayed_work(&conn->le_conn_timeout); + hci_conn_failed(conn, bt_status(err)); + +done: + hci_dev_unlock(hdev); +} + +int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, + create_le_conn_complete); +} + +int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + if (conn->state != BT_OPEN) + return -EINVAL; + + switch (conn->type) { + case ACL_LINK: + return !hci_cmd_sync_dequeue_once(hdev, + hci_acl_create_conn_sync, + conn, NULL); + case LE_LINK: + return !hci_cmd_sync_dequeue_once(hdev, hci_le_create_conn_sync, + conn, create_le_conn_complete); + } + + return -ENOENT; } -- cgit 1.2.3-korg From 412b894a183cf0546b03f871579a6eff4c3f5d49 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Mon, 19 Feb 2024 16:46:57 -0300 Subject: Bluetooth: constify the struct device_type usage Since commit aed65af1cc2f ("drivers: make device_type const"), the driver core can properly handle constant struct device_type. Move the bt_type and bnep_type variables to be constant structures as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/6lowpan.c | 2 +- net/bluetooth/bnep/core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 715cbafbf6631..27520a8a486f3 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -572,7 +572,7 @@ static void netdev_setup(struct net_device *dev) dev->needs_free_netdev = true; } -static struct device_type bt_type = { +static const struct device_type bt_type = { .name = "bluetooth", }; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 5a6a49885ab66..8c3f8d0c03588 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -549,7 +549,7 @@ static struct device *bnep_get_device(struct bnep_session *session) return &conn->hcon->dev; } -static struct device_type bnep_type = { +static const struct device_type bnep_type = { .name = "bluetooth", }; -- cgit 1.2.3-korg From 7453847fb22c7c45334c43cc6a02ea5df5b9961d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 16 Feb 2024 15:29:55 -0500 Subject: Bluetooth: hci_sync: Fix UAF on hci_abort_conn_sync Fixes the following trace where hci_acl_create_conn_sync attempts to call hci_abort_conn_sync after timeout: BUG: KASAN: slab-use-after-free in hci_abort_conn_sync (net/bluetooth/hci_sync.c:5439) Read of size 2 at addr ffff88800322c032 by task kworker/u3:2/36 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:67 ./arch/x86/include/asm/irqflags.h:127 lib/dump_stack.c:107) print_report (mm/kasan/report.c:378 mm/kasan/report.c:488) ? preempt_count_sub (kernel/sched/core.c:5889) ? __virt_addr_valid (./arch/x86/include/asm/preempt.h:103 (discriminator 1) ./include/linux/rcupdate.h:865 (discriminator 1) ./include/linux/mmzone.h:2026 (discriminator 1) arch/x86/mm/physaddr.c:65 (discriminator 1)) ? hci_abort_conn_sync (net/bluetooth/hci_sync.c:5439) kasan_report (mm/kasan/report.c:603) ? hci_abort_conn_sync (net/bluetooth/hci_sync.c:5439) hci_abort_conn_sync (net/bluetooth/hci_sync.c:5439) ? __pfx_hci_abort_conn_sync (net/bluetooth/hci_sync.c:5433) hci_acl_create_conn_sync (net/bluetooth/hci_sync.c:6681) Fixes: 45340097ce6e ("Bluetooth: hci_conn: Only do ACL connections sequentially") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index b7d8e99e2a30e..4a28aef2f01f4 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6670,15 +6670,10 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) else cp.role_switch = 0x00; - err = __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN, - sizeof(cp), &cp, - HCI_EV_CONN_COMPLETE, - conn->conn_timeout, NULL); - - if (err == -ETIMEDOUT) - hci_abort_conn_sync(hdev, conn, HCI_ERROR_LOCAL_HOST_TERM); - - return err; + return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN, + sizeof(cp), &cp, + HCI_EV_CONN_COMPLETE, + conn->conn_timeout, NULL); } int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) -- cgit 1.2.3-korg From f7cbce60a38a6589f0dade720d4c2544959ecc0e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 20 Feb 2024 13:10:47 -0500 Subject: Bluetooth: hci_sync: Fix UAF on create_le_conn_complete While waiting for hci_dev_lock the hci_conn object may be cleanup causing the following trace: BUG: KASAN: slab-use-after-free in hci_connect_le_scan_cleanup+0x29/0x350 Read of size 8 at addr ffff888001a50a30 by task kworker/u3:1/111 CPU: 0 PID: 111 Comm: kworker/u3:1 Not tainted 6.8.0-rc2-00701-g8179b15ab3fd-dirty #6418 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x21/0x70 print_report+0xce/0x620 ? preempt_count_sub+0x13/0xc0 ? __virt_addr_valid+0x15f/0x310 ? hci_connect_le_scan_cleanup+0x29/0x350 kasan_report+0xdf/0x110 ? hci_connect_le_scan_cleanup+0x29/0x350 hci_connect_le_scan_cleanup+0x29/0x350 create_le_conn_complete+0x25c/0x2c0 Fixes: 881559af5f5c ("Bluetooth: hci_sync: Attempt to dequeue connection attempt") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 4a28aef2f01f4..c2d88811d326c 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6693,6 +6693,9 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) hci_dev_lock(hdev); + if (!hci_conn_valid(hdev, conn)) + goto done; + if (!err) { hci_connect_le_scan_cleanup(conn, 0x00); goto done; -- cgit 1.2.3-korg From 02171da6e86a73e1b343b36722f5d9d5c04b3539 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Fri, 23 Feb 2024 15:14:41 +0200 Subject: Bluetooth: ISO: Add hcon for listening bis sk This creates a hcon instance at bis listen, before the PA sync procedure is started. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 6 +++--- net/bluetooth/hci_conn.c | 32 +++++++++++++++++++++++++------ net/bluetooth/iso.c | 41 ++++++++++++++++++++++++++++++---------- 3 files changed, 60 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 317d495cfcf5e..199a9f81cf502 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1,7 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. - Copyright 2023 NXP + Copyright 2023-2024 NXP Written 2000,2001 by Maxim Krasnyansky @@ -1528,8 +1528,8 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos, __u8 data_len, __u8 *data); -int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, - __u8 sid, struct bt_iso_qos *qos); +struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, struct bt_iso_qos *qos, __u16 sync_handle, __u8 num_bis, __u8 bis[]); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 21e0b4064d05d..54d4189c198ae 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1,7 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. - Copyright 2023 NXP + Copyright 2023-2024 NXP Written 2000,2001 by Maxim Krasnyansky @@ -2057,18 +2057,31 @@ static int create_pa_sync(struct hci_dev *hdev, void *data) return hci_update_passive_scan_sync(hdev); } -int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, - __u8 sid, struct bt_iso_qos *qos) +struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, __u8 sid, + struct bt_iso_qos *qos) { struct hci_cp_le_pa_create_sync *cp; + struct hci_conn *conn; + int err; if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC)) - return -EBUSY; + return ERR_PTR(-EBUSY); + + conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE); + if (!conn) + return ERR_PTR(-ENOMEM); + + conn->iso_qos = *qos; + conn->state = BT_LISTEN; + + hci_conn_hold(conn); cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) { hci_dev_clear_flag(hdev, HCI_PA_SYNC); - return -ENOMEM; + hci_conn_drop(conn); + return ERR_PTR(-ENOMEM); } cp->options = qos->bcast.options; @@ -2080,7 +2093,14 @@ int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, cp->sync_cte_type = qos->bcast.sync_cte_type; /* Queue start pa_create_sync and scan */ - return hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete); + err = hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete); + if (err < 0) { + hci_conn_drop(conn); + kfree(cp); + return ERR_PTR(err); + } + + return conn; } int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 04f6572d35f17..d9ee69fa1384b 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -3,7 +3,7 @@ * BlueZ - Bluetooth protocol stack for Linux * * Copyright (C) 2022 Intel Corporation - * Copyright 2023 NXP + * Copyright 2023-2024 NXP */ #include @@ -690,11 +690,8 @@ static void iso_sock_cleanup_listen(struct sock *parent) iso_sock_kill(sk); } - /* If listening socket stands for a PA sync connection, - * properly disconnect the hcon and socket. - */ - if (iso_pi(parent)->conn && iso_pi(parent)->conn->hcon && - test_bit(HCI_CONN_PA_SYNC, &iso_pi(parent)->conn->hcon->flags)) { + /* If listening socket has a hcon, properly disconnect it */ + if (iso_pi(parent)->conn && iso_pi(parent)->conn->hcon) { iso_sock_disconn(parent); return; } @@ -1076,6 +1073,8 @@ static int iso_listen_bis(struct sock *sk) { struct hci_dev *hdev; int err = 0; + struct iso_conn *conn; + struct hci_conn *hcon; BT_DBG("%pMR -> %pMR (SID 0x%2.2x)", &iso_pi(sk)->src, &iso_pi(sk)->dst, iso_pi(sk)->bc_sid); @@ -1096,18 +1095,40 @@ static int iso_listen_bis(struct sock *sk) if (!hdev) return -EHOSTUNREACH; + hci_dev_lock(hdev); + /* Fail if user set invalid QoS */ if (iso_pi(sk)->qos_user_set && !check_bcast_qos(&iso_pi(sk)->qos)) { iso_pi(sk)->qos = default_qos; - return -EINVAL; + err = -EINVAL; + goto unlock; } - err = hci_pa_create_sync(hdev, &iso_pi(sk)->dst, - le_addr_type(iso_pi(sk)->dst_type), - iso_pi(sk)->bc_sid, &iso_pi(sk)->qos); + hcon = hci_pa_create_sync(hdev, &iso_pi(sk)->dst, + le_addr_type(iso_pi(sk)->dst_type), + iso_pi(sk)->bc_sid, &iso_pi(sk)->qos); + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); + goto unlock; + } + + conn = iso_conn_add(hcon); + if (!conn) { + hci_conn_drop(hcon); + err = -ENOMEM; + goto unlock; + } + + err = iso_chan_add(conn, sk, NULL); + if (err) { + hci_conn_drop(hcon); + goto unlock; + } hci_dev_put(hdev); +unlock: + hci_dev_unlock(hdev); return err; } -- cgit 1.2.3-korg From 168d9bf9c7f01df71e6404cfff66d9c2a8e968fb Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Fri, 23 Feb 2024 15:14:42 +0200 Subject: Bluetooth: ISO: Reassemble PA data for bcast sink This adds support to reassemble PA data for a Broadcast Sink listening socket. This is needed in case the BASE is received fragmented in multiple PA reports. PA data is first reassembled inside the hcon, before the BASE is extracted and stored inside the socket. The length of the le_per_adv_data hcon array has been raised to 1650, to accommodate the maximum PA data length that can come fragmented, according to spec. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 5 ++++ include/net/bluetooth/hci_core.h | 5 ++-- net/bluetooth/iso.c | 50 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 54 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 08cb5cb249a49..21099bd3c8bce 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2037,6 +2037,7 @@ struct hci_cp_le_set_per_adv_params { } __packed; #define HCI_MAX_PER_AD_LENGTH 252 +#define HCI_MAX_PER_AD_TOT_LEN 1650 #define HCI_OP_LE_SET_PER_ADV_DATA 0x203f struct hci_cp_le_set_per_adv_data { @@ -2797,6 +2798,10 @@ struct hci_ev_le_per_adv_report { __u8 data[]; } __packed; +#define LE_PA_DATA_COMPLETE 0x00 +#define LE_PA_DATA_MORE_TO_COME 0x01 +#define LE_PA_DATA_TRUNCATED 0x02 + #define HCI_EV_LE_EXT_ADV_SET_TERM 0x12 struct hci_evt_le_ext_adv_set_term { __u8 status; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 199a9f81cf502..da6aa6549b81b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -734,8 +734,9 @@ struct hci_conn { __u16 le_supv_timeout; __u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH]; __u8 le_adv_data_len; - __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH]; - __u8 le_per_adv_data_len; + __u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN]; + __u16 le_per_adv_data_len; + __u16 le_per_adv_data_offset; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index d9ee69fa1384b..30c777c469f96 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1982,16 +1982,58 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT); if (ev3) { - size_t base_len = ev3->length; + size_t base_len = 0; u8 *base; + struct hci_conn *hcon; sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sync_handle_pa_report, ev3); - base = eir_get_service_data(ev3->data, ev3->length, - EIR_BAA_SERVICE_UUID, &base_len); - if (base && sk && base_len <= sizeof(iso_pi(sk)->base)) { + if (!sk) + goto done; + + hcon = iso_pi(sk)->conn->hcon; + if (!hcon) + goto done; + + if (ev3->data_status == LE_PA_DATA_TRUNCATED) { + /* The controller was unable to retrieve PA data. */ + memset(hcon->le_per_adv_data, 0, + HCI_MAX_PER_AD_TOT_LEN); + hcon->le_per_adv_data_len = 0; + hcon->le_per_adv_data_offset = 0; + goto done; + } + + if (hcon->le_per_adv_data_offset + ev3->length > + HCI_MAX_PER_AD_TOT_LEN) + goto done; + + memcpy(hcon->le_per_adv_data + hcon->le_per_adv_data_offset, + ev3->data, ev3->length); + hcon->le_per_adv_data_offset += ev3->length; + + if (ev3->data_status == LE_PA_DATA_COMPLETE) { + /* All PA data has been received. */ + hcon->le_per_adv_data_len = + hcon->le_per_adv_data_offset; + hcon->le_per_adv_data_offset = 0; + + /* Extract BASE */ + base = eir_get_service_data(hcon->le_per_adv_data, + hcon->le_per_adv_data_len, + EIR_BAA_SERVICE_UUID, + &base_len); + + if (!base || base_len > BASE_MAX_LENGTH) + goto done; + memcpy(iso_pi(sk)->base, base, base_len); iso_pi(sk)->base_len = base_len; + } else { + /* This is a PA data fragment. Keep pa_data_len set to 0 + * until all data has been reassembled. + */ + hcon->le_per_adv_data_len = 0; } } else { sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL); -- cgit 1.2.3-korg From bba71ef13b20ef3373f6fdcd66fac35cd60f0bbb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 21 Feb 2024 09:38:09 -0500 Subject: Bluetooth: hci_sync: Use address filtering when HCI_PA_SYNC is set If HCI_PA_SYNC flag is set it means there is a Periodic Advertising Synchronization pending, so this attempts to locate the address passed to HCI_OP_LE_PA_CREATE_SYNC and program it in the accept list so only reports with that address are processed. Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c2d88811d326c..d2928baaa4ee7 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2562,6 +2562,16 @@ static struct conn_params *conn_params_copy(struct list_head *list, size_t *n) return p; } +/* Clear LE Accept List */ +static int hci_le_clear_accept_list_sync(struct hci_dev *hdev) +{ + if (!(hdev->commands[26] & 0x80)) + return 0; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL, + HCI_CMD_TIMEOUT); +} + /* Device must not be scanning when updating the accept list. * * Update is done using the following sequence: @@ -2610,6 +2620,31 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) goto done; } + /* Force address filtering if PA Sync is in progress */ + if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { + struct hci_cp_le_pa_create_sync *sent; + + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC); + if (sent) { + struct conn_params pa; + + memset(&pa, 0, sizeof(pa)); + + bacpy(&pa.addr, &sent->addr); + pa.addr_type = sent->addr_type; + + /* Clear first since there could be addresses left + * behind. + */ + hci_le_clear_accept_list_sync(hdev); + + num_entries = 1; + err = hci_le_add_accept_list_sync(hdev, &pa, + &num_entries); + goto done; + } + } + /* Go through the current accept list programmed into the * controller one by one and check if that address is connected or is * still in the list of pending connections or list of devices to @@ -4216,16 +4251,6 @@ static int hci_le_read_accept_list_size_sync(struct hci_dev *hdev) 0, NULL, HCI_CMD_TIMEOUT); } -/* Clear LE Accept List */ -static int hci_le_clear_accept_list_sync(struct hci_dev *hdev) -{ - if (!(hdev->commands[26] & 0x80)) - return 0; - - return __hci_cmd_sync_status(hdev, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL, - HCI_CMD_TIMEOUT); -} - /* Read LE Resolving List Size */ static int hci_le_read_resolv_list_size_sync(struct hci_dev *hdev) { -- cgit 1.2.3-korg From 22cbf4f84c00da64196eb15034feee868e63eef0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 21 Feb 2024 09:38:10 -0500 Subject: Bluetooth: hci_sync: Use QoS to determine which PHY to scan This used the hci_conn QoS to determine which PHY to scan when creating a PA Sync. Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 66 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d2928baaa4ee7..10aa59da735b8 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2754,6 +2754,14 @@ done: return filter_policy; } +static void hci_le_scan_phy_params(struct hci_cp_le_scan_phy_params *cp, + u8 type, u16 interval, u16 window) +{ + cp->type = type; + cp->interval = cpu_to_le16(interval); + cp->window = cpu_to_le16(window); +} + static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type, u16 interval, u16 window, u8 own_addr_type, u8 filter_policy) @@ -2761,7 +2769,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type, struct hci_cp_le_set_ext_scan_params *cp; struct hci_cp_le_scan_phy_params *phy; u8 data[sizeof(*cp) + sizeof(*phy) * 2]; - u8 num_phy = 0; + u8 num_phy = 0x00; cp = (void *)data; phy = (void *)cp->data; @@ -2771,28 +2779,64 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type, cp->own_addr_type = own_addr_type; cp->filter_policy = filter_policy; - if (scan_1m(hdev) || scan_2m(hdev)) { - cp->scanning_phys |= LE_SCAN_PHY_1M; + /* Check if PA Sync is in progress then select the PHY based on the + * hci_conn.iso_qos. + */ + if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { + struct hci_cp_le_add_to_accept_list *sent; - phy->type = type; - phy->interval = cpu_to_le16(interval); - phy->window = cpu_to_le16(window); + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST); + if (sent) { + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_ba(hdev, ISO_LINK, + &sent->bdaddr); + if (conn) { + struct bt_iso_qos *qos = &conn->iso_qos; + + if (qos->bcast.in.phy & BT_ISO_PHY_1M || + qos->bcast.in.phy & BT_ISO_PHY_2M) { + cp->scanning_phys |= LE_SCAN_PHY_1M; + hci_le_scan_phy_params(phy, type, + interval, + window); + num_phy++; + phy++; + } + + if (qos->bcast.in.phy & BT_ISO_PHY_CODED) { + cp->scanning_phys |= LE_SCAN_PHY_CODED; + hci_le_scan_phy_params(phy, type, + interval, + window); + num_phy++; + phy++; + } + + if (num_phy) + goto done; + } + } + } + if (scan_1m(hdev) || scan_2m(hdev)) { + cp->scanning_phys |= LE_SCAN_PHY_1M; + hci_le_scan_phy_params(phy, type, interval, window); num_phy++; phy++; } if (scan_coded(hdev)) { cp->scanning_phys |= LE_SCAN_PHY_CODED; - - phy->type = type; - phy->interval = cpu_to_le16(interval); - phy->window = cpu_to_le16(window); - + hci_le_scan_phy_params(phy, type, interval, window); num_phy++; phy++; } +done: + if (!num_phy) + return -EINVAL; + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_PARAMS, sizeof(*cp) + sizeof(*phy) * num_phy, data, HCI_CMD_TIMEOUT); -- cgit 1.2.3-korg From 2615fd9a7c2507eb3be3fbe49dcec88a2f56454a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 16 Feb 2024 16:20:11 -0500 Subject: Bluetooth: hci_sync: Fix overwriting request callback In a few cases the stack may generate commands as responses to events which would happen to overwrite the sent_cmd, so this attempts to store the request in req_skb so even if sent_cmd is replaced with a new command the pending request will remain in stored in req_skb. Fixes: 6a98e3836fa2 ("Bluetooth: Add helper for serialized HCI command execution") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_core.c | 46 ++++++++++++++++++++++++++++------------ net/bluetooth/hci_event.c | 18 ++++++++-------- net/bluetooth/hci_sync.c | 21 +++++++++++++++--- 5 files changed, 61 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index da6aa6549b81b..56fb42df44a33 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -552,6 +552,7 @@ struct hci_dev { __u32 req_status; __u32 req_result; struct sk_buff *req_skb; + struct sk_buff *req_rsp; void *smp_data; void *smp_bredr_data; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 54d4189c198ae..3ad74f76983b2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2918,7 +2918,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) case HCI_EV_LE_CONN_COMPLETE: case HCI_EV_LE_ENHANCED_CONN_COMPLETE: case HCI_EVT_LE_CIS_ESTABLISHED: - hci_cmd_sync_cancel(hdev, -ECANCELED); + hci_cmd_sync_cancel(hdev, ECANCELED); break; } /* Cancel connect attempt if still queued/pending */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 34c8dca2069f6..6ca4c0df9f9c4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1522,8 +1522,8 @@ static void hci_cmd_timeout(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_timer.work); - if (hdev->sent_cmd) { - u16 opcode = hci_skb_opcode(hdev->sent_cmd); + if (hdev->req_skb) { + u16 opcode = hci_skb_opcode(hdev->req_skb); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); @@ -2828,6 +2828,7 @@ void hci_release_dev(struct hci_dev *hdev) ida_destroy(&hdev->unset_handle_ida); ida_free(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); + kfree_skb(hdev->req_skb); kfree_skb(hdev->recv_event); kfree(hdev); } @@ -3157,21 +3158,33 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_send); /* Get data from the previously sent command */ -void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) +static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode) { struct hci_command_hdr *hdr; - if (!hdev->sent_cmd) + if (!skb || skb->len < HCI_COMMAND_HDR_SIZE) return NULL; - hdr = (void *) hdev->sent_cmd->data; + hdr = (void *)skb->data; if (hdr->opcode != cpu_to_le16(opcode)) return NULL; - BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); + return skb->data + HCI_COMMAND_HDR_SIZE; +} - return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE; +/* Get data from the previously sent command */ +void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) +{ + void *data; + + /* Check if opcode matches last sent command */ + data = hci_cmd_data(hdev->sent_cmd, opcode); + if (!data) + /* Check if opcode matches last request */ + data = hci_cmd_data(hdev->req_skb, opcode); + + return data; } /* Get data from last received event */ @@ -4072,17 +4085,19 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, if (!status && !hci_req_is_complete(hdev)) return; + skb = hdev->req_skb; + /* If this was the last command in a request the complete - * callback would be found in hdev->sent_cmd instead of the + * callback would be found in hdev->req_skb instead of the * command queue (hdev->cmd_q). */ - if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { - *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; + if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; return; } - if (bt_cb(hdev->sent_cmd)->hci.req_complete) { - *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; + if (skb && bt_cb(skb)->hci.req_complete) { + *req_complete = bt_cb(skb)->hci.req_complete; return; } @@ -4199,8 +4214,11 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (hci_req_status_pend(hdev)) - hci_dev_set_flag(hdev, HCI_CMD_PENDING); + if (hci_req_status_pend(hdev) && + !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { + kfree_skb(hdev->req_skb); + hdev->req_skb = skb_clone(skb, GFP_KERNEL); + } atomic_dec(&hdev->cmd_cnt); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6071a1226e1b4..bffd2c7ff6087 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4368,7 +4368,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, void *data, * (since for this kind of commands there will not be a command * complete event). */ - if (ev->status || (hdev->sent_cmd && !hci_skb_event(hdev->sent_cmd))) { + if (ev->status || (hdev->req_skb && !hci_skb_event(hdev->req_skb))) { hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { @@ -7170,10 +7170,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "subevent 0x%2.2x", ev->subevent); /* Only match event if command OGF is for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) == 0x08 && - hci_skb_event(hdev->sent_cmd) == ev->subevent) { - *opcode = hci_skb_opcode(hdev->sent_cmd); + if (hdev->req_skb && + hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 && + hci_skb_event(hdev->req_skb) == ev->subevent) { + *opcode = hci_skb_opcode(hdev->req_skb); hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, req_complete_skb); } @@ -7541,10 +7541,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) } /* Only match event if command OGF is not for LE */ - if (hdev->sent_cmd && - hci_opcode_ogf(hci_skb_opcode(hdev->sent_cmd)) != 0x08 && - hci_skb_event(hdev->sent_cmd) == event) { - hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->sent_cmd), + if (hdev->req_skb && + hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) != 0x08 && + hci_skb_event(hdev->req_skb) == event) { + hci_req_cmd_complete(hdev, hci_skb_opcode(hdev->req_skb), status, &req_complete, &req_complete_skb); req_evt = event; } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 10aa59da735b8..8a3d0d1f78717 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -32,6 +32,10 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, hdev->req_result = result; hdev->req_status = HCI_REQ_DONE; + /* Free the request command so it is not used as response */ + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + if (skb) { struct sock *sk = hci_skb_sk(skb); @@ -39,7 +43,7 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, if (sk) sock_put(sk); - hdev->req_skb = skb_get(skb); + hdev->req_rsp = skb_get(skb); } wake_up_interruptible(&hdev->req_wait_q); @@ -187,8 +191,8 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hdev->req_status = 0; hdev->req_result = 0; - skb = hdev->req_skb; - hdev->req_skb = NULL; + skb = hdev->req_rsp; + hdev->req_rsp = NULL; bt_dev_dbg(hdev, "end: err %d", err); @@ -5021,6 +5025,11 @@ int hci_dev_open_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + if (hdev->req_skb) { + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + } + clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); @@ -5181,6 +5190,12 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->sent_cmd = NULL; } + /* Drop last request */ + if (hdev->req_skb) { + kfree_skb(hdev->req_skb); + hdev->req_skb = NULL; + } + clear_bit(HCI_RUNNING, &hdev->flags); hci_sock_dev_event(hdev, HCI_DEV_CLOSE); -- cgit 1.2.3-korg From 81137162bfaa7278785b24c1fd2e9e74f082e8e4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 28 Feb 2024 10:49:26 -0500 Subject: Bluetooth: hci_core: Fix possible buffer overflow struct hci_dev_info has a fixed size name[8] field so in the event that hdev->name is bigger than that strcpy would attempt to write past its size, so this fixes this problem by switching to use strscpy. Fixes: dcda165706b9 ("Bluetooth: hci_core: Fix build warnings") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6ca4c0df9f9c4..230d2bbb933b2 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -908,7 +908,7 @@ int hci_get_dev_info(void __user *arg) else flags = hdev->flags; - strcpy(di.name, hdev->name); + strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); di.flags = flags; -- cgit 1.2.3-korg From a6e06258f4c31eba0fcd503e19828b5f8fe7b08b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 28 Feb 2024 10:56:49 -0500 Subject: Bluetooth: msft: Fix memory leak Fix leaking buffer allocated to send MSFT_OP_LE_MONITOR_ADVERTISEMENT. Fixes: 9e14606d8f38 ("Bluetooth: msft: Extended monitor tracking by address filter") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/msft.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 630e3023273b2..9612c5d1b13f6 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -875,6 +875,7 @@ static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data) remove = true; goto done; } + cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT; cp->rssi_high = address_filter->rssi_high; cp->rssi_low = address_filter->rssi_low; @@ -887,6 +888,8 @@ static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data) skb = __hci_cmd_sync(hdev, hdev->msft_opcode, size, cp, HCI_CMD_TIMEOUT); + kfree(cp); + if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to enable address %pMR filter", &address_filter->bdaddr); -- cgit 1.2.3-korg From 0f0639b4d6f649338ce29c62da3ec0787fa08cd1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 28 Feb 2024 12:11:08 -0500 Subject: Bluetooth: bnep: Fix out-of-bound access This fixes attempting to access past ethhdr.h_source, although it seems intentional to copy also the contents of h_proto this triggers out-of-bound access problems with the likes of static analyzer, so this instead just copy ETH_ALEN and then proceed to use put_unaligned to copy h_proto separetely. Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/bnep/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 8c3f8d0c03588..ec45f77fce218 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -385,7 +385,8 @@ static int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) case BNEP_COMPRESSED_DST_ONLY: __skb_put_data(nskb, skb_mac_header(skb), ETH_ALEN); - __skb_put_data(nskb, s->eh.h_source, ETH_ALEN + 2); + __skb_put_data(nskb, s->eh.h_source, ETH_ALEN); + put_unaligned(s->eh.h_proto, (__be16 *)__skb_put(nskb, 2)); break; case BNEP_GENERAL: -- cgit 1.2.3-korg From f7b94bdc1ec107c92262716b073b3e816d4784fb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 1 Mar 2024 12:58:11 -0500 Subject: Bluetooth: af_bluetooth: Fix deadlock Attemting to do sock_lock on .recvmsg may cause a deadlock as shown bellow, so instead of using sock_sock this uses sk_receive_queue.lock on bt_sock_ioctl to avoid the UAF: INFO: task kworker/u9:1:121 blocked for more than 30 seconds. Not tainted 6.7.6-lemon #183 Workqueue: hci0 hci_rx_work Call Trace: __schedule+0x37d/0xa00 schedule+0x32/0xe0 __lock_sock+0x68/0xa0 ? __pfx_autoremove_wake_function+0x10/0x10 lock_sock_nested+0x43/0x50 l2cap_sock_recv_cb+0x21/0xa0 l2cap_recv_frame+0x55b/0x30a0 ? psi_task_switch+0xeb/0x270 ? finish_task_switch.isra.0+0x93/0x2a0 hci_rx_work+0x33a/0x3f0 process_one_work+0x13a/0x2f0 worker_thread+0x2f0/0x410 ? __pfx_worker_thread+0x10/0x10 kthread+0xe0/0x110 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 2e07e8348ea4 ("Bluetooth: af_bluetooth: Fix Use-After-Free in bt_sock_recvmsg") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/af_bluetooth.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index b93464ac3517f..67604ccec2f42 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -309,14 +309,11 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; - lock_sock(sk); - skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - release_sock(sk); return err; } @@ -346,8 +343,6 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); - release_sock(sk); - if (flags & MSG_TRUNC) copied = skblen; @@ -570,10 +565,11 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (sk->sk_state == BT_LISTEN) return -EINVAL; - lock_sock(sk); + spin_lock(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; - release_sock(sk); + spin_unlock(&sk->sk_receive_queue.lock); + err = put_user(amount, (int __user *)arg); break; -- cgit 1.2.3-korg From 947ec0d002dce8577b655793dcc6fc78d67b7cb6 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 2 Mar 2024 19:06:23 +0200 Subject: Bluetooth: fix use-after-free in accessing skb after sending it hci_send_cmd_sync first sends skb and then tries to clone it. However, the driver may have already freed the skb at that point. Fix by cloning the sent_cmd cloned just above, instead of the original. Log: ================================================================ BUG: KASAN: slab-use-after-free in __copy_skb_header+0x1a/0x240 ... Call Trace: .. __skb_clone+0x59/0x2c0 hci_cmd_work+0x3b3/0x3d0 [bluetooth] process_one_work+0x459/0x900 ... Allocated by task 129: ... __alloc_skb+0x1ae/0x220 __hci_cmd_sync_sk+0x44c/0x7a0 [bluetooth] __hci_cmd_sync_status+0x24/0xb0 [bluetooth] set_cig_params_sync+0x778/0x7d0 [bluetooth] ... Freed by task 0: ... kmem_cache_free+0x157/0x3c0 __usb_hcd_giveback_urb+0x11e/0x1e0 usb_giveback_urb_bh+0x1ad/0x2a0 tasklet_action_common.isra.0+0x259/0x4a0 __do_softirq+0x15b/0x5a7 ================================================================ Fixes: 2615fd9a7c25 ("Bluetooth: hci_sync: Fix overwriting request callback") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 230d2bbb933b2..1690ae57a09db 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4217,7 +4217,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) if (hci_req_status_pend(hdev) && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); - hdev->req_skb = skb_clone(skb, GFP_KERNEL); + hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); } atomic_dec(&hdev->cmd_cnt); -- cgit 1.2.3-korg From 18d88f0fd8c0fade7ae08c2778d6c6447b11f16a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 2 Mar 2024 11:30:43 +0300 Subject: Bluetooth: ISO: Clean up returns values in iso_connect_ind() This function either returns 0 or HCI_LM_ACCEPT. Make it clearer which returns are which and delete the "lm" variable because it is no longer required. Signed-off-by: Dan Carpenter Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 30c777c469f96..8af75d37b14c0 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1910,7 +1910,6 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) struct hci_evt_le_big_info_adv_report *ev2; struct hci_ev_le_per_adv_report *ev3; struct sock *sk; - int lm = 0; bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr); @@ -1954,7 +1953,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (sk && test_bit(BT_SK_PA_SYNC_TERM, &iso_pi(sk)->flags)) - return lm; + return 0; } if (sk) { @@ -2041,16 +2040,14 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) done: if (!sk) - return lm; - - lm |= HCI_LM_ACCEPT; + return 0; if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) *flags |= HCI_PROTO_DEFER; sock_put(sk); - return lm; + return HCI_LM_ACCEPT; } static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) -- cgit 1.2.3-korg From 3237da12a388d972c15d3ed4ce07c015309709ad Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Fri, 1 Mar 2024 13:39:15 +0000 Subject: Bluetooth: mgmt: remove NULL check in mgmt_set_connectable_complete() Remove the cmd pointer NULL check in mgmt_set_connectable_complete() because it occurs earlier in set_connectable(). This check is also unnecessary because the pointer is dereferenced just before it. Found by Linux Verification Center (linuxtesting.org) with Svace. Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 78ab562807d0f..8f16287930dc8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1707,8 +1707,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, new_settings(hdev, cmd->sk); done: - if (cmd) - mgmt_pending_remove(cmd); + mgmt_pending_remove(cmd); hci_dev_unlock(hdev); } -- cgit 1.2.3-korg From a310d74dce686a6ed77155b5a5e67f0e7b3619fd Mon Sep 17 00:00:00 2001 From: Roman Smirnov Date: Fri, 1 Mar 2024 13:39:16 +0000 Subject: Bluetooth: mgmt: remove NULL check in add_ext_adv_params_complete() Remove the cmd pointer NULL check in add_ext_adv_params_complete() because it occurs earlier in add_ext_adv_params(). This check is also unnecessary because the pointer is dereferenced just before it. Found by Linux Verification Center (linuxtesting.org) with Svace. Signed-off-by: Roman Smirnov Reviewed-by: Sergey Shtylyov Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8f16287930dc8..9613cc8a60f8b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8767,8 +8767,7 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, } unlock: - if (cmd) - mgmt_pending_free(cmd); + mgmt_pending_free(cmd); hci_dev_unlock(hdev); } -- cgit 1.2.3-korg From 48201a3b3f398be6a01f78a14b18bd5d31c47458 Mon Sep 17 00:00:00 2001 From: Vinicius Peixoto Date: Mon, 26 Feb 2024 22:43:26 -0300 Subject: Bluetooth: Add new quirk for broken read key length on ATS2851 The ATS2851 controller erroneously reports support for the "Read Encryption Key Length" HCI command. This makes it unable to connect to any devices, since this command is issued by the kernel during the connection process in response to an "Encryption Change" HCI event. Add a new quirk (HCI_QUIRK_BROKEN_ENC_KEY_SIZE) to hint that the command is unsupported, preventing it from interrupting the connection process. This is the error log from btmon before this patch: > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 2048 Address: ... Encryption: Enabled with E0 (0x01) < HCI Command: Read Encryption Key Size (0x05|0x0008) plen 2 Handle: 2048 Address: ... > HCI Event: Command Status (0x0f) plen 4 Read Encryption Key Size (0x05|0x0008) ncmd 1 Status: Unknown HCI Command (0x01) Signed-off-by: Vinicius Peixoto Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 1 + include/net/bluetooth/hci.h | 8 ++++++++ net/bluetooth/hci_event.c | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b09fe480e708f..06e915b57283f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -4500,6 +4500,7 @@ static int btusb_probe(struct usb_interface *intf, set_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks); set_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks); set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks); + set_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks); } if (!reset) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 21099bd3c8bce..8701ca5f31eec 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -330,6 +330,14 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_LE_CODED, + + /* + * When this quirk is set, the HCI_OP_READ_ENC_KEY_SIZE command is + * skipped during an HCI_EV_ENCRYPT_CHANGE event. This is required + * for Actions Semiconductor ATS2851 based controllers, which erroneously + * claim to support it. + */ + HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, }; /* HCI device flags */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index bffd2c7ff6087..4ae2248240121 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3641,7 +3641,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, * controller really supports it. If it doesn't, assume * the default size (16). */ - if (!(hdev->commands[20] & 0x10)) { + if (!(hdev->commands[20] & 0x10) || + test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) { conn->enc_key_size = HCI_LINK_KEY_SIZE; goto notify; } -- cgit 1.2.3-korg From d5dfbfa2f88eead230d411d1a58f38d6241e2882 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 5 Mar 2024 12:04:30 +0100 Subject: mptcp: drop duplicate header inclusions The headers net/tcp.h, net/genetlink.h and uapi/linux/mptcp.h are included in protocol.h already, no need to include them again directly. This patch removes these duplicate header inclusions. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240305-upstream-net-next-20240304-mptcp-misc-cleanup-v1-1-c436ba5e569b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/diag.c | 1 - net/mptcp/mptcp_diag.c | 1 - net/mptcp/pm.c | 1 - net/mptcp/pm_netlink.c | 3 --- net/mptcp/protocol.c | 1 - net/mptcp/subflow.c | 2 -- 6 files changed, 9 deletions(-) (limited to 'net') diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 7017dd60659dc..3ae46b545d2c2 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -10,7 +10,6 @@ #include #include #include -#include #include "protocol.h" static int subflow_get_info(struct sock *sk, struct sk_buff *skb) diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index bd8ff5950c8d3..0566dd793810a 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -10,7 +10,6 @@ #include #include #include -#include #include "protocol.h" static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index b4bdd92a56482..28e5d514bf208 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -6,7 +6,6 @@ #define pr_fmt(fmt) "MPTCP: " fmt #include -#include #include #include "protocol.h" diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 16f8bd47f4b8f..a900df9f173dd 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -8,12 +8,9 @@ #include #include -#include #include #include #include -#include -#include #include "protocol.h" #include "mib.h" diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 99367c40de0d1..3a1967bc7bad6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6403c56f2902e..1626dd20c68f1 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -15,13 +15,11 @@ #include #include #include -#include #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include #include #endif #include -#include #include "protocol.h" #include "mib.h" -- cgit 1.2.3-korg From 6a42477fe4491e454aaeabfed5708d1eff5ff2ad Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 5 Mar 2024 12:04:31 +0100 Subject: mptcp: update set_flags interfaces This patch updates set_flags interfaces, make it more similar to the interfaces of dump_addr and get_addr: mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info) mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240305-upstream-net-next-20240304-mptcp-misc-cleanup-v1-2-c436ba5e569b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 10 ++++----- net/mptcp/pm_netlink.c | 58 +++++++++++++++++++++--------------------------- net/mptcp/pm_userspace.c | 32 +++++++++++++++++++++----- net/mptcp/protocol.h | 10 +++------ 4 files changed, 58 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 28e5d514bf208..55406720c6071 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -456,13 +456,11 @@ int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) return mptcp_pm_nl_dump_addr(msg, cb); } -int mptcp_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup) +int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info) { - if (token) - return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup); - return mptcp_pm_nl_set_flags(net, loc, bkup); + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_set_flags(skb, info); + return mptcp_pm_nl_set_flags(skb, info); } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a900df9f173dd..c799fe84dfd36 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1887,66 +1887,58 @@ next: return ret; } -int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup) +int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) { - struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); + struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | MPTCP_PM_ADDR_FLAG_FULLMESH; + struct net *net = sock_net(skb->sk); struct mptcp_pm_addr_entry *entry; + struct pm_nl_pernet *pernet; u8 lookup_by_id = 0; + u8 bkup = 0; + int ret; + + pernet = pm_nl_get_pernet(net); + + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + return ret; - if (addr->addr.family == AF_UNSPEC) { + if (addr.addr.family == AF_UNSPEC) { lookup_by_id = 1; - if (!addr->addr.id) + if (!addr.addr.id) return -EOPNOTSUPP; } + if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + bkup = 1; + spin_lock_bh(&pernet->lock); - entry = __lookup_addr(pernet, &addr->addr, lookup_by_id); + entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); if (!entry) { spin_unlock_bh(&pernet->lock); return -EINVAL; } - if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { spin_unlock_bh(&pernet->lock); return -EINVAL; } - changed = (addr->flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (addr->flags & mask); - *addr = *entry; + changed = (addr.flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (addr.flags & mask); + addr = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_set_flags(net, &addr->addr, bkup, changed); + mptcp_nl_set_flags(net, &addr.addr, bkup, changed); return 0; } int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { - struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; - struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct net *net = sock_net(skb->sk); - u8 bkup = 0; - int ret; - - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - if (attr_rem) { - ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote); - if (ret < 0) - return ret; - } - - if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) - bkup = 1; - - return mptcp_pm_set_flags(net, token, &addr, &remote, bkup); + return mptcp_pm_set_flags(skb, info); } static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index b9809d988693b..7ef3b69852f00 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -546,14 +546,19 @@ destroy_err: return err; } -int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup) +int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) { + struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; + struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct net *net = sock_net(skb->sk); struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; u32 token_val; + u8 bkup = 0; token_val = nla_get_u32(token); @@ -566,12 +571,27 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, if (!mptcp_pm_is_userspace(msk)) goto set_flags_err; - if (loc->addr.family == AF_UNSPEC || - rem->addr.family == AF_UNSPEC) + ret = mptcp_pm_parse_entry(attr, info, false, &loc); + if (ret < 0) + goto set_flags_err; + + if (attr_rem) { + ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); + if (ret < 0) + goto set_flags_err; + } + + if (loc.addr.family == AF_UNSPEC || + rem.addr.family == AF_UNSPEC) { + ret = -EINVAL; goto set_flags_err; + } + + if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + bkup = 1; lock_sock(sk); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup); + ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup); release_sock(sk); set_flags_err: diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index de9f0ff6dd309..f16edef6026a3 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -970,13 +970,9 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex); -int mptcp_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup); -int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup); -int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup); +int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); -- cgit 1.2.3-korg From a4d68b160240815d7ca2f935ba690df33a525dd9 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 5 Mar 2024 12:04:32 +0100 Subject: mptcp: set error messages for set_flags In addition to returning the error value, this patch also sets an error messages with GENL_SET_ERR_MSG or NL_SET_ERR_MSG_ATTR both for pm_netlink.c and pm_userspace.c. It will help the userspace to identify the issue. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240305-upstream-net-next-20240304-mptcp-misc-cleanup-v1-3-c436ba5e569b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 6 +++++- net/mptcp/pm_userspace.c | 9 +++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c799fe84dfd36..354083b8386f0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1908,8 +1908,10 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) if (addr.addr.family == AF_UNSPEC) { lookup_by_id = 1; - if (!addr.addr.id) + if (!addr.addr.id) { + GENL_SET_ERR_MSG(info, "missing required inputs"); return -EOPNOTSUPP; + } } if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) @@ -1919,11 +1921,13 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); if (!entry) { spin_unlock_bh(&pernet->lock); + GENL_SET_ERR_MSG(info, "address not found"); return -EINVAL; } if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { spin_unlock_bh(&pernet->lock); + GENL_SET_ERR_MSG(info, "invalid addr flags"); return -EINVAL; } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 7ef3b69852f00..9f5d422d5ef6c 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -563,13 +563,17 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) token_val = nla_get_u32(token); msk = mptcp_token_get_sock(net, token_val); - if (!msk) + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); return ret; + } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "userspace PM not selected"); goto set_flags_err; + } ret = mptcp_pm_parse_entry(attr, info, false, &loc); if (ret < 0) @@ -583,6 +587,7 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) if (loc.addr.family == AF_UNSPEC || rem.addr.family == AF_UNSPEC) { + GENL_SET_ERR_MSG(info, "invalid address families"); ret = -EINVAL; goto set_flags_err; } -- cgit 1.2.3-korg From af250c27ea1c404e210fc3a308b20f772df584d6 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 5 Mar 2024 12:04:33 +0100 Subject: mptcp: drop lookup_by_id in lookup_addr When the lookup_by_id parameter of __lookup_addr() is true, it's the same as __lookup_addr_by_id(), it can be replaced by __lookup_addr_by_id() directly. So drop this parameter, let __lookup_addr() only looks up address on the local address list by comparing addresses in it, not address ids. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240305-upstream-net-next-20240304-mptcp-misc-cleanup-v1-4-c436ba5e569b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 354083b8386f0..5c17d39146ea2 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -499,15 +499,12 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) } static struct mptcp_pm_addr_entry * -__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, - bool lookup_by_id) +__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) { struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && - mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) || - (lookup_by_id && entry->addr.id == info->id)) + if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) return entry; } return NULL; @@ -537,7 +534,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); rcu_read_lock(); - entry = __lookup_addr(pernet, &mpc_addr, false); + entry = __lookup_addr(pernet, &mpc_addr); if (entry) { __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); msk->mpc_endpoint_id = entry->addr.id; @@ -1918,7 +1915,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) bkup = 1; spin_lock_bh(&pernet->lock); - entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); + entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) : + __lookup_addr(pernet, &addr.addr); if (!entry) { spin_unlock_bh(&pernet->lock); GENL_SET_ERR_MSG(info, "address not found"); -- cgit 1.2.3-korg From d7933a2c7f87667a87a2c0d0c5a1617c414c6024 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 5 Mar 2024 22:26:10 +0100 Subject: ethtool: remove ethtool_eee_use_linkmodes After 292fac464b01 ("net: ethtool: eee: Remove legacy _u32 from keee") this function has no user any longer. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/b4ff9b51-092b-4d44-bfce-c95342a05b51@gmail.com Signed-off-by: Jakub Kicinski --- net/ethtool/common.c | 5 ----- net/ethtool/common.h | 1 - 2 files changed, 6 deletions(-) (limited to 'net') diff --git a/net/ethtool/common.c b/net/ethtool/common.c index ce486cec346c8..6b2a360dcdf06 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -712,8 +712,3 @@ ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) } } EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); - -bool ethtool_eee_use_linkmodes(const struct ethtool_keee *eee) -{ - return !linkmode_empty(eee->supported); -} diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 0f2b5f7eacee7..28b8aaaf9bcb3 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -55,6 +55,5 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data); bool __ethtool_dev_mm_supported(struct net_device *dev); -bool ethtool_eee_use_linkmodes(const struct ethtool_keee *eee); #endif /* _ETHTOOL_COMMON_H */ -- cgit 1.2.3-korg From 42ed95de82c01184a88945d3ca274be6a7ea607d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 7 Mar 2024 11:58:17 -0500 Subject: Bluetooth: ISO: Align broadcast sync_timeout with connection timeout This aligns broadcast sync_timeout with existing connection timeouts which are 20 seconds long. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 2 ++ net/bluetooth/iso.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 7ffa8c192c3f2..9fe95a22abeb7 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -164,6 +164,8 @@ struct bt_voice { #define BT_ISO_QOS_BIG_UNSET 0xff #define BT_ISO_QOS_BIS_UNSET 0xff +#define BT_ISO_SYNC_TIMEOUT 0x07d0 /* 20 secs */ + struct bt_iso_io_qos { __u32 interval; __u16 latency; diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 8af75d37b14c0..c8793e57f4b54 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -834,10 +834,10 @@ static struct bt_iso_qos default_qos = { .bcode = {0x00}, .options = 0x00, .skip = 0x0000, - .sync_timeout = 0x4000, + .sync_timeout = BT_ISO_SYNC_TIMEOUT, .sync_cte_type = 0x00, .mse = 0x00, - .timeout = 0x4000, + .timeout = BT_ISO_SYNC_TIMEOUT, }, }; -- cgit 1.2.3-korg From 7a04ff127786b3950a28030cb1c75d2e6c0a7d97 Mon Sep 17 00:00:00 2001 From: Justin Swartz Date: Wed, 6 Mar 2024 13:26:59 +0200 Subject: net: x25: remove dead links from Kconfig Remove the "You can read more about X.25 at" links provided in Kconfig as they have not pointed at any relevant pages for quite a while. An old copy of https://www.sangoma.com/tutorials/x25/ can be retrieved via https://archive.org/web/ but nothing useful seems to have been preserved for http://docwiki.cisco.com/wiki/X.25 For the sake of necromancy and those who really did want to read more about X.25, a previous incarnation of Kconfig included a link to: http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm Which can still be read at: https://web.archive.org/web/20071013101232/http://cisco.com/en/US/docs/ios/11_0/router/configuration/guide/cx25.html Signed-off-by: Justin Swartz Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20240306112659.25375-1-justin.swartz@risingedge.co.za Signed-off-by: Jakub Kicinski --- net/x25/Kconfig | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/x25/Kconfig b/net/x25/Kconfig index 68729aa3a5d53..dc72302cbd079 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -17,8 +17,6 @@ config X25 if you want that) and the lower level data link layer protocol LAPB (say Y to "LAPB Data Link Driver" below if you want that). - You can read more about X.25 at and - . Information about X.25 for Linux is contained in the files and . -- cgit 1.2.3-korg From 4f6473ad60947c88c9eac21fe16233e795d3a2f3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 6 Mar 2024 12:04:09 -0800 Subject: net: dsa: Leverage core stats allocator With commit 34d21de99cea9 ("net: Move {l,t,d}stats allocation to core and convert veth & vrf"), stats allocation could be done on net core instead of in this driver. With this new approach, the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc). This is core responsibility now. Remove the allocation in the DSA user network device code and leverage the network core allocation instead. Signed-off-by: Florian Fainelli Reviewed-by: Breno Leitao Link: https://lore.kernel.org/r/20240306200416.2973179-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- net/dsa/user.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/dsa/user.c b/net/dsa/user.c index 9c42a6edcdc8a..16d395bb1a1fe 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -2625,11 +2625,7 @@ int dsa_user_create(struct dsa_port *port) user_dev->vlan_features = conduit->vlan_features; p = netdev_priv(user_dev); - user_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!user_dev->tstats) { - free_netdev(user_dev); - return -ENOMEM; - } + user_dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; ret = gro_cells_init(&p->gcells, user_dev); if (ret) @@ -2695,7 +2691,6 @@ out_phy: out_gcells: gro_cells_destroy(&p->gcells); out_free: - free_percpu(user_dev->tstats); free_netdev(user_dev); port->user = NULL; return ret; @@ -2716,7 +2711,6 @@ void dsa_user_destroy(struct net_device *user_dev) dsa_port_phylink_destroy(dp); gro_cells_destroy(&p->gcells); - free_percpu(user_dev->tstats); free_netdev(user_dev); } -- cgit 1.2.3-korg From 8edbd3960150ee7dd7a2857bd9a5cf14906facc2 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 6 Mar 2024 10:11:17 -0800 Subject: mpls: Do not orphan the skb We observed that TCP-pacing was falling back to the TCP-layer pacing instead of utilizing sch_fq for the pacing. This causes significant CPU-usage due to the hrtimer running on a per-TCP-connection basis. The issue is that mpls_xmit() calls skb_orphan() and thus sets skb->sk to NULL. Which implies that many of the goodies of TCP won't work. Pacing falls back to TCP-layer pacing. TCP Small Queues does not work, ... It is safe to remove this call to skb_orphan() in mpls_xmit() as there really is not reason for it to be there. It appears that this call to skb_orphan comes from the very initial implementation of MPLS. Cc: Roopa Prabhu Reported-by: Craig Taylor Signed-off-by: Christoph Paasch Link: https://lore.kernel.org/r/20240306181117.77419-1-cpaasch@apple.com Signed-off-by: Jakub Kicinski --- net/mpls/mpls_iptunnel.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index ef59e25dc4827..8fc790f2a01bb 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -55,8 +55,6 @@ static int mpls_xmit(struct sk_buff *skb) out_dev = dst->dev; net = dev_net(out_dev); - skb_orphan(skb); - if (!mpls_output_possible(out_dev) || !dst->lwtstate || skb_warn_if_lro(skb)) goto drop; -- cgit 1.2.3-korg From 02e24903e5a46b7a7fca44bcfe0cd6fa5b240c34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 10:24:26 +0000 Subject: netlink: let core handle error cases in dump operations After commit b5a899154aa9 ("netlink: handle EMSGSIZE errors in the core"), we can remove some code that was not 100 % correct anyway. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306102426.245689-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 5 +---- net/ipv4/devinet.c | 4 ---- net/ipv4/fib_frontend.c | 7 +------ net/ipv6/addrconf.c | 7 +------ 4 files changed, 3 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e0353487c57ea..a3d7847ce69d3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2267,11 +2267,8 @@ walk_entries: nlh->nlmsg_seq, 0, flags, ext_filter_mask, 0, NULL, 0, netnsid, GFP_KERNEL); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) break; - } } cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 4daa8124f247c..7a437f0d41905 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1900,8 +1900,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) goto done; } done: - if (err < 0 && likely(skb->len)) - err = skb->len; if (fillargs.netnsid >= 0) put_net(tgt_net); rcu_read_unlock(); @@ -2312,8 +2310,6 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, ctx->all_default++; } done: - if (err < 0 && likely(skb->len)) - err = skb->len; rcu_read_unlock(); return err; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index bf3a2214fe29b..48741352a88a7 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1026,8 +1026,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) goto unlock; } err = fib_table_dump(tb, skb, cb, &filter); - if (err < 0 && skb->len) - err = skb->len; goto unlock; } @@ -1045,11 +1043,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); err = fib_table_dump(tb, skb, cb, &filter); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) goto out; - } dumped = 1; next: e++; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f84e6ecf19f4..f786b65d12e43 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -793,8 +793,6 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, ctx->all_default++; } done: - if (err < 0 && likely(skb->len)) - err = skb->len; rcu_read_unlock(); return err; } @@ -6158,11 +6156,8 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; + if (err < 0) break; - } } rcu_read_unlock(); -- cgit 1.2.3-korg From 2658b5a8a4eee5fad378d0bde2f221deacbc58f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:14 +0000 Subject: net: introduce struct net_hotdata Instead of spreading networking critical fields all over the places, add a custom net_hotdata structure so that we can precisely control its layout. In this first patch, move : - gro_normal_batch used in rx (GRO stack) - offload_base used in rx and tx (GRO and TSO stacks) Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - include/net/gro.h | 5 ++--- include/net/hotdata.h | 15 +++++++++++++++ net/core/Makefile | 1 + net/core/gro.c | 15 ++++++--------- net/core/gso.c | 4 ++-- net/core/hotdata.c | 9 +++++++++ net/core/sysctl_net_core.c | 3 ++- 8 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 include/net/hotdata.h create mode 100644 net/core/hotdata.c (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2767467138a02..6643452af5436 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4796,7 +4796,6 @@ void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; extern int dev_rx_weight; extern int dev_tx_weight; -extern int gro_normal_batch; enum { NESTED_SYNC_IMM_BIT, diff --git a/include/net/gro.h b/include/net/gro.h index 2b58671a65492..d6fc8fbd37302 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -9,6 +9,7 @@ #include #include #include +#include struct napi_gro_cb { union { @@ -446,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, { list_add_tail(&skb->list, &napi->rx_list); napi->rx_count += segs; - if (napi->rx_count >= READ_ONCE(gro_normal_batch)) + if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) gro_normal_list(napi); } @@ -493,6 +494,4 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int * #endif } -extern struct list_head offload_base; - #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/hotdata.h b/include/net/hotdata.h new file mode 100644 index 0000000000000..6ed32e4e34aa3 --- /dev/null +++ b/include/net/hotdata.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_HOTDATA_H +#define _NET_HOTDATA_H + +#include + +/* Read mostly data used in network fast paths. */ +struct net_hotdata { + struct list_head offload_base; + int gro_normal_batch; +}; + +extern struct net_hotdata net_hotdata; + +#endif /* _NET_HOTDATA_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 821aec06abf14..6e6548011fae5 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -18,6 +18,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o obj-y += net-sysfs.o +obj-y += hotdata.o obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o diff --git a/net/core/gro.c b/net/core/gro.c index 6a0edbd826a17..ee30d4f0c0387 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -10,9 +10,6 @@ #define GRO_MAX_HEAD (MAX_HEADER + 128) static DEFINE_SPINLOCK(offload_lock); -struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base); -/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ -int gro_normal_batch __read_mostly = 8; /** * dev_add_offload - register offload handlers @@ -31,7 +28,7 @@ void dev_add_offload(struct packet_offload *po) struct packet_offload *elem; spin_lock(&offload_lock); - list_for_each_entry(elem, &offload_base, list) { + list_for_each_entry(elem, &net_hotdata.offload_base, list) { if (po->priority < elem->priority) break; } @@ -55,7 +52,7 @@ EXPORT_SYMBOL(dev_add_offload); */ static void __dev_remove_offload(struct packet_offload *po) { - struct list_head *head = &offload_base; + struct list_head *head = &net_hotdata.offload_base; struct packet_offload *po1; spin_lock(&offload_lock); @@ -235,9 +232,9 @@ done: static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { + struct list_head *head = &net_hotdata.offload_base; struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &offload_base; int err = -ENOENT; BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); @@ -444,7 +441,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff { u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); struct gro_list *gro_list = &napi->gro_hash[bucket]; - struct list_head *head = &offload_base; + struct list_head *head = &net_hotdata.offload_base; struct packet_offload *ptype; __be16 type = skb->protocol; struct sk_buff *pp = NULL; @@ -550,7 +547,7 @@ normal: struct packet_offload *gro_find_receive_by_type(__be16 type) { - struct list_head *offload_head = &offload_base; + struct list_head *offload_head = &net_hotdata.offload_base; struct packet_offload *ptype; list_for_each_entry_rcu(ptype, offload_head, list) { @@ -564,7 +561,7 @@ EXPORT_SYMBOL(gro_find_receive_by_type); struct packet_offload *gro_find_complete_by_type(__be16 type) { - struct list_head *offload_head = &offload_base; + struct list_head *offload_head = &net_hotdata.offload_base; struct packet_offload *ptype; list_for_each_entry_rcu(ptype, offload_head, list) { diff --git a/net/core/gso.c b/net/core/gso.c index 9e1803bfc9c6c..bcd156372f4df 100644 --- a/net/core/gso.c +++ b/net/core/gso.c @@ -17,7 +17,7 @@ struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, struct packet_offload *ptype; rcu_read_lock(); - list_for_each_entry_rcu(ptype, &offload_base, list) { + list_for_each_entry_rcu(ptype, &net_hotdata.offload_base, list) { if (ptype->type == type && ptype->callbacks.gso_segment) { segs = ptype->callbacks.gso_segment(skb, features); break; @@ -48,7 +48,7 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, __skb_pull(skb, vlan_depth); rcu_read_lock(); - list_for_each_entry_rcu(ptype, &offload_base, list) { + list_for_each_entry_rcu(ptype, &net_hotdata.offload_base, list) { if (ptype->type == type && ptype->callbacks.gso_segment) { segs = ptype->callbacks.gso_segment(skb, features); break; diff --git a/net/core/hotdata.c b/net/core/hotdata.c new file mode 100644 index 0000000000000..abb8ad19d59ac --- /dev/null +++ b/net/core/hotdata.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include + +struct net_hotdata net_hotdata __cacheline_aligned = { + .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base), + .gro_normal_batch = 8, +}; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 986f15e5d6c41..0eb1242eabbe0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "dev.h" @@ -632,7 +633,7 @@ static struct ctl_table net_core_table[] = { }, { .procname = "gro_normal_batch", - .data = &gro_normal_batch, + .data = &net_hotdata.gro_normal_batch, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, -- cgit 1.2.3-korg From ae6e22f7b7f0702015d86cfa036492b94be92f04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:15 +0000 Subject: net: move netdev_budget and netdev_budget to net_hotdata netdev_budget and netdev_budget are used in rx path (net_rx_action()) Move them into net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ net/core/dev.c | 7 ++----- net/core/dev.h | 2 -- net/core/hotdata.c | 6 ++++++ net/core/sysctl_net_core.c | 4 ++-- 5 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 6ed32e4e34aa3..72170223385eb 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -8,6 +8,8 @@ struct net_hotdata { struct list_head offload_base; int gro_normal_batch; + int netdev_budget; + int netdev_budget_usecs; }; extern struct net_hotdata net_hotdata; diff --git a/net/core/dev.c b/net/core/dev.c index bf933eeaa688d..9ccb48618dba8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4410,9 +4410,6 @@ EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; unsigned int sysctl_skb_defer_max __read_mostly = 64; -int netdev_budget __read_mostly = 300; -/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ -unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ @@ -6790,8 +6787,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + - usecs_to_jiffies(READ_ONCE(netdev_budget_usecs)); - int budget = READ_ONCE(netdev_budget); + usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs)); + int budget = READ_ONCE(net_hotdata.netdev_budget); LIST_HEAD(list); LIST_HEAD(repoll); diff --git a/net/core/dev.h b/net/core/dev.h index 45892267848d7..9a6170530850c 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -38,8 +38,6 @@ int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); /* sysctls not referred to from outside net/core/ */ -extern int netdev_budget; -extern unsigned int netdev_budget_usecs; extern unsigned int sysctl_skb_defer_max; extern int netdev_tstamp_prequeue; extern int netdev_unregister_timeout_secs; diff --git a/net/core/hotdata.c b/net/core/hotdata.c index abb8ad19d59ac..907d69120397d 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -1,9 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include #include +#include #include + struct net_hotdata net_hotdata __cacheline_aligned = { .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base), .gro_normal_batch = 8, + + .netdev_budget = 300, + /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ + .netdev_budget_usecs = 2 * USEC_PER_SEC / HZ, }; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0eb1242eabbe0..a9c2d798b2195 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -577,7 +577,7 @@ static struct ctl_table net_core_table[] = { #endif { .procname = "netdev_budget", - .data = &netdev_budget, + .data = &net_hotdata.netdev_budget, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec @@ -600,7 +600,7 @@ static struct ctl_table net_core_table[] = { }, { .procname = "netdev_budget_usecs", - .data = &netdev_budget_usecs, + .data = &net_hotdata.netdev_budget_usecs, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, -- cgit 1.2.3-korg From f59b5416c396ac4910dd7a0cdf26cbb0e1faf529 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:16 +0000 Subject: net: move netdev_tstamp_prequeue into net_hotdata netdev_tstamp_prequeue is used in rx path. Move it to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 1 + net/core/dev.c | 10 +++++----- net/core/dev.h | 1 - net/core/hotdata.c | 2 ++ net/core/sysctl_net_core.c | 2 +- 5 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 72170223385eb..149e56528537d 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -10,6 +10,7 @@ struct net_hotdata { int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; + int tstamp_prequeue; }; extern struct net_hotdata net_hotdata; diff --git a/net/core/dev.c b/net/core/dev.c index 9ccb48618dba8..f2f44303c0351 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4408,7 +4408,6 @@ EXPORT_SYMBOL(__dev_direct_xmit); int netdev_max_backlog __read_mostly = 1000; EXPORT_SYMBOL(netdev_max_backlog); -int netdev_tstamp_prequeue __read_mostly = 1; unsigned int sysctl_skb_defer_max __read_mostly = 64; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ @@ -5052,7 +5051,7 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret; - net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb); trace_netif_rx(skb); @@ -5344,7 +5343,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, int ret = NET_RX_DROP; __be16 type; - net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb); + net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb); trace_netif_receive_skb(skb); @@ -5728,7 +5727,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; - net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), skb); if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; @@ -5758,7 +5757,8 @@ void netif_receive_skb_list_internal(struct list_head *head) INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); + net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue), + skb); skb_list_del_init(skb); if (!skb_defer_rx_timestamp(skb)) list_add_tail(&skb->list, &sublist); diff --git a/net/core/dev.h b/net/core/dev.h index 9a6170530850c..2bcaf8eee50c1 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -39,7 +39,6 @@ void dev_addr_check(struct net_device *dev); /* sysctls not referred to from outside net/core/ */ extern unsigned int sysctl_skb_defer_max; -extern int netdev_tstamp_prequeue; extern int netdev_unregister_timeout_secs; extern int weight_p; extern int dev_weight_rx_bias; diff --git a/net/core/hotdata.c b/net/core/hotdata.c index 907d69120397d..087c4c84987df 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -12,4 +12,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .netdev_budget = 300, /* Must be at least 2 jiffes to guarantee 1 jiffy timeout */ .netdev_budget_usecs = 2 * USEC_PER_SEC / HZ, + + .tstamp_prequeue = 1, }; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a9c2d798b2195..bddd07da09988 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -499,7 +499,7 @@ static struct ctl_table net_core_table[] = { #endif { .procname = "netdev_tstamp_prequeue", - .data = &netdev_tstamp_prequeue, + .data = &net_hotdata.tstamp_prequeue, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec -- cgit 1.2.3-korg From 0b91fa4bfb1caedd01cb6eb3b733cbc77c9edb0e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:17 +0000 Subject: net: move ptype_all into net_hotdata ptype_all is used in rx/tx fast paths. Move it to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - include/net/hotdata.h | 1 + net/core/dev.c | 16 +++++++--------- net/core/hotdata.c | 1 + net/core/net-procfs.c | 7 ++++--- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6643452af5436..b18ac8072f187 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5306,7 +5306,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev) #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) -extern struct list_head ptype_all __read_mostly; extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; extern struct net_device *blackhole_netdev; diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 149e56528537d..d462cb8f16bad 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -7,6 +7,7 @@ /* Read mostly data used in network fast paths. */ struct net_hotdata { struct list_head offload_base; + struct list_head ptype_all; int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; diff --git a/net/core/dev.c b/net/core/dev.c index f2f44303c0351..e1493e5584070 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -161,7 +161,6 @@ static DEFINE_SPINLOCK(ptype_lock); struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; -struct list_head ptype_all __read_mostly; /* Taps */ static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_extack(unsigned long val, @@ -540,7 +539,7 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) static inline struct list_head *ptype_head(const struct packet_type *pt) { if (pt->type == htons(ETH_P_ALL)) - return pt->dev ? &pt->dev->ptype_all : &ptype_all; + return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all; else return pt->dev ? &pt->dev->ptype_specific : &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; @@ -2226,7 +2225,8 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) */ bool dev_nit_active(struct net_device *dev) { - return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all); + return !list_empty(&net_hotdata.ptype_all) || + !list_empty(&dev->ptype_all); } EXPORT_SYMBOL_GPL(dev_nit_active); @@ -2237,10 +2237,9 @@ EXPORT_SYMBOL_GPL(dev_nit_active); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { - struct packet_type *ptype; + struct list_head *ptype_list = &net_hotdata.ptype_all; + struct packet_type *ptype, *pt_prev = NULL; struct sk_buff *skb2 = NULL; - struct packet_type *pt_prev = NULL; - struct list_head *ptype_list = &ptype_all; rcu_read_lock(); again: @@ -2286,7 +2285,7 @@ again: pt_prev = ptype; } - if (ptype_list == &ptype_all) { + if (ptype_list == &net_hotdata.ptype_all) { ptype_list = &dev->ptype_all; goto again; } @@ -5387,7 +5386,7 @@ another_round: if (pfmemalloc) goto skip_taps; - list_for_each_entry_rcu(ptype, &ptype_all, list) { + list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -11749,7 +11748,6 @@ static int __init net_dev_init(void) if (netdev_kobject_init()) goto out; - INIT_LIST_HEAD(&ptype_all); for (i = 0; i < PTYPE_HASH_SIZE; i++) INIT_LIST_HEAD(&ptype_base[i]); diff --git a/net/core/hotdata.c b/net/core/hotdata.c index 087c4c84987df..29fcfe89fd9a6 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -7,6 +7,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base), + .ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all), .gro_normal_batch = 8, .netdev_budget = 300, diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 2e4e96d30ee1a..a97eceb84e61e 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "dev.h" @@ -183,7 +184,7 @@ static void *ptype_get_idx(struct seq_file *seq, loff_t pos) } } - list_for_each_entry_rcu(pt, &ptype_all, list) { + list_for_each_entry_rcu(pt, &net_hotdata.ptype_all, list) { if (i == pos) return pt; ++i; @@ -231,13 +232,13 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) } } - nxt = ptype_all.next; + nxt = net_hotdata.ptype_all.next; goto ptype_all; } if (pt->type == htons(ETH_P_ALL)) { ptype_all: - if (nxt != &ptype_all) + if (nxt != &net_hotdata.ptype_all) goto found; hash = 0; nxt = ptype_base[0].next; -- cgit 1.2.3-korg From edbc666cdcbf4a80ada4311c272a2078af87b880 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:18 +0000 Subject: net: move netdev_max_backlog to net_hotdata netdev_max_backlog is used in rx fat path. Move it to net_hodata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - include/net/hotdata.h | 1 + net/core/dev.c | 8 +++----- net/core/gro_cells.c | 3 ++- net/core/hotdata.c | 2 ++ net/core/sysctl_net_core.c | 2 +- net/xfrm/espintcp.c | 4 +++- net/xfrm/xfrm_input.c | 3 ++- 8 files changed, 14 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b18ac8072f187..c9a671b7bb373 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4793,7 +4793,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); -extern int netdev_max_backlog; extern int dev_rx_weight; extern int dev_tx_weight; diff --git a/include/net/hotdata.h b/include/net/hotdata.h index d462cb8f16bad..dc50b200a94b6 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -12,6 +12,7 @@ struct net_hotdata { int netdev_budget; int netdev_budget_usecs; int tstamp_prequeue; + int max_backlog; }; extern struct net_hotdata net_hotdata; diff --git a/net/core/dev.c b/net/core/dev.c index e1493e5584070..e23edbaff392f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4404,9 +4404,6 @@ EXPORT_SYMBOL(__dev_direct_xmit); * Receiver routines *************************************************************************/ -int netdev_max_backlog __read_mostly = 1000; -EXPORT_SYMBOL(netdev_max_backlog); - unsigned int sysctl_skb_defer_max __read_mostly = 64; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ @@ -4713,7 +4710,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) struct softnet_data *sd; unsigned int old_flow, new_flow; - if (qlen < (READ_ONCE(netdev_max_backlog) >> 1)) + if (qlen < (READ_ONCE(net_hotdata.max_backlog) >> 1)) return false; sd = this_cpu_ptr(&softnet_data); @@ -4761,7 +4758,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, if (!netif_running(skb->dev)) goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) { + if (qlen <= READ_ONCE(net_hotdata.max_backlog) && + !skb_flow_limit(skb, qlen)) { if (qlen) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index ed5ec5de47f67..ff8e5b64bf6b7 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -3,6 +3,7 @@ #include #include #include +#include struct gro_cell { struct sk_buff_head napi_skbs; @@ -26,7 +27,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) cell = this_cpu_ptr(gcells->cells); - if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) { + if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) { drop: dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); diff --git a/net/core/hotdata.c b/net/core/hotdata.c index 29fcfe89fd9a6..35ed5a83ecc7e 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -15,4 +15,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .netdev_budget_usecs = 2 * USEC_PER_SEC / HZ, .tstamp_prequeue = 1, + .max_backlog = 1000, }; +EXPORT_SYMBOL(net_hotdata); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index bddd07da09988..8eaeeb2899142 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -440,7 +440,7 @@ static struct ctl_table net_core_table[] = { }, { .procname = "netdev_max_backlog", - .data = &netdev_max_backlog, + .data = &net_hotdata.max_backlog, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index d3b3f9e720b3b..fe82e2d073006 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -10,6 +10,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif +#include static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, struct sock *sk) @@ -169,7 +170,8 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) { struct espintcp_ctx *ctx = espintcp_getctx(sk); - if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog)) + if (skb_queue_len(&ctx->out_queue) >= + READ_ONCE(net_hotdata.max_backlog)) return -ENOBUFS; __skb_queue_tail(&ctx->out_queue, skb); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index bd4ce21d76d75..161f535c8b949 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "xfrm_inout.h" @@ -764,7 +765,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, trans = this_cpu_ptr(&xfrm_trans_tasklet); - if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog)) + if (skb_queue_len(&trans->queue) >= READ_ONCE(net_hotdata.max_backlog)) return -ENOBUFS; BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb)); -- cgit 1.2.3-korg From 61a0be1a5342045059ce53eabfe6500d499d2f89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:19 +0000 Subject: net: move ip_packet_offload and ipv6_packet_offload to net_hotdata These structures are used in GRO and GSO paths. v2: ipv6_packet_offload definition depends on CONFIG_INET Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 5 +++++ net/ipv4/af_inet.c | 18 +++++++++--------- net/ipv6/ip6_offload.c | 18 +++++++++--------- 3 files changed, 23 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index dc50b200a94b6..d8ce20d3215d3 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -3,9 +3,14 @@ #define _NET_HOTDATA_H #include +#include /* Read mostly data used in network fast paths. */ struct net_hotdata { +#if IS_ENABLED(CONFIG_INET) + struct packet_offload ip_packet_offload; + struct packet_offload ipv6_packet_offload; +#endif struct list_head offload_base; struct list_head ptype_all; int gro_normal_batch; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5daebdcbca326..08dda6955562e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1904,14 +1904,6 @@ static int ipv4_proc_init(void); * IP protocol layer initialiser */ -static struct packet_offload ip_packet_offload __read_mostly = { - .type = cpu_to_be16(ETH_P_IP), - .callbacks = { - .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, - .gro_complete = inet_gro_complete, - }, -}; static const struct net_offload ipip_offload = { .callbacks = { @@ -1938,7 +1930,15 @@ static int __init ipv4_offload_init(void) if (ipip_offload_init() < 0) pr_crit("%s: Cannot add IPIP protocol offload\n", __func__); - dev_add_offload(&ip_packet_offload); + net_hotdata.ip_packet_offload = (struct packet_offload) { + .type = cpu_to_be16(ETH_P_IP), + .callbacks = { + .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, + }, + }; + dev_add_offload(&net_hotdata.ip_packet_offload); return 0; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index cca64c7809bee..b41e35af69ea2 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -419,14 +419,6 @@ static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff) return inet_gro_complete(skb, nhoff); } -static struct packet_offload ipv6_packet_offload __read_mostly = { - .type = cpu_to_be16(ETH_P_IPV6), - .callbacks = { - .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, - }, -}; static struct sk_buff *sit_gso_segment(struct sk_buff *skb, netdev_features_t features) @@ -486,7 +478,15 @@ static int __init ipv6_offload_init(void) if (ipv6_exthdrs_offload_init() < 0) pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); - dev_add_offload(&ipv6_packet_offload); + net_hotdata.ipv6_packet_offload = (struct packet_offload) { + .type = cpu_to_be16(ETH_P_IPV6), + .callbacks = { + .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = ipv6_gro_complete, + }, + }; + dev_add_offload(&net_hotdata.ipv6_packet_offload); inet_add_offload(&sit_offload, IPPROTO_IPV6); inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6); -- cgit 1.2.3-korg From 0139806eebd6bcf6a3df98950cd0691aff216304 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:20 +0000 Subject: net: move tcpv4_offload and tcpv6_offload to net_hotdata These are used in TCP fast paths. Move them into net_hotdata for better cache locality. v2: tcpv6_offload definition depends on CONFIG_INET Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 3 +++ net/ipv4/tcp_offload.c | 17 ++++++++--------- net/ipv6/tcpv6_offload.c | 16 ++++++++-------- 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index d8ce20d3215d3..d86d02f156fc3 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -4,12 +4,15 @@ #include #include +#include /* Read mostly data used in network fast paths. */ struct net_hotdata { #if IS_ENABLED(CONFIG_INET) struct packet_offload ip_packet_offload; + struct net_offload tcpv4_offload; struct packet_offload ipv6_packet_offload; + struct net_offload tcpv6_offload; #endif struct list_head offload_base; struct list_head ptype_all; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b955ab3b236d9..ebe4722bb0204 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -345,15 +345,14 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) return 0; } -static const struct net_offload tcpv4_offload = { - .callbacks = { - .gso_segment = tcp4_gso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - }, -}; - int __init tcpv4_offload_init(void) { - return inet_add_offload(&tcpv4_offload, IPPROTO_TCP); + net_hotdata.tcpv4_offload = (struct net_offload) { + .callbacks = { + .gso_segment = tcp4_gso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, + }; + return inet_add_offload(&net_hotdata.tcpv4_offload, IPPROTO_TCP); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index bf0c957e4b5ea..4b07d1e6c9529 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -66,15 +66,15 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, return tcp_gso_segment(skb, features); } -static const struct net_offload tcpv6_offload = { - .callbacks = { - .gso_segment = tcp6_gso_segment, - .gro_receive = tcp6_gro_receive, - .gro_complete = tcp6_gro_complete, - }, -}; int __init tcpv6_offload_init(void) { - return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP); + net_hotdata.tcpv6_offload = (struct net_offload) { + .callbacks = { + .gso_segment = tcp6_gso_segment, + .gro_receive = tcp6_gro_receive, + .gro_complete = tcp6_gro_complete, + }, + }; + return inet6_add_offload(&net_hotdata.tcpv6_offload, IPPROTO_TCP); } -- cgit 1.2.3-korg From 26722dc74bf08fd79564cbcad1e5f3e2aa3bf9cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:21 +0000 Subject: net: move dev_tx_weight to net_hotdata dev_tx_weight is used in tx fast path. Move it to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 - include/net/hotdata.h | 1 + net/core/dev.c | 1 - net/core/hotdata.c | 1 + net/core/sysctl_net_core.c | 2 +- net/sched/sch_generic.c | 3 ++- 6 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c9a671b7bb373..ad4b031098ff5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4794,7 +4794,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int dev_rx_weight; -extern int dev_tx_weight; enum { NESTED_SYNC_IMM_BIT, diff --git a/include/net/hotdata.h b/include/net/hotdata.h index d86d02f156fc3..ffea9cc263e50 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -21,6 +21,7 @@ struct net_hotdata { int netdev_budget_usecs; int tstamp_prequeue; int max_backlog; + int dev_tx_weight; }; extern struct net_hotdata net_hotdata; diff --git a/net/core/dev.c b/net/core/dev.c index e23edbaff392f..5d6bd481103f4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4409,7 +4409,6 @@ int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ int dev_rx_weight __read_mostly = 64; -int dev_tx_weight __read_mostly = 64; /* Called with irq disabled */ static inline void ____napi_schedule(struct softnet_data *sd, diff --git a/net/core/hotdata.c b/net/core/hotdata.c index 35ed5a83ecc7e..ec8c3b48e8fea 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -16,5 +16,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .tstamp_prequeue = 1, .max_backlog = 1000, + .dev_tx_weight = 64, }; EXPORT_SYMBOL(net_hotdata); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8eaeeb2899142..a30016a8660e0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -302,7 +302,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write, if (!ret && write) { weight = READ_ONCE(weight_p); WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias); - WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias); + WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); } mutex_unlock(&dev_weight_mutex); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 9b3e9262040b6..ff53364937775 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -409,7 +410,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets) void __qdisc_run(struct Qdisc *q) { - int quota = READ_ONCE(dev_tx_weight); + int quota = READ_ONCE(net_hotdata.dev_tx_weight); int packets; while (qdisc_restart(q, &packets)) { -- cgit 1.2.3-korg From 71c0de9bac9c1dda503322c86be4924f055dc6c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:22 +0000 Subject: net: move dev_rx_weight to net_hotdata dev_rx_weight is read from process_backlog(). Move it to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-10-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 -- include/net/hotdata.h | 1 + net/core/dev.c | 3 +-- net/core/hotdata.c | 1 + net/core/sysctl_net_core.c | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ad4b031098ff5..dd641297e807c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4793,8 +4793,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); -extern int dev_rx_weight; - enum { NESTED_SYNC_IMM_BIT, NESTED_SYNC_TODO_BIT, diff --git a/include/net/hotdata.h b/include/net/hotdata.h index ffea9cc263e50..e6595ed2c3be5 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -22,6 +22,7 @@ struct net_hotdata { int tstamp_prequeue; int max_backlog; int dev_tx_weight; + int dev_rx_weight; }; extern struct net_hotdata net_hotdata; diff --git a/net/core/dev.c b/net/core/dev.c index 5d6bd481103f4..40ba02e04bcb5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4408,7 +4408,6 @@ unsigned int sysctl_skb_defer_max __read_mostly = 64; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ -int dev_rx_weight __read_mostly = 64; /* Called with irq disabled */ static inline void ____napi_schedule(struct softnet_data *sd, @@ -5978,7 +5977,7 @@ static int process_backlog(struct napi_struct *napi, int quota) net_rps_action_and_irq_enable(sd); } - napi->weight = READ_ONCE(dev_rx_weight); + napi->weight = READ_ONCE(net_hotdata.dev_rx_weight); while (again) { struct sk_buff *skb; diff --git a/net/core/hotdata.c b/net/core/hotdata.c index ec8c3b48e8fea..c8a7a451c18a3 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -17,5 +17,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .tstamp_prequeue = 1, .max_backlog = 1000, .dev_tx_weight = 64, + .dev_rx_weight = 64, }; EXPORT_SYMBOL(net_hotdata); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a30016a8660e0..8a4c698dad9c9 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -301,7 +301,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write, ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { weight = READ_ONCE(weight_p); - WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias); + WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); } mutex_unlock(&dev_weight_mutex); -- cgit 1.2.3-korg From aa70d2d16f280efe8aa52afc25a33b2ec8d346b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:23 +0000 Subject: net: move skbuff_cache(s) to net_hotdata skbuff_cache, skbuff_fclone_cache and skb_small_head_cache are used in rx/tx fast paths. Move them to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-11-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 - include/net/hotdata.h | 3 +++ kernel/bpf/cpumap.c | 4 +++- net/bpf/test_run.c | 4 +++- net/core/skbuff.c | 44 ++++++++++++++++++++------------------------ net/core/xdp.c | 5 +++-- 6 files changed, 32 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3013355b63f5f..d0508f90bed50 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1271,7 +1271,6 @@ static inline void consume_skb(struct sk_buff *skb) void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); -extern struct kmem_cache *skbuff_cache; void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, diff --git a/include/net/hotdata.h b/include/net/hotdata.h index e6595ed2c3be5..a8f7e5e826fb7 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -16,6 +16,9 @@ struct net_hotdata { #endif struct list_head offload_base; struct list_head ptype_all; + struct kmem_cache *skbuff_cache; + struct kmem_cache *skbuff_fclone_cache; + struct kmem_cache *skb_small_head_cache; int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index ef82ffc90cbe9..9ee8da4774656 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -326,7 +327,8 @@ static int cpu_map_kthread_run(void *data) /* Support running another XDP prog on this CPU */ nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list); if (nframes) { - m = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, skbs); + m = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, + gfp, nframes, skbs); if (unlikely(m == 0)) { for (i = 0; i < nframes; i++) skbs[i] = NULL; /* effect: xdp_return_frame */ diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 5535f9adc6589..61efeadaff8db 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -254,7 +255,8 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes, int i, n; LIST_HEAD(list); - n = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, (void **)skbs); + n = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, nframes, + (void **)skbs); if (unlikely(n == 0)) { for (i = 0; i < nframes; i++) xdp_return_frame(frames[i]); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 43d7fc150acc9..766219011aeaf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -88,15 +89,10 @@ #include "dev.h" #include "sock_destructor.h" -struct kmem_cache *skbuff_cache __ro_after_init; -static struct kmem_cache *skbuff_fclone_cache __ro_after_init; #ifdef CONFIG_SKB_EXTENSIONS static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif - -static struct kmem_cache *skb_small_head_cache __ro_after_init; - #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. @@ -349,7 +345,7 @@ static struct sk_buff *napi_skb_cache_get(void) struct sk_buff *skb; if (unlikely(!nc->skb_count)) { - nc->skb_count = kmem_cache_alloc_bulk(skbuff_cache, + nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, GFP_ATOMIC, NAPI_SKB_CACHE_BULK, nc->skb_cache); @@ -358,7 +354,7 @@ static struct sk_buff *napi_skb_cache_get(void) } skb = nc->skb_cache[--nc->skb_count]; - kasan_mempool_unpoison_object(skb, kmem_cache_size(skbuff_cache)); + kasan_mempool_unpoison_object(skb, kmem_cache_size(net_hotdata.skbuff_cache)); return skb; } @@ -416,7 +412,7 @@ struct sk_buff *slab_build_skb(void *data) struct sk_buff *skb; unsigned int size; - skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC); + skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC); if (unlikely(!skb)) return NULL; @@ -467,7 +463,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; - skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC); + skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC); if (unlikely(!skb)) return NULL; @@ -578,7 +574,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, obj_size = SKB_HEAD_ALIGN(*size); if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE && !(flags & KMALLOC_NOT_NORMAL_BITS)) { - obj = kmem_cache_alloc_node(skb_small_head_cache, + obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); *size = SKB_SMALL_HEAD_CACHE_SIZE; @@ -586,7 +582,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, goto out; /* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; - obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node); + obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node); goto out; } @@ -649,7 +645,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, u8 *data; cache = (flags & SKB_ALLOC_FCLONE) - ? skbuff_fclone_cache : skbuff_cache; + ? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache; if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) gfp_mask |= __GFP_MEMALLOC; @@ -1095,7 +1091,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb) static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) - kmem_cache_free(skb_small_head_cache, head); + kmem_cache_free(net_hotdata.skb_small_head_cache, head); else kfree(head); } @@ -1162,7 +1158,7 @@ static void kfree_skbmem(struct sk_buff *skb) switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: - kmem_cache_free(skbuff_cache, skb); + kmem_cache_free(net_hotdata.skbuff_cache, skb); return; case SKB_FCLONE_ORIG: @@ -1183,7 +1179,7 @@ static void kfree_skbmem(struct sk_buff *skb) if (!refcount_dec_and_test(&fclones->fclone_ref)) return; fastpath: - kmem_cache_free(skbuff_fclone_cache, fclones); + kmem_cache_free(net_hotdata.skbuff_fclone_cache, fclones); } void skb_release_head_state(struct sk_buff *skb) @@ -1280,7 +1276,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb, sa->skb_array[sa->skb_count++] = skb; if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) { - kmem_cache_free_bulk(skbuff_cache, KFREE_SKB_BULK_SIZE, + kmem_cache_free_bulk(net_hotdata.skbuff_cache, KFREE_SKB_BULK_SIZE, sa->skb_array); sa->skb_count = 0; } @@ -1305,7 +1301,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason) } if (sa.skb_count) - kmem_cache_free_bulk(skbuff_cache, sa.skb_count, sa.skb_array); + kmem_cache_free_bulk(net_hotdata.skbuff_cache, sa.skb_count, sa.skb_array); } EXPORT_SYMBOL(kfree_skb_list_reason); @@ -1467,9 +1463,9 @@ static void napi_skb_cache_put(struct sk_buff *skb) if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++) kasan_mempool_unpoison_object(nc->skb_cache[i], - kmem_cache_size(skbuff_cache)); + kmem_cache_size(net_hotdata.skbuff_cache)); - kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF, + kmem_cache_free_bulk(net_hotdata.skbuff_cache, NAPI_SKB_CACHE_HALF, nc->skb_cache + NAPI_SKB_CACHE_HALF); nc->skb_count = NAPI_SKB_CACHE_HALF; } @@ -2066,7 +2062,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; - n = kmem_cache_alloc(skbuff_cache, gfp_mask); + n = kmem_cache_alloc(net_hotdata.skbuff_cache, gfp_mask); if (!n) return NULL; @@ -5005,7 +5001,7 @@ static void skb_extensions_init(void) {} void __init skb_init(void) { - skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache", + net_hotdata.skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache", sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC| @@ -5013,7 +5009,7 @@ void __init skb_init(void) offsetof(struct sk_buff, cb), sizeof_field(struct sk_buff, cb), NULL); - skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", + net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", sizeof(struct sk_buff_fclones), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, @@ -5022,7 +5018,7 @@ void __init skb_init(void) * struct skb_shared_info is located at the end of skb->head, * and should not be copied to/from user. */ - skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head", + net_hotdata.skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head", SKB_SMALL_HEAD_CACHE_SIZE, 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, @@ -5895,7 +5891,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) { if (head_stolen) { skb_release_head_state(skb); - kmem_cache_free(skbuff_cache, skb); + kmem_cache_free(net_hotdata.skbuff_cache, skb); } else { __kfree_skb(skb); } diff --git a/net/core/xdp.c b/net/core/xdp.c index 0e3709a29175b..41693154e426f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -16,6 +16,7 @@ #include #include +#include #include #include /* struct xdp_mem_allocator */ #include @@ -589,7 +590,7 @@ EXPORT_SYMBOL_GPL(xdp_warn); int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp) { - n_skb = kmem_cache_alloc_bulk(skbuff_cache, gfp, n_skb, skbs); + n_skb = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, n_skb, skbs); if (unlikely(!n_skb)) return -ENOMEM; @@ -658,7 +659,7 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf, { struct sk_buff *skb; - skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC); + skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC); if (unlikely(!skb)) return NULL; -- cgit 1.2.3-korg From 6a55ca6b0122d4678e3ab54a8553361aae5082f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:24 +0000 Subject: udp: move udpv4_offload and udpv6_offload to net_hotdata These structures are used in GRO and GSO paths. Move them to net_hodata for better cache locality. v2: udpv6_offload definition depends on CONFIG_INET=y Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-12-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ net/ipv4/udp_offload.c | 17 ++++++++--------- net/ipv6/udp_offload.c | 21 ++++++++++----------- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index a8f7e5e826fb7..daeee8ce8084e 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -11,8 +11,10 @@ struct net_hotdata { #if IS_ENABLED(CONFIG_INET) struct packet_offload ip_packet_offload; struct net_offload tcpv4_offload; + struct net_offload udpv4_offload; struct packet_offload ipv6_packet_offload; struct net_offload tcpv6_offload; + struct net_offload udpv6_offload; #endif struct list_head offload_base; struct list_head ptype_all; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6c95d28d0c4a7..b9880743765c6 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -737,15 +737,14 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); } -static const struct net_offload udpv4_offload = { - .callbacks = { - .gso_segment = udp4_ufo_fragment, - .gro_receive = udp4_gro_receive, - .gro_complete = udp4_gro_complete, - }, -}; - int __init udpv4_offload_init(void) { - return inet_add_offload(&udpv4_offload, IPPROTO_UDP); + net_hotdata.udpv4_offload = (struct net_offload) { + .callbacks = { + .gso_segment = udp4_ufo_fragment, + .gro_receive = udp4_gro_receive, + .gro_complete = udp4_gro_complete, + }, + }; + return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 6b95ba241ebe2..312bcaeea96fb 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -192,20 +192,19 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); } -static const struct net_offload udpv6_offload = { - .callbacks = { - .gso_segment = udp6_ufo_fragment, - .gro_receive = udp6_gro_receive, - .gro_complete = udp6_gro_complete, - }, -}; - -int udpv6_offload_init(void) +int __init udpv6_offload_init(void) { - return inet6_add_offload(&udpv6_offload, IPPROTO_UDP); + net_hotdata.udpv6_offload = (struct net_offload) { + .callbacks = { + .gso_segment = udp6_ufo_fragment, + .gro_receive = udp6_gro_receive, + .gro_complete = udp6_gro_complete, + }, + }; + return inet6_add_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP); } int udpv6_offload_exit(void) { - return inet6_del_offload(&udpv6_offload, IPPROTO_UDP); + return inet6_del_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP); } -- cgit 1.2.3-korg From 4ea0875b9d897e3c64cdb486788509f1f062285b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:25 +0000 Subject: ipv6: move tcpv6_protocol and udpv6_protocol to net_hotdata These structures are read in rx path, move them to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-13-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 4 ++++ net/ipv6/tcp_ipv6.c | 17 +++++++++-------- net/ipv6/udp.c | 16 ++++++++-------- 3 files changed, 21 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index daeee8ce8084e..03d758d25c028 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -14,6 +14,10 @@ struct net_hotdata { struct net_offload udpv4_offload; struct packet_offload ipv6_packet_offload; struct net_offload tcpv6_offload; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_protocol tcpv6_protocol; + struct inet6_protocol udpv6_protocol; +#endif struct net_offload udpv6_offload; #endif struct list_head offload_base; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f677f0fa51968..3f4cba49e9ee6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -2367,11 +2368,6 @@ struct proto tcpv6_prot = { }; EXPORT_SYMBOL_GPL(tcpv6_prot); -static const struct inet6_protocol tcpv6_protocol = { - .handler = tcp_v6_rcv, - .err_handler = tcp_v6_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, -}; static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, @@ -2408,7 +2404,12 @@ int __init tcpv6_init(void) { int ret; - ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); + net_hotdata.tcpv6_protocol = (struct inet6_protocol) { + .handler = tcp_v6_rcv, + .err_handler = tcp_v6_err, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, + }; + ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); if (ret) goto out; @@ -2433,7 +2434,7 @@ out_tcpv6_pernet_subsys: out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); out_tcpv6_protocol: - inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); + inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); goto out; } @@ -2441,5 +2442,5 @@ void tcpv6_exit(void) { unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); - inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); + inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3f2249b4cd5f6..97d86909aabb6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1702,11 +1702,6 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, return ipv6_getsockopt(sk, level, optname, optval, optlen); } -static const struct inet6_protocol udpv6_protocol = { - .handler = udpv6_rcv, - .err_handler = udpv6_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, -}; /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS @@ -1803,7 +1798,12 @@ int __init udpv6_init(void) { int ret; - ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); + net_hotdata.udpv6_protocol = (struct inet6_protocol) { + .handler = udpv6_rcv, + .err_handler = udpv6_err, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, + }; + ret = inet6_add_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); if (ret) goto out; @@ -1814,12 +1814,12 @@ out: return ret; out_udpv6_protocol: - inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); + inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); goto out; } void udpv6_exit(void) { inet6_unregister_protosw(&udpv6_protosw); - inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); + inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP); } -- cgit 1.2.3-korg From 571bf020be9c3b135e8b6dd87421919953268c1f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:26 +0000 Subject: inet: move tcp_protocol and udp_protocol to net_hotdata These structures are read in rx path, move them to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-14-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ net/ipv4/af_inet.c | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 03d758d25c028..87215f7ac200f 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -11,7 +11,9 @@ struct net_hotdata { #if IS_ENABLED(CONFIG_INET) struct packet_offload ip_packet_offload; struct net_offload tcpv4_offload; + struct net_protocol tcp_protocol; struct net_offload udpv4_offload; + struct net_protocol udp_protocol; struct packet_offload ipv6_packet_offload; struct net_offload tcpv6_offload; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 08dda6955562e..6f1cfd176e7b8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1751,19 +1751,6 @@ static const struct net_protocol igmp_protocol = { }; #endif -static const struct net_protocol tcp_protocol = { - .handler = tcp_v4_rcv, - .err_handler = tcp_v4_err, - .no_policy = 1, - .icmp_strict_tag_validation = 1, -}; - -static const struct net_protocol udp_protocol = { - .handler = udp_rcv, - .err_handler = udp_err, - .no_policy = 1, -}; - static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, @@ -1992,9 +1979,22 @@ static int __init inet_init(void) if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) pr_crit("%s: Cannot add ICMP protocol\n", __func__); - if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) + + net_hotdata.udp_protocol = (struct net_protocol) { + .handler = udp_rcv, + .err_handler = udp_err, + .no_policy = 1, + }; + if (inet_add_protocol(&net_hotdata.udp_protocol, IPPROTO_UDP) < 0) pr_crit("%s: Cannot add UDP protocol\n", __func__); - if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) + + net_hotdata.tcp_protocol = (struct net_protocol) { + .handler = tcp_v4_rcv, + .err_handler = tcp_v4_err, + .no_policy = 1, + .icmp_strict_tag_validation = 1, + }; + if (inet_add_protocol(&net_hotdata.tcp_protocol, IPPROTO_TCP) < 0) pr_crit("%s: Cannot add TCP protocol\n", __func__); #ifdef CONFIG_IP_MULTICAST if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) -- cgit 1.2.3-korg From 6e0735723ab437793cfab02d50b3ae3539aeb520 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:27 +0000 Subject: inet: move inet_ehash_secret and udp_ehash_secret into net_hotdata "struct net_protocol" has a 32bit hole in 32bit arches. Use it to store the 32bit secret used by UDP and TCP, to increase cache locality in rx path. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-15-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 3 +++ include/net/protocol.h | 1 + net/ipv4/inet_hashtables.c | 3 +-- net/ipv4/udp.c | 2 -- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 87215f7ac200f..7a210ea6899c6 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -36,6 +36,9 @@ struct net_hotdata { int dev_rx_weight; }; +#define inet_ehash_secret net_hotdata.tcp_protocol.secret +#define udp_ehash_secret net_hotdata.udp_protocol.secret + extern struct net_hotdata net_hotdata; #endif /* _NET_HOTDATA_H */ diff --git a/include/net/protocol.h b/include/net/protocol.h index 6aef8cb11cc8c..3ff26e66735ce 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -46,6 +46,7 @@ struct net_protocol { * socket lookup? */ icmp_strict_tag_validation:1; + u32 secret; }; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 308ff34002ea6..7498af3201647 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -24,6 +24,7 @@ #include #endif #include +#include #include #include #include @@ -32,8 +33,6 @@ u32 inet_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { - static u32 inet_ehash_secret __read_mostly; - net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); return __inet_ehashfn(laddr, lport, faddr, fport, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a8acea17b4e53..2beabf5b2d862 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -411,8 +411,6 @@ INDIRECT_CALLABLE_SCOPE u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { - static u32 udp_ehash_secret __read_mostly; - net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret)); return __inet_ehashfn(laddr, lport, faddr, fport, -- cgit 1.2.3-korg From 5af674bb90a030317a02419e04b66ec0dc892dcd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:28 +0000 Subject: ipv6: move inet6_ehash_secret and udp6_ehash_secret into net_hotdata "struct inet6_protocol" has a 32bit hole in 32bit arches. Use it to store the 32bit secret used by UDP and TCP, to increase cache locality in rx path. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-16-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ include/net/protocol.h | 1 + net/ipv6/inet6_hashtables.c | 2 +- net/ipv6/udp.c | 1 - 4 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 7a210ea6899c6..6d5cd967183a5 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -38,6 +38,8 @@ struct net_hotdata { #define inet_ehash_secret net_hotdata.tcp_protocol.secret #define udp_ehash_secret net_hotdata.udp_protocol.secret +#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret +#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret extern struct net_hotdata net_hotdata; diff --git a/include/net/protocol.h b/include/net/protocol.h index 3ff26e66735ce..213649d2ab098 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -60,6 +60,7 @@ struct inet6_protocol { __be32 info); unsigned int flags; /* INET6_PROTO_xxx */ + u32 secret; }; #define INET6_PROTO_NOPOLICY 0x1 diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b0e8d278e8a9b..0fee97f3166cf 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -25,7 +26,6 @@ u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { - static u32 inet6_ehash_secret __read_mostly; static u32 ipv6_hash_secret __read_mostly; u32 lhash, fhash; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 97d86909aabb6..1e1c67a51675e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -79,7 +79,6 @@ u32 udp6_ehashfn(const struct net *net, const struct in6_addr *faddr, const __be16 fport) { - static u32 udp6_ehash_secret __read_mostly; static u32 udp_ipv6_hash_secret __read_mostly; u32 lhash, fhash; -- cgit 1.2.3-korg From df51b84564159cdd91a67ee0f9e30b42b3a73cef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:29 +0000 Subject: ipv6: move tcp_ipv6_hash_secret and udp_ipv6_hash_secret to net_hotdata Use a 32bit hole in "struct net_offload" to store the remaining 32bit secrets used by TCPv6 and UDPv6. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-17-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ include/net/protocol.h | 1 + net/ipv6/inet6_hashtables.c | 6 ++---- net/ipv6/udp.c | 2 -- 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 6d5cd967183a5..b0b847585f7e6 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -39,7 +39,9 @@ struct net_hotdata { #define inet_ehash_secret net_hotdata.tcp_protocol.secret #define udp_ehash_secret net_hotdata.udp_protocol.secret #define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret +#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret #define udp6_ehash_secret net_hotdata.udpv6_protocol.secret +#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret extern struct net_hotdata net_hotdata; diff --git a/include/net/protocol.h b/include/net/protocol.h index 213649d2ab098..b2499f88f8f81 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -70,6 +70,7 @@ struct inet6_protocol { struct net_offload { struct offload_callbacks callbacks; unsigned int flags; /* Flags used by IPv6 for now */ + u32 secret; }; /* This should be set for any extension header which is compatible with GSO. */ #define INET6_PROTO_GSO_EXTHDR 0x1 diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 0fee97f3166cf..2e81383b663b7 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -26,15 +26,13 @@ u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { - static u32 ipv6_hash_secret __read_mostly; - u32 lhash, fhash; net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); - net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); + net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret)); lhash = (__force u32)laddr->s6_addr32[3]; - fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, inet6_ehash_secret + net_hash_mix(net)); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e1c67a51675e..80ad8f436b179 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -79,8 +79,6 @@ u32 udp6_ehashfn(const struct net *net, const struct in6_addr *faddr, const __be16 fport) { - static u32 udp_ipv6_hash_secret __read_mostly; - u32 lhash, fhash; net_get_random_once(&udp6_ehash_secret, -- cgit 1.2.3-korg From 490a79faf95e705ba0ffd9ebf04a624b379e53c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:30 +0000 Subject: net: introduce include/net/rps.h Move RPS related structures and helpers from include/linux/netdevice.h and include/net/sock.h to a new include file. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-18-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_arfs.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 1 + drivers/net/ethernet/sfc/rx_common.c | 1 + drivers/net/ethernet/sfc/siena/rx_common.c | 1 + drivers/net/tun.c | 1 + include/linux/netdevice.h | 82 -------------- include/net/rps.h | 127 ++++++++++++++++++++++ include/net/sock.h | 35 ------ net/core/dev.c | 1 + net/core/net-sysfs.c | 1 + net/core/sysctl_net_core.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/tcp.c | 1 + net/ipv6/af_inet6.c | 1 + net/sctp/socket.c | 1 + 16 files changed, 140 insertions(+), 117 deletions(-) create mode 100644 include/net/rps.h (limited to 'net') diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index cca0e753f38ff..7cee365cc7d16 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -2,6 +2,7 @@ /* Copyright (C) 2018-2020, Intel Corporation. */ #include "ice.h" +#include /** * ice_is_arfs_active - helper to check is aRFS is active diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d7da62cda821f..5d3fde63b2739 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index e66f486faafe1..c7f542d0b8f08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "en.h" #define ARFS_HASH_SHIFT BITS_PER_BYTE diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index fac227d372db4..dcd901eccfc8f 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -11,6 +11,7 @@ #include "net_driver.h" #include #include +#include #include "efx.h" #include "nic.h" #include "rx_common.h" diff --git a/drivers/net/ethernet/sfc/siena/rx_common.c b/drivers/net/ethernet/sfc/siena/rx_common.c index 4579f43484c36..219fb358a646d 100644 --- a/drivers/net/ethernet/sfc/siena/rx_common.c +++ b/drivers/net/ethernet/sfc/siena/rx_common.c @@ -11,6 +11,7 @@ #include "net_driver.h" #include #include +#include #include "efx.h" #include "nic.h" #include "rx_common.h" diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 8d258e263f548..0b3f21cba552f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dd641297e807c..416a800d72ba9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -225,12 +225,6 @@ struct net_device_core_stats { #include #include -#ifdef CONFIG_RPS -#include -extern struct static_key_false rps_needed; -extern struct static_key_false rfs_needed; -#endif - struct neighbour; struct neigh_parms; struct sk_buff; @@ -730,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node #endif } -#ifdef CONFIG_RPS -/* - * This structure holds an RPS map which can be of variable length. The - * map is an array of CPUs. - */ -struct rps_map { - unsigned int len; - struct rcu_head rcu; - u16 cpus[]; -}; -#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) - -/* - * The rps_dev_flow structure contains the mapping of a flow to a CPU, the - * tail pointer for that CPU's input queue at the time of last enqueue, and - * a hardware filter index. - */ -struct rps_dev_flow { - u16 cpu; - u16 filter; - unsigned int last_qtail; -}; -#define RPS_NO_FILTER 0xffff - -/* - * The rps_dev_flow_table structure contains a table of flow mappings. - */ -struct rps_dev_flow_table { - unsigned int mask; - struct rcu_head rcu; - struct rps_dev_flow flows[]; -}; -#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - ((_num) * sizeof(struct rps_dev_flow))) - -/* - * The rps_sock_flow_table contains mappings of flows to the last CPU - * on which they were processed by the application (set in recvmsg). - * Each entry is a 32bit value. Upper part is the high-order bits - * of flow hash, lower part is CPU number. - * rps_cpu_mask is used to partition the space, depending on number of - * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 - * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, - * meaning we use 32-6=26 bits for the hash. - */ -struct rps_sock_flow_table { - u32 mask; - - u32 ents[] ____cacheline_aligned_in_smp; -}; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) - -#define RPS_NO_CPU 0xffff - -extern u32 rps_cpu_mask; -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - -static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, - u32 hash) -{ - if (table && hash) { - unsigned int index = hash & table->mask; - u32 val = hash & ~rps_cpu_mask; - - /* We only give a hint, preemption can change CPU under us */ - val |= raw_smp_processor_id(); - - /* The following WRITE_ONCE() is paired with the READ_ONCE() - * here, and another one in get_rps_cpu(). - */ - if (READ_ONCE(table->ents[index]) != val) - WRITE_ONCE(table->ents[index], val); - } -} - #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); #endif -#endif /* CONFIG_RPS */ /* XPS map type and offset of the xps map within net_device->xps_maps[]. */ enum xps_map_type { diff --git a/include/net/rps.h b/include/net/rps.h new file mode 100644 index 0000000000000..6081d817d2458 --- /dev/null +++ b/include/net/rps.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_RPS_H +#define _NET_RPS_H + +#include +#include +#include + +#ifdef CONFIG_RPS + +extern struct static_key_false rps_needed; +extern struct static_key_false rfs_needed; + +/* + * This structure holds an RPS map which can be of variable length. The + * map is an array of CPUs. + */ +struct rps_map { + unsigned int len; + struct rcu_head rcu; + u16 cpus[]; +}; +#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) + +/* + * The rps_dev_flow structure contains the mapping of a flow to a CPU, the + * tail pointer for that CPU's input queue at the time of last enqueue, and + * a hardware filter index. + */ +struct rps_dev_flow { + u16 cpu; + u16 filter; + unsigned int last_qtail; +}; +#define RPS_NO_FILTER 0xffff + +/* + * The rps_dev_flow_table structure contains a table of flow mappings. + */ +struct rps_dev_flow_table { + unsigned int mask; + struct rcu_head rcu; + struct rps_dev_flow flows[]; +}; +#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ + ((_num) * sizeof(struct rps_dev_flow))) + +/* + * The rps_sock_flow_table contains mappings of flows to the last CPU + * on which they were processed by the application (set in recvmsg). + * Each entry is a 32bit value. Upper part is the high-order bits + * of flow hash, lower part is CPU number. + * rps_cpu_mask is used to partition the space, depending on number of + * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, + * meaning we use 32-6=26 bits for the hash. + */ +struct rps_sock_flow_table { + u32 mask; + + u32 ents[] ____cacheline_aligned_in_smp; +}; +#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) + +#define RPS_NO_CPU 0xffff + +extern u32 rps_cpu_mask; +extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; + +static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, + u32 hash) +{ + unsigned int index = hash & table->mask; + u32 val = hash & ~rps_cpu_mask; + + /* We only give a hint, preemption can change CPU under us */ + val |= raw_smp_processor_id(); + + /* The following WRITE_ONCE() is paired with the READ_ONCE() + * here, and another one in get_rps_cpu(). + */ + if (READ_ONCE(table->ents[index]) != val) + WRITE_ONCE(table->ents[index], val); +} + +#endif /* CONFIG_RPS */ + +static inline void sock_rps_record_flow_hash(__u32 hash) +{ +#ifdef CONFIG_RPS + struct rps_sock_flow_table *sock_flow_table; + + if (!hash) + return; + rcu_read_lock(); + sock_flow_table = rcu_dereference(rps_sock_flow_table); + if (sock_flow_table) + rps_record_sock_flow(sock_flow_table, hash); + rcu_read_unlock(); +#endif +} + +static inline void sock_rps_record_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (static_branch_unlikely(&rfs_needed)) { + /* Reading sk->sk_rxhash might incur an expensive cache line + * miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) { + /* This READ_ONCE() is paired with the WRITE_ONCE() + * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). + */ + sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); + } + } +#endif +} + +#endif /* _NET_RPS_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 09a0cde8bf522..b5e00702acc1f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1117,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk) WRITE_ONCE(sk->sk_incoming_cpu, cpu); } -static inline void sock_rps_record_flow_hash(__u32 hash) -{ -#ifdef CONFIG_RPS - struct rps_sock_flow_table *sock_flow_table; - - rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_record_sock_flow(sock_flow_table, hash); - rcu_read_unlock(); -#endif -} - -static inline void sock_rps_record_flow(const struct sock *sk) -{ -#ifdef CONFIG_RPS - if (static_branch_unlikely(&rfs_needed)) { - /* Reading sk->sk_rxhash might incur an expensive cache line - * miss. - * - * TCP_ESTABLISHED does cover almost all states where RFS - * might be useful, and is cheaper [1] than testing : - * IPv4: inet_sk(sk)->inet_daddr - * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) - * OR an additional socket flag - * [1] : sk_state and sk_prot are in the same cache line. - */ - if (sk->sk_state == TCP_ESTABLISHED) { - /* This READ_ONCE() is paired with the WRITE_ONCE() - * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). - */ - sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); - } - } -#endif -} static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) diff --git a/net/core/dev.c b/net/core/dev.c index 40ba02e04bcb5..bcf49b0393d2f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -155,6 +155,7 @@ #include #include #include +#include #include "dev.h" #include "net-sysfs.h" diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index af238026ac3c6..5560083774b1a 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "dev.h" #include "net-sysfs.h" diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8a4c698dad9c9..4b93e27404e83 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "dev.h" diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6f1cfd176e7b8..55bd72997b310 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -119,6 +119,7 @@ #endif #include #include +#include #include diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7e1b848398d04..c5b83875411ae 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include #include #include +#include /* Track pending CMSGs. */ enum { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b90d46533cdcc..8041dc181bd42 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6b9fcdb0952a0..c67679a41044f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -67,6 +67,7 @@ #include #include #include +#include /* Forward declarations for internal helper functions. */ static bool sctp_writeable(const struct sock *sk); -- cgit 1.2.3-korg From ce7f49ab741591d83e33e56948bac2f12de6e14e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:31 +0000 Subject: net: move rps_sock_flow_table to net_hotdata rps_sock_flow_table and rps_cpu_mask are used in fast path. Move them to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-19-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 4 ++++ include/net/rps.h | 8 +++----- net/core/dev.c | 12 +++--------- net/core/sysctl_net_core.c | 9 ++++++--- 4 files changed, 16 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index b0b847585f7e6..003667a1efd6b 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -27,6 +27,10 @@ struct net_hotdata { struct kmem_cache *skbuff_cache; struct kmem_cache *skbuff_fclone_cache; struct kmem_cache *skb_small_head_cache; +#ifdef CONFIG_RPS + struct rps_sock_flow_table __rcu *rps_sock_flow_table; + u32 rps_cpu_mask; +#endif int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; diff --git a/include/net/rps.h b/include/net/rps.h index 6081d817d2458..7660243e905b9 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef CONFIG_RPS @@ -64,14 +65,11 @@ struct rps_sock_flow_table { #define RPS_NO_CPU 0xffff -extern u32 rps_cpu_mask; -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, u32 hash) { unsigned int index = hash & table->mask; - u32 val = hash & ~rps_cpu_mask; + u32 val = hash & ~net_hotdata.rps_cpu_mask; /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); @@ -93,7 +91,7 @@ static inline void sock_rps_record_flow_hash(__u32 hash) if (!hash) return; rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); + sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); if (sock_flow_table) rps_record_sock_flow(sock_flow_table, hash); rcu_read_unlock(); diff --git a/net/core/dev.c b/net/core/dev.c index bcf49b0393d2f..0766a245816bd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4450,12 +4450,6 @@ static inline void ____napi_schedule(struct softnet_data *sd, #ifdef CONFIG_RPS -/* One global table that all flow-based protocols share. */ -struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; -EXPORT_SYMBOL(rps_sock_flow_table); -u32 rps_cpu_mask __read_mostly; -EXPORT_SYMBOL(rps_cpu_mask); - struct static_key_false rps_needed __read_mostly; EXPORT_SYMBOL(rps_needed); struct static_key_false rfs_needed __read_mostly; @@ -4547,7 +4541,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (!hash) goto done; - sock_flow_table = rcu_dereference(rps_sock_flow_table); + sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); if (flow_table && sock_flow_table) { struct rps_dev_flow *rflow; u32 next_cpu; @@ -4557,10 +4551,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow(). */ ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]); - if ((ident ^ hash) & ~rps_cpu_mask) + if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask) goto try_rps; - next_cpu = ident & rps_cpu_mask; + next_cpu = ident & net_hotdata.rps_cpu_mask; /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 4b93e27404e83..6973dda3abda6 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -140,7 +140,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, mutex_lock(&sock_flow_mutex); - orig_sock_table = rcu_dereference_protected(rps_sock_flow_table, + orig_sock_table = rcu_dereference_protected( + net_hotdata.rps_sock_flow_table, lockdep_is_held(&sock_flow_mutex)); size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; @@ -161,7 +162,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, mutex_unlock(&sock_flow_mutex); return -ENOMEM; } - rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; + net_hotdata.rps_cpu_mask = + roundup_pow_of_two(nr_cpu_ids) - 1; sock_table->mask = size - 1; } else sock_table = orig_sock_table; @@ -172,7 +174,8 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, sock_table = NULL; if (sock_table != orig_sock_table) { - rcu_assign_pointer(rps_sock_flow_table, sock_table); + rcu_assign_pointer(net_hotdata.rps_sock_flow_table, + sock_table); if (sock_table) { static_branch_inc(&rps_needed); static_branch_inc(&rfs_needed); -- cgit 1.2.3-korg From ab63a2387cb906d43b72a8effb611bbaecb2d0cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Mar 2024 11:55:07 -0800 Subject: netdev: add per-queue statistics The ethtool-nl family does a good job exposing various protocol related and IEEE/IETF statistics which used to get dumped under ethtool -S, with creative names. Queue stats don't have a netlink API, yet, and remain a lion's share of ethtool -S output for new drivers. Not only is that bad because the names differ driver to driver but it's also bug-prone. Intuitively drivers try to report only the stats for active queues, but querying ethtool stats involves multiple system calls, and the number of stats is read separately from the stats themselves. Worse still when user space asks for values of the stats, it doesn't inform the kernel how big the buffer is. If number of stats increases in the meantime kernel will overflow user buffer. Add a netlink API for dumping queue stats. Queue information is exposed via the netdev-genl family, so add the stats there. Support per-queue and sum-for-device dumps. Latter will be useful when subsequent patches add more interesting common stats than just bytes and packets. The API does not currently distinguish between HW and SW stats. The expectation is that the source of the stats will either not matter much (good packets) or be obvious (skb alloc errors). Acked-by: Stanislav Fomichev Reviewed-by: Amritha Nambiar Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240306195509.1502746-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 84 +++++++++++++ Documentation/networking/statistics.rst | 15 +++ include/linux/netdevice.h | 3 + include/net/netdev_queues.h | 54 ++++++++ include/uapi/linux/netdev.h | 19 +++ net/core/netdev-genl-gen.c | 12 ++ net/core/netdev-genl-gen.h | 2 + net/core/netdev-genl.c | 213 ++++++++++++++++++++++++++++++++ tools/include/uapi/linux/netdev.h | 19 +++ 9 files changed, 421 insertions(+) (limited to 'net') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 3addac9706804..a1e48c3c84c96 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -74,6 +74,10 @@ definitions: name: queue-type type: enum entries: [ rx, tx ] + - + name: qstats-scope + type: flags + entries: [ queue ] attribute-sets: - @@ -265,6 +269,66 @@ attribute-sets: doc: ID of the NAPI instance which services this queue. type: u32 + - + name: qstats + doc: | + Get device statistics, scoped to a device or a queue. + These statistics extend (and partially duplicate) statistics available + in struct rtnl_link_stats64. + Value of the `scope` attribute determines how statistics are + aggregated. When aggregated for the entire device the statistics + represent the total number of events since last explicit reset of + the device (i.e. not a reconfiguration like changing queue count). + When reported per-queue, however, the statistics may not add + up to the total number of events, will only be reported for currently + active objects, and will likely report the number of events since last + reconfiguration. + attributes: + - + name: ifindex + doc: ifindex of the netdevice to which stats belong. + type: u32 + checks: + min: 1 + - + name: queue-type + doc: Queue type as rx, tx, for queue-id. + type: u32 + enum: queue-type + - + name: queue-id + doc: Queue ID, if stats are scoped to a single queue instance. + type: u32 + - + name: scope + doc: | + What object type should be used to iterate over the stats. + type: uint + enum: qstats-scope + - + name: rx-packets + doc: | + Number of wire packets successfully received and passed to the stack. + For drivers supporting XDP, XDP is considered the first layer + of the stack, so packets consumed by XDP are still counted here. + type: uint + value: 8 # reserve some attr ids in case we need more metadata later + - + name: rx-bytes + doc: Successfully received bytes, see `rx-packets`. + type: uint + - + name: tx-packets + doc: | + Number of wire packets successfully sent. Packet is considered to be + successfully sent once it is in device memory (usually this means + the device has issued a DMA completion for the packet). + type: uint + - + name: tx-bytes + doc: Successfully sent bytes, see `tx-packets`. + type: uint + operations: list: - @@ -405,6 +469,26 @@ operations: attributes: - ifindex reply: *napi-get-op + - + name: qstats-get + doc: | + Get / dump fine grained statistics. Which statistics are reported + depends on the device and the driver, and whether the driver stores + software counters per-queue. + attribute-set: qstats + dump: + request: + attributes: + - scope + reply: + attributes: + - ifindex + - queue-type + - queue-id + - rx-packets + - rx-bytes + - tx-packets + - tx-bytes mcast-groups: list: diff --git a/Documentation/networking/statistics.rst b/Documentation/networking/statistics.rst index 551b3cc29a413..75e017dfa8251 100644 --- a/Documentation/networking/statistics.rst +++ b/Documentation/networking/statistics.rst @@ -41,6 +41,15 @@ If `-s` is specified once the detailed errors won't be shown. `ip` supports JSON formatting via the `-j` option. +Queue statistics +~~~~~~~~~~~~~~~~ + +Queue statistics are accessible via the netdev netlink family. + +Currently no widely distributed CLI exists to access those statistics. +Kernel development tools (ynl) can be used to experiment with them, +see `Documentation/userspace-api/netlink/intro-specs.rst`. + Protocol-specific statistics ---------------------------- @@ -147,6 +156,12 @@ Statistics are reported both in the responses to link information requests (`RTM_GETLINK`) and statistic requests (`RTM_GETSTATS`, when `IFLA_STATS_LINK_64` bit is set in the `.filter_mask` of the request). +netdev (netlink) +~~~~~~~~~~~~~~~~ + +`netdev` generic netlink family allows accessing page pool and per queue +statistics. + ethtool ------- diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 416a800d72ba9..4230c7f3b9596 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1955,6 +1955,7 @@ enum netdev_reg_state { * * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops + * @stat_ops: Optional ops for queue-aware statistics * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size @@ -2335,6 +2336,8 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; + const struct netdev_stat_ops *stat_ops; + /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 8b8ed4e13d74d..d633347eeda59 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,6 +4,60 @@ #include +struct netdev_queue_stats_rx { + u64 bytes; + u64 packets; +}; + +struct netdev_queue_stats_tx { + u64 bytes; + u64 packets; +}; + +/** + * struct netdev_stat_ops - netdev ops for fine grained stats + * @get_queue_stats_rx: get stats for a given Rx queue + * @get_queue_stats_tx: get stats for a given Tx queue + * @get_base_stats: get base stats (not belonging to any live instance) + * + * Query stats for a given object. The values of the statistics are undefined + * on entry (specifically they are *not* zero-initialized). Drivers should + * assign values only to the statistics they collect. Statistics which are not + * collected must be left undefined. + * + * Queue objects are not necessarily persistent, and only currently active + * queues are queried by the per-queue callbacks. This means that per-queue + * statistics will not generally add up to the total number of events for + * the device. The @get_base_stats callback allows filling in the delta + * between events for currently live queues and overall device history. + * When the statistics for the entire device are queried, first @get_base_stats + * is issued to collect the delta, and then a series of per-queue callbacks. + * Only statistics which are set in @get_base_stats will be reported + * at the device level, meaning that unlike in queue callbacks, setting + * a statistic to zero in @get_base_stats is a legitimate thing to do. + * This is because @get_base_stats has a second function of designating which + * statistics are in fact correct for the entire device (e.g. when history + * for some of the events is not maintained, and reliable "total" cannot + * be provided). + * + * Device drivers can assume that when collecting total device stats, + * the @get_base_stats and subsequent per-queue calls are performed + * "atomically" (without releasing the rtnl_lock). + * + * Device drivers are encouraged to reset the per-queue statistics when + * number of queues change. This is because the primary use case for + * per-queue statistics is currently to detect traffic imbalance. + */ +struct netdev_stat_ops { + void (*get_queue_stats_rx)(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *stats); + void (*get_queue_stats_tx)(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *stats); + void (*get_base_stats)(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx); +}; + /** * DOC: Lockless queue stopping / waking helpers. * diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 93cb411adf72e..639ffa04c172c 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -70,6 +70,10 @@ enum netdev_queue_type { NETDEV_QUEUE_TYPE_TX, }; +enum netdev_qstats_scope { + NETDEV_QSTATS_SCOPE_QUEUE = 1, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -132,6 +136,20 @@ enum { NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) }; +enum { + NETDEV_A_QSTATS_IFINDEX = 1, + NETDEV_A_QSTATS_QUEUE_TYPE, + NETDEV_A_QSTATS_QUEUE_ID, + NETDEV_A_QSTATS_SCOPE, + NETDEV_A_QSTATS_RX_PACKETS = 8, + NETDEV_A_QSTATS_RX_BYTES, + NETDEV_A_QSTATS_TX_PACKETS, + NETDEV_A_QSTATS_TX_BYTES, + + __NETDEV_A_QSTATS_MAX, + NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -144,6 +162,7 @@ enum { NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, + NETDEV_CMD_QSTATS_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index be7f2ebd61b29..8d8ace9ef87f6 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -68,6 +68,11 @@ static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFIN [NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; +/* NETDEV_CMD_QSTATS_GET - dump */ +static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE + 1] = { + [NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -138,6 +143,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_NAPI_IFINDEX, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = NETDEV_CMD_QSTATS_GET, + .dumpit = netdev_nl_qstats_get_dumpit, + .policy = netdev_qstats_get_nl_policy, + .maxattr = NETDEV_A_QSTATS_SCOPE, + .flags = GENL_CMD_CAP_DUMP, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index a47f2bcbe4fa5..4db40fd5b4a9e 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -28,6 +28,8 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 918b109e0cf40..7fa75e13dc6da 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "netdev-genl-gen.h" @@ -460,6 +461,218 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return err; } +#define NETDEV_STAT_NOT_SET (~0ULL) + +static void netdev_nl_stats_add(void *_sum, const void *_add, size_t size) +{ + const u64 *add = _add; + u64 *sum = _sum; + + while (size) { + if (*add != NETDEV_STAT_NOT_SET && *sum != NETDEV_STAT_NOT_SET) + *sum += *add; + sum++; + add++; + size -= 8; + } +} + +static int netdev_stat_put(struct sk_buff *rsp, unsigned int attr_id, u64 value) +{ + if (value == NETDEV_STAT_NOT_SET) + return 0; + return nla_put_uint(rsp, attr_id, value); +} + +static int +netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) +{ + if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes)) + return -EMSGSIZE; + return 0; +} + +static int +netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) +{ + if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes)) + return -EMSGSIZE; + return 0; +} + +static int +netdev_nl_stats_queue(struct net_device *netdev, struct sk_buff *rsp, + u32 q_type, int i, const struct genl_info *info) +{ + const struct netdev_stat_ops *ops = netdev->stat_ops; + struct netdev_queue_stats_rx rx; + struct netdev_queue_stats_tx tx; + void *hdr; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex) || + nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_TYPE, q_type) || + nla_put_u32(rsp, NETDEV_A_QSTATS_QUEUE_ID, i)) + goto nla_put_failure; + + switch (q_type) { + case NETDEV_QUEUE_TYPE_RX: + memset(&rx, 0xff, sizeof(rx)); + ops->get_queue_stats_rx(netdev, i, &rx); + if (!memchr_inv(&rx, 0xff, sizeof(rx))) + goto nla_cancel; + if (netdev_nl_stats_write_rx(rsp, &rx)) + goto nla_put_failure; + break; + case NETDEV_QUEUE_TYPE_TX: + memset(&tx, 0xff, sizeof(tx)); + ops->get_queue_stats_tx(netdev, i, &tx); + if (!memchr_inv(&tx, 0xff, sizeof(tx))) + goto nla_cancel; + if (netdev_nl_stats_write_tx(rsp, &tx)) + goto nla_put_failure; + break; + } + + genlmsg_end(rsp, hdr); + return 0; + +nla_cancel: + genlmsg_cancel(rsp, hdr); + return 0; +nla_put_failure: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +} + +static int +netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, + const struct genl_info *info, + struct netdev_nl_dump_ctx *ctx) +{ + const struct netdev_stat_ops *ops = netdev->stat_ops; + int i, err; + + if (!(netdev->flags & IFF_UP)) + return 0; + + i = ctx->rxq_idx; + while (ops->get_queue_stats_rx && i < netdev->real_num_rx_queues) { + err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_RX, + i, info); + if (err) + return err; + ctx->rxq_idx = i++; + } + i = ctx->txq_idx; + while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) { + err = netdev_nl_stats_queue(netdev, rsp, NETDEV_QUEUE_TYPE_TX, + i, info); + if (err) + return err; + ctx->txq_idx = i++; + } + + ctx->rxq_idx = 0; + ctx->txq_idx = 0; + return 0; +} + +static int +netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, + const struct genl_info *info) +{ + struct netdev_queue_stats_rx rx_sum, rx; + struct netdev_queue_stats_tx tx_sum, tx; + const struct netdev_stat_ops *ops; + void *hdr; + int i; + + ops = netdev->stat_ops; + /* Netdev can't guarantee any complete counters */ + if (!ops->get_base_stats) + return 0; + + memset(&rx_sum, 0xff, sizeof(rx_sum)); + memset(&tx_sum, 0xff, sizeof(tx_sum)); + + ops->get_base_stats(netdev, &rx_sum, &tx_sum); + + /* The op was there, but nothing reported, don't bother */ + if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && + !memchr_inv(&tx_sum, 0xff, sizeof(tx_sum))) + return 0; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) + goto nla_put_failure; + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + memset(&rx, 0xff, sizeof(rx)); + if (ops->get_queue_stats_rx) + ops->get_queue_stats_rx(netdev, i, &rx); + netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx)); + } + for (i = 0; i < netdev->real_num_tx_queues; i++) { + memset(&tx, 0xff, sizeof(tx)); + if (ops->get_queue_stats_tx) + ops->get_queue_stats_tx(netdev, i, &tx); + netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx)); + } + + if (netdev_nl_stats_write_rx(rsp, &rx_sum) || + netdev_nl_stats_write_tx(rsp, &tx_sum)) + goto nla_put_failure; + + genlmsg_end(rsp, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +} + +int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); + const struct genl_info *info = genl_info_dump(cb); + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + unsigned int scope; + int err = 0; + + scope = 0; + if (info->attrs[NETDEV_A_QSTATS_SCOPE]) + scope = nla_get_uint(info->attrs[NETDEV_A_QSTATS_SCOPE]); + + rtnl_lock(); + for_each_netdev_dump(net, netdev, ctx->ifindex) { + if (!netdev->stat_ops) + continue; + + switch (scope) { + case 0: + err = netdev_nl_stats_by_netdev(netdev, skb, info); + break; + case NETDEV_QSTATS_SCOPE_QUEUE: + err = netdev_nl_stats_by_queue(netdev, skb, info, ctx); + break; + } + if (err < 0) + break; + } + rtnl_unlock(); + + return err; +} + static int netdev_genl_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 93cb411adf72e..639ffa04c172c 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -70,6 +70,10 @@ enum netdev_queue_type { NETDEV_QUEUE_TYPE_TX, }; +enum netdev_qstats_scope { + NETDEV_QSTATS_SCOPE_QUEUE = 1, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -132,6 +136,20 @@ enum { NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) }; +enum { + NETDEV_A_QSTATS_IFINDEX = 1, + NETDEV_A_QSTATS_QUEUE_TYPE, + NETDEV_A_QSTATS_QUEUE_ID, + NETDEV_A_QSTATS_SCOPE, + NETDEV_A_QSTATS_RX_PACKETS = 8, + NETDEV_A_QSTATS_RX_BYTES, + NETDEV_A_QSTATS_TX_PACKETS, + NETDEV_A_QSTATS_TX_BYTES, + + __NETDEV_A_QSTATS_MAX, + NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -144,6 +162,7 @@ enum { NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, + NETDEV_CMD_QSTATS_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit 1.2.3-korg From 92f8b1f5ca0f157f564e75cef4c63641c172e0f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Mar 2024 11:55:08 -0800 Subject: netdev: add queue stat for alloc failures Rx alloc failures are commonly counted by drivers. Support reporting those via netdev-genl queue stats. Acked-by: Stanislav Fomichev Reviewed-by: Amritha Nambiar Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240306195509.1502746-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 7 +++++++ include/net/netdev_queues.h | 2 ++ include/uapi/linux/netdev.h | 1 + net/core/netdev-genl.c | 3 ++- tools/include/uapi/linux/netdev.h | 1 + 5 files changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index a1e48c3c84c96..76352dbd2be4e 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -328,6 +328,13 @@ attribute-sets: name: tx-bytes doc: Successfully sent bytes, see `tx-packets`. type: uint + - + name: rx-alloc-fail + doc: | + Number of times skb or buffer allocation failed on the Rx datapath. + Allocation failure may, or may not result in a packet drop, depending + on driver implementation and whether system recovers quickly. + type: uint operations: list: diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index d633347eeda59..1ec4085853738 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,9 +4,11 @@ #include +/* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; u64 packets; + u64 alloc_fail; }; struct netdev_queue_stats_tx { diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 639ffa04c172c..bb65ee840cdac 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -145,6 +145,7 @@ enum { NETDEV_A_QSTATS_RX_BYTES, NETDEV_A_QSTATS_TX_PACKETS, NETDEV_A_QSTATS_TX_BYTES, + NETDEV_A_QSTATS_RX_ALLOC_FAIL, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 7fa75e13dc6da..7004b3399c2b0 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -488,7 +488,8 @@ static int netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || - netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes)) + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail)) return -EMSGSIZE; return 0; } diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 639ffa04c172c..bb65ee840cdac 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -145,6 +145,7 @@ enum { NETDEV_A_QSTATS_RX_BYTES, NETDEV_A_QSTATS_TX_PACKETS, NETDEV_A_QSTATS_TX_BYTES, + NETDEV_A_QSTATS_RX_ALLOC_FAIL, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) -- cgit 1.2.3-korg From c4386ab4f6c600f75fdfd21143f89bac3e625d0d Mon Sep 17 00:00:00 2001 From: Shiming Cheng Date: Thu, 7 Mar 2024 18:01:57 +0800 Subject: ipv6: fib6_rules: flush route cache when rule is changed When rule policy is changed, ipv6 socket cache is not refreshed. The sock's skb still uses a outdated route cache and was sent to a wrong interface. To avoid this error we should update fib node's version when rule is changed. Then skb's route will be reroute checked as route cache version is already different with fib node version. The route cache is refreshed to match the latest rule. Fixes: 101367c2f8c4 ("[IPV6]: Policy Routing Rules") Signed-off-by: Shiming Cheng Signed-off-by: Lena Wang Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 7523c4baef35e..52c04f0ac4981 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -449,6 +449,11 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } +static void fib6_rule_flush_cache(struct fib_rules_ops *ops) +{ + rt_genid_bump_ipv6(ops->fro_net); +} + static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), @@ -461,6 +466,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, + .flush_cache = fib6_rule_flush_cache, .nlgroup = RTNLGRP_IPV6_RULE, .owner = THIS_MODULE, .fro_net = &init_net, -- cgit 1.2.3-korg From b0ec2abf98267f14d032102551581c833b0659d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 10:07:16 +0000 Subject: net: ip_tunnel: make sure to pull inner header in ip_tunnel_rcv() Apply the same fix than ones found in : 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") 1ca1ba465e55 ("geneve: make sure to pull inner header in geneve_rx()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. syzbot reported: BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in ip_tunnel_rcv+0xed9/0x2ed0 net/ipv4/ip_tunnel.c:409 __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] ip_tunnel_rcv+0xed9/0x2ed0 net/ipv4/ip_tunnel.c:409 __ipgre_rcv+0x9bc/0xbc0 net/ipv4/ip_gre.c:389 ipgre_rcv net/ipv4/ip_gre.c:411 [inline] gre_rcv+0x423/0x19f0 net/ipv4/ip_gre.c:447 gre_rcv+0x2a4/0x390 net/ipv4/gre_demux.c:163 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 netif_receive_skb_internal net/core/dev.c:5734 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5793 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1556 tun_get_user+0x53b9/0x66e0 drivers/net/tun.c:2009 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2055 call_write_iter include/linux/fs.h:2087 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb6b/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: __alloc_pages+0x9a6/0xe00 mm/page_alloc.c:4590 alloc_pages_mpol+0x62b/0x9d0 mm/mempolicy.c:2133 alloc_pages+0x1be/0x1e0 mm/mempolicy.c:2204 skb_page_frag_refill+0x2bf/0x7c0 net/core/sock.c:2909 tun_build_skb drivers/net/tun.c:1686 [inline] tun_get_user+0xe0a/0x66e0 drivers/net/tun.c:1826 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2055 call_write_iter include/linux/fs.h:2087 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xb6b/0x1520 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 1b6981de3f295..7af36e4f1647d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -378,7 +378,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, bool log_ecn_error) { const struct iphdr *iph = ip_hdr(skb); - int err; + int nh, err; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -404,8 +404,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + iph = (struct iphdr *)(skb->head + nh); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) -- cgit 1.2.3-korg From 6025b9135f7a8b46826a5fcf947259da43bac281 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 06:08:47 -0800 Subject: net: dqs: add NIC stall detector based on BQL softnet_data->time_squeeze is sometimes used as a proxy for host overload or indication of scheduling problems. In practice this statistic is very noisy and has hard to grasp units - e.g. is 10 squeezes a second to be expected, or high? Delaying network (NAPI) processing leads to drops on NIC queues but also RTT bloat, impacting pacing and CA decisions. Stalls are a little hard to detect on the Rx side, because there may simply have not been any packets received in given period of time. Packet timestamps help a little bit, but again we don't know if packets are stale because we're not keeping up or because someone (*cough* cgroups) disabled IRQs for a long time. We can, however, use Tx as a proxy for Rx stalls. Most drivers use combined Rx+Tx NAPIs so if Tx gets starved so will Rx. On the Tx side we know exactly when packets get queued, and completed, so there is no uncertainty. This patch adds stall checks to BQL. Why BQL? Because it's a convenient place to add such checks, already called by most drivers, and it has copious free space in its structures (this patch adds no extra cache references or dirtying to the fast path). The algorithm takes one parameter - max delay AKA stall threshold and increments a counter whenever NAPI got delayed for at least that amount of time. It also records the length of the longest stall. To be precise every time NAPI has not polled for at least stall thrs we check if there were any Tx packets queued between last NAPI run and now - stall_thrs/2. Unlike the classic Tx watchdog this mechanism does not ignore stalls caused by Tx being disabled, or loss of link. I don't think the check is worth the complexity, and stall is a stall, whether due to host overload, flow control, link down... doesn't matter much to the application. We have been running this detector in production at Meta for 2 years, with the threshold of 8ms. It's the lowest value where false positives become rare. There's still a constant stream of reported stalls (especially without the ksoftirqd deferral patches reverted), those who like their stall metrics to be 0 may prefer higher value. Signed-off-by: Jakub Kicinski Signed-off-by: Breno Leitao Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net-queues | 23 ++++++++ include/linux/dynamic_queue_limits.h | 45 ++++++++++++++ include/trace/events/napi.h | 33 +++++++++++ lib/dynamic_queue_limits.c | 74 ++++++++++++++++++++++++ net/core/net-sysfs.c | 62 ++++++++++++++++++++ 5 files changed, 237 insertions(+) (limited to 'net') diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 5bff64d256c20..84aa25e0d14d1 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -96,3 +96,26 @@ Description: Indicates the absolute minimum limit of bytes allowed to be queued on this network device transmit queue. Default value is 0. + +What: /sys/class/net//queues/tx-/byte_queue_limits/stall_thrs +Date: Jan 2024 +KernelVersion: 6.9 +Contact: netdev@vger.kernel.org +Description: + Tx completion stall detection threshold in ms. Kernel will + guarantee to detect all stalls longer than this threshold but + may also detect stalls longer than half of the threshold. + +What: /sys/class/net//queues/tx-/byte_queue_limits/stall_cnt +Date: Jan 2024 +KernelVersion: 6.9 +Contact: netdev@vger.kernel.org +Description: + Number of detected Tx completion stalls. + +What: /sys/class/net//queues/tx-/byte_queue_limits/stall_max +Date: Jan 2024 +KernelVersion: 6.9 +Contact: netdev@vger.kernel.org +Description: + Longest detected Tx completion stall. Write 0 to clear. diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 407c2f281b644..5693a4be0d9a9 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -38,14 +38,22 @@ #ifdef __KERNEL__ +#include #include +#define DQL_HIST_LEN 4 +#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN]) + struct dql { /* Fields accessed in enqueue path (dql_queued) */ unsigned int num_queued; /* Total ever queued */ unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ + unsigned long history_head; /* top 58 bits of jiffies */ + /* stall entries, a bit per entry */ + unsigned long history[DQL_HIST_LEN]; + /* Fields accessed only by completion path (dql_completed) */ unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */ @@ -62,6 +70,13 @@ struct dql { unsigned int max_limit; /* Max limit */ unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ + + /* Stall threshold (in jiffies), defined by user */ + unsigned short stall_thrs; + /* Longest stall detected, reported to user */ + unsigned short stall_max; + unsigned long last_reap; /* Last reap (in jiffies) */ + unsigned long stall_cnt; /* Number of stalls */ }; /* Set some static maximums */ @@ -74,6 +89,8 @@ struct dql { */ static inline void dql_queued(struct dql *dql, unsigned int count) { + unsigned long map, now, now_hi, i; + BUG_ON(count > DQL_MAX_OBJECT); dql->last_obj_cnt = count; @@ -86,6 +103,34 @@ static inline void dql_queued(struct dql *dql, unsigned int count) barrier(); dql->num_queued += count; + + now = jiffies; + now_hi = now / BITS_PER_LONG; + + /* The following code set a bit in the ring buffer, where each + * bit trackes time the packet was queued. The dql->history buffer + * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot + */ + if (unlikely(now_hi != dql->history_head)) { + /* About to reuse slots, clear them */ + for (i = 0; i < DQL_HIST_LEN; i++) { + /* Multiplication masks high bits */ + if (now_hi * BITS_PER_LONG == + (dql->history_head + i) * BITS_PER_LONG) + break; + DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0; + } + /* pairs with smp_rmb() in dql_check_stall() */ + smp_wmb(); + WRITE_ONCE(dql->history_head, now_hi); + } + + /* __set_bit() does not guarantee WRITE_ONCE() semantics */ + map = DQL_HIST_ENT(dql, now_hi); + + /* Populate the history with an entry (bit) per queued */ + if (!(map & BIT_MASK(now))) + WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); } /* Returns how many objects can be queued, < 0 indicates over limit. */ diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 6678cf8b235b8..dc03cf8e03691 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -36,6 +36,39 @@ TRACE_EVENT(napi_poll, __entry->work, __entry->budget) ); +TRACE_EVENT(dql_stall_detected, + + TP_PROTO(unsigned short thrs, unsigned int len, + unsigned long last_reap, unsigned long hist_head, + unsigned long now, unsigned long *hist), + + TP_ARGS(thrs, len, last_reap, hist_head, now, hist), + + TP_STRUCT__entry( + __field( unsigned short, thrs) + __field( unsigned int, len) + __field( unsigned long, last_reap) + __field( unsigned long, hist_head) + __field( unsigned long, now) + __array( unsigned long, hist, 4) + ), + + TP_fast_assign( + __entry->thrs = thrs; + __entry->len = len; + __entry->last_reap = last_reap; + __entry->hist_head = hist_head * BITS_PER_LONG; + __entry->now = now; + memcpy(__entry->hist, hist, sizeof(entry->hist)); + ), + + TP_printk("thrs %u len %u last_reap %lu hist_head %lu now %lu hist %016lx %016lx %016lx %016lx", + __entry->thrs, __entry->len, + __entry->last_reap, __entry->hist_head, __entry->now, + __entry->hist[0], __entry->hist[1], + __entry->hist[2], __entry->hist[3]) +); + #undef NO_DEV #endif /* _TRACE_NAPI_H */ diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index fde0aa2441480..a1389db1c30a3 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -10,10 +10,77 @@ #include #include #include +#include #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0) +static void dql_check_stall(struct dql *dql) +{ + unsigned short stall_thrs; + unsigned long now; + + stall_thrs = READ_ONCE(dql->stall_thrs); + if (!stall_thrs) + return; + + now = jiffies; + /* Check for a potential stall */ + if (time_after_eq(now, dql->last_reap + stall_thrs)) { + unsigned long hist_head, t, start, end; + + /* We are trying to detect a period of at least @stall_thrs + * jiffies without any Tx completions, but during first half + * of which some Tx was posted. + */ +dqs_again: + hist_head = READ_ONCE(dql->history_head); + /* pairs with smp_wmb() in dql_queued() */ + smp_rmb(); + + /* Get the previous entry in the ring buffer, which is the + * oldest sample. + */ + start = (hist_head - DQL_HIST_LEN + 1) * BITS_PER_LONG; + + /* Advance start to continue from the last reap time */ + if (time_before(start, dql->last_reap + 1)) + start = dql->last_reap + 1; + + /* Newest sample we should have already seen a completion for */ + end = hist_head * BITS_PER_LONG + (BITS_PER_LONG - 1); + + /* Shrink the search space to [start, (now - start_thrs/2)] if + * `end` is beyond the stall zone + */ + if (time_before(now, end + stall_thrs / 2)) + end = now - stall_thrs / 2; + + /* Search for the queued time in [t, end] */ + for (t = start; time_before_eq(t, end); t++) + if (test_bit(t % (DQL_HIST_LEN * BITS_PER_LONG), + dql->history)) + break; + + /* Variable t contains the time of the queue */ + if (!time_before_eq(t, end)) + goto no_stall; + + /* The ring buffer was modified in the meantime, retry */ + if (hist_head != READ_ONCE(dql->history_head)) + goto dqs_again; + + dql->stall_cnt++; + dql->stall_max = max_t(unsigned short, dql->stall_max, now - t); + + trace_dql_stall_detected(dql->stall_thrs, now - t, + dql->last_reap, dql->history_head, + now, dql->history); + } +no_stall: + dql->last_reap = now; +} + /* Records completed count and recalculates the queue limit */ void dql_completed(struct dql *dql, unsigned int count) { @@ -110,6 +177,8 @@ void dql_completed(struct dql *dql, unsigned int count) dql->prev_last_obj_cnt = dql->last_obj_cnt; dql->num_completed = completed; dql->prev_num_queued = num_queued; + + dql_check_stall(dql); } EXPORT_SYMBOL(dql_completed); @@ -125,6 +194,10 @@ void dql_reset(struct dql *dql) dql->prev_ovlimit = 0; dql->lowest_slack = UINT_MAX; dql->slack_start_time = jiffies; + + dql->last_reap = jiffies; + dql->history_head = jiffies / BITS_PER_LONG; + memset(dql->history, 0, sizeof(dql->history)); } EXPORT_SYMBOL(dql_reset); @@ -133,6 +206,7 @@ void dql_init(struct dql *dql, unsigned int hold_time) dql->max_limit = DQL_MAX_LIMIT; dql->min_limit = 0; dql->slack_hold_time = hold_time; + dql->stall_thrs = 0; dql_reset(dql); } EXPORT_SYMBOL(dql_init); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5560083774b1a..e3d7a8cfa20b7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1415,6 +1415,65 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init = __ATTR(hold_time, 0644, bql_show_hold_time, bql_set_hold_time); +static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf) +{ + struct dql *dql = &queue->dql; + + return sprintf(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs)); +} + +static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, + const char *buf, size_t len) +{ + struct dql *dql = &queue->dql; + unsigned int value; + int err; + + err = kstrtouint(buf, 10, &value); + if (err < 0) + return err; + + value = msecs_to_jiffies(value); + if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG)) + return -ERANGE; + + if (!dql->stall_thrs && value) + dql->last_reap = jiffies; + /* Force last_reap to be live */ + smp_wmb(); + dql->stall_thrs = value; + + return len; +} + +static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init = + __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs); + +static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf) +{ + return sprintf(buf, "%u\n", READ_ONCE(queue->dql.stall_max)); +} + +static ssize_t bql_set_stall_max(struct netdev_queue *queue, + const char *buf, size_t len) +{ + WRITE_ONCE(queue->dql.stall_max, 0); + return len; +} + +static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init = + __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max); + +static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) +{ + struct dql *dql = &queue->dql; + + return sprintf(buf, "%lu\n", dql->stall_cnt); +} + +static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init = + __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL); + static ssize_t bql_show_inflight(struct netdev_queue *queue, char *buf) { @@ -1453,6 +1512,9 @@ static struct attribute *dql_attrs[] __ro_after_init = { &bql_limit_min_attribute.attr, &bql_hold_time_attribute.attr, &bql_inflight_attribute.attr, + &bql_stall_thrs_attribute.attr, + &bql_stall_cnt_attribute.attr, + &bql_stall_max_attribute.attr, NULL }; -- cgit 1.2.3-korg From 2118f9390d83cf942de8b34faf3d35b54f9f4eee Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 6 Mar 2024 13:49:15 +0100 Subject: net: nexthop: Adjust netlink policy parsing for a new attribute A following patch will introduce a new attribute, op-specific flags to adjust the behavior of an operation. Different operations will recognize different flags. - To make the differentiation possible, stop sharing the policies for get and del operations. - To allow querying for presence of the attribute, have all the attribute arrays sized to NHA_MAX, regardless of what is permitted by policy, and pass the corresponding value to nlmsg_parse() as well. Signed-off-by: Petr Machata Reviewed-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/nexthop.c | 58 ++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 70509da4f0806..bcd4df2f1cad4 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -43,6 +43,10 @@ static const struct nla_policy rtm_nh_policy_get[] = { [NHA_ID] = { .type = NLA_U32 }, }; +static const struct nla_policy rtm_nh_policy_del[] = { + [NHA_ID] = { .type = NLA_U32 }, +}; + static const struct nla_policy rtm_nh_policy_dump[] = { [NHA_OIF] = { .type = NLA_U32 }, [NHA_GROUPS] = { .type = NLA_FLAG }, @@ -2966,9 +2970,9 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, - struct nlattr **tb, u32 *id, - struct netlink_ext_ack *extack) +static int nh_valid_get_del_req(const struct nlmsghdr *nlh, + struct nlattr **tb, u32 *id, + struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); @@ -2991,26 +2995,12 @@ static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, return 0; } -static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id, - struct netlink_ext_ack *extack) -{ - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; - int err; - - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, - ARRAY_SIZE(rtm_nh_policy_get) - 1, - rtm_nh_policy_get, extack); - if (err < 0) - return err; - - return __nh_valid_get_del_req(nlh, tb, id, extack); -} - /* rtnl */ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct nlattr *tb[NHA_MAX + 1]; struct nl_info nlinfo = { .nlh = nlh, .nl_net = net, @@ -3020,7 +3010,12 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u32 id; - err = nh_valid_get_del_req(nlh, &id, extack); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, + rtm_nh_policy_del, extack); + if (err < 0) + return err; + + err = nh_valid_get_del_req(nlh, tb, &id, extack); if (err) return err; @@ -3038,12 +3033,18 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NHA_MAX + 1]; struct sk_buff *skb = NULL; struct nexthop *nh; int err; u32 id; - err = nh_valid_get_del_req(nlh, &id, extack); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, + rtm_nh_policy_get, extack); + if (err < 0) + return err; + + err = nh_valid_get_del_req(nlh, tb, &id, extack); if (err) return err; @@ -3157,11 +3158,10 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, struct nh_dump_filter *filter, struct netlink_callback *cb) { - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; + struct nlattr *tb[NHA_MAX + 1]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, - ARRAY_SIZE(rtm_nh_policy_dump) - 1, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, rtm_nh_policy_dump, cb->extack); if (err < 0) return err; @@ -3300,11 +3300,10 @@ static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh, struct netlink_callback *cb) { struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)]; - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)]; + struct nlattr *tb[NHA_MAX + 1]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, - ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, rtm_nh_policy_dump_bucket, NULL); if (err < 0) return err; @@ -3474,16 +3473,15 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, u32 *id, u16 *bucket_index, struct netlink_ext_ack *extack) { - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)]; + struct nlattr *tb[NHA_MAX + 1]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, - ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, rtm_nh_policy_get_bucket, extack); if (err < 0) return err; - err = __nh_valid_get_del_req(nlh, tb, id, extack); + err = nh_valid_get_del_req(nlh, tb, id, extack); if (err) return err; -- cgit 1.2.3-korg From a207eab1039b501daddc8e729c9cc5d99fe93d18 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 6 Mar 2024 13:49:16 +0100 Subject: net: nexthop: Add NHA_OP_FLAGS In order to add per-nexthop statistics, but still not increase netlink message size for consumers that do not care about them, there needs to be a toggle through which the user indicates their desire to get the statistics. To that end, add a new attribute, NHA_OP_FLAGS. The idea is to be able to use the attribute for carrying of arbitrary operation-specific flags, i.e. not make it specific for get / dump. Add the new attribute to get and dump policies, but do not actually allow any flags yet -- those will come later as the flags themselves are defined. Add the necessary parsing code. Signed-off-by: Petr Machata Reviewed-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/nexthop.h | 3 +++ net/ipv4/nexthop.c | 24 ++++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index d8ffa8c9ca784..086444e2946c8 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -60,6 +60,9 @@ enum { /* nested; nexthop bucket attributes */ NHA_RES_BUCKET, + /* u32; operation-specific flags */ + NHA_OP_FLAGS, + __NHA_MAX, }; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index bcd4df2f1cad4..576981f4ca498 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -41,6 +41,7 @@ static const struct nla_policy rtm_nh_policy_new[] = { static const struct nla_policy rtm_nh_policy_get[] = { [NHA_ID] = { .type = NLA_U32 }, + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0), }; static const struct nla_policy rtm_nh_policy_del[] = { @@ -52,6 +53,7 @@ static const struct nla_policy rtm_nh_policy_dump[] = { [NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_MASTER] = { .type = NLA_U32 }, [NHA_FDB] = { .type = NLA_FLAG }, + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0), }; static const struct nla_policy rtm_nh_res_policy_new[] = { @@ -2971,7 +2973,7 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, } static int nh_valid_get_del_req(const struct nlmsghdr *nlh, - struct nlattr **tb, u32 *id, + struct nlattr **tb, u32 *id, u32 *op_flags, struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); @@ -2992,6 +2994,11 @@ static int nh_valid_get_del_req(const struct nlmsghdr *nlh, return -EINVAL; } + if (tb[NHA_OP_FLAGS]) + *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); + else + *op_flags = 0; + return 0; } @@ -3007,6 +3014,7 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, .portid = NETLINK_CB(skb).portid, }; struct nexthop *nh; + u32 op_flags; int err; u32 id; @@ -3015,7 +3023,7 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; - err = nh_valid_get_del_req(nlh, tb, &id, extack); + err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack); if (err) return err; @@ -3036,6 +3044,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct nlattr *tb[NHA_MAX + 1]; struct sk_buff *skb = NULL; struct nexthop *nh; + u32 op_flags; int err; u32 id; @@ -3044,7 +3053,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (err < 0) return err; - err = nh_valid_get_del_req(nlh, tb, &id, extack); + err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack); if (err) return err; @@ -3080,6 +3089,7 @@ struct nh_dump_filter { bool group_filter; bool fdb_filter; u32 res_bucket_nh_id; + u32 op_flags; }; static bool nh_dump_filtered(struct nexthop *nh, @@ -3151,6 +3161,11 @@ static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, return -EINVAL; } + if (tb[NHA_OP_FLAGS]) + filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); + else + filter->op_flags = 0; + return 0; } @@ -3474,6 +3489,7 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[NHA_MAX + 1]; + u32 op_flags; int err; err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, @@ -3481,7 +3497,7 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, if (err < 0) return err; - err = nh_valid_get_del_req(nlh, tb, id, extack); + err = nh_valid_get_del_req(nlh, tb, id, &op_flags, extack); if (err) return err; -- cgit 1.2.3-korg From f4676ea74b8549cd88dbfe2a592ce4530039e61f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:17 +0100 Subject: net: nexthop: Add nexthop group entry stats Add nexthop group entry stats to count the number of packets forwarded via each nexthop in the group. The stats will be exposed to user space for better data path observability in the next patch. The per-CPU stats pointer is placed at the beginning of 'struct nh_grp_entry', so that all the fields accessed for the data path reside on the same cache line: struct nh_grp_entry { struct nexthop * nh; /* 0 8 */ struct nh_grp_entry_stats * stats; /* 8 8 */ u8 weight; /* 16 1 */ /* XXX 7 bytes hole, try to pack */ union { struct { atomic_t upper_bound; /* 24 4 */ } hthr; /* 24 4 */ struct { struct list_head uw_nh_entry; /* 24 16 */ u16 count_buckets; /* 40 2 */ u16 wants_buckets; /* 42 2 */ } res; /* 24 24 */ }; /* 24 24 */ struct list_head nh_list; /* 48 16 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct nexthop * nh_parent; /* 64 8 */ /* size: 72, cachelines: 2, members: 6 */ /* sum members: 65, holes: 1, sum holes: 7 */ /* last cacheline: 8 bytes */ }; Co-developed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/nexthop.h | 6 ++++++ net/ipv4/nexthop.c | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 77e99cba60ade..6e6a36fee51e9 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -95,8 +95,14 @@ struct nh_res_table { struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); }; +struct nh_grp_entry_stats { + u64_stats_t packets; + struct u64_stats_sync syncp; +}; + struct nh_grp_entry { struct nexthop *nh; + struct nh_grp_entry_stats __percpu *stats; u8 weight; union { diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 576981f4ca498..92dc21a231f85 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -480,6 +480,7 @@ static void nexthop_free_group(struct nexthop *nh) struct nh_grp_entry *nhge = &nhg->nh_entries[i]; WARN_ON(!list_empty(&nhge->nh_list)); + free_percpu(nhge->stats); nexthop_put(nhge->nh); } @@ -660,6 +661,16 @@ nla_put_failure: return -EMSGSIZE; } +static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge) +{ + struct nh_grp_entry_stats *cpu_stats; + + cpu_stats = this_cpu_ptr(nhge->stats); + u64_stats_update_begin(&cpu_stats->syncp); + u64_stats_inc(&cpu_stats->packets); + u64_stats_update_end(&cpu_stats->syncp); +} + static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) { struct nexthop_grp *p; @@ -1182,6 +1193,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash) if (hash > atomic_read(&nhge->hthr.upper_bound)) continue; + nh_grp_entry_stats_inc(nhge); return nhge->nh; } @@ -1191,7 +1203,7 @@ static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash) static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) { - struct nexthop *rc = NULL; + struct nh_grp_entry *nhge0 = NULL; int i; if (nhg->fdb_nh) @@ -1206,16 +1218,20 @@ static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) if (!nexthop_is_good_nh(nhge->nh)) continue; - if (!rc) - rc = nhge->nh; + if (!nhge0) + nhge0 = nhge; if (hash > atomic_read(&nhge->hthr.upper_bound)) continue; + nh_grp_entry_stats_inc(nhge); return nhge->nh; } - return rc ? : nhg->nh_entries[0].nh; + if (!nhge0) + nhge0 = &nhg->nh_entries[0]; + nh_grp_entry_stats_inc(nhge0); + return nhge0->nh; } static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) @@ -1231,6 +1247,7 @@ static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) bucket = &res_table->nh_buckets[bucket_index]; nh_res_bucket_set_busy(bucket); nhge = rcu_dereference(bucket->nh_entry); + nh_grp_entry_stats_inc(nhge); return nhge->nh; } @@ -1804,6 +1821,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, newg->has_v4 = true; list_del(&nhges[i].nh_list); + new_nhges[j].stats = nhges[i].stats; new_nhges[j].nh_parent = nhges[i].nh_parent; new_nhges[j].nh = nhges[i].nh; new_nhges[j].weight = nhges[i].weight; @@ -1819,6 +1837,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, rcu_assign_pointer(nhp->nh_grp, newg); list_del(&nhge->nh_list); + free_percpu(nhge->stats); nexthop_put(nhge->nh); /* Removal of a NH from a resilient group is notified through @@ -2483,6 +2502,13 @@ static struct nexthop *nexthop_create_group(struct net *net, if (nhi->family == AF_INET) nhg->has_v4 = true; + nhg->nh_entries[i].stats = + netdev_alloc_pcpu_stats(struct nh_grp_entry_stats); + if (!nhg->nh_entries[i].stats) { + err = -ENOMEM; + nexthop_put(nhe); + goto out_no_nh; + } nhg->nh_entries[i].nh = nhe; nhg->nh_entries[i].weight = entry[i].weight + 1; list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); @@ -2522,6 +2548,7 @@ static struct nexthop *nexthop_create_group(struct net *net, out_no_nh: for (i--; i >= 0; --i) { list_del(&nhg->nh_entries[i].nh_list); + free_percpu(nhg->nh_entries[i].stats); nexthop_put(nhg->nh_entries[i].nh); } -- cgit 1.2.3-korg From 95fedd7685912f96304615efe50485588b1c3567 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:18 +0100 Subject: net: nexthop: Expose nexthop group stats to user space Add netlink support for reading NH group stats. This data is only for statistics of the traffic in the SW datapath. HW nexthop group statistics will be added in the following patches. Emission of the stats is keyed to a new op_stats flag to avoid cluttering the netlink message with stats if the user doesn't need them: NHA_OP_FLAG_DUMP_STATS. Co-developed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/nexthop.h | 30 ++++++++++++++ net/ipv4/nexthop.c | 95 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 117 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index 086444e2946c8..f4db63c170853 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -30,6 +30,8 @@ enum { #define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) +#define NHA_OP_FLAG_DUMP_STATS BIT(0) + enum { NHA_UNSPEC, NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ @@ -63,6 +65,9 @@ enum { /* u32; operation-specific flags */ NHA_OP_FLAGS, + /* nested; nexthop group stats */ + NHA_GROUP_STATS, + __NHA_MAX, }; @@ -104,4 +109,29 @@ enum { #define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) +enum { + NHA_GROUP_STATS_UNSPEC, + + /* nested; nexthop group entry stats */ + NHA_GROUP_STATS_ENTRY, + + __NHA_GROUP_STATS_MAX, +}; + +#define NHA_GROUP_STATS_MAX (__NHA_GROUP_STATS_MAX - 1) + +enum { + NHA_GROUP_STATS_ENTRY_UNSPEC, + + /* u32; nexthop id of the nexthop group entry */ + NHA_GROUP_STATS_ENTRY_ID, + + /* uint; number of packets forwarded via the nexthop group entry */ + NHA_GROUP_STATS_ENTRY_PACKETS, + + __NHA_GROUP_STATS_ENTRY_MAX, +}; + +#define NHA_GROUP_STATS_ENTRY_MAX (__NHA_GROUP_STATS_ENTRY_MAX - 1) + #endif diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 92dc21a231f85..3365c41eee9a2 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -26,6 +26,8 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, #define NH_DEV_HASHBITS 8 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) +#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS) + static const struct nla_policy rtm_nh_policy_new[] = { [NHA_ID] = { .type = NLA_U32 }, [NHA_GROUP] = { .type = NLA_BINARY }, @@ -41,7 +43,8 @@ static const struct nla_policy rtm_nh_policy_new[] = { static const struct nla_policy rtm_nh_policy_get[] = { [NHA_ID] = { .type = NLA_U32 }, - [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0), + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, + NHA_OP_FLAGS_DUMP_ALL), }; static const struct nla_policy rtm_nh_policy_del[] = { @@ -53,7 +56,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = { [NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_MASTER] = { .type = NLA_U32 }, [NHA_FDB] = { .type = NLA_FLAG }, - [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0), + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, + NHA_OP_FLAGS_DUMP_ALL), }; static const struct nla_policy rtm_nh_res_policy_new[] = { @@ -671,8 +675,78 @@ static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge) u64_stats_update_end(&cpu_stats->syncp); } -static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) +static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge, + u64 *ret_packets) +{ + int i; + + *ret_packets = 0; + + for_each_possible_cpu(i) { + struct nh_grp_entry_stats *cpu_stats; + unsigned int start; + u64 packets; + + cpu_stats = per_cpu_ptr(nhge->stats, i); + do { + start = u64_stats_fetch_begin(&cpu_stats->syncp); + packets = u64_stats_read(&cpu_stats->packets); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + *ret_packets += packets; + } +} + +static int nla_put_nh_group_stats_entry(struct sk_buff *skb, + struct nh_grp_entry *nhge) +{ + struct nlattr *nest; + u64 packets; + + nh_grp_entry_stats_read(nhge, &packets); + + nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) || + nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS, packets)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh) { + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + struct nlattr *nest; + int i; + + nest = nla_nest_start(skb, NHA_GROUP_STATS); + if (!nest) + return -EMSGSIZE; + + for (i = 0; i < nhg->num_nh; i++) + if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i])) + goto cancel_out; + + nla_nest_end(skb, nest); + return 0; + +cancel_out: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh, + u32 op_flags) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); struct nexthop_grp *p; size_t len = nhg->num_nh * sizeof(*p); struct nlattr *nla; @@ -701,6 +775,10 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) if (nhg->resilient && nla_put_nh_group_res(skb, nhg)) goto nla_put_failure; + if (op_flags & NHA_OP_FLAG_DUMP_STATS && + nla_put_nh_group_stats(skb, nh)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -708,7 +786,8 @@ nla_put_failure: } static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, - int event, u32 portid, u32 seq, unsigned int nlflags) + int event, u32 portid, u32 seq, unsigned int nlflags, + u32 op_flags) { struct fib6_nh *fib6_nh; struct fib_nh *fib_nh; @@ -735,7 +814,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) goto nla_put_failure; - if (nla_put_nh_group(skb, nhg)) + if (nla_put_nh_group(skb, nh, op_flags)) goto nla_put_failure; goto out; } @@ -866,7 +945,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) if (!skb) goto errout; - err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); + err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0); if (err < 0) { /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -3095,7 +3174,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_free; err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + nlh->nlmsg_seq, 0, op_flags); if (err < 0) { WARN_ON(err == -EMSGSIZE); goto errout_free; @@ -3265,7 +3344,7 @@ static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); + cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags); } /* rtnl */ -- cgit 1.2.3-korg From 5877786fcf52d1b255afcd61832753d1619f0738 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:19 +0100 Subject: net: nexthop: Add hardware statistics notifications Add hw_stats field to several notifier structures to communicate to the drivers that HW statistics should be configured for nexthops within a given group. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/net/nexthop.h | 3 +++ net/ipv4/nexthop.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 6e6a36fee51e9..584c37120c204 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -130,6 +130,7 @@ struct nh_group { bool resilient; bool fdb_nh; bool has_v4; + bool hw_stats; struct nh_res_table __rcu *res_table; struct nh_grp_entry nh_entries[] __counted_by(num_nh); @@ -193,6 +194,7 @@ struct nh_notifier_grp_entry_info { struct nh_notifier_grp_info { u16 num_nh; bool is_fdb; + bool hw_stats; struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); }; @@ -206,6 +208,7 @@ struct nh_notifier_res_bucket_info { struct nh_notifier_res_table_info { u16 num_nh_buckets; + bool hw_stats; struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); }; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 3365c41eee9a2..c0d14e16e9c8f 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -141,6 +141,7 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, info->nh_grp->num_nh = num_nh; info->nh_grp->is_fdb = nhg->fdb_nh; + info->nh_grp->hw_stats = nhg->hw_stats; for (i = 0; i < num_nh; i++) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; @@ -172,6 +173,7 @@ static int nh_notifier_res_table_info_init(struct nh_notifier_info *info, return -ENOMEM; info->nh_res_table->num_nh_buckets = num_nh_buckets; + info->nh_res_table->hw_stats = nhg->hw_stats; for (i = 0; i < num_nh_buckets; i++) { struct nh_res_bucket *bucket = &res_table->nh_buckets[i]; -- cgit 1.2.3-korg From 746c19a52ec50b81422fd4772254d55e588d7df6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:20 +0100 Subject: net: nexthop: Add ability to enable / disable hardware statistics Add netlink support for enabling collection of HW statistics on nexthop groups. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/net/nexthop.h | 2 ++ include/uapi/linux/nexthop.h | 3 +++ net/ipv4/nexthop.c | 15 ++++++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 584c37120c204..1ba6d36685187 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -47,6 +47,8 @@ struct nh_config { bool nh_grp_res_has_idle_timer; bool nh_grp_res_has_unbalanced_timer; + bool nh_hw_stats; + struct nlattr *nh_encap; u16 nh_encap_type; diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index f4db63c170853..b86af80d4e094 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -68,6 +68,9 @@ enum { /* nested; nexthop group stats */ NHA_GROUP_STATS, + /* u32; nexthop hardware stats enable */ + NHA_HW_STATS_ENABLE, + __NHA_MAX, }; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index c0d14e16e9c8f..e75b1aae5a813 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -39,6 +39,7 @@ static const struct nla_policy rtm_nh_policy_new[] = { [NHA_ENCAP] = { .type = NLA_NESTED }, [NHA_FDB] = { .type = NLA_FLAG }, [NHA_RES_GROUP] = { .type = NLA_NESTED }, + [NHA_HW_STATS_ENABLE] = NLA_POLICY_MAX(NLA_U32, true), }; static const struct nla_policy rtm_nh_policy_get[] = { @@ -778,7 +779,8 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh, goto nla_put_failure; if (op_flags & NHA_OP_FLAG_DUMP_STATS && - nla_put_nh_group_stats(skb, nh)) + (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) || + nla_put_nh_group_stats(skb, nh))) goto nla_put_failure; return 0; @@ -1202,6 +1204,7 @@ static int nh_check_attr_group(struct net *net, if (!tb[i]) continue; switch (i) { + case NHA_HW_STATS_ENABLE: case NHA_FDB: continue; case NHA_RES_GROUP: @@ -2622,6 +2625,9 @@ static struct nexthop *nexthop_create_group(struct net *net, if (cfg->nh_fdb) nhg->fdb_nh = 1; + if (cfg->nh_hw_stats) + nhg->hw_stats = true; + rcu_assign_pointer(nh->nh_grp, nhg); return nh; @@ -2964,6 +2970,9 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP], cfg, extack); + if (tb[NHA_HW_STATS_ENABLE]) + cfg->nh_hw_stats = nla_get_u32(tb[NHA_HW_STATS_ENABLE]); + /* no other attributes should be set */ goto out; } @@ -3055,6 +3064,10 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, goto out; } + if (tb[NHA_HW_STATS_ENABLE]) { + NL_SET_ERR_MSG(extack, "Cannot enable nexthop hardware statistics for non-group nexthops"); + goto out; + } err = 0; out: -- cgit 1.2.3-korg From 5072ae00aea434d922cabd1c3e6236350a77c4d7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:21 +0100 Subject: net: nexthop: Expose nexthop group HW stats to user space Add netlink support for reading NH group hardware stats. Stats collection is done through a new notifier, NEXTHOP_EVENT_HW_STATS_REPORT_DELTA. Drivers that implement HW counters for a given NH group are thereby asked to collect the stats and report back to core by calling nh_grp_hw_stats_report_delta(). This is similar to what netdevice L3 stats do. Besides exposing number of packets that passed in the HW datapath, also include information on whether any driver actually realizes the counters. The core can tell based on whether it got any _report_delta() reports from the drivers. This allows enabling the statistics at the group at any time, with drivers opting into supporting them. This is also in line with what netdevice L3 stats are doing. So as not to waste time and space, tie the collection and reporting of HW stats with a new op flag, NHA_OP_FLAG_DUMP_HW_STATS. Co-developed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Kees Cook # For the __counted_by bits Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/nexthop.h | 18 ++++++ include/uapi/linux/nexthop.h | 9 +++ net/ipv4/nexthop.c | 130 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 149 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 1ba6d36685187..7ec9cc80f11c7 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -122,6 +122,7 @@ struct nh_grp_entry { struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ + u64 packets_hw; }; struct nh_group { @@ -166,6 +167,7 @@ enum nexthop_event_type { NEXTHOP_EVENT_REPLACE, NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, NEXTHOP_EVENT_BUCKET_REPLACE, + NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, }; enum nh_notifier_info_type { @@ -173,6 +175,7 @@ enum nh_notifier_info_type { NH_NOTIFIER_INFO_TYPE_GRP, NH_NOTIFIER_INFO_TYPE_RES_TABLE, NH_NOTIFIER_INFO_TYPE_RES_BUCKET, + NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS, }; struct nh_notifier_single_info { @@ -214,6 +217,17 @@ struct nh_notifier_res_table_info { struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); }; +struct nh_notifier_grp_hw_stats_entry_info { + u32 id; + u64 packets; +}; + +struct nh_notifier_grp_hw_stats_info { + u16 num_nh; + bool hw_stats_used; + struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh); +}; + struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; @@ -224,6 +238,7 @@ struct nh_notifier_info { struct nh_notifier_grp_info *nh_grp; struct nh_notifier_res_table_info *nh_res_table; struct nh_notifier_res_bucket_info *nh_res_bucket; + struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats; }; }; @@ -236,6 +251,9 @@ void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, bool offload, bool trap); void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, unsigned long *activity); +void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, + unsigned int nh_idx, + u64 delta_packets); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index b86af80d4e094..dd8787f9cf39a 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -31,6 +31,7 @@ enum { #define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) #define NHA_OP_FLAG_DUMP_STATS BIT(0) +#define NHA_OP_FLAG_DUMP_HW_STATS BIT(1) enum { NHA_UNSPEC, @@ -71,6 +72,9 @@ enum { /* u32; nexthop hardware stats enable */ NHA_HW_STATS_ENABLE, + /* u32; read-only; whether any driver collects HW stats */ + NHA_HW_STATS_USED, + __NHA_MAX, }; @@ -132,6 +136,11 @@ enum { /* uint; number of packets forwarded via the nexthop group entry */ NHA_GROUP_STATS_ENTRY_PACKETS, + /* uint; number of packets forwarded via the nexthop group entry in + * hardware + */ + NHA_GROUP_STATS_ENTRY_PACKETS_HW, + __NHA_GROUP_STATS_ENTRY_MAX, }; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e75b1aae5a813..e34466751d7be 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -26,7 +26,8 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, #define NH_DEV_HASHBITS 8 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) -#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS) +#define NHA_OP_FLAGS_DUMP_ALL (NHA_OP_FLAG_DUMP_STATS | \ + NHA_OP_FLAG_DUMP_HW_STATS) static const struct nla_policy rtm_nh_policy_new[] = { [NHA_ID] = { .type = NLA_U32 }, @@ -700,8 +701,95 @@ static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge, } } +static int nh_notifier_grp_hw_stats_init(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + struct nh_group *nhg; + int i; + + ASSERT_RTNL(); + nhg = rtnl_dereference(nh->nh_grp); + + info->id = nh->id; + info->type = NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS; + info->nh_grp_hw_stats = kzalloc(struct_size(info->nh_grp_hw_stats, + stats, nhg->num_nh), + GFP_KERNEL); + if (!info->nh_grp_hw_stats) + return -ENOMEM; + + info->nh_grp_hw_stats->num_nh = nhg->num_nh; + for (i = 0; i < nhg->num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + info->nh_grp_hw_stats->stats[i].id = nhge->nh->id; + } + + return 0; +} + +static void nh_notifier_grp_hw_stats_fini(struct nh_notifier_info *info) +{ + kfree(info->nh_grp_hw_stats); +} + +void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, + unsigned int nh_idx, + u64 delta_packets) +{ + info->hw_stats_used = true; + info->stats[nh_idx].packets += delta_packets; +} +EXPORT_SYMBOL(nh_grp_hw_stats_report_delta); + +static void nh_grp_hw_stats_apply_update(struct nexthop *nh, + struct nh_notifier_info *info) +{ + struct nh_group *nhg; + int i; + + ASSERT_RTNL(); + nhg = rtnl_dereference(nh->nh_grp); + + for (i = 0; i < nhg->num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + nhge->packets_hw += info->nh_grp_hw_stats->stats[i].packets; + } +} + +static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used) +{ + struct nh_notifier_info info = { + .net = nh->net, + }; + struct net *net = nh->net; + int err; + + if (nexthop_notifiers_is_empty(net)) + return 0; + + err = nh_notifier_grp_hw_stats_init(&info, nh); + if (err) + return err; + + err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, + NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, + &info); + + /* Cache whatever we got, even if there was an error, otherwise the + * successful stats retrievals would get lost. + */ + nh_grp_hw_stats_apply_update(nh, &info); + *hw_stats_used = info.nh_grp_hw_stats->hw_stats_used; + + nh_notifier_grp_hw_stats_fini(&info); + return notifier_to_errno(err); +} + static int nla_put_nh_group_stats_entry(struct sk_buff *skb, - struct nh_grp_entry *nhge) + struct nh_grp_entry *nhge, + u32 op_flags) { struct nlattr *nest; u64 packets; @@ -713,7 +801,13 @@ static int nla_put_nh_group_stats_entry(struct sk_buff *skb, return -EMSGSIZE; if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) || - nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS, packets)) + nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS, + packets + nhge->packets_hw)) + goto nla_put_failure; + + if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS && + nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS_HW, + nhge->packets_hw)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -724,18 +818,35 @@ nla_put_failure: return -EMSGSIZE; } -static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh) +static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh, + u32 op_flags) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); struct nlattr *nest; + bool hw_stats_used; + int err; int i; + if (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats)) + goto err_out; + + if (op_flags & NHA_OP_FLAG_DUMP_HW_STATS && + nhg->hw_stats) { + err = nh_grp_hw_stats_update(nh, &hw_stats_used); + if (err) + goto out; + + if (nla_put_u32(skb, NHA_HW_STATS_USED, hw_stats_used)) + goto err_out; + } + nest = nla_nest_start(skb, NHA_GROUP_STATS); if (!nest) - return -EMSGSIZE; + goto err_out; for (i = 0; i < nhg->num_nh; i++) - if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i])) + if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i], + op_flags)) goto cancel_out; nla_nest_end(skb, nest); @@ -743,7 +854,10 @@ static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh) cancel_out: nla_nest_cancel(skb, nest); - return -EMSGSIZE; +err_out: + err = -EMSGSIZE; +out: + return err; } static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh, @@ -780,7 +894,7 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh, if (op_flags & NHA_OP_FLAG_DUMP_STATS && (nla_put_u32(skb, NHA_HW_STATS_ENABLE, nhg->hw_stats) || - nla_put_nh_group_stats(skb, nh))) + nla_put_nh_group_stats(skb, nh, op_flags))) goto nla_put_failure; return 0; -- cgit 1.2.3-korg From f0a7da702093d5ff2c54ea18d6b47658be1b4522 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 15:51:41 +0000 Subject: ipv6: make inet6_fill_ifaddr() lockless Make inet6_fill_ifaddr() lockless, and add approriate annotations on ifa->tstamp, ifa->valid_lft, ifa->preferred_lft, ifa->ifa_proto and ifa->rt_priority. Also constify 2nd argument of inet6_fill_ifaddr(), inet6_fill_ifmcaddr() and inet6_fill_ifacaddr(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 66 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f786b65d12e43..5864a1fc6850a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2728,7 +2728,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, if (update_lft) { ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; - ifp->tstamp = now; + WRITE_ONCE(ifp->tstamp, now); flags = ifp->flags; ifp->flags &= ~IFA_F_DEPRECATED; spin_unlock_bh(&ifp->lock); @@ -4896,13 +4896,13 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE); ifp->flags |= cfg->ifa_flags; - ifp->tstamp = jiffies; - ifp->valid_lft = cfg->valid_lft; - ifp->prefered_lft = cfg->preferred_lft; - ifp->ifa_proto = cfg->ifa_proto; + WRITE_ONCE(ifp->tstamp, jiffies); + WRITE_ONCE(ifp->valid_lft, cfg->valid_lft); + WRITE_ONCE(ifp->prefered_lft, cfg->preferred_lft); + WRITE_ONCE(ifp->ifa_proto, cfg->ifa_proto); if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority) - ifp->rt_priority = cfg->rt_priority; + WRITE_ONCE(ifp->rt_priority, cfg->rt_priority); if (new_peer) ifp->peer_addr = *cfg->peer_pfx; @@ -5123,17 +5123,21 @@ struct inet6_fill_args { enum addr_type_t type; }; -static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, +static int inet6_fill_ifaddr(struct sk_buff *skb, + const struct inet6_ifaddr *ifa, struct inet6_fill_args *args) { - struct nlmsghdr *nlh; + struct nlmsghdr *nlh; u32 preferred, valid; + u32 flags, priority; + u8 proto; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; + flags = READ_ONCE(ifa->flags); put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); @@ -5141,13 +5145,14 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; - spin_lock_bh(&ifa->lock); - if (!((ifa->flags&IFA_F_PERMANENT) && - (ifa->prefered_lft == INFINITY_LIFE_TIME))) { - preferred = ifa->prefered_lft; - valid = ifa->valid_lft; + preferred = READ_ONCE(ifa->prefered_lft); + valid = READ_ONCE(ifa->valid_lft); + + if (!((flags & IFA_F_PERMANENT) && + (preferred == INFINITY_LIFE_TIME))) { if (preferred != INFINITY_LIFE_TIME) { - long tval = (jiffies - ifa->tstamp)/HZ; + long tval = (jiffies - READ_ONCE(ifa->tstamp)) / HZ; + if (preferred > tval) preferred -= tval; else @@ -5163,28 +5168,29 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } - spin_unlock_bh(&ifa->lock); if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0) goto error; - } else + } else { if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0) goto error; + } - if (ifa->rt_priority && - nla_put_u32(skb, IFA_RT_PRIORITY, ifa->rt_priority)) + priority = READ_ONCE(ifa->rt_priority); + if (priority && nla_put_u32(skb, IFA_RT_PRIORITY, priority)) goto error; - if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + if (put_cacheinfo(skb, ifa->cstamp, READ_ONCE(ifa->tstamp), + preferred, valid) < 0) goto error; - if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) + if (nla_put_u32(skb, IFA_FLAGS, flags) < 0) goto error; - if (ifa->ifa_proto && - nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) + proto = READ_ONCE(ifa->ifa_proto); + if (proto && nla_put_u8(skb, IFA_PROTO, proto)) goto error; nlmsg_end(skb, nlh); @@ -5195,12 +5201,13 @@ error: return -EMSGSIZE; } -static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, +static int inet6_fill_ifmcaddr(struct sk_buff *skb, + const struct ifmcaddr6 *ifmca, struct inet6_fill_args *args) { - struct nlmsghdr *nlh; - u8 scope = RT_SCOPE_UNIVERSE; int ifindex = ifmca->idev->dev->ifindex; + u8 scope = RT_SCOPE_UNIVERSE; + struct nlmsghdr *nlh; if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; @@ -5218,7 +5225,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || - put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, + put_cacheinfo(skb, ifmca->mca_cstamp, READ_ONCE(ifmca->mca_tstamp), INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -5228,13 +5235,14 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, return 0; } -static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, +static int inet6_fill_ifacaddr(struct sk_buff *skb, + const struct ifacaddr6 *ifaca, struct inet6_fill_args *args) { struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt); int ifindex = dev ? dev->ifindex : 1; - struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; + struct nlmsghdr *nlh; if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; @@ -5252,7 +5260,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || - put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, + put_cacheinfo(skb, ifaca->aca_cstamp, READ_ONCE(ifaca->aca_tstamp), INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; -- cgit 1.2.3-korg From 46f5182dd792c55940ca520576d1544744732b81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 15:51:42 +0000 Subject: ipv6: make in6_dump_addrs() lockless in6_dump_addrs() is called with RCU protection. There is no need holding idev->lock to iterate through unicast addresses. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5864a1fc6850a..c662972d92063 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5271,23 +5271,22 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, } /* called with rcu_read_lock() */ -static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, +static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb, struct netlink_callback *cb, int s_ip_idx, struct inet6_fill_args *fillargs) { - struct ifmcaddr6 *ifmca; - struct ifacaddr6 *ifaca; + const struct ifmcaddr6 *ifmca; + const struct ifacaddr6 *ifaca; int ip_idx = 0; int err = 1; - read_lock_bh(&idev->lock); switch (fillargs->type) { case UNICAST_ADDR: { - struct inet6_ifaddr *ifa; + const struct inet6_ifaddr *ifa; fillargs->event = RTM_NEWADDR; /* unicast address incl. temp addr */ - list_for_each_entry(ifa, &idev->addr_list, if_list) { + list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) { if (ip_idx < s_ip_idx) goto next; err = inet6_fill_ifaddr(skb, ifa, fillargs); @@ -5300,7 +5299,6 @@ next: break; } case MULTICAST_ADDR: - read_unlock_bh(&idev->lock); fillargs->event = RTM_GETMULTICAST; /* multicast address */ @@ -5313,7 +5311,6 @@ next: if (err < 0) break; } - read_lock_bh(&idev->lock); break; case ANYCAST_ADDR: fillargs->event = RTM_GETANYCAST; @@ -5330,7 +5327,6 @@ next: default: break; } - read_unlock_bh(&idev->lock); cb->args[2] = ip_idx; return err; } -- cgit 1.2.3-korg From 9cc4cc329d30dbb353fbb23ef8cd633e8f5b8ccf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 15:51:43 +0000 Subject: ipv6: use xa_array iterator to implement inet6_dump_addr() inet6_dump_addr() can use the new xa_array iterator for better scalability. Make it ready for RCU-only protection. RTNL use is removed in the following patch. Also properly return 0 at the end of a dump to avoid and extra recvmsg() to get NLMSG_DONE. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 79 ++++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c662972d92063..4359e49102490 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -717,7 +717,7 @@ errout: static u32 inet6_base_seq(const struct net *net) { u32 res = atomic_read(&net->ipv6.dev_addr_genid) + - net->dev_base_seq; + READ_ONCE(net->dev_base_seq); /* Must not return 0 (see nl_dump_check_consistent()). * Chose a value far away from 0. @@ -5272,13 +5272,13 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, /* called with rcu_read_lock() */ static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb, - struct netlink_callback *cb, int s_ip_idx, + struct netlink_callback *cb, int *s_ip_idx, struct inet6_fill_args *fillargs) { const struct ifmcaddr6 *ifmca; const struct ifacaddr6 *ifaca; int ip_idx = 0; - int err = 1; + int err = 0; switch (fillargs->type) { case UNICAST_ADDR: { @@ -5287,7 +5287,7 @@ static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb, /* unicast address incl. temp addr */ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) { - if (ip_idx < s_ip_idx) + if (ip_idx < *s_ip_idx) goto next; err = inet6_fill_ifaddr(skb, ifa, fillargs); if (err < 0) @@ -5305,7 +5305,7 @@ next: for (ifmca = rcu_dereference(idev->mc_list); ifmca; ifmca = rcu_dereference(ifmca->next), ip_idx++) { - if (ip_idx < s_ip_idx) + if (ip_idx < *s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); if (err < 0) @@ -5317,7 +5317,7 @@ next: /* anycast address */ for (ifaca = rcu_dereference(idev->ac_list); ifaca; ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) { - if (ip_idx < s_ip_idx) + if (ip_idx < *s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, fillargs); if (err < 0) @@ -5327,7 +5327,7 @@ next: default: break; } - cb->args[2] = ip_idx; + *s_ip_idx = err ? ip_idx : 0; return err; } @@ -5390,6 +5390,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, enum addr_type_t type) { + struct net *tgt_net = sock_net(skb->sk); const struct nlmsghdr *nlh = cb->nlh; struct inet6_fill_args fillargs = { .portid = NETLINK_CB(cb->skb).portid, @@ -5398,72 +5399,52 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, .netnsid = -1, .type = type, }; - struct net *tgt_net = sock_net(skb->sk); - int idx, s_idx, s_ip_idx; - int h, s_h; + struct { + unsigned long ifindex; + int ip_idx; + } *ctx = (void *)cb->ctx; struct net_device *dev; struct inet6_dev *idev; - struct hlist_head *head; int err = 0; - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - s_ip_idx = cb->args[2]; - rcu_read_lock(); if (cb->strict_check) { err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) - goto put_tgt_net; + goto done; err = 0; if (fillargs.ifindex) { - dev = __dev_get_by_index(tgt_net, fillargs.ifindex); - if (!dev) { - err = -ENODEV; - goto put_tgt_net; - } + err = -ENODEV; + dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); + if (!dev) + goto done; idev = __in6_dev_get(dev); - if (idev) { - err = in6_dump_addrs(idev, skb, cb, s_ip_idx, + if (idev) + err = in6_dump_addrs(idev, skb, cb, + &ctx->ip_idx, &fillargs); - if (err > 0) - err = 0; - } - goto put_tgt_net; + goto done; } } cb->seq = inet6_base_seq(tgt_net); - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &tgt_net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - if (h > s_h || idx > s_idx) - s_ip_idx = 0; - idev = __in6_dev_get(dev); - if (!idev) - goto cont; - - if (in6_dump_addrs(idev, skb, cb, s_ip_idx, - &fillargs) < 0) - goto done; -cont: - idx++; - } + for_each_netdev_dump(tgt_net, dev, ctx->ifindex) { + idev = __in6_dev_get(dev); + if (!idev) + continue; + err = in6_dump_addrs(idev, skb, cb, &ctx->ip_idx, + &fillargs); + if (err < 0) + goto done; } done: - cb->args[0] = h; - cb->args[1] = idx; -put_tgt_net: rcu_read_unlock(); if (fillargs.netnsid >= 0) put_net(tgt_net); - return skb->len ? : err; + return err; } static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) -- cgit 1.2.3-korg From 155549a6683b1ee37cecec1bd4a439083b706656 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 15:51:44 +0000 Subject: ipv6: remove RTNL protection from inet6_dump_addr() We can now remove RTNL acquisition while running inet6_dump_addr(), inet6_dump_ifmcaddr() and inet6_dump_ifacaddr(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4359e49102490..247bd4d8ee45a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7454,15 +7454,18 @@ int __init addrconf_init(void) goto errout; err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr, - RTNL_FLAG_DOIT_UNLOCKED); + RTNL_FLAG_DOIT_UNLOCKED | + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST, - NULL, inet6_dump_ifmcaddr, 0); + NULL, inet6_dump_ifmcaddr, + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST, - NULL, inet6_dump_ifacaddr, 0); + NULL, inet6_dump_ifacaddr, + RTNL_FLAG_DUMP_UNLOCKED); if (err < 0) goto errout; err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF, -- cgit 1.2.3-korg From 2ab3e8d67fc1d4a7638b769cf83023ec209fc0a9 Mon Sep 17 00:00:00 2001 From: Frédéric Danis Date: Thu, 7 Mar 2024 17:42:05 +0100 Subject: Bluetooth: Fix eir name length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Section 1.2 of Core Specification Supplement Part A the complete or short name strings are defined as utf8s, which should not include the trailing NULL for variable length array as defined in Core Specification Vol1 Part E Section 2.9.3. Removing the trailing NULL allows PTS to retrieve the random address based on device name, e.g. for SM/PER/KDU/BV-02-C, SM/PER/KDU/BV-08-C or GAP/BROB/BCST/BV-03-C. Fixes: f61851f64b17 ("Bluetooth: Fix append max 11 bytes of name to scan rsp data") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/eir.c | 29 +++++++---------------------- net/bluetooth/mgmt.c | 2 +- 2 files changed, 8 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 9214189279e80..1bc51e2b05a34 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -13,48 +13,33 @@ #define PNP_INFO_SVCLASS_ID 0x1200 -static u8 eir_append_name(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) -{ - u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; - - /* If data is already NULL terminated just pass it directly */ - if (data[data_len - 1] == '\0') - return eir_append_data(eir, eir_len, type, data, data_len); - - memcpy(name, data, HCI_MAX_SHORT_NAME_LENGTH); - name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; - - return eir_append_data(eir, eir_len, type, name, sizeof(name)); -} - u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { size_t short_len; size_t complete_len; - /* no space left for name (+ NULL + type + len) */ - if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + /* no space left for name (+ type + len) */ + if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 2) return ad_len; /* use complete name if present and fits */ complete_len = strnlen(hdev->dev_name, sizeof(hdev->dev_name)); if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) - return eir_append_name(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len + 1); + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len); /* use short name if present */ short_len = strnlen(hdev->short_name, sizeof(hdev->short_name)); if (short_len) - return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, hdev->short_name, - short_len == HCI_MAX_SHORT_NAME_LENGTH ? - short_len : short_len + 1); + short_len); /* use shortened full name if present, we already know that name * is longer then HCI_MAX_SHORT_NAME_LENGTH */ if (complete_len) - return eir_append_name(ptr, ad_len, EIR_NAME_SHORT, + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9613cc8a60f8b..32ed6e9245a30 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8408,7 +8408,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, static u8 calculate_name_len(struct hci_dev *hdev) { - u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; + u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 2]; /* len + type + name */ return eir_append_local_name(hdev, buf, 0); } -- cgit 1.2.3-korg From 3d1c16e920c88eb5e583e1b4a10b95a5dc97ec22 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 8 Mar 2024 11:02:48 -0500 Subject: Bluetooth: hci_sync: Fix UAF in hci_acl_create_conn_sync This fixes the following error caused by hci_conn being freed while hcy_acl_create_conn_sync is pending: ================================================================== BUG: KASAN: slab-use-after-free in hci_acl_create_conn_sync+0xa7/0x2e0 Write of size 2 at addr ffff888002ae0036 by task kworker/u3:0/848 CPU: 0 PID: 848 Comm: kworker/u3:0 Not tainted 6.8.0-rc6-g2ab3e8d67fc1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x21/0x70 print_report+0xce/0x620 ? preempt_count_sub+0x13/0xc0 ? __virt_addr_valid+0x15f/0x310 ? hci_acl_create_conn_sync+0xa7/0x2e0 kasan_report+0xdf/0x110 ? hci_acl_create_conn_sync+0xa7/0x2e0 hci_acl_create_conn_sync+0xa7/0x2e0 ? __pfx_hci_acl_create_conn_sync+0x10/0x10 ? __pfx_lock_release+0x10/0x10 ? __pfx_hci_acl_create_conn_sync+0x10/0x10 hci_cmd_sync_work+0x138/0x1c0 process_one_work+0x405/0x800 ? __pfx_lock_acquire+0x10/0x10 ? __pfx_process_one_work+0x10/0x10 worker_thread+0x37b/0x670 ? __pfx_worker_thread+0x10/0x10 kthread+0x19b/0x1e0 ? kthread+0xfe/0x1e0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2f/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Allocated by task 847: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 hci_conn_add+0xc6/0x970 hci_connect_acl+0x309/0x410 pair_device+0x4fb/0x710 hci_sock_sendmsg+0x933/0xef0 sock_write_iter+0x2c3/0x2d0 do_iter_readv_writev+0x21a/0x2e0 vfs_writev+0x21c/0x7b0 do_writev+0x14a/0x180 do_syscall_64+0x77/0x150 entry_SYSCALL_64_after_hwframe+0x6c/0x74 Freed by task 847: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0xfa/0x150 kfree+0xcb/0x250 device_release+0x58/0xf0 kobject_put+0xbb/0x160 hci_conn_del+0x281/0x570 hci_conn_hash_flush+0xfc/0x130 hci_dev_close_sync+0x336/0x960 hci_dev_close+0x10e/0x140 hci_sock_ioctl+0x14a/0x5c0 sock_ioctl+0x58a/0x5d0 __x64_sys_ioctl+0x480/0xf60 do_syscall_64+0x77/0x150 entry_SYSCALL_64_after_hwframe+0x6c/0x74 Fixes: 45340097ce6e ("Bluetooth: hci_conn: Only do ACL connections sequentially") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8a3d0d1f78717..f6b662369322b 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6709,6 +6709,9 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) struct hci_cp_create_conn cp; int err; + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + /* Many controllers disallow HCI Create Connection while it is doing * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create * Connection. This may cause the MGMT discovering state to become false -- cgit 1.2.3-korg From 1cface552a5b5f6e53a855de1a503ff958e2e253 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 12:34:46 +0000 Subject: net: add skb_data_unref() helper Similar to skb_unref(), add skb_data_unref() to save an expensive atomic operation (and cache line dirtying) when last reference on shinfo->dataref is released. I saw this opportunity on hosts with RAW sockets accidentally bound to UDP protocol, forcing an skb_clone() on all received packets. These RAW sockets had their receive queue full, so all clone packets were immediately dropped. When UDP recvmsg() consumes later the original skb, skb_release_data() is hitting atomic_sub_return() quite badly, because skb->clone has been set permanently. Note that this patch helps TCP TX performance, because TCP stack also use (fast) clones. This means that at least one of the two packets (the main skb or its clone) will no longer have to perform this atomic operation in skb_release_data(). Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240307123446.2302230-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 18 ++++++++++++++++++ net/core/skbuff.c | 4 +--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d0508f90bed50..3023bc2be6a1c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1237,6 +1237,24 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +static inline bool skb_data_unref(const struct sk_buff *skb, + struct skb_shared_info *shinfo) +{ + int bias; + + if (!skb->cloned) + return true; + + bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + if (atomic_read(&shinfo->dataref) == bias) + smp_rmb(); + else if (atomic_sub_return(bias, &shinfo->dataref)) + return false; + + return true; +} + void __fix_address kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 766219011aeaf..b99127712e670 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1115,9 +1115,7 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason, struct skb_shared_info *shinfo = skb_shinfo(skb); int i; - if (skb->cloned && - atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, - &shinfo->dataref)) + if (!skb_data_unref(skb, shinfo)) goto exit; if (skb_zcopy(skb)) { -- cgit 1.2.3-korg From 5d9b7cb383bb3228d72066c15206299858214f92 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 7 Mar 2024 17:47:27 +0200 Subject: nexthop: Simplify dump error handling The only error that can happen during a nexthop dump is insufficient space in the skb caring the netlink messages (EMSGSIZE). If this happens and some messages were already filled in, the nexthop code returns the skb length to signal the netlink core that more objects need to be dumped. After commit b5a899154aa9 ("netlink: handle EMSGSIZE errors in the core") there is no need to handle this error in the nexthop code as it is now handled in the core. Simplify the code and simply return the error to the core. No regressions in nexthop tests: # ./fib_nexthops.sh Tests passed: 234 Tests failed: 0 Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240307154727.3555462-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e34466751d7be..5eb3ba568f4e7 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3491,10 +3491,6 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) err = rtm_dump_walk_nexthops(skb, cb, root, ctx, &rtm_dump_nexthop_cb, &filter); - if (err < 0) { - if (likely(skb->len)) - err = skb->len; - } cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -3688,11 +3684,6 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb, &rtm_dump_nexthop_bucket_cb, &dd); } - if (err < 0) { - if (likely(skb->len)) - err = skb->len; - } - cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); return err; -- cgit 1.2.3-korg From 026763ece881b4c636173c25d012cde085689027 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 16:29:43 +0000 Subject: ipv6: raw: check sk->sk_rcvbuf earlier There is no point cloning an skb and having to free the clone if the receive queue of the raw socket is full. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240307162943.2523817-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/raw.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 779274055abf9..ca49e6617afaf 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -160,6 +160,13 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, inet6_iif(skb), inet6_sdif(skb))) continue; + + if (atomic_read(&sk->sk_rmem_alloc) >= + READ_ONCE(sk->sk_rcvbuf)) { + atomic_inc(&sk->sk_drops); + continue; + } + delivered = true; switch (nexthdr) { case IPPROTO_ICMPV6: -- cgit 1.2.3-korg From d721812aa875a898eb38b2729abf6e96abdb357b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 16:30:20 +0000 Subject: ipv4: raw: check sk->sk_rcvbuf earlier There is no point cloning an skb and having to free the clone if the receive queue of the raw socket is full. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240307163020.2524409-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/raw.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 494a6284bd7ee..42ac434cfcfa6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -175,6 +175,13 @@ static int raw_v4_input(struct net *net, struct sk_buff *skb, if (!raw_v4_match(net, sk, iph->protocol, iph->saddr, iph->daddr, dif, sdif)) continue; + + if (atomic_read(&sk->sk_rmem_alloc) >= + READ_ONCE(sk->sk_rcvbuf)) { + atomic_inc(&sk->sk_drops); + continue; + } + delivered = 1; if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && ip_mc_sf_allow(sk, iph->daddr, iph->saddr, -- cgit 1.2.3-korg From 716edc9706deb3bb2ff56e2eeb83559cea8f22db Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:49 +0000 Subject: tcp: fix incorrect parameter validation in the do_tcp_getsockopt() function The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Reviewed-by: Jason Xing Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c5b83875411ae..b091149742b1d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4011,11 +4011,11 @@ int do_tcp_getsockopt(struct sock *sk, int level, if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; -- cgit 1.2.3-korg From 5c3be3e0eb44b7f978bb6cbb20ad956adb93f736 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: ipmr: fix incorrect parameter validation in the ip_mroute_getsockopt() function The 'olr' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'olr' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0708ac6f6c582..fd5c01c8489f0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1603,9 +1603,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, if (copy_from_sockptr(&olr, optlen, sizeof(int))) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; + + olr = min_t(unsigned int, olr, sizeof(int)); + if (copy_to_sockptr(optlen, &olr, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, olr)) -- cgit 1.2.3-korg From 955e9876ba4ee26eeaab1b13517f5b2c88e73d55 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: l2tp: fix incorrect parameter validation in the pppol2tp_getsockopt() function The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reviewed-by: Tom Parkin Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f011af6601c9c..6146e4e67bbb5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1356,11 +1356,11 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + err = -ENOTCONN; if (!sk->sk_user_data) goto end; -- cgit 1.2.3-korg From 4bb3ba7b74fceec6f558745b25a43c6521cf5506 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: udp: fix incorrect parameter validation in the udp_lib_getsockopt() function The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Willem de Bruijn Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2beabf5b2d862..43ec4812b60ea 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2791,11 +2791,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); - if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case UDP_CORK: val = udp_test_bit(CORK, sk); -- cgit 1.2.3-korg From 3ed5f415133f9b7518fbe55ba9ae9a3f5e700929 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: net: kcm: fix incorrect parameter validation in the kcm_getsockopt) function The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index dc46f17a3187e..2f191e50d4fc9 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1153,10 +1153,11 @@ static int kcm_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; + len = min_t(unsigned int, len, sizeof(int)); + switch (optname) { case KCM_RECV_DISABLE: val = kcm->rx_disabled; -- cgit 1.2.3-korg From d6eb8de2015f0c24822e47356f839167ebde2945 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 7 Mar 2024 14:23:50 +0000 Subject: net/x25: fix incorrect parameter validation in the x25_getsockopt() function The 'len' variable can't be negative when assigned the result of 'min_t' because all 'min_t' parameters are cast to unsigned int, and then the minimum one is chosen. To fix the logic, check 'len' as read from 'optlen', where the types of relevant variables are (signed) int. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Gavrilov Ilia Signed-off-by: David S. Miller --- net/x25/af_x25.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index f7a7c7798c3b2..d18d51412cc00 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -460,12 +460,12 @@ static int x25_getsockopt(struct socket *sock, int level, int optname, if (get_user(len, optlen)) goto out; - len = min_t(unsigned int, len, sizeof(int)); - rc = -EINVAL; if (len < 0) goto out; + len = min_t(unsigned int, len, sizeof(int)); + rc = -EFAULT; if (put_user(len, optlen)) goto out; -- cgit 1.2.3-korg From 08842c43d0165b0ed78907fd8cc92ce17d857913 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 22:00:16 +0000 Subject: udp: no longer touch sk->sk_refcnt in early demux After commits ca065d0cf80f ("udp: no longer use SLAB_DESTROY_BY_RCU") and 7ae215d23c12 ("bpf: Don't refcount LISTEN sockets in sk_assign()") UDP early demux no longer need to grab a refcount on the UDP socket. This save two atomic operations per incoming packet for connected sockets. Signed-off-by: Eric Dumazet Cc: Martin KaFai Lau Cc: Joe Stringer Cc: Alexei Starovoitov Cc: Willem de Bruijn Cc: Kuniyuki Iwashima Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 5 +++-- net/ipv6/udp.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 43ec4812b60ea..661d0e0d273f6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2568,11 +2568,12 @@ int udp_v4_early_demux(struct sk_buff *skb) uh->source, iph->saddr, dif, sdif); } - if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) + if (!sk) return 0; skb->sk = sk; - skb->destructor = sock_efree; + DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk)); + skb->destructor = sock_pfree; dst = rcu_dereference(sk->sk_rx_dst); if (dst) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 80ad8f436b179..7c1e6469d091d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1098,11 +1098,12 @@ void udp_v6_early_demux(struct sk_buff *skb) else return; - if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) + if (!sk) return; skb->sk = sk; - skb->destructor = sock_efree; + DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk)); + skb->destructor = sock_pfree; dst = rcu_dereference(sk->sk_rx_dst); if (dst) -- cgit 1.2.3-korg From 9eb430d40e449640122b0b33ace1f33e793ecc5f Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 8 Mar 2024 19:25:03 +0800 Subject: mptcp: annotate a data-race around sysctl_tcp_wmem[0] It's possible that writer and the reader can manipulate the same sysctl knob concurrently. Using READ_ONCE() to prevent reading an old value. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f16edef6026a3..a10ebf3ee10a1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -850,7 +850,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk) if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) return; - new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0]; + new_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]); mptcp_for_each_subflow(mptcp_sk(sk), subflow) { ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf); -- cgit 1.2.3-korg From 683a67da95616c91a85b98e41dc8eefe9f2b29e7 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 8 Mar 2024 19:25:04 +0800 Subject: tcp: annotate a data-race around sysctl_tcp_wmem[0] When reading wmem[0], it could be changed concurrently without READ_ONCE() protection. So add one annotation here. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b091149742b1d..d20b62d521712 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -975,7 +975,7 @@ int tcp_wmem_schedule(struct sock *sk, int copy) * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0] * to guarantee some progress. */ - left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued; + left = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]) - sk->sk_wmem_queued; if (left > 0) sk_forced_mem_schedule(sk, min(left, copy)); return min(copy, sk->sk_forward_alloc); -- cgit 1.2.3-korg From 46f40172b68154106cae660c90c7801b61080892 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 8 Mar 2024 12:44:58 -0800 Subject: net: page_pool: factor out page_pool recycle check The check is duplicated in 2 places, factor it out into a common helper. Signed-off-by: Mina Almasry Reviewed-by: Yunsheng Lin Reviewed-by: Ilias Apalodimas Link: https://lore.kernel.org/r/20240308204500.1112858-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/page_pool.c b/net/core/page_pool.c index d706fe5548dfe..dd364d738c006 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -657,6 +657,11 @@ static bool page_pool_recycle_in_cache(struct page *page, return true; } +static bool __page_pool_page_can_be_recycled(const struct page *page) +{ + return page_ref_count(page) == 1 && !page_is_pfmemalloc(page); +} + /* If the page refcnt == 1, this will try to recycle the page. * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for * the configured size min(dma_sync_size, pool->max_len). @@ -678,7 +683,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * page is NOT reusable when allocated when system is under * some pressure. (page_is_pfmemalloc) */ - if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) { + if (likely(__page_pool_page_can_be_recycled(page))) { /* Read barrier done in page_ref_count / READ_ONCE */ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) @@ -793,7 +798,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool, if (likely(page_pool_unref_page(page, drain_count))) return NULL; - if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { + if (__page_pool_page_can_be_recycled(page)) { if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, -1); -- cgit 1.2.3-korg From 8f4cd89bf10607de08231d6d91a73dd63336808e Mon Sep 17 00:00:00 2001 From: William Tu Date: Sun, 10 Mar 2024 18:45:47 +0200 Subject: devlink: Fix length of eswitch inline-mode Set eswitch inline-mode to be u8, not u16. Otherwise, errors below $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev \ inline-mode network Error: Attribute failed policy validation. kernel answers: Numerical result out of rang netlink: 'devlink': attribute type 26 has an invalid length. Fixes: f2f9dd164db0 ("netlink: specs: devlink: add the remaining command to generate complete split_ops") Signed-off-by: William Tu Reviewed-by: Jiri Pirko Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240310164547.35219-1-witu@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 2 +- net/devlink/netlink_gen.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index cf6eaa0da821b..09fbb4c03fc8d 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -290,7 +290,7 @@ attribute-sets: enum: eswitch-mode - name: eswitch-inline-mode - type: u16 + type: u8 enum: eswitch-inline-mode - name: dpipe-tables diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index c81cf2dd154f3..f9786d51f68f9 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -198,7 +198,7 @@ static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITC [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1), - [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U16, 3), + [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U8, 3), [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1), }; -- cgit 1.2.3-korg From 2d32c49386cbb748ef775aab60aa687d4a320d3b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 8 Mar 2024 13:59:45 +0100 Subject: net: nexthop: Initialize NH group ID in resilient NH group notifiers The NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE notifier currently keeps the group ID unset. That makes it impossible to look up the group for which the notifier is intended. This is not an issue at the moment, because the only client is netdevsim, and that just so that it veto replacements, which is a static property not tied to a particular group. But for any practical use, the ID is necessary. Set it. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/025fef095dcfb408042568bb5439da014d47239e.1709901020.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5eb3ba568f4e7..aa1b366502828 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -407,6 +407,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh, struct nh_notifier_info info = { .net = net, .extack = extack, + .id = nh->id, }; struct nh_group *nhg; int err; -- cgit 1.2.3-korg From e99eb57e9b14d830a571c5255248d4d7eb08b27e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 8 Mar 2024 13:59:46 +0100 Subject: net: nexthop: Have all NH notifiers carry NH ID When sending the notifications to collect NH statistics for resilient groups, the driver will need to know the nexthop IDs in individual buckets to look up the right counter. To that end, move the nexthop ID from struct nh_notifier_grp_entry_info to nh_notifier_single_info. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/8f964cd50b1a56d3606ce7ab4c50354ae019c43b.1709901020.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 2 +- net/ipv4/nexthop.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 7ec9cc80f11c7..7ca315ad500e7 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -185,6 +185,7 @@ struct nh_notifier_single_info { __be32 ipv4; struct in6_addr ipv6; }; + u32 id; u8 is_reject:1, is_fdb:1, has_encap:1; @@ -192,7 +193,6 @@ struct nh_notifier_single_info { struct nh_notifier_grp_entry_info { u8 weight; - u32 id; struct nh_notifier_single_info nh; }; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index aa1b366502828..73b849be65042 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -104,6 +104,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, else if (nh_info->gw_family == AF_INET6) nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; + nh_info->id = nhi->nh_parent->id; nh_info->is_reject = nhi->reject_nh; nh_info->is_fdb = nhi->fdb_nh; nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; @@ -150,7 +151,6 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, struct nh_info *nhi; nhi = rtnl_dereference(nhge->nh->nh_info); - info->nh_grp->nh_entries[i].id = nhge->nh->id; info->nh_grp->nh_entries[i].weight = nhge->weight; __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, nhi); -- cgit 1.2.3-korg From fcac05daa7efcf96f8042bd06abd4da384ac3cd3 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Fri, 8 Mar 2024 19:53:43 +0100 Subject: net: ipv6: exthdrs: get rid of ipv6_skb_net() Get rid of ipv6_skb_net() which is only used in ipv6_hop_ioam(). Signed-off-by: Justin Iurman Link: https://lore.kernel.org/r/20240308185343.39272-1-justin.iurman@uliege.be Signed-off-by: Jakub Kicinski --- net/ipv6/exthdrs.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 25a5b394481b7..6789623b2b0d1 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -883,14 +883,6 @@ void ipv6_exthdrs_exit(void) Hop-by-hop options. **********************************/ -/* - * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input(). - */ -static inline struct net *ipv6_skb_net(struct sk_buff *skb) -{ - return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev); -} - /* Router Alert as of RFC 2711 */ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) @@ -941,7 +933,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) goto drop; /* Ignore if the IOAM namespace is unknown */ - ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id); + ns = ioam6_namespace(dev_net(skb->dev), trace->namespace_id); if (!ns) goto ignore; -- cgit 1.2.3-korg From 8b6d307f4391c20cfd76bbb15f8b3784d36e0755 Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Fri, 8 Mar 2024 11:33:04 +0000 Subject: net/netlink: Add getsockopt support for NETLINK_LISTEN_ALL_NSID Currently getsockopt does not support NETLINK_LISTEN_ALL_NSID, and we are unable to get the value of NETLINK_LISTEN_ALL_NSID socket option through getsockopt. This patch adds getsockopt support for NETLINK_LISTEN_ALL_NSID. Signed-off-by: Juntong Deng Link: https://lore.kernel.org/r/AM6PR03MB58482322B7B335308DA56FE599272@AM6PR03MB5848.eurprd03.prod.outlook.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index da846212fb9b2..7554803218a25 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1773,6 +1773,9 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, netlink_unlock_table(); return err; } + case NETLINK_LISTEN_ALL_NSID: + flag = NETLINK_F_LISTEN_ALL_NSID; + break; case NETLINK_CAP_ACK: flag = NETLINK_F_CAP_ACK; break; -- cgit 1.2.3-korg From 76839e2f1fded42382ac7bad215cd9e9c293e97d Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Fri, 8 Mar 2024 13:06:36 +0000 Subject: net/packet: Add getsockopt support for PACKET_COPY_THRESH Currently getsockopt does not support PACKET_COPY_THRESH, and we are unable to get the value of PACKET_COPY_THRESH socket option through getsockopt. This patch adds getsockopt support for PACKET_COPY_THRESH. In addition, this patch converts access to copy_thresh to READ_ONCE/WRITE_ONCE. Signed-off-by: Juntong Deng Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Link: https://lore.kernel.org/r/AM6PR03MB58487A9704FD150CF76F542899272@AM6PR03MB5848.eurprd03.prod.outlook.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 7 +++++-- net/packet/diag.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0db31ca4982da..61270826b9ac7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2318,7 +2318,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } if (po->tp_version <= TPACKET_V2) { if (macoff + snaplen > po->rx_ring.frame_size) { - if (po->copy_thresh && + if (READ_ONCE(po->copy_thresh) && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { if (skb_shared(skb)) { copy_skb = skb_clone(skb, GFP_ATOMIC); @@ -3836,7 +3836,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; - pkt_sk(sk)->copy_thresh = val; + WRITE_ONCE(pkt_sk(sk)->copy_thresh, val); return 0; } case PACKET_VERSION: @@ -4090,6 +4090,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_VNET_HDR_SZ: val = READ_ONCE(po->vnet_hdr_sz); break; + case PACKET_COPY_THRESH: + val = READ_ONCE(pkt_sk(sk)->copy_thresh); + break; case PACKET_VERSION: val = po->tp_version; break; diff --git a/net/packet/diag.c b/net/packet/diag.c index b3bd2f6c2bf7b..47f69f3dbf73e 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -17,7 +17,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_index = po->ifindex; pinfo.pdi_version = po->tp_version; pinfo.pdi_reserve = po->tp_reserve; - pinfo.pdi_copy_thresh = po->copy_thresh; + pinfo.pdi_copy_thresh = READ_ONCE(po->copy_thresh); pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp); pinfo.pdi_flags = 0; -- cgit 1.2.3-korg From 66c8473135c62f478301a0e5b3012f203562dfa6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Mar 2024 16:47:39 -0800 Subject: bpf: move sleepable flag from bpf_prog_aux to bpf_prog prog->aux->sleepable is checked very frequently as part of (some) BPF program run hot paths. So this extra aux indirection seems wasteful and on busy systems might cause unnecessary memory cache misses. Let's move sleepable flag into prog itself to eliminate unnecessary pointer dereference. Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Message-ID: <20240309004739.2961431-1-andrii@kernel.org> Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 ++++---- kernel/bpf/bpf_iter.c | 4 ++-- kernel/bpf/core.c | 2 +- kernel/bpf/syscall.c | 6 +++--- kernel/bpf/trampoline.c | 4 ++-- kernel/bpf/verifier.c | 12 ++++++------ kernel/events/core.c | 2 +- kernel/trace/bpf_trace.c | 2 +- net/bpf/bpf_dummy_struct_ops.c | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 08ad265cb1959..4f20f62f9d63d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1455,7 +1455,6 @@ struct bpf_prog_aux { bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; - bool sleepable; bool tail_call_reachable; bool xdp_has_frags; bool exception_cb; @@ -1541,7 +1540,8 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ + sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ @@ -2112,14 +2112,14 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_lock(); run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 0fae791641870..112581cf97e7f 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -548,7 +548,7 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, return -ENOENT; /* Only allow sleepable program for resched-able iterator */ - if (prog->aux->sleepable && !bpf_iter_target_support_resched(tinfo)) + if (prog->sleepable && !bpf_iter_target_support_resched(tinfo)) return -EINVAL; link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); @@ -697,7 +697,7 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) struct bpf_run_ctx run_ctx, *old_run_ctx; int ret; - if (prog->aux->sleepable) { + if (prog->sleepable) { rcu_read_lock_trace(); migrate_disable(); might_fault(); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 297184baeb2d4..696bc55de8e82 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2706,7 +2706,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, bool sleepable; u32 i; - sleepable = aux->sleepable; + sleepable = aux->prog->sleepable; for (i = 0; i < len; i++) { map = used_maps[i]; if (map->ops->map_poke_untrack) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 07f2a4db4511c..ae2ff73bde7e7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2248,7 +2248,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) btf_put(prog->aux->attach_btf); if (deferred) { - if (prog->aux->sleepable) + if (prog->sleepable) call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); else call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); @@ -2813,11 +2813,11 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) } prog->expected_attach_type = attr->expected_attach_type; + prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE); prog->aux->attach_btf = attach_btf; prog->aux->attach_btf_id = attr->attach_btf_id; prog->aux->dst_prog = dst_prog; prog->aux->dev_bound = !!attr->prog_ifindex; - prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; /* move token into prog->aux, reuse taken refcnt */ @@ -5554,7 +5554,7 @@ static int bpf_prog_bind_map(union bpf_attr *attr) /* The bpf program will not access the bpf map, but for the sake of * simplicity, increase sleepable_refcnt for sleepable program as well. */ - if (prog->aux->sleepable) + if (prog->sleepable) atomic64_inc(&map->sleepable_refcnt); memcpy(used_maps_new, used_maps_old, sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index d382f5ebe06c8..db7599c59c78a 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -1014,7 +1014,7 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog) { - bool sleepable = prog->aux->sleepable; + bool sleepable = prog->sleepable; if (bpf_prog_check_recur(prog)) return sleepable ? __bpf_prog_enter_sleepable_recur : @@ -1029,7 +1029,7 @@ bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog) bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog) { - bool sleepable = prog->aux->sleepable; + bool sleepable = prog->sleepable; if (bpf_prog_check_recur(prog)) return sleepable ? __bpf_prog_exit_sleepable_recur : diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d64f7a9b60e81..d501c90f8ab51 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5274,7 +5274,7 @@ bad_type: static bool in_sleepable(struct bpf_verifier_env *env) { - return env->prog->aux->sleepable; + return env->prog->sleepable; } /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock() @@ -18137,7 +18137,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, return -EINVAL; } - if (prog->aux->sleepable) + if (prog->sleepable) switch (map->map_type) { case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_LRU_HASH: @@ -18325,7 +18325,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) return -E2BIG; } - if (env->prog->aux->sleepable) + if (env->prog->sleepable) atomic64_inc(&map->sleepable_refcnt); /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it @@ -20938,7 +20938,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, } } - if (prog->aux->sleepable) { + if (prog->sleepable) { ret = -EINVAL; switch (prog->type) { case BPF_PROG_TYPE_TRACING: @@ -21049,14 +21049,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) u64 key; if (prog->type == BPF_PROG_TYPE_SYSCALL) { - if (prog->aux->sleepable) + if (prog->sleepable) /* attach_btf_id checked to be zero already */ return 0; verbose(env, "Syscall programs can only be sleepable\n"); return -EINVAL; } - if (prog->aux->sleepable && !can_be_sleepable(prog)) { + if (prog->sleepable && !can_be_sleepable(prog)) { verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n"); return -EINVAL; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 5ecfa57e3b97f..724e6d7e128f3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10553,7 +10553,7 @@ int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) return -EINVAL; - if (prog->type == BPF_PROG_TYPE_KPROBE && prog->aux->sleepable && !is_uprobe) + if (prog->type == BPF_PROG_TYPE_KPROBE && prog->sleepable && !is_uprobe) /* only uprobe programs are allowed to be sleepable */ return -EINVAL; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 241ddf5e38953..0a5c4efc73c36 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3256,7 +3256,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe, .uprobe = uprobe, }; struct bpf_prog *prog = link->link.prog; - bool sleepable = prog->aux->sleepable; + bool sleepable = prog->sleepable; struct bpf_run_ctx *old_run_ctx; int err = 0; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 1b5f812e6972c..de33dc1b0daad 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -174,7 +174,7 @@ static int bpf_dummy_ops_check_member(const struct btf_type *t, case offsetof(struct bpf_dummy_ops, test_sleepable): break; default: - if (prog->aux->sleepable) + if (prog->sleepable) return -EINVAL; } -- cgit 1.2.3-korg From dc5e0141ff19439bfe8d54fce6d6b834bdf1efe0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Mar 2024 18:23:04 +0200 Subject: nexthop: Only parse NHA_OP_FLAGS for get messages that require it The attribute is parsed into 'op_flags' in nh_valid_get_del_req() which is called from the handlers of three message types: RTM_DELNEXTHOP, RTM_GETNEXTHOPBUCKET and RTM_GETNEXTHOP. The attribute is only used by the latter and rejected by the policies of the other two. Pass 'op_flags' as NULL from the handlers of the other two and only parse the attribute when the argument is not NULL. This is a preparation for a subsequent patch. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240311162307.545385-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 73b849be65042..6901fa3906511 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3230,10 +3230,12 @@ static int nh_valid_get_del_req(const struct nlmsghdr *nlh, return -EINVAL; } - if (tb[NHA_OP_FLAGS]) - *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); - else - *op_flags = 0; + if (op_flags) { + if (tb[NHA_OP_FLAGS]) + *op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); + else + *op_flags = 0; + } return 0; } @@ -3250,7 +3252,6 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, .portid = NETLINK_CB(skb).portid, }; struct nexthop *nh; - u32 op_flags; int err; u32 id; @@ -3259,7 +3260,7 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; - err = nh_valid_get_del_req(nlh, tb, &id, &op_flags, extack); + err = nh_valid_get_del_req(nlh, tb, &id, NULL, extack); if (err) return err; @@ -3716,7 +3717,6 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[NHA_MAX + 1]; - u32 op_flags; int err; err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, @@ -3724,7 +3724,7 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, if (err < 0) return err; - err = nh_valid_get_del_req(nlh, tb, id, &op_flags, extack); + err = nh_valid_get_del_req(nlh, tb, id, NULL, extack); if (err) return err; -- cgit 1.2.3-korg From 262a68aa46f88d9b42f1569f893d53f71ddf7e52 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Mar 2024 18:23:05 +0200 Subject: nexthop: Only parse NHA_OP_FLAGS for dump messages that require it The attribute is parsed in __nh_valid_dump_req() which is called by the dump handlers of RTM_GETNEXTHOP and RTM_GETNEXTHOPBUCKET although it is only used by the former and rejected by the policy of the latter. Move the parsing to nh_valid_dump_req() which is only called by the dump handler of RTM_GETNEXTHOP. This is a preparation for a subsequent patch. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240311162307.545385-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 6901fa3906511..d1669000ac1cf 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3398,11 +3398,6 @@ static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, return -EINVAL; } - if (tb[NHA_OP_FLAGS]) - filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); - else - filter->op_flags = 0; - return 0; } @@ -3418,6 +3413,11 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, if (err < 0) return err; + if (tb[NHA_OP_FLAGS]) + filter->op_flags = nla_get_u32(tb[NHA_OP_FLAGS]); + else + filter->op_flags = 0; + return __nh_valid_dump_req(nlh, tb, filter, cb->extack); } -- cgit 1.2.3-korg From d8a21070b6e168d3800c2962a574f16020dd2951 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Mar 2024 18:23:06 +0200 Subject: nexthop: Fix out-of-bounds access during attribute validation Passing a maximum attribute type to nlmsg_parse() that is larger than the size of the passed policy will result in an out-of-bounds access [1] when the attribute type is used as an index into the policy array. Fix by setting the maximum attribute type according to the policy size, as is already done for RTM_NEWNEXTHOP messages. Add a test case that triggers the bug. No regressions in fib nexthops tests: # ./fib_nexthops.sh [...] Tests passed: 236 Tests failed: 0 [1] BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x1e53/0x2940 Read of size 1 at addr ffffffff99ab4d20 by task ip/610 CPU: 3 PID: 610 Comm: ip Not tainted 6.8.0-rc7-custom-gd435d6e3e161 #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 Call Trace: dump_stack_lvl+0x8f/0xe0 print_report+0xcf/0x670 kasan_report+0xd8/0x110 __nla_validate_parse+0x1e53/0x2940 __nla_parse+0x40/0x50 rtm_del_nexthop+0x1bd/0x400 rtnetlink_rcv_msg+0x3cc/0xf20 netlink_rcv_skb+0x170/0x440 netlink_unicast+0x540/0x820 netlink_sendmsg+0x8d3/0xdb0 ____sys_sendmsg+0x31f/0xa60 ___sys_sendmsg+0x13a/0x1e0 __sys_sendmsg+0x11c/0x1f0 do_syscall_64+0xc5/0x1d0 entry_SYSCALL_64_after_hwframe+0x63/0x6b [...] The buggy address belongs to the variable: rtm_nh_policy_del+0x20/0x40 Fixes: 2118f9390d83 ("net: nexthop: Adjust netlink policy parsing for a new attribute") Reported-by: Eric Dumazet Closes: https://lore.kernel.org/netdev/CANn89i+UNcG0PJMW5X7gOMunF38ryMh=L1aeZUKH3kL4UdUqag@mail.gmail.com/ Reported-by: syzbot+65bb09a7208ce3d4a633@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/00000000000088981b06133bc07b@google.com/ Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240311162307.545385-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 29 +++++++++++++++++------------ tools/testing/selftests/net/fib_nexthops.sh | 6 ++++++ 2 files changed, 23 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d1669000ac1cf..4c6915bff31ef 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3244,8 +3244,8 @@ static int nh_valid_get_del_req(const struct nlmsghdr *nlh, static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_del)]; struct net *net = sock_net(skb->sk); - struct nlattr *tb[NHA_MAX + 1]; struct nl_info nlinfo = { .nlh = nlh, .nl_net = net, @@ -3255,8 +3255,9 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u32 id; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, - rtm_nh_policy_del, extack); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_del) - 1, rtm_nh_policy_del, + extack); if (err < 0) return err; @@ -3277,16 +3278,17 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; struct net *net = sock_net(in_skb->sk); - struct nlattr *tb[NHA_MAX + 1]; struct sk_buff *skb = NULL; struct nexthop *nh; u32 op_flags; int err; u32 id; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, - rtm_nh_policy_get, extack); + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_get) - 1, rtm_nh_policy_get, + extack); if (err < 0) return err; @@ -3405,10 +3407,11 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, struct nh_dump_filter *filter, struct netlink_callback *cb) { - struct nlattr *tb[NHA_MAX + 1]; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_dump) - 1, rtm_nh_policy_dump, cb->extack); if (err < 0) return err; @@ -3548,10 +3551,11 @@ static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh, struct netlink_callback *cb) { struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)]; - struct nlattr *tb[NHA_MAX + 1]; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1, rtm_nh_policy_dump_bucket, NULL); if (err < 0) return err; @@ -3716,10 +3720,11 @@ static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh, u32 *id, u16 *bucket_index, struct netlink_ext_ack *extack) { - struct nlattr *tb[NHA_MAX + 1]; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)]; int err; - err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, NHA_MAX, + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1, rtm_nh_policy_get_bucket, extack); if (err < 0) return err; diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d5a281aadbac4..ac0b2c6a5761b 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -2066,6 +2066,12 @@ basic() run_cmd "$IP nexthop get id 1" log_test $? 2 "Nexthop get on non-existent id" + run_cmd "$IP nexthop del id 1" + log_test $? 2 "Nexthop del with non-existent id" + + run_cmd "$IP nexthop del id 1 group 1/2/3/4/5/6/7/8" + log_test $? 2 "Nexthop del with non-existent id and extra attributes" + # attempt to create nh without a device or gw - fails run_cmd "$IP nexthop add id 1" log_test $? 2 "Nexthop with no device or gateway" -- cgit 1.2.3-korg From e006858f1a1c6e8ba8aeff67e9b15700f70174da Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 11 Mar 2024 18:23:07 +0200 Subject: nexthop: Fix splat with CONFIG_DEBUG_PREEMPT=y Locally generated packets can increment the new nexthop statistics from process context, resulting in the following splat [1] due to preemption being enabled. Fix by using get_cpu_ptr() / put_cpu_ptr() which will which take care of disabling / enabling preemption. BUG: using smp_processor_id() in preemptible [00000000] code: ping/949 caller is nexthop_select_path+0xcf8/0x1e30 CPU: 12 PID: 949 Comm: ping Not tainted 6.8.0-rc7-custom-gcb450f605fae #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 Call Trace: dump_stack_lvl+0xbd/0xe0 check_preemption_disabled+0xce/0xe0 nexthop_select_path+0xcf8/0x1e30 fib_select_multipath+0x865/0x18b0 fib_select_path+0x311/0x1160 ip_route_output_key_hash_rcu+0xe54/0x2720 ip_route_output_key_hash+0x193/0x380 ip_route_output_flow+0x25/0x130 raw_sendmsg+0xbab/0x34a0 inet_sendmsg+0xa2/0xe0 __sys_sendto+0x2ad/0x430 __x64_sys_sendto+0xe5/0x1c0 do_syscall_64+0xc5/0x1d0 entry_SYSCALL_64_after_hwframe+0x63/0x6b [...] Fixes: f4676ea74b85 ("net: nexthop: Add nexthop group entry stats") Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240311162307.545385-5-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 4c6915bff31ef..74928a9d1aa48 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -674,10 +674,11 @@ static void nh_grp_entry_stats_inc(struct nh_grp_entry *nhge) { struct nh_grp_entry_stats *cpu_stats; - cpu_stats = this_cpu_ptr(nhge->stats); + cpu_stats = get_cpu_ptr(nhge->stats); u64_stats_update_begin(&cpu_stats->syncp); u64_stats_inc(&cpu_stats->packets); u64_stats_update_end(&cpu_stats->syncp); + put_cpu_ptr(cpu_stats); } static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge, -- cgit 1.2.3-korg