aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-05-01 18:37:17 -0700
committerJakub Kicinski <kuba@kernel.org>2024-05-01 18:37:17 -0700
commit5165c48ef1afbb3e0126b36be84ffaa6f05cb6f0 (patch)
tree54e09e532fed923864889afc954475987af31738
parent496bc5861c73f34e0486612f93634a9289de0dfb (diff)
parentbf4ea58874df3d43f7264709cec7fe320616552c (diff)
downloadnet-next-5165c48ef1afbb3e0126b36be84ffaa6f05cb6f0.tar.gz
Merge branch 'arp-random-clean-up-and-rcu-conversion-for-ioctl-siocgarp'
Kuniyuki Iwashima says: ==================== arp: Random clean up and RCU conversion for ioctl(SIOCGARP). arp_ioctl() holds rtnl_lock() regardless of cmd (SIOCDARP, SIOCSARP, and SIOCGARP) to get net_device by __dev_get_by_name() and copy dev->name safely. In the SIOCGARP path, arp_req_get() calls neigh_lookup(), which looks up a neighbour entry under RCU. This series cleans up ioctl() code a bit and extends the RCU section not to take rtnl_lock() and instead use dev_get_by_name_rcu() and netdev_copy_name() for SIOCGARP. v2: https://lore.kernel.org/netdev/20240425170002.68160-1-kuniyu@amazon.com/ v1: https://lore.kernel.org/netdev/20240422194755.4221-1-kuniyu@amazon.com/ ==================== Link: https://lore.kernel.org/r/20240430015813.71143-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--net/core/dev.c27
-rw-r--r--net/ipv4/arp.c203
3 files changed, 147 insertions, 84 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f849e7d110ed96..41853424b41d7a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3136,6 +3136,7 @@ struct net_device *netdev_get_by_name(struct net *net, const char *name,
netdevice_tracker *tracker, gfp_t gfp);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
+void netdev_copy_name(struct net_device *dev, char *name);
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
diff --git a/net/core/dev.c b/net/core/dev.c
index cd7ba50eac1534..e02d2363347e2e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -940,6 +940,18 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
}
EXPORT_SYMBOL(dev_get_by_napi_id);
+static DEFINE_SEQLOCK(netdev_rename_lock);
+
+void netdev_copy_name(struct net_device *dev, char *name)
+{
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&netdev_rename_lock);
+ strscpy(name, dev->name, IFNAMSIZ);
+ } while (read_seqretry(&netdev_rename_lock, seq));
+}
+
/**
* netdev_get_name - get a netdevice name, knowing its ifindex.
* @net: network namespace
@@ -951,7 +963,6 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
struct net_device *dev;
int ret;
- down_read(&devnet_rename_sem);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
@@ -960,12 +971,11 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
goto out;
}
- strcpy(name, dev->name);
+ netdev_copy_name(dev, name);
ret = 0;
out:
rcu_read_unlock();
- up_read(&devnet_rename_sem);
return ret;
}
@@ -1217,7 +1227,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
+ write_seqlock(&netdev_rename_lock);
err = dev_get_valid_name(net, dev, newname);
+ write_sequnlock(&netdev_rename_lock);
+
if (err < 0) {
up_write(&devnet_rename_sem);
return err;
@@ -1257,7 +1270,9 @@ rollback:
if (err >= 0) {
err = ret;
down_write(&devnet_rename_sem);
+ write_seqlock(&netdev_rename_lock);
memcpy(dev->name, oldname, IFNAMSIZ);
+ write_sequnlock(&netdev_rename_lock);
memcpy(oldname, newname, IFNAMSIZ);
WRITE_ONCE(dev->name_assign_type, old_assign_type);
old_assign_type = NET_NAME_RENAMED;
@@ -11403,8 +11418,12 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- if (new_name[0]) /* Rename the netdev to prepared name */
+ if (new_name[0]) {
+ /* Rename the netdev to prepared name */
+ write_seqlock(&netdev_rename_lock);
strscpy(dev->name, new_name, IFNAMSIZ);
+ write_sequnlock(&netdev_rename_lock);
+ }
/* Fixup kobjects */
dev_set_uevent_suppress(&dev->dev, 1);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ab82ca1044963a..11c1519b36993d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1003,6 +1003,55 @@ out_of_mem:
* User level interface (ioctl)
*/
+static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r,
+ bool getarp)
+{
+ struct net_device *dev;
+
+ if (getarp)
+ dev = dev_get_by_name_rcu(net, r->arp_dev);
+ else
+ dev = __dev_get_by_name(net, r->arp_dev);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ /* Mmmm... It is wrong... ARPHRD_NETROM == 0 */
+ if (!r->arp_ha.sa_family)
+ r->arp_ha.sa_family = dev->type;
+
+ if ((r->arp_flags & ATF_COM) && r->arp_ha.sa_family != dev->type)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
+static struct net_device *arp_req_dev(struct net *net, struct arpreq *r)
+{
+ struct net_device *dev;
+ struct rtable *rt;
+ __be32 ip;
+
+ if (r->arp_dev[0])
+ return arp_req_dev_by_name(net, r, false);
+
+ if (r->arp_flags & ATF_PUBL)
+ return NULL;
+
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
+ rt = ip_route_output(net, ip, 0, 0, 0, RT_SCOPE_LINK);
+ if (IS_ERR(rt))
+ return ERR_CAST(rt);
+
+ dev = rt->dst.dev;
+ ip_rt_put(rt);
+
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ return dev;
+}
+
/*
* Set (create) an ARP cache entry.
*/
@@ -1023,11 +1072,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
static int arp_req_set_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask && mask != htonl(0xFFFFFFFF))
- return -EINVAL;
if (!dev && (r->arp_flags & ATF_COM)) {
dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
r->arp_ha.sa_data);
@@ -1035,6 +1081,8 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return -ENODEV;
}
if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1))
return -ENOBUFS;
return 0;
@@ -1043,30 +1091,20 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return arp_req_set_proxy(net, dev, 1);
}
-static int arp_req_set(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_set(struct net *net, struct arpreq *r)
{
- __be32 ip;
struct neighbour *neigh;
+ struct net_device *dev;
+ __be32 ip;
int err;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_set_public(net, r, dev);
- ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (r->arp_flags & ATF_PERM)
- r->arp_flags |= ATF_COM;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
- RT_SCOPE_LINK);
-
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
switch (dev->type) {
#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
@@ -1088,12 +1126,18 @@ static int arp_req_set(struct net *net, struct arpreq *r,
break;
}
+ ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+
neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
err = PTR_ERR(neigh);
if (!IS_ERR(neigh)) {
unsigned int state = NUD_STALE;
- if (r->arp_flags & ATF_PERM)
+
+ if (r->arp_flags & ATF_PERM) {
+ r->arp_flags |= ATF_COM;
state = NUD_PERMANENT;
+ }
+
err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
NEIGH_UPDATE_F_OVERRIDE |
@@ -1117,27 +1161,40 @@ static unsigned int arp_state_to_flags(struct neighbour *neigh)
* Get an ARP cache entry.
*/
-static int arp_req_get(struct arpreq *r, struct net_device *dev)
+static int arp_req_get(struct net *net, struct arpreq *r)
{
__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
struct neighbour *neigh;
- int err = -ENXIO;
+ struct net_device *dev;
+
+ if (!r->arp_dev[0])
+ return -ENODEV;
+
+ dev = arp_req_dev_by_name(net, r, true);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
neigh = neigh_lookup(&arp_tbl, &ip, dev);
- if (neigh) {
- if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
- read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha,
- min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
- r->arp_flags = arp_state_to_flags(neigh);
- read_unlock_bh(&neigh->lock);
- r->arp_ha.sa_family = dev->type;
- strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
- err = 0;
- }
+ if (!neigh)
+ return -ENXIO;
+
+ if (READ_ONCE(neigh->nud_state) & NUD_NOARP) {
neigh_release(neigh);
+ return -ENXIO;
}
- return err;
+
+ read_lock_bh(&neigh->lock);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
+ r->arp_flags = arp_state_to_flags(neigh);
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+
+ r->arp_ha.sa_family = dev->type;
+ netdev_copy_name(dev, r->arp_dev);
+
+ return 0;
}
int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
@@ -1168,37 +1225,31 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
static int arp_req_delete_public(struct net *net, struct arpreq *r,
struct net_device *dev)
{
- __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
- if (mask == htonl(0xFFFFFFFF))
- return pneigh_delete(&arp_tbl, net, &ip, dev);
+ if (mask) {
+ __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (mask)
- return -EINVAL;
+ return pneigh_delete(&arp_tbl, net, &ip, dev);
+ }
return arp_req_set_proxy(net, dev, 0);
}
-static int arp_req_delete(struct net *net, struct arpreq *r,
- struct net_device *dev)
+static int arp_req_delete(struct net *net, struct arpreq *r)
{
+ struct net_device *dev;
__be32 ip;
+ dev = arp_req_dev(net, r);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
if (r->arp_flags & ATF_PUBL)
return arp_req_delete_public(net, r, dev);
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
- RT_SCOPE_LINK);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- dev = rt->dst.dev;
- ip_rt_put(rt);
- if (!dev)
- return -EINVAL;
- }
+
return arp_invalidate(dev, ip, true);
}
@@ -1208,9 +1259,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
- int err;
struct arpreq r;
- struct net_device *dev = NULL;
+ __be32 *netmask;
+ int err;
switch (cmd) {
case SIOCDARP:
@@ -1233,42 +1284,34 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!(r.arp_flags & ATF_PUBL) &&
(r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
return -EINVAL;
+
+ netmask = &((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr;
if (!(r.arp_flags & ATF_NETMASK))
- ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
- htonl(0xFFFFFFFFUL);
- rtnl_lock();
- if (r.arp_dev[0]) {
- err = -ENODEV;
- dev = __dev_get_by_name(net, r.arp_dev);
- if (!dev)
- goto out;
-
- /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
- if (!r.arp_ha.sa_family)
- r.arp_ha.sa_family = dev->type;
- err = -EINVAL;
- if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type)
- goto out;
- } else if (cmd == SIOCGARP) {
- err = -ENODEV;
- goto out;
- }
+ *netmask = htonl(0xFFFFFFFFUL);
+ else if (*netmask && *netmask != htonl(0xFFFFFFFFUL))
+ return -EINVAL;
switch (cmd) {
case SIOCDARP:
- err = arp_req_delete(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_delete(net, &r);
+ rtnl_unlock();
break;
case SIOCSARP:
- err = arp_req_set(net, &r, dev);
+ rtnl_lock();
+ err = arp_req_set(net, &r);
+ rtnl_unlock();
break;
case SIOCGARP:
- err = arp_req_get(&r, dev);
+ rcu_read_lock();
+ err = arp_req_get(net, &r);
+ rcu_read_unlock();
+
+ if (!err && copy_to_user(arg, &r, sizeof(r)))
+ err = -EFAULT;
break;
}
-out:
- rtnl_unlock();
- if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
- err = -EFAULT;
+
return err;
}