aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-07-22 10:52:37 -0700
committerDavid S. Miller <davem@davemloft.net>2018-07-22 10:52:37 -0700
commit0fb8d5a030db52a184eca49320e5f4ff8dacd61d (patch)
tree98b448481cc3ad9443a3e257e05f071b1233a24d
parent6e56830776828d8ca9897fc4429eeab47c3bb432 (diff)
parente99465b952861533d9ba748fdbecc96d9a36da3e (diff)
downloadlinux-0fb8d5a030db52a184eca49320e5f4ff8dacd61d.tar.gz
Merge branch 'vxlan-fix-default-fdb-entry-user-space-notify-ordering-race'
Roopa Prabhu says: ==================== vxlan: fix default fdb entry user-space notify ordering/race Problem: In vxlan_newlink, a default fdb entry is added before register_netdev. The default fdb creation function notifies user-space of the fdb entry on the vxlan device which user-space does not know about yet. (RTM_NEWNEIGH goes before RTM_NEWLINK for the same ifindex). This series fixes the user-space netlink notification ordering issue with the following changes: - decouple fdb notify from fdb create. - Move fdb notify after register_netdev. - modify rtnl_configure_link to allow configuring a link early. - Call rtnl_configure_link in vxlan newlink handler to notify userspace about the newlink before fdb notify and hence avoiding the user-space race. ==================== Fixes: afbd8bae9c79 ("vxlan: add implicit fdb entry for default destination") Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
-rw-r--r--drivers/net/vxlan.c126
-rw-r--r--net/core/rtnetlink.c9
2 files changed, 93 insertions, 42 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f6bb1d54d4bdec..e857cb3335f6bd 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -636,9 +636,62 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
-/* Add new entry to forwarding table -- assumes lock held */
+static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
+ const u8 *mac, __u16 state,
+ __be32 src_vni, __u8 ndm_flags)
+{
+ struct vxlan_fdb *f;
+
+ f = kmalloc(sizeof(*f), GFP_ATOMIC);
+ if (!f)
+ return NULL;
+ f->state = state;
+ f->flags = ndm_flags;
+ f->updated = f->used = jiffies;
+ f->vni = src_vni;
+ INIT_LIST_HEAD(&f->remotes);
+ memcpy(f->eth_addr, mac, ETH_ALEN);
+
+ return f;
+}
+
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __be16 port, __be32 src_vni,
+ __be32 vni, __u32 ifindex, __u8 ndm_flags,
+ struct vxlan_fdb **fdb)
+{
+ struct vxlan_rdst *rd = NULL;
+ struct vxlan_fdb *f;
+ int rc;
+
+ if (vxlan->cfg.addrmax &&
+ vxlan->addrcnt >= vxlan->cfg.addrmax)
+ return -ENOSPC;
+
+ netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+ f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
+ if (!f)
+ return -ENOMEM;
+
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+ if (rc < 0) {
+ kfree(f);
+ return rc;
+ }
+
+ ++vxlan->addrcnt;
+ hlist_add_head_rcu(&f->hlist,
+ vxlan_fdb_head(vxlan, mac, src_vni));
+
+ *fdb = f;
+
+ return 0;
+}
+
+/* Add new entry to forwarding table -- assumes lock held */
+static int vxlan_fdb_update(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags,
__be16 port, __be32 src_vni, __be32 vni,
__u32 ifindex, __u8 ndm_flags)
@@ -687,37 +740,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
- if (vxlan->cfg.addrmax &&
- vxlan->addrcnt >= vxlan->cfg.addrmax)
- return -ENOSPC;
-
/* Disallow replace to add a multicast entry */
if ((flags & NLM_F_REPLACE) &&
(is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
return -EOPNOTSUPP;
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
- f = kmalloc(sizeof(*f), GFP_ATOMIC);
- if (!f)
- return -ENOMEM;
-
- notify = 1;
- f->state = state;
- f->flags = ndm_flags;
- f->updated = f->used = jiffies;
- f->vni = src_vni;
- INIT_LIST_HEAD(&f->remotes);
- memcpy(f->eth_addr, mac, ETH_ALEN);
-
- rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
- if (rc < 0) {
- kfree(f);
+ rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
+ vni, ifindex, ndm_flags, &f);
+ if (rc < 0)
return rc;
- }
-
- ++vxlan->addrcnt;
- hlist_add_head_rcu(&f->hlist,
- vxlan_fdb_head(vxlan, mac, src_vni));
+ notify = 1;
}
if (notify) {
@@ -741,13 +774,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
kfree(f);
}
-static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
+static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+ bool do_notify)
{
netdev_dbg(vxlan->dev,
"delete %pM\n", f->eth_addr);
--vxlan->addrcnt;
- vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
+ if (do_notify)
+ vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
hlist_del_rcu(&f->hlist);
call_rcu(&f->rcu, vxlan_fdb_free);
@@ -863,7 +898,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
return -EAFNOSUPPORT;
spin_lock_bh(&vxlan->hash_lock);
- err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
+ err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
port, src_vni, vni, ifindex, ndm->ndm_flags);
spin_unlock_bh(&vxlan->hash_lock);
@@ -897,7 +932,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
goto out;
}
- vxlan_fdb_destroy(vxlan, f);
+ vxlan_fdb_destroy(vxlan, f, true);
out:
return 0;
@@ -1006,7 +1041,7 @@ static bool vxlan_snoop(struct net_device *dev,
/* close off race between vxlan_flush and incoming packets */
if (netif_running(dev))
- vxlan_fdb_create(vxlan, src_mac, src_ip,
+ vxlan_fdb_update(vxlan, src_mac, src_ip,
NUD_REACHABLE,
NLM_F_EXCL|NLM_F_CREATE,
vxlan->cfg.dst_port,
@@ -2364,7 +2399,7 @@ static void vxlan_cleanup(struct timer_list *t)
"garbage collect %pM\n",
f->eth_addr);
f->state = NUD_STALE;
- vxlan_fdb_destroy(vxlan, f);
+ vxlan_fdb_destroy(vxlan, f, true);
} else if (time_before(timeout, next_timer))
next_timer = timeout;
}
@@ -2415,7 +2450,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
spin_lock_bh(&vxlan->hash_lock);
f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f)
- vxlan_fdb_destroy(vxlan, f);
+ vxlan_fdb_destroy(vxlan, f, true);
spin_unlock_bh(&vxlan->hash_lock);
}
@@ -2469,7 +2504,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
continue;
/* the all_zeros_mac entry is deleted at vxlan_uninit */
if (!is_zero_ether_addr(f->eth_addr))
- vxlan_fdb_destroy(vxlan, f);
+ vxlan_fdb_destroy(vxlan, f, true);
}
}
spin_unlock_bh(&vxlan->hash_lock);
@@ -3160,6 +3195,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb *f = NULL;
int err;
err = vxlan_dev_configure(net, dev, conf, false, extack);
@@ -3173,24 +3209,35 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
err = vxlan_fdb_create(vxlan, all_zeros_mac,
&vxlan->default_dst.remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
- NLM_F_EXCL | NLM_F_CREATE,
vxlan->cfg.dst_port,
vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_vni,
vxlan->default_dst.remote_ifindex,
- NTF_SELF);
+ NTF_SELF, &f);
if (err)
return err;
}
err = register_netdevice(dev);
+ if (err)
+ goto errout;
+
+ err = rtnl_configure_link(dev, NULL);
if (err) {
- vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
- return err;
+ unregister_netdevice(dev);
+ goto errout;
}
+ /* notify default fdb entry */
+ if (f)
+ vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
+
list_add(&vxlan->next, &vn->vxlan_list);
return 0;
+errout:
+ if (f)
+ vxlan_fdb_destroy(vxlan, f, false);
+ return err;
}
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
@@ -3425,6 +3472,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
struct vxlan_rdst *dst = &vxlan->default_dst;
struct vxlan_rdst old_dst;
struct vxlan_config conf;
+ struct vxlan_fdb *f = NULL;
int err;
err = vxlan_nl2conf(tb, data,
@@ -3453,16 +3501,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
err = vxlan_fdb_create(vxlan, all_zeros_mac,
&dst->remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
- NLM_F_CREATE | NLM_F_APPEND,
vxlan->cfg.dst_port,
dst->remote_vni,
dst->remote_vni,
dst->remote_ifindex,
- NTF_SELF);
+ NTF_SELF, &f);
if (err) {
spin_unlock_bh(&vxlan->hash_lock);
return err;
}
+ vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
}
spin_unlock_bh(&vxlan->hash_lock);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ef61222fdef1f..e3f743c141b3f7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
return err;
}
- dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-
- __dev_notify_flags(dev, old_flags, ~0U);
+ if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
+ __dev_notify_flags(dev, old_flags, 0U);
+ } else {
+ dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+ __dev_notify_flags(dev, old_flags, ~0U);
+ }
return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);