From ad26d40d82b537dd00626e036f02bec1f0a25d47 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Jul 2009 08:30:08 -0500 Subject: [PATCH] net: preempt-rt support commit 0f2c3c2b4cbac06fa3080bc350b0defb9d0f525e in tip. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Paul Gortmaker --- include/linux/netdevice.h | 8 ++-- net/core/dev.c | 41 ++++++++++++++++++++++---- net/core/netpoll.c | 60 +++++++++++++++++++++++++------------- net/ipv4/icmp.c | 5 ++- net/ipv4/route.c | 4 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/sched/sch_generic.c | 13 ++++++-- 7 files changed, 94 insertions(+), 39 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa8b476..6be4dde 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1790,14 +1790,14 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + txq->xmit_lock_owner = raw_smp_processor_id(); } static inline int __netif_tx_trylock(struct netdev_queue *txq) { int ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + txq->xmit_lock_owner = raw_smp_processor_id(); return ok; } @@ -1831,7 +1831,7 @@ static inline void netif_tx_lock(struct net_device *dev) int cpu; spin_lock(&dev->tx_global_lock); - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); @@ -1894,7 +1894,7 @@ static inline void netif_tx_disable(struct net_device *dev) int cpu; local_bh_disable(); - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); diff --git a/net/core/dev.c b/net/core/dev.c index 6b1cc19..07b9c01 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2130,9 +2130,14 @@ gso: Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { - int cpu = smp_processor_id(); /* ok because BHs are off */ - + /* + * No need to check for recursion with threaded interrupts: + */ +#ifdef CONFIG_PREEMPT_RT + if (1) { +#else if (txq->xmit_lock_owner != cpu) { +#endif HARD_TX_LOCK(dev, txq, cpu); @@ -2251,7 +2256,8 @@ EXPORT_SYMBOL(netif_rx_ni); static void net_tx_action(struct softirq_action *h) { - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = &per_cpu(softnet_data, + raw_smp_processor_id()); if (sd->completion_queue) { struct sk_buff *clist; @@ -2267,6 +2273,11 @@ static void net_tx_action(struct softirq_action *h) WARN_ON(atomic_read(&skb->users)); __kfree_skb(skb); + /* + * Safe to reschedule - the list is private + * at this point. + */ + cond_resched_softirq_context(); } } @@ -2285,6 +2296,22 @@ static void net_tx_action(struct softirq_action *h) head = head->next_sched; root_lock = qdisc_lock(q); + /* + * We are executing in softirq context here, and + * if softirqs are preemptible, we must avoid + * infinite reactivation of the softirq by + * either the tx handler, or by netif_schedule(). + * (it would result in an infinitely looping + * softirq context) + * So we take the spinlock unconditionally. + */ +#ifdef CONFIG_PREEMPT_SOFTIRQS + spin_lock(root_lock); + smp_mb__before_clear_bit(); + clear_bit(__QDISC_STATE_SCHED, &q->state); + qdisc_run(q); + spin_unlock(root_lock); +#else if (spin_trylock(root_lock)) { smp_mb__before_clear_bit(); clear_bit(__QDISC_STATE_SCHED, @@ -2301,6 +2328,7 @@ static void net_tx_action(struct softirq_action *h) &q->state); } } +#endif } } } @@ -2516,7 +2544,7 @@ int netif_receive_skb(struct sk_buff *skb) skb->dev = master; } - __get_cpu_var(netdev_rx_stat).total++; + per_cpu(netdev_rx_stat, raw_smp_processor_id()).total++; skb_reset_network_header(skb); skb_reset_transport_header(skb); @@ -2913,9 +2941,10 @@ EXPORT_SYMBOL(napi_gro_frags); static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; - struct softnet_data *queue = &__get_cpu_var(softnet_data); + struct softnet_data *queue; unsigned long start_time = jiffies; + queue = &per_cpu(softnet_data, raw_smp_processor_id()); napi->weight = weight_p; do { struct sk_buff *skb; @@ -2947,7 +2976,7 @@ void __napi_schedule(struct napi_struct *n) local_irq_save(flags); list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); } EXPORT_SYMBOL(__napi_schedule); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a58f59b..2fbd53b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -74,20 +74,20 @@ static void queue_process(struct work_struct *work) txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - local_irq_save(flags); + local_irq_save_nort(flags); __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq) || ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); - local_irq_restore(flags); + local_irq_restore_nort(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } __netif_tx_unlock(txq); - local_irq_restore(flags); + local_irq_restore_nort(flags); } } @@ -158,7 +158,7 @@ static void poll_napi(struct net_device *dev) int budget = 16; list_for_each_entry(napi, &dev->napi_list, dev_list) { - if (napi->poll_owner != smp_processor_id() && + if (napi->poll_owner != raw_smp_processor_id() && spin_trylock(&napi->poll_lock)) { budget = poll_one_napi(dev->npinfo, napi, budget); spin_unlock(&napi->poll_lock); @@ -219,30 +219,35 @@ static void refill_skbs(void) static void zap_completion_queue(void) { - unsigned long flags; struct softnet_data *sd = &get_cpu_var(softnet_data); + struct sk_buff *clist = NULL; + unsigned long flags; if (sd->completion_queue) { - struct sk_buff *clist; local_irq_save(flags); clist = sd->completion_queue; sd->completion_queue = NULL; local_irq_restore(flags); - - while (clist != NULL) { - struct sk_buff *skb = clist; - clist = clist->next; - if (skb->destructor) { - atomic_inc(&skb->users); - dev_kfree_skb_any(skb); /* put this one back */ - } else { - __kfree_skb(skb); - } - } } + + /* + * Took the list private, can drop our softnet + * reference: + */ put_cpu_var(softnet_data); + + while (clist != NULL) { + struct sk_buff *skb = clist; + clist = clist->next; + if (skb->destructor) { + atomic_inc(&skb->users); + dev_kfree_skb_any(skb); /* put this one back */ + } else { + __kfree_skb(skb); + } + } } static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) @@ -250,13 +255,26 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) int count = 0; struct sk_buff *skb; +#ifdef CONFIG_PREEMPT_RT + /* + * On -rt skb_pool.lock is schedulable, so if we are + * in an atomic context we just try to dequeue from the + * pool and fail if we cannot get one. + */ + if (in_atomic() || irqs_disabled()) + goto pick_atomic; +#endif zap_completion_queue(); refill_skbs(); repeat: skb = alloc_skb(len, GFP_ATOMIC); - if (!skb) + if (!skb) { +#ifdef CONFIG_PREEMPT_RT +pick_atomic: +#endif skb = skb_dequeue(&skb_pool); + } if (!skb) { if (++count < 10) { @@ -276,7 +294,7 @@ static int netpoll_owner_active(struct net_device *dev) struct napi_struct *napi; list_for_each_entry(napi, &dev->napi_list, dev_list) { - if (napi->poll_owner == smp_processor_id()) + if (napi->poll_owner == raw_smp_processor_id()) return 1; } return 0; @@ -302,7 +320,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - local_irq_save(flags); + local_irq_save_nort(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { @@ -329,7 +347,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", dev->name, ops->ndo_start_xmit); - local_irq_restore(flags); + local_irq_restore_nort(flags); } if (status != NETDEV_TX_OK) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ac4dec1..e4d0cc4 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -202,7 +202,10 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; */ static struct sock *icmp_sk(struct net *net) { - return net->ipv4.icmp_sk[smp_processor_id()]; + /* + * Should be safe on PREEMPT_SOFTIRQS/HARDIRQS to use raw-smp-processor-id: + */ + return net->ipv4.icmp_sk[raw_smp_processor_id()]; } static inline struct sock *icmp_xmit_lock(struct net *net) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 71d4e5b..f322a91 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -204,13 +204,13 @@ struct rt_hash_bucket { }; #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) + defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_PREEMPT_RT) /* * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks * The size of this table is a power of two and depends on the number of CPUS. * (on lockdep we have a quite big spinlock_t, so keep the size down there) */ -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) # define RT_HASH_LOCK_SZ 256 #else # if NR_CPUS >= 32 diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9210e31..053133a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -384,7 +384,7 @@ ip6t_do_table(struct sk_buff *skb, xt_info_rdlock_bh(); private = table->private; - table_base = private->entries[smp_processor_id()]; + table_base = private->entries[raw_smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ff4dd53..6d6a2ac 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include #include +#include #include /* Main transmission queue. */ @@ -77,7 +79,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, { int ret; - if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { + if (unlikely(dev_queue->xmit_lock_owner == raw_smp_processor_id())) { /* * Same CPU holding the lock. It may be a transient * configuration error, when hard_start_xmit() recurses. We @@ -119,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, /* And release qdisc */ spin_unlock(root_lock); - HARD_TX_LOCK(dev, txq, smp_processor_id()); + HARD_TX_LOCK(dev, txq, raw_smp_processor_id()); if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) ret = dev_hard_start_xmit(skb, dev, txq); @@ -787,9 +789,12 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ synchronize_rcu(); - /* Wait for outstanding qdisc_run calls. */ + /* + * Wait for outstanding qdisc_run calls. + * TODO: shouldnt this be wakeup-based, instead of polling it? + */ while (some_qdisc_is_busy(dev)) - yield(); + msleep(1); } static void dev_init_scheduler_queue(struct net_device *dev, -- 1.7.0.4