aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c78
-rw-r--r--net/core/sock.c3
-rw-r--r--net/ipv4/icmp.c30
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/packet/af_packet.c5
-rw-r--r--net/rds/ib_rdma.c3
-rw-r--r--net/sched/sch_generic.c2
10 files changed, 116 insertions, 22 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index cebdc15ce327..96392dbb1951 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -220,14 +220,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
static inline void rps_lock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- spin_lock(&sd->input_pkt_queue.lock);
+ raw_spin_lock(&sd->input_pkt_queue.raw_lock);
#endif
}
static inline void rps_unlock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- spin_unlock(&sd->input_pkt_queue.lock);
+ raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
#endif
}
@@ -1823,6 +1823,7 @@ static inline void __netif_reschedule(struct Qdisc *q)
sd->output_queue_tailp = &q->next_sched;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
void __netif_schedule(struct Qdisc *q)
@@ -1844,6 +1845,7 @@ void dev_kfree_skb_irq(struct sk_buff *skb)
sd->completion_queue = skb;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
}
EXPORT_SYMBOL(dev_kfree_skb_irq);
@@ -2902,6 +2904,7 @@ enqueue:
rps_unlock(sd);
local_irq_restore(flags);
+ preempt_check_resched_rt();
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
@@ -2939,7 +2942,7 @@ int netif_rx(struct sk_buff *skb)
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
- preempt_disable();
+ migrate_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -2949,13 +2952,13 @@ int netif_rx(struct sk_buff *skb)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- preempt_enable();
+ migrate_enable();
} else
#endif
{
unsigned int qtail;
- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
- put_cpu();
+ ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
+ put_cpu_light();
}
return ret;
}
@@ -2965,16 +2968,46 @@ int netif_rx_ni(struct sk_buff *skb)
{
int err;
- preempt_disable();
+ migrate_disable();
err = netif_rx(skb);
if (local_softirq_pending())
- do_softirq();
- preempt_enable();
+ thread_do_softirq();
+ migrate_enable();
return err;
}
EXPORT_SYMBOL(netif_rx_ni);
+#ifdef CONFIG_PREEMPT_RT_FULL
+/*
+ * RT runs ksoftirqd as a real time thread and the root_lock is a
+ * "sleeping spinlock". If the trylock fails then we can go into an
+ * infinite loop when ksoftirqd preempted the task which actually
+ * holds the lock, because we requeue q and raise NET_TX softirq
+ * causing ksoftirqd to loop forever.
+ *
+ * It's safe to use spin_lock on RT here as softirqs run in thread
+ * context and cannot deadlock against the thread which is holding
+ * root_lock.
+ *
+ * On !RT the trylock might fail, but there we bail out from the
+ * softirq loop after 10 attempts which we can't do on RT. And the
+ * task holding root_lock cannot be preempted, so the only downside of
+ * that trylock is that we need 10 loops to decide that we should have
+ * given up in the first one :)
+ */
+static inline int take_root_lock(spinlock_t *lock)
+{
+ spin_lock(lock);
+ return 1;
+}
+#else
+static inline int take_root_lock(spinlock_t *lock)
+{
+ return spin_trylock(lock);
+}
+#endif
+
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -3013,7 +3046,7 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
- if (spin_trylock(root_lock)) {
+ if (take_root_lock(root_lock)) {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
@@ -3342,7 +3375,7 @@ static void flush_backlog(void *arg)
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->input_pkt_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&sd->tofree_queue, skb);
input_queue_head_incr(sd);
}
}
@@ -3351,10 +3384,13 @@ static void flush_backlog(void *arg)
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->process_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&sd->tofree_queue, skb);
input_queue_head_incr(sd);
}
}
+
+ if (!skb_queue_empty(&sd->tofree_queue))
+ raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -3692,6 +3728,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
} else
#endif
local_irq_enable();
+ preempt_check_resched_rt();
}
static int process_backlog(struct napi_struct *napi, int quota)
@@ -3764,6 +3801,7 @@ void __napi_schedule(struct napi_struct *n)
local_irq_save(flags);
____napi_schedule(&__get_cpu_var(softnet_data), n);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
EXPORT_SYMBOL(__napi_schedule);
@@ -3838,10 +3876,17 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = &__get_cpu_var(softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
+ struct sk_buff *skb;
void *have;
local_irq_disable();
+ while ((skb = __skb_dequeue(&sd->tofree_queue))) {
+ local_irq_enable();
+ kfree_skb(skb);
+ local_irq_disable();
+ }
+
while (!list_empty(&sd->poll_list)) {
struct napi_struct *n;
int work, weight;
@@ -6261,6 +6306,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
+ preempt_check_resched_rt();
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
@@ -6271,6 +6317,9 @@ static int dev_cpu_callback(struct notifier_block *nfb,
netif_rx(skb);
input_queue_head_incr(oldsd);
}
+ while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
+ kfree_skb(skb);
+ }
return NOTIFY_OK;
}
@@ -6535,8 +6584,9 @@ static int __init net_dev_init(void)
struct softnet_data *sd = &per_cpu(softnet_data, i);
memset(sd, 0, sizeof(*sd));
- skb_queue_head_init(&sd->input_pkt_queue);
- skb_queue_head_init(&sd->process_queue);
+ skb_queue_head_init_raw(&sd->input_pkt_queue);
+ skb_queue_head_init_raw(&sd->process_queue);
+ skb_queue_head_init_raw(&sd->tofree_queue);
sd->completion_queue = NULL;
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue = NULL;
diff --git a/net/core/sock.c b/net/core/sock.c
index 832cf043a8f7..6ab07d7867f1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2138,12 +2138,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
+ spin_unlock_bh(&sk->sk_lock.slock);
/*
* The sk_lock has mutex_lock() semantics here:
*/
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
- local_bh_enable();
}
EXPORT_SYMBOL(lock_sock_nested);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 2cb2bf845641..9a37732e8f32 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -69,6 +69,7 @@
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
+#include <linux/sysrq.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
@@ -799,6 +800,30 @@ out_err:
}
/*
+ * 32bit and 64bit have different timestamp length, so we check for
+ * the cookie at offset 20 and verify it is repeated at offset 50
+ */
+#define CO_POS0 20
+#define CO_POS1 50
+#define CO_SIZE sizeof(int)
+#define ICMP_SYSRQ_SIZE 57
+
+/*
+ * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
+ * pattern and if it matches send the next byte as a trigger to sysrq.
+ */
+static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
+{
+ int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
+ char *p = skb->data;
+
+ if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
+ !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
+ p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
+ handle_sysrq(p[CO_POS0 + CO_SIZE]);
+}
+
+/*
* Handle ICMP_ECHO ("ping") requests.
*
* RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
@@ -825,6 +850,11 @@ static void icmp_echo(struct sk_buff *skb)
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
icmp_reply(&icmp_param, skb);
+
+ if (skb->len == ICMP_SYSRQ_SIZE &&
+ net->ipv4.sysctl_icmp_echo_sysrq) {
+ icmp_check_sysrq(net, skb);
+ }
}
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 108c73d760df..cddcd26c2c82 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -250,7 +250,7 @@ struct rt_hash_bucket {
};
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
- defined(CONFIG_PROVE_LOCKING)
+ defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_PREEMPT_RT_FULL)
/*
* Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
* The size of this table is a power of two and depends on the number of CPUS.
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 086c97327b9b..38d0c7e747a1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -729,6 +729,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "icmp_echo_sysrq",
+ .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "icmp_ignore_bogus_error_responses",
.data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
.maxlen = sizeof(int),
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6937a84bef3a..884ea193b382 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3063,7 +3063,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
struct ieee80211_supported_band *sband;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- WARN_ON_ONCE(softirq_count() == 0);
+ WARN_ON_ONCE_NONRT(softirq_count() == 0);
if (WARN_ON(status->band < 0 ||
status->band >= IEEE80211_NUM_BANDS))
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index e1b7e051332e..151061b2e474 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -20,11 +20,17 @@
#include <linux/proc_fs.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/locallock.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include "nf_internals.h"
+#ifdef CONFIG_PREEMPT_RT_BASE
+DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
+EXPORT_PER_CPU_SYMBOL(xt_write_lock);
+#endif
+
static DEFINE_MUTEX(afinfo_mutex);
const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index dbe1715c629f..af701f316bdc 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -88,6 +88,7 @@
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
+#include <linux/delay.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
@@ -680,7 +681,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data)
if (BLOCK_NUM_PKTS(pbd)) {
while (atomic_read(&pkc->blk_fill_in_prog)) {
/* Waiting for skb_copy_bits to finish... */
- cpu_relax();
+ cpu_chill();
}
}
@@ -925,7 +926,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
if (!(status & TP_STATUS_BLK_TMO)) {
while (atomic_read(&pkc->blk_fill_in_prog)) {
/* Waiting for skb_copy_bits to finish... */
- cpu_relax();
+ cpu_chill();
}
}
prb_close_block(pkc, pbd, po, status);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index e8fdb172adbb..5a44c6e77cd8 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -34,6 +34,7 @@
#include <linux/slab.h>
#include <linux/rculist.h>
#include <linux/llist.h>
+#include <linux/delay.h>
#include "rds.h"
#include "ib.h"
@@ -286,7 +287,7 @@ static inline void wait_clean_list_grace(void)
for_each_online_cpu(cpu) {
flag = &per_cpu(clean_list_grace, cpu);
while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
- cpu_relax();
+ cpu_chill();
}
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 67fc573e013a..455d21a8b0e0 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head)
/* Wait for outstanding qdisc_run calls. */
list_for_each_entry(dev, head, unreg_list)
while (some_qdisc_is_busy(dev))
- yield();
+ msleep(1);
}
void dev_deactivate(struct net_device *dev)