From a13861a28b90541aa207532d237e7a940f1b1c7b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Dec 2011 20:00:32 +0000 Subject: bridge: provide a mtu() method for fake_dst_ops Commit 618f9bc74a039da76 (net: Move mtu handling down to the protocol depended handlers) forgot the bridge netfilter case, adding a NULL dereference in ip_fragment(). Reported-by: Chris Boot CC: Steffen Klassert Signed-off-by: Eric Dumazet Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index d6ec3720c77..08757dc670a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -114,12 +114,18 @@ static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const vo return NULL; } +static unsigned int fake_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, .cow_metrics = fake_cow_metrics, .neigh_lookup = fake_neigh_lookup, + .mtu = fake_mtu, }; /* -- cgit v1.2.3 From 7838f2ce36b6ab5c13ef20b1857e3bbd567f1759 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 22 Dec 2011 02:05:07 +0000 Subject: mqprio: Avoid panic if no options are provided Userspace may not provide TCA_OPTIONS, in fact tc currently does so not do so if no arguments are specified on the command line. Return EINVAL instead of panicing. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/sch_mqprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index f88256cbacb..28de4309233 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -107,7 +107,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; - if (nla_len(opt) < sizeof(*qopt)) + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); -- cgit v1.2.3 From e688a604807647c9450f9c12a7cb6d027150a895 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Dec 2011 04:15:53 +0000 Subject: net: introduce DST_NOPEER dst flag Chris Boot reported crashes occurring in ipv6_select_ident(). [ 461.457562] RIP: 0010:[] [] ipv6_select_ident+0x31/0xa7 [ 461.578229] Call Trace: [ 461.580742] [ 461.582870] [] ? udp6_ufo_fragment+0x124/0x1a2 [ 461.589054] [] ? ipv6_gso_segment+0xc0/0x155 [ 461.595140] [] ? skb_gso_segment+0x208/0x28b [ 461.601198] [] ? ipv6_confirm+0x146/0x15e [nf_conntrack_ipv6] [ 461.608786] [] ? nf_iterate+0x41/0x77 [ 461.614227] [] ? dev_hard_start_xmit+0x357/0x543 [ 461.620659] [] ? nf_hook_slow+0x73/0x111 [ 461.626440] [] ? br_parse_ip_options+0x19a/0x19a [bridge] [ 461.633581] [] ? dev_queue_xmit+0x3af/0x459 [ 461.639577] [] ? br_dev_queue_push_xmit+0x72/0x76 [bridge] [ 461.646887] [] ? br_nf_post_routing+0x17d/0x18f [bridge] [ 461.653997] [] ? nf_iterate+0x41/0x77 [ 461.659473] [] ? br_flood+0xfa/0xfa [bridge] [ 461.665485] [] ? nf_hook_slow+0x73/0x111 [ 461.671234] [] ? br_flood+0xfa/0xfa [bridge] [ 461.677299] [] ? nf_bridge_update_protocol+0x20/0x20 [bridge] [ 461.684891] [] ? nf_ct_zone+0xa/0x17 [nf_conntrack] [ 461.691520] [] ? br_flood+0xfa/0xfa [bridge] [ 461.697572] [] ? NF_HOOK.constprop.8+0x3c/0x56 [bridge] [ 461.704616] [] ? nf_bridge_push_encap_header+0x1c/0x26 [bridge] [ 461.712329] [] ? br_nf_forward_finish+0x8a/0x95 [bridge] [ 461.719490] [] ? nf_bridge_pull_encap_header+0x1c/0x27 [bridge] [ 461.727223] [] ? br_nf_forward_ip+0x1c0/0x1d4 [bridge] [ 461.734292] [] ? nf_iterate+0x41/0x77 [ 461.739758] [] ? __br_deliver+0xa0/0xa0 [bridge] [ 461.746203] [] ? nf_hook_slow+0x73/0x111 [ 461.751950] [] ? __br_deliver+0xa0/0xa0 [bridge] [ 461.758378] [] ? NF_HOOK.constprop.4+0x56/0x56 [bridge] This is caused by bridge netfilter special dst_entry (fake_rtable), a special shared entry, where attaching an inetpeer makes no sense. Problem is present since commit 87c48fa3b46 (ipv6: make fragment identifications less predictable) Introduce DST_NOPEER dst flag and make sure ipv6_select_ident() and __ip_select_ident() fallback to the 'no peer attached' handling. Reported-by: Chris Boot Tested-by: Chris Boot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 1 + net/bridge/br_netfilter.c | 2 +- net/ipv4/route.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 6faec1a6021..75766b42660 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -53,6 +53,7 @@ struct dst_entry { #define DST_NOHASH 0x0008 #define DST_NOCACHE 0x0010 #define DST_NOCOUNT 0x0020 +#define DST_NOPEER 0x0040 short error; short obsolete; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 08757dc670a..fa8b8f76358 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -147,7 +147,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) rt->dst.dev = br->dev; rt->dst.path = &rt->dst; dst_init_metrics(&rt->dst, br_dst_default_metrics, true); - rt->dst.flags = DST_NOXFRM; + rt->dst.flags = DST_NOXFRM | DST_NOPEER; rt->dst.ops = &fake_dst_ops; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 85cc053d9d6..94cdbc55ca7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1367,7 +1367,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) { struct rtable *rt = (struct rtable *) dst; - if (rt) { + if (rt && !(rt->dst.flags & DST_NOPEER)) { if (rt->peer == NULL) rt_bind_peer(rt, rt->rt_dst, 1); @@ -1378,7 +1378,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) iph->id = htons(inet_getid(rt->peer, more)); return; } - } else + } else if (!rt) printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", __builtin_return_address(0)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 84d0bd5cac9..ec562713db9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -603,7 +603,7 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) static atomic_t ipv6_fragmentation_id; int old, new; - if (rt) { + if (rt && !(rt->dst.flags & DST_NOPEER)) { struct inet_peer *peer; if (!rt->rt6i_peer) -- cgit v1.2.3 From a0a129f8b6cff54ab479324a54aefdab5db4f240 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 22 Dec 2011 13:35:22 +0000 Subject: rps: fix insufficient bounds checking in store_rps_dev_flow_table_cnt() Setting a large rps_flow_cnt like (1 << 30) on 32-bit platform will cause a kernel oops due to insufficient bounds checking. if (count > 1<<30) { /* Enforce a limit to prevent overflow */ return -EINVAL; } count = roundup_pow_of_two(count); table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); Note that the macro RPS_DEV_FLOW_TABLE_SIZE(count) is defined as: ... + (count * sizeof(struct rps_dev_flow)) where sizeof(struct rps_dev_flow) is 8. (1 << 30) * 8 will overflow 32 bits. This patch replaces the magic number (1 << 30) with a symbolic bound. Suggested-by: Eric Dumazet Signed-off-by: Xi Wang Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c71c434a4c0..385aefe5364 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -665,11 +665,14 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, if (count) { int i; - if (count > 1<<30) { + if (count > INT_MAX) + return -EINVAL; + count = roundup_pow_of_two(count); + if (count > (ULONG_MAX - sizeof(struct rps_dev_flow_table)) + / sizeof(struct rps_dev_flow)) { /* Enforce a limit to prevent overflow */ return -EINVAL; } - count = roundup_pow_of_two(count); table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); if (!table) return -ENOMEM; -- cgit v1.2.3 From 0fd7bac6b6157eed6cf0cb86a1e88ba29e57c033 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Dec 2011 07:11:44 +0000 Subject: net: relax rcvbuf limits skb->truesize might be big even for a small packet. Its even bigger after commit 87fb4b7b533 (net: more accurate skb truesize) and big MTU. We should allow queueing at least one packet per receiver, even with a low RCVBUF setting. Reported-by: Michal Simek Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- net/core/sock.c | 6 +----- net/packet/af_packet.c | 6 ++---- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index abb6e0f0c3c..32e39371fba 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -637,12 +637,14 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) /* * Take into account size of receive queue and backlog queue + * Do not take into account this skb truesize, + * to allow even a single big packet to come. */ static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) { unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); - return qsize + skb->truesize > sk->sk_rcvbuf; + return qsize > sk->sk_rcvbuf; } /* The per-socket spinlock must be held here. */ diff --git a/net/core/sock.c b/net/core/sock.c index 4ed7b1d12f5..b23f174ab84 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -288,11 +288,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; - /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces - number of warnings when compiling with -W --ANK - */ - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned)sk->sk_rcvbuf) { + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { atomic_inc(&sk->sk_drops); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 82a6f34d39d..3891702b81d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1630,8 +1630,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, if (snaplen > res) snaplen = res; - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned)sk->sk_rcvbuf) + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto drop_n_acct; if (skb_shared(skb)) { @@ -1762,8 +1761,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (po->tp_version <= TPACKET_V2) { if (macoff + snaplen > po->rx_ring.frame_size) { if (po->copy_thresh && - atomic_read(&sk->sk_rmem_alloc) + skb->truesize - < (unsigned)sk->sk_rcvbuf) { + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { if (skb_shared(skb)) { copy_skb = skb_clone(skb, GFP_ATOMIC); } else { -- cgit v1.2.3 From 0354b48f633ae435acbc01b470a1ce8cfeff3e9f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 16 Dec 2011 18:35:15 +0100 Subject: netfilter: xt_connbytes: handle negation correctly "! --connbytes 23:42" should match if the packet/byte count is not in range. As there is no explict "invert match" toggle in the match structure, userspace swaps the from and to arguments (i.e., as if "--connbytes 42:23" were given). However, "what <= 23 && what >= 42" will always be false. Change things so we use "||" in case "from" is larger than "to". This change may look like it breaks backwards compatibility when "to" is 0. However, older iptables binaries will refuse "connbytes 42:0", and current releases treat it to mean "! --connbytes 0:42", so we should be fine. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connbytes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 5b138506690..9ddf1c3bfb3 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -87,10 +87,10 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) break; } - if (sinfo->count.to) + if (sinfo->count.to >= sinfo->count.from) return what <= sinfo->count.to && what >= sinfo->count.from; - else - return what >= sinfo->count.from; + else /* inverted */ + return what < sinfo->count.to || what > sinfo->count.from; } static int connbytes_mt_check(const struct xt_mtchk_param *par) -- cgit v1.2.3