From a13861a28b90541aa207532d237e7a940f1b1c7b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 21 Dec 2011 20:00:32 +0000
Subject: bridge: provide a mtu() method for fake_dst_ops

Commit 618f9bc74a039da76 (net: Move mtu handling down to the protocol
depended handlers) forgot the bridge netfilter case, adding a NULL
dereference in ip_fragment().

Reported-by: Chris Boot <bootc@bootc.net>
CC: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 6 ++++++
 1 file changed, 6 insertions(+)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index d6ec3720c77..08757dc670a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -114,12 +114,18 @@ static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const vo
 	return NULL;
 }
 
+static unsigned int fake_mtu(const struct dst_entry *dst)
+{
+	return dst->dev->mtu;
+}
+
 static struct dst_ops fake_dst_ops = {
 	.family =		AF_INET,
 	.protocol =		cpu_to_be16(ETH_P_IP),
 	.update_pmtu =		fake_update_pmtu,
 	.cow_metrics =		fake_cow_metrics,
 	.neigh_lookup =		fake_neigh_lookup,
+	.mtu =			fake_mtu,
 };
 
 /*
-- 
cgit v1.2.3


From 7838f2ce36b6ab5c13ef20b1857e3bbd567f1759 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@redhat.com>
Date: Thu, 22 Dec 2011 02:05:07 +0000
Subject: mqprio: Avoid panic if no options are provided

Userspace may not provide TCA_OPTIONS, in fact tc currently does
so not do so if no arguments are specified on the command line.
Return EINVAL instead of panicing.

Signed-off-by: Thomas Graf <tgraf@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_mqprio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index f88256cbacb..28de4309233 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -107,7 +107,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!netif_is_multiqueue(dev))
 		return -EOPNOTSUPP;
 
-	if (nla_len(opt) < sizeof(*qopt))
+	if (!opt || nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
 
 	qopt = nla_data(opt);
-- 
cgit v1.2.3


From e688a604807647c9450f9c12a7cb6d027150a895 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 22 Dec 2011 04:15:53 +0000
Subject: net: introduce DST_NOPEER dst flag

Chris Boot reported crashes occurring in ipv6_select_ident().

[  461.457562] RIP: 0010:[<ffffffff812dde61>]  [<ffffffff812dde61>]
ipv6_select_ident+0x31/0xa7

[  461.578229] Call Trace:
[  461.580742] <IRQ>
[  461.582870]  [<ffffffff812efa7f>] ? udp6_ufo_fragment+0x124/0x1a2
[  461.589054]  [<ffffffff812dbfe0>] ? ipv6_gso_segment+0xc0/0x155
[  461.595140]  [<ffffffff812700c6>] ? skb_gso_segment+0x208/0x28b
[  461.601198]  [<ffffffffa03f236b>] ? ipv6_confirm+0x146/0x15e
[nf_conntrack_ipv6]
[  461.608786]  [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77
[  461.614227]  [<ffffffff81271d64>] ? dev_hard_start_xmit+0x357/0x543
[  461.620659]  [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111
[  461.626440]  [<ffffffffa0379745>] ? br_parse_ip_options+0x19a/0x19a
[bridge]
[  461.633581]  [<ffffffff812722ff>] ? dev_queue_xmit+0x3af/0x459
[  461.639577]  [<ffffffffa03747d2>] ? br_dev_queue_push_xmit+0x72/0x76
[bridge]
[  461.646887]  [<ffffffffa03791e3>] ? br_nf_post_routing+0x17d/0x18f
[bridge]
[  461.653997]  [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77
[  461.659473]  [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge]
[  461.665485]  [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111
[  461.671234]  [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge]
[  461.677299]  [<ffffffffa0379215>] ?
nf_bridge_update_protocol+0x20/0x20 [bridge]
[  461.684891]  [<ffffffffa03bb0e5>] ? nf_ct_zone+0xa/0x17 [nf_conntrack]
[  461.691520]  [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge]
[  461.697572]  [<ffffffffa0374812>] ? NF_HOOK.constprop.8+0x3c/0x56
[bridge]
[  461.704616]  [<ffffffffa0379031>] ?
nf_bridge_push_encap_header+0x1c/0x26 [bridge]
[  461.712329]  [<ffffffffa037929f>] ? br_nf_forward_finish+0x8a/0x95
[bridge]
[  461.719490]  [<ffffffffa037900a>] ?
nf_bridge_pull_encap_header+0x1c/0x27 [bridge]
[  461.727223]  [<ffffffffa0379974>] ? br_nf_forward_ip+0x1c0/0x1d4 [bridge]
[  461.734292]  [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77
[  461.739758]  [<ffffffffa03748cc>] ? __br_deliver+0xa0/0xa0 [bridge]
[  461.746203]  [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111
[  461.751950]  [<ffffffffa03748cc>] ? __br_deliver+0xa0/0xa0 [bridge]
[  461.758378]  [<ffffffffa037533a>] ? NF_HOOK.constprop.4+0x56/0x56
[bridge]

This is caused by bridge netfilter special dst_entry (fake_rtable), a
special shared entry, where attaching an inetpeer makes no sense.

Problem is present since commit 87c48fa3b46 (ipv6: make fragment
identifications less predictable)

Introduce DST_NOPEER dst flag and make sure ipv6_select_ident() and
__ip_select_ident() fallback to the 'no peer attached' handling.

Reported-by: Chris Boot <bootc@bootc.net>
Tested-by: Chris Boot <bootc@bootc.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h         | 1 +
 net/bridge/br_netfilter.c | 2 +-
 net/ipv4/route.c          | 4 ++--
 net/ipv6/ip6_output.c     | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 6faec1a6021..75766b42660 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -53,6 +53,7 @@ struct dst_entry {
 #define DST_NOHASH		0x0008
 #define DST_NOCACHE		0x0010
 #define DST_NOCOUNT		0x0020
+#define DST_NOPEER		0x0040
 
 	short			error;
 	short			obsolete;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 08757dc670a..fa8b8f76358 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -147,7 +147,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 	rt->dst.dev = br->dev;
 	rt->dst.path = &rt->dst;
 	dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
-	rt->dst.flags	= DST_NOXFRM;
+	rt->dst.flags	= DST_NOXFRM | DST_NOPEER;
 	rt->dst.ops = &fake_dst_ops;
 }
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 85cc053d9d6..94cdbc55ca7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1367,7 +1367,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 {
 	struct rtable *rt = (struct rtable *) dst;
 
-	if (rt) {
+	if (rt && !(rt->dst.flags & DST_NOPEER)) {
 		if (rt->peer == NULL)
 			rt_bind_peer(rt, rt->rt_dst, 1);
 
@@ -1378,7 +1378,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 			iph->id = htons(inet_getid(rt->peer, more));
 			return;
 		}
-	} else
+	} else if (!rt)
 		printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
 		       __builtin_return_address(0));
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 84d0bd5cac9..ec562713db9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -603,7 +603,7 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
 	static atomic_t ipv6_fragmentation_id;
 	int old, new;
 
-	if (rt) {
+	if (rt && !(rt->dst.flags & DST_NOPEER)) {
 		struct inet_peer *peer;
 
 		if (!rt->rt6i_peer)
-- 
cgit v1.2.3


From a0a129f8b6cff54ab479324a54aefdab5db4f240 Mon Sep 17 00:00:00 2001
From: Xi Wang <xi.wang@gmail.com>
Date: Thu, 22 Dec 2011 13:35:22 +0000
Subject: rps: fix insufficient bounds checking in
 store_rps_dev_flow_table_cnt()

Setting a large rps_flow_cnt like (1 << 30) on 32-bit platform will
cause a kernel oops due to insufficient bounds checking.

	if (count > 1<<30) {
		/* Enforce a limit to prevent overflow */
		return -EINVAL;
	}
	count = roundup_pow_of_two(count);
	table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));

Note that the macro RPS_DEV_FLOW_TABLE_SIZE(count) is defined as:

	... + (count * sizeof(struct rps_dev_flow))

where sizeof(struct rps_dev_flow) is 8.  (1 << 30) * 8 will overflow
32 bits.

This patch replaces the magic number (1 << 30) with a symbolic bound.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c71c434a4c0..385aefe5364 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -665,11 +665,14 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 	if (count) {
 		int i;
 
-		if (count > 1<<30) {
+		if (count > INT_MAX)
+			return -EINVAL;
+		count = roundup_pow_of_two(count);
+		if (count > (ULONG_MAX - sizeof(struct rps_dev_flow_table))
+				/ sizeof(struct rps_dev_flow)) {
 			/* Enforce a limit to prevent overflow */
 			return -EINVAL;
 		}
-		count = roundup_pow_of_two(count);
 		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
 		if (!table)
 			return -ENOMEM;
-- 
cgit v1.2.3


From 0fd7bac6b6157eed6cf0cb86a1e88ba29e57c033 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 21 Dec 2011 07:11:44 +0000
Subject: net: relax rcvbuf limits

skb->truesize might be big even for a small packet.

Its even bigger after commit 87fb4b7b533 (net: more accurate skb
truesize) and big MTU.

We should allow queueing at least one packet per receiver, even with a
low RCVBUF setting.

Reported-by: Michal Simek <monstr@monstr.eu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h     | 4 +++-
 net/core/sock.c        | 6 +-----
 net/packet/af_packet.c | 6 ++----
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index abb6e0f0c3c..32e39371fba 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -637,12 +637,14 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 
 /*
  * Take into account size of receive queue and backlog queue
+ * Do not take into account this skb truesize,
+ * to allow even a single big packet to come.
  */
 static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
 {
 	unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
 
-	return qsize + skb->truesize > sk->sk_rcvbuf;
+	return qsize > sk->sk_rcvbuf;
 }
 
 /* The per-socket spinlock must be held here. */
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ed7b1d12f5..b23f174ab84 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -288,11 +288,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	unsigned long flags;
 	struct sk_buff_head *list = &sk->sk_receive_queue;
 
-	/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
-	   number of warnings when compiling with -W --ANK
-	 */
-	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-	    (unsigned)sk->sk_rcvbuf) {
+	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
 		atomic_inc(&sk->sk_drops);
 		trace_sock_rcvqueue_full(sk, skb);
 		return -ENOMEM;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 82a6f34d39d..3891702b81d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1630,8 +1630,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (snaplen > res)
 		snaplen = res;
 
-	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-	    (unsigned)sk->sk_rcvbuf)
+	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
 		goto drop_n_acct;
 
 	if (skb_shared(skb)) {
@@ -1762,8 +1761,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (po->tp_version <= TPACKET_V2) {
 		if (macoff + snaplen > po->rx_ring.frame_size) {
 			if (po->copy_thresh &&
-				atomic_read(&sk->sk_rmem_alloc) + skb->truesize
-				< (unsigned)sk->sk_rcvbuf) {
+			    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
 				if (skb_shared(skb)) {
 					copy_skb = skb_clone(skb, GFP_ATOMIC);
 				} else {
-- 
cgit v1.2.3


From 0354b48f633ae435acbc01b470a1ce8cfeff3e9f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 16 Dec 2011 18:35:15 +0100
Subject: netfilter: xt_connbytes: handle negation correctly

"! --connbytes 23:42" should match if the packet/byte count is not in range.

As there is no explict "invert match" toggle in the match structure,
userspace swaps the from and to arguments
(i.e., as if "--connbytes 42:23" were given).

However, "what <= 23 && what >= 42" will always be false.

Change things so we use "||" in case "from" is larger than "to".

This change may look like it breaks backwards compatibility when "to" is 0.
However, older iptables binaries will refuse "connbytes 42:0",
and current releases treat it to mean "! --connbytes 0:42",
so we should be fine.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_connbytes.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5b138506690..9ddf1c3bfb3 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -87,10 +87,10 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		break;
 	}
 
-	if (sinfo->count.to)
+	if (sinfo->count.to >= sinfo->count.from)
 		return what <= sinfo->count.to && what >= sinfo->count.from;
-	else
-		return what >= sinfo->count.from;
+	else /* inverted */
+		return what < sinfo->count.to || what > sinfo->count.from;
 }
 
 static int connbytes_mt_check(const struct xt_mtchk_param *par)
-- 
cgit v1.2.3