diff options
author | Eric Dumazet <edumazet@google.com> | 2013-10-15 11:54:30 -0700 |
---|---|---|
committer | Luis Henriques <luis.henriques@canonical.com> | 2013-10-25 10:10:05 +0100 |
commit | 58009511de92e75bceda30e2920c41b9a77fa5e3 (patch) | |
tree | 68fad82703adb072d60f421d310d01973af78ea6 | |
parent | 9e6484d8b1db216ee039ac53b7ff6ec89b49d294 (diff) |
tcp: must unclone packets before mangling them
commit c52e2421f7368fd36cbe330d2cf41b10452e39a9 upstream.
TCP stack should make sure it owns skbs before mangling them.
We had various crashes using bnx2x, and it turned out gso_size
was cleared right before bnx2x driver was populating TC descriptor
of the _previous_ packet send. TCP stack can sometime retransmit
packets that are still in Qdisc.
Of course we could make bnx2x driver more robust (using
ACCESS_ONCE(shinfo->gso_size) for example), but the bug is TCP stack.
We have identified two points where skb_unclone() was needed.
This patch adds a WARN_ON_ONCE() to warn us if we missed another
fix of this kind.
Kudos to Neal for finding the root cause of this bug. Its visible
using small MSS.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[ luis: backported to 3.5, using davem's port to 3.4:
- skb_unclone() function definition to include/linux/skbuff.h ]
Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
-rw-r--r-- | include/linux/skbuff.h | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 9 |
2 files changed, 16 insertions, 3 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e1c1e641778c..0ec478895617 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -774,6 +774,16 @@ static inline int skb_cloned(const struct sk_buff *skb) (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; } +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(pri & __GFP_WAIT); + + if (skb_cloned(skb)) + return pskb_expand_head(skb, 0, 0, pri); + + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4a1e84093091..d06b130e40f1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -934,6 +934,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + /* Make sure we own this skb before messing gso_size/gso_segs */ + WARN_ON_ONCE(skb_cloned(skb)); + if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1015,9 +1018,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -2150,6 +2151,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int oldpcount = tcp_skb_pcount(skb); if (unlikely(oldpcount > 1)) { + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; tcp_init_tso_segs(sk, skb, cur_mss); tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); } |