aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 08:00:01 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 08:00:01 -0700
commit199f4c9f76fd8b030405abddf294e771f888de03 (patch)
treeee4f104a7562e1fd76882bc40f2de7d90812e1df /net
parent37224470c8c6d90a4062e76a08d4dc1fcf91fc89 (diff)
parentca6bb5d7ab22ac79f608fe6cbc6b12de6a5a19f0 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: [NET]: Require CAP_NET_ADMIN to create tuntap devices. [NET]: fix net-core kernel-doc [TCP]: Move inclusion of <linux/dmaengine.h> to correct place in <linux/tcp.h> [IPSEC]: Handle GSO packets [NET]: Added GSO toggle [NET]: Add software TSOv4 [NET]: Add generic segmentation offload [NET]: Merge TSO/UFO fields in sk_buff [NET]: Prevent transmission after dev_deactivate [IPV6] ADDRCONF: Fix default source address selection without CONFIG_IPV6_PRIVACY [IPV6]: Fix source address selection. [NET]: Avoid allocating skb in skb_pad
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_forward.c4
-rw-r--r--net/bridge/br_if.c17
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/core/dev.c137
-rw-r--r--net/core/ethtool.c29
-rw-r--r--net/core/skbuff.c178
-rw-r--r--net/ipv4/af_inet.c51
-rw-r--r--net/ipv4/ip_output.c16
-rw-r--r--net/ipv4/tcp.c66
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_output.c47
-rw-r--r--net/ipv4/xfrm4_output.c54
-rw-r--r--net/ipv6/addrconf.c9
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/xfrm6_output.c39
-rw-r--r--net/sched/sch_generic.c29
16 files changed, 602 insertions, 85 deletions
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 0dca027ceb8..8be9f2123e5 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -34,8 +34,8 @@ static inline unsigned packet_length(const struct sk_buff *skb)
int br_dev_queue_push_xmit(struct sk_buff *skb)
{
- /* drop mtu oversized packets except tso */
- if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->tso_size)
+ /* drop mtu oversized packets except gso */
+ if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->gso_size)
kfree_skb(skb);
else {
#ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index fdec773f5b5..07956ecf545 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -376,15 +376,20 @@ void br_features_recompute(struct net_bridge *br)
features = br->feature_mask & ~NETIF_F_ALL_CSUM;
list_for_each_entry(p, &br->port_list, list) {
- if (checksum & NETIF_F_NO_CSUM &&
- !(p->dev->features & NETIF_F_NO_CSUM))
+ unsigned long feature = p->dev->features;
+
+ if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM))
checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
- if (checksum & NETIF_F_HW_CSUM &&
- !(p->dev->features & NETIF_F_HW_CSUM))
+ if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM))
checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM;
- if (!(p->dev->features & NETIF_F_IP_CSUM))
+ if (!(feature & NETIF_F_IP_CSUM))
checksum = 0;
- features &= p->dev->features;
+
+ if (feature & NETIF_F_GSO)
+ feature |= NETIF_F_TSO;
+ feature |= NETIF_F_GSO;
+
+ features &= feature;
}
br->dev->features = features | checksum | NETIF_F_LLTX;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3e41f9d6d51..8298a5179ae 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -761,7 +761,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
if (skb->protocol == htons(ETH_P_IP) &&
skb->len > skb->dev->mtu &&
- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+ !skb_shinfo(skb)->gso_size)
return ip_fragment(skb, br_dev_queue_push_xmit);
else
return br_dev_queue_push_xmit(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 195a5e96b2d..ea2469398bd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -116,6 +116,7 @@
#include <asm/current.h>
#include <linux/audit.h>
#include <linux/dmaengine.h>
+#include <linux/err.h>
/*
* The list of packet types we will receive (as opposed to discard)
@@ -1048,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb)
* taps currently in use.
*/
-void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
@@ -1186,6 +1187,40 @@ out:
return ret;
}
+/**
+ * skb_gso_segment - Perform segmentation on skb.
+ * @skb: buffer to segment
+ * @sg: whether scatter-gather is supported on the target.
+ *
+ * This function segments the given skb and returns a list of segments.
+ */
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_type *ptype;
+ int type = skb->protocol;
+
+ BUG_ON(skb_shinfo(skb)->frag_list);
+ BUG_ON(skb->ip_summed != CHECKSUM_HW);
+
+ skb->mac.raw = skb->data;
+ skb->mac_len = skb->nh.raw - skb->data;
+ __skb_pull(skb, skb->mac_len);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
+ if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+ segs = ptype->gso_segment(skb, sg);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return segs;
+}
+
+EXPORT_SYMBOL(skb_gso_segment);
+
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
void netdev_rx_csum_fault(struct net_device *dev)
@@ -1222,6 +1257,86 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
#define illegal_highdma(dev, skb) (0)
#endif
+struct dev_gso_cb {
+ void (*destructor)(struct sk_buff *skb);
+};
+
+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
+
+static void dev_gso_skb_destructor(struct sk_buff *skb)
+{
+ struct dev_gso_cb *cb;
+
+ do {
+ struct sk_buff *nskb = skb->next;
+
+ skb->next = nskb->next;
+ nskb->next = NULL;
+ kfree_skb(nskb);
+ } while (skb->next);
+
+ cb = DEV_GSO_CB(skb);
+ if (cb->destructor)
+ cb->destructor(skb);
+}
+
+/**
+ * dev_gso_segment - Perform emulated hardware segmentation on skb.
+ * @skb: buffer to segment
+ *
+ * This function segments the given skb and stores the list of segments
+ * in skb->next.
+ */
+static int dev_gso_segment(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct sk_buff *segs;
+
+ segs = skb_gso_segment(skb, dev->features & NETIF_F_SG &&
+ !illegal_highdma(dev, skb));
+ if (unlikely(IS_ERR(segs)))
+ return PTR_ERR(segs);
+
+ skb->next = segs;
+ DEV_GSO_CB(skb)->destructor = skb->destructor;
+ skb->destructor = dev_gso_skb_destructor;
+
+ return 0;
+}
+
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ if (likely(!skb->next)) {
+ if (netdev_nit)
+ dev_queue_xmit_nit(skb, dev);
+
+ if (!netif_needs_gso(dev, skb))
+ return dev->hard_start_xmit(skb, dev);
+
+ if (unlikely(dev_gso_segment(skb)))
+ goto out_kfree_skb;
+ }
+
+ do {
+ struct sk_buff *nskb = skb->next;
+ int rc;
+
+ skb->next = nskb->next;
+ nskb->next = NULL;
+ rc = dev->hard_start_xmit(nskb, dev);
+ if (unlikely(rc)) {
+ skb->next = nskb;
+ return rc;
+ }
+ } while (skb->next);
+
+ skb->destructor = DEV_GSO_CB(skb)->destructor;
+
+out_kfree_skb:
+ kfree_skb(skb);
+ return 0;
+}
+
#define HARD_TX_LOCK(dev, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
netif_tx_lock(dev); \
@@ -1266,6 +1381,10 @@ int dev_queue_xmit(struct sk_buff *skb)
struct Qdisc *q;
int rc = -ENOMEM;
+ /* GSO will handle the following emulations directly. */
+ if (netif_needs_gso(dev, skb))
+ goto gso;
+
if (skb_shinfo(skb)->frag_list &&
!(dev->features & NETIF_F_FRAGLIST) &&
__skb_linearize(skb))
@@ -1290,12 +1409,13 @@ int dev_queue_xmit(struct sk_buff *skb)
if (skb_checksum_help(skb, 0))
goto out_kfree_skb;
+gso:
spin_lock_prefetch(&dev->queue_lock);
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
- local_bh_disable();
+ rcu_read_lock_bh();
/* Updates of qdisc are serialized by queue_lock.
* The struct Qdisc which is pointed to by qdisc is now a
@@ -1346,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb)
HARD_TX_LOCK(dev, cpu);
if (!netif_queue_stopped(dev)) {
- if (netdev_nit)
- dev_queue_xmit_nit(skb, dev);
-
rc = 0;
- if (!dev->hard_start_xmit(skb, dev)) {
+ if (!dev_hard_start_xmit(skb, dev)) {
HARD_TX_UNLOCK(dev);
goto out;
}
@@ -1369,13 +1486,13 @@ int dev_queue_xmit(struct sk_buff *skb)
}
rc = -ENETDOWN;
- local_bh_enable();
+ rcu_read_unlock_bh();
out_kfree_skb:
kfree_skb(skb);
return rc;
out:
- local_bh_enable();
+ rcu_read_unlock_bh();
return rc;
}
@@ -3301,8 +3418,8 @@ static void net_dma_rebalance(void)
/**
* netdev_dma_event - event callback for the net_dma_client
* @client: should always be net_dma_client
- * @chan:
- * @event:
+ * @chan: DMA channel for the event
+ * @event: event type
*/
static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
enum dma_event event)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 33ce7ed6afc..27ce1683caf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -614,6 +614,29 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_ufo(dev, edata.data);
}
+static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GGSO };
+
+ edata.data = dev->features & NETIF_F_GSO;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+ if (edata.data)
+ dev->features |= NETIF_F_GSO;
+ else
+ dev->features &= ~NETIF_F_GSO;
+ return 0;
+}
+
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
@@ -905,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr)
case ETHTOOL_SUFO:
rc = ethtool_set_ufo(dev, useraddr);
break;
+ case ETHTOOL_GGSO:
+ rc = ethtool_get_gso(dev, useraddr);
+ break;
+ case ETHTOOL_SGSO:
+ rc = ethtool_set_gso(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bb7210f4005..8e5044ba3ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -172,9 +172,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo = skb_shinfo(skb);
atomic_set(&shinfo->dataref, 1);
shinfo->nr_frags = 0;
- shinfo->tso_size = 0;
- shinfo->tso_segs = 0;
- shinfo->ufo_size = 0;
+ shinfo->gso_size = 0;
+ shinfo->gso_segs = 0;
+ shinfo->gso_type = 0;
shinfo->ip6_frag_id = 0;
shinfo->frag_list = NULL;
@@ -238,8 +238,9 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->tso_size = 0;
- skb_shinfo(skb)->tso_segs = 0;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_segs = 0;
+ skb_shinfo(skb)->gso_type = 0;
skb_shinfo(skb)->frag_list = NULL;
out:
return skb;
@@ -528,8 +529,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
skb_copy_secmark(new, old);
atomic_set(&new->users, 1);
- skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
- skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
+ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
+ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
+ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
}
/**
@@ -781,24 +783,40 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
* filled. Used by network drivers which may DMA or transfer data
* beyond the buffer end onto the wire.
*
- * May return NULL in out of memory cases.
+ * May return error in out of memory cases. The skb is freed on error.
*/
-struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
+int skb_pad(struct sk_buff *skb, int pad)
{
- struct sk_buff *nskb;
+ int err;
+ int ntail;
/* If the skbuff is non linear tailroom is always zero.. */
- if (skb_tailroom(skb) >= pad) {
+ if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
memset(skb->data+skb->len, 0, pad);
- return skb;
+ return 0;
}
-
- nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
+
+ ntail = skb->data_len + pad - (skb->end - skb->tail);
+ if (likely(skb_cloned(skb) || ntail > 0)) {
+ err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
+ if (unlikely(err))
+ goto free_skb;
+ }
+
+ /* FIXME: The use of this function with non-linear skb's really needs
+ * to be audited.
+ */
+ err = skb_linearize(skb);
+ if (unlikely(err))
+ goto free_skb;
+
+ memset(skb->data + skb->len, 0, pad);
+ return 0;
+
+free_skb:
kfree_skb(skb);
- if (nskb)
- memset(nskb->data+nskb->len, 0, pad);
- return nskb;
+ return err;
}
/* Trims skb to length len. It can change skb pointers.
@@ -1824,6 +1842,132 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+/**
+ * skb_segment - Perform protocol segmentation on skb.
+ * @skb: buffer to segment
+ * @sg: whether scatter-gather can be used for generated segments
+ *
+ * This function performs segmentation on the given skb. It returns
+ * the segment at the given position. It returns NULL if there are
+ * no more segments to generate, or when an error is encountered.
+ */
+struct sk_buff *skb_segment(struct sk_buff *skb, int sg)
+{
+ struct sk_buff *segs = NULL;
+ struct sk_buff *tail = NULL;
+ unsigned int mss = skb_shinfo(skb)->gso_size;
+ unsigned int doffset = skb->data - skb->mac.raw;
+ unsigned int offset = doffset;
+ unsigned int headroom;
+ unsigned int len;
+ int nfrags = skb_shinfo(skb)->nr_frags;
+ int err = -ENOMEM;
+ int i = 0;
+ int pos;
+
+ __skb_push(skb, doffset);
+ headroom = skb_headroom(skb);
+ pos = skb_headlen(skb);
+
+ do {
+ struct sk_buff *nskb;
+ skb_frag_t *frag;
+ int hsize, nsize;
+ int k;
+ int size;
+
+ len = skb->len - offset;
+ if (len > mss)
+ len = mss;
+
+ hsize = skb_headlen(skb) - offset;
+ if (hsize < 0)
+ hsize = 0;
+ nsize = hsize + doffset;
+ if (nsize > len + doffset || !sg)
+ nsize = len + doffset;
+
+ nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
+ if (segs)
+ tail->next = nskb;
+ else
+ segs = nskb;
+ tail = nskb;
+
+ nskb->dev = skb->dev;
+ nskb->priority = skb->priority;
+ nskb->protocol = skb->protocol;
+ nskb->dst = dst_clone(skb->dst);
+ memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+ nskb->pkt_type = skb->pkt_type;
+ nskb->mac_len = skb->mac_len;
+
+ skb_reserve(nskb, headroom);
+ nskb->mac.raw = nskb->data;
+ nskb->nh.raw = nskb->data + skb->mac_len;
+ nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
+ memcpy(skb_put(nskb, doffset), skb->data, doffset);
+
+ if (!sg) {
+ nskb->csum = skb_copy_and_csum_bits(skb, offset,
+ skb_put(nskb, len),
+ len, 0);
+ continue;
+ }
+
+ frag = skb_shinfo(nskb)->frags;
+ k = 0;
+
+ nskb->ip_summed = CHECKSUM_HW;
+ nskb->csum = skb->csum;
+ memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
+
+ while (pos < offset + len) {
+ BUG_ON(i >= nfrags);
+
+ *frag = skb_shinfo(skb)->frags[i];
+ get_page(frag->page);
+ size = frag->size;
+
+ if (pos < offset) {
+ frag->page_offset += offset - pos;
+ frag->size -= offset - pos;
+ }
+
+ k++;
+
+ if (pos + size <= offset + len) {
+ i++;
+ pos += size;
+ } else {
+ frag->size -= pos + size - (offset + len);
+ break;
+ }
+
+ frag++;
+ }
+
+ skb_shinfo(nskb)->nr_frags = k;
+ nskb->data_len = len - hsize;
+ nskb->len += nskb->data_len;
+ nskb->truesize += nskb->data_len;
+ } while ((offset += len) < skb->len);
+
+ return segs;
+
+err:
+ while ((skb = segs)) {
+ segs = skb->next;
+ kfree(skb);
+ }
+ return ERR_PTR(err);
+}
+
+EXPORT_SYMBOL_GPL(skb_segment);
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0a277453526..461216b4794 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -68,6 +68,7 @@
*/
#include <linux/config.h>
+#include <linux/err.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
@@ -1096,6 +1097,54 @@ int inet_sk_rebuild_header(struct sock *sk)
EXPORT_SYMBOL(inet_sk_rebuild_header);
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct iphdr *iph;
+ struct net_protocol *ops;
+ int proto;
+ int ihl;
+ int id;
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ goto out;
+
+ iph = skb->nh.iph;
+ ihl = iph->ihl * 4;
+ if (ihl < sizeof(*iph))
+ goto out;
+
+ if (!pskb_may_pull(skb, ihl))
+ goto out;
+
+ skb->h.raw = __skb_pull(skb, ihl);
+ iph = skb->nh.iph;
+ id = ntohs(iph->id);
+ proto = iph->protocol & (MAX_INET_PROTOS - 1);
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet_protos[proto]);
+ if (ops && ops->gso_segment)
+ segs = ops->gso_segment(skb, sg);
+ rcu_read_unlock();
+
+ if (IS_ERR(segs))
+ goto out;
+
+ skb = segs;
+ do {
+ iph = skb->nh.iph;
+ iph->id = htons(id++);
+ iph->tot_len = htons(skb->len - skb->mac_len);
+ iph->check = 0;
+ iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+ } while ((skb = skb->next));
+
+out:
+ return segs;
+}
+
#ifdef CONFIG_IP_MULTICAST
static struct net_protocol igmp_protocol = {
.handler = igmp_rcv,
@@ -1105,6 +1154,7 @@ static struct net_protocol igmp_protocol = {
static struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
+ .gso_segment = tcp_tso_segment,
.no_policy = 1,
};
@@ -1150,6 +1200,7 @@ static int ipv4_proc_init(void);
static struct packet_type ip_packet_type = {
.type = __constant_htons(ETH_P_IP),
.func = ip_rcv,
+ .gso_segment = inet_gso_segment,
};
static int __init inet_init(void)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8538aac3d14..7624fd1d8f9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -210,8 +210,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
return dst_output(skb);
}
#endif
- if (skb->len > dst_mtu(skb->dst) &&
- !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+ if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
return ip_fragment(skb, ip_finish_output2);
else
return ip_finish_output2(skb);
@@ -362,7 +361,7 @@ packet_routed:
}
ip_select_ident_more(iph, &rt->u.dst, sk,
- (skb_shinfo(skb)->tso_segs ?: 1) - 1);
+ (skb_shinfo(skb)->gso_segs ?: 1) - 1);
/* Add an IP checksum. */
ip_send_check(iph);
@@ -744,7 +743,8 @@ static inline int ip_ufo_append_data(struct sock *sk,
(length - transhdrlen));
if (!err) {
/* specify the length of each IP datagram fragment*/
- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+ skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
__skb_queue_tail(&sk->sk_write_queue, skb);
return 0;
@@ -1087,14 +1087,16 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
inet->cork.length += size;
if ((sk->sk_protocol == IPPROTO_UDP) &&
- (rt->u.dst.dev->features & NETIF_F_UFO))
- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+ (rt->u.dst.dev->features & NETIF_F_UFO)) {
+ skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
+ }
while (size > 0) {
int i;
- if (skb_shinfo(skb)->ufo_size)
+ if (skb_shinfo(skb)->gso_size)
len = size;
else {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 74998f25007..0e029c4e290 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -258,6 +258,7 @@
#include <linux/random.h>
#include <linux/bootmem.h>
#include <linux/cache.h>
+#include <linux/err.h>
#include <net/icmp.h>
#include <net/tcp.h>
@@ -571,7 +572,7 @@ new_segment:
skb->ip_summed = CHECKSUM_HW;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
- skb_shinfo(skb)->tso_segs = 0;
+ skb_shinfo(skb)->gso_segs = 0;
if (!copied)
TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
@@ -818,7 +819,7 @@ new_segment:
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
- skb_shinfo(skb)->tso_segs = 0;
+ skb_shinfo(skb)->gso_segs = 0;
from += copy;
copied += copy;
@@ -2144,6 +2145,67 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct tcphdr *th;
+ unsigned thlen;
+ unsigned int seq;
+ unsigned int delta;
+ unsigned int oldlen;
+ unsigned int len;
+
+ if (!pskb_may_pull(skb, sizeof(*th)))
+ goto out;
+
+ th = skb->h.th;
+ thlen = th->doff * 4;
+ if (thlen < sizeof(*th))
+ goto out;
+
+ if (!pskb_may_pull(skb, thlen))
+ goto out;
+
+ oldlen = ~htonl(skb->len);
+ __skb_pull(skb, thlen);
+
+ segs = skb_segment(skb, sg);
+ if (IS_ERR(segs))
+ goto out;
+
+ len = skb_shinfo(skb)->gso_size;
+ delta = csum_add(oldlen, htonl(thlen + len));
+
+ skb = segs;
+ th = skb->h.th;
+ seq = ntohl(th->seq);
+
+ do {
+ th->fin = th->psh = 0;
+
+ if (skb->ip_summed == CHECKSUM_NONE) {
+ th->check = csum_fold(csum_partial(
+ skb->h.raw, thlen, csum_add(skb->csum, delta)));
+ }
+
+ seq += len;
+ skb = skb->next;
+ th = skb->h.th;
+
+ th->seq = htonl(seq);
+ th->cwr = 0;
+ } while (skb->next);
+
+ if (skb->ip_summed == CHECKSUM_NONE) {
+ delta = csum_add(oldlen, htonl(skb->tail - skb->h.raw));
+ th->check = csum_fold(csum_partial(
+ skb->h.raw, thlen, csum_add(skb->csum, delta)));
+ }
+
+out:
+ return segs;
+}
+
extern void __skb_cb_too_small_for_tcp(int, int);
extern struct tcp_congestion_ops tcp_reno;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e08245bdda3..94fe5b1f9dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1073,7 +1073,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
else
pkt_len = (end_seq -
TCP_SKB_CB(skb)->seq);
- if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size))
+ if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size))
break;
pcount = tcp_skb_pcount(skb);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 07bb5a2b375..bdd71db8bf9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -515,15 +515,17 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
/* Avoid the costly divide in the normal
* non-TSO case.
*/
- skb_shinfo(skb)->tso_segs = 1;
- skb_shinfo(skb)->tso_size = 0;
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
} else {
unsigned int factor;
factor = skb->len + (mss_now - 1);
factor /= mss_now;
- skb_shinfo(skb)->tso_segs = factor;
- skb_shinfo(skb)->tso_size = mss_now;
+ skb_shinfo(skb)->gso_segs = factor;
+ skb_shinfo(skb)->gso_size = mss_now;
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
}
}
@@ -914,7 +916,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int
if (!tso_segs ||
(tso_segs > 1 &&
- skb_shinfo(skb)->tso_size != mss_now)) {
+ tcp_skb_mss(skb) != mss_now)) {
tcp_set_skb_tso_segs(sk, skb, mss_now);
tso_segs = tcp_skb_pcount(skb);
}
@@ -1724,8 +1726,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
if (!pskb_trim(skb, 0)) {
TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
- skb_shinfo(skb)->tso_segs = 1;
- skb_shinfo(skb)->tso_size = 0;
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
skb->ip_summed = CHECKSUM_NONE;
skb->csum = 0;
}
@@ -1930,8 +1933,9 @@ void tcp_send_fin(struct sock *sk)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
TCP_SKB_CB(skb)->sacked = 0;
- skb_shinfo(skb)->tso_segs = 1;
- skb_shinfo(skb)->tso_size = 0;
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
TCP_SKB_CB(skb)->seq = tp->write_seq;
@@ -1963,8 +1967,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
TCP_SKB_CB(skb)->sacked = 0;
- skb_shinfo(skb)->tso_segs = 1;
- skb_shinfo(skb)->tso_size = 0;
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
/* Send it off. */
TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
@@ -2047,8 +2052,9 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
TCP_SKB_CB(skb)->sacked = 0;
- skb_shinfo(skb)->tso_segs = 1;
- skb_shinfo(skb)->tso_size = 0;
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2152,8 +2158,9 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
TCP_ECN_send_syn(sk, tp, buff);
TCP_SKB_CB(buff)->sacked = 0;
- skb_shinfo(buff)->tso_segs = 1;
- skb_shinfo(buff)->tso_size = 0;
+ skb_shinfo(buff)->gso_segs = 1;
+ skb_shinfo(buff)->gso_size = 0;
+ skb_shinfo(buff)->gso_type = 0;
buff->csum = 0;
TCP_SKB_CB(buff)->seq = tp->write_seq++;
TCP_SKB_CB(buff)->end_seq = tp->write_seq;
@@ -2257,8 +2264,9 @@ void tcp_send_ack(struct sock *sk)
buff->csum = 0;
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(buff)->sacked = 0;
- skb_shinfo(buff)->tso_segs = 1;
- skb_shinfo(buff)->tso_size = 0;
+ skb_shinfo(buff)->gso_segs = 1;
+ skb_shinfo(buff)->gso_size = 0;
+ skb_shinfo(buff)->gso_type = 0;
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
@@ -2293,8 +2301,9 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(skb)->sacked = urgent;
- skb_shinfo(skb)->tso_segs = 1;
- skb_shinfo(skb)->tso_size = 0;
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
/* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index ac9d91d4bb0..193363e2293 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -9,6 +9,8 @@
*/
#include <linux/compiler.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/netfilter_ipv4.h>
@@ -97,16 +99,10 @@ error_nolock:
goto out_exit;
}
-static int xfrm4_output_finish(struct sk_buff *skb)
+static int xfrm4_output_finish2(struct sk_buff *skb)
{
int err;
-#ifdef CONFIG_NETFILTER
- if (!skb->dst->xfrm) {
- IPCB(skb)->flags |= IPSKB_REROUTED;
- return dst_output(skb);
- }
-#endif
while (likely((err = xfrm4_output_one(skb)) == 0)) {
nf_reset(skb);
@@ -119,7 +115,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
return dst_output(skb);
err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
- skb->dst->dev, xfrm4_output_finish);
+ skb->dst->dev, xfrm4_output_finish2);
if (unlikely(err != 1))
break;
}
@@ -127,6 +123,48 @@ static int xfrm4_output_finish(struct sk_buff *skb)
return err;
}
+static int xfrm4_output_finish(struct sk_buff *skb)
+{
+ struct sk_buff *segs;
+
+#ifdef CONFIG_NETFILTER
+ if (!skb->dst->xfrm) {
+ IPCB(skb)->flags |= IPSKB_REROUTED;
+ return dst_output(skb);
+ }
+#endif
+
+ if (!skb_shinfo(skb)->gso_size)
+ return xfrm4_output_finish2(skb);
+
+ skb->protocol = htons(ETH_P_IP);
+ segs = skb_gso_segment(skb, 0);
+ kfree_skb(skb);
+ if (unlikely(IS_ERR(segs)))
+ return PTR_ERR(segs);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+ err = xfrm4_output_finish2(segs);
+
+ if (unlikely(err)) {
+ while ((segs = nskb)) {
+ nskb = segs->next;
+ segs->next = NULL;
+ kfree_skb(segs);
+ }
+ return err;
+ }
+
+ segs = nskb;
+ } while (segs);
+
+ return 0;
+}
+
int xfrm4_output(struct sk_buff *skb)
{
return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c2c26fa0943..4da664538f5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -862,6 +862,8 @@ static int inline ipv6_saddr_label(const struct in6_addr *addr, int type)
* 2002::/16 2
* ::/96 3
* ::ffff:0:0/96 4
+ * fc00::/7 5
+ * 2001::/32 6
*/
if (type & IPV6_ADDR_LOOPBACK)
return 0;
@@ -869,8 +871,12 @@ static int inline ipv6_saddr_label(const struct in6_addr *addr, int type)
return 3;
else if (type & IPV6_ADDR_MAPPED)
return 4;
+ else if (addr->s6_addr32[0] == htonl(0x20010000))
+ return 6;
else if (addr->s6_addr16[0] == htons(0x2002))
return 2;
+ else if ((addr->s6_addr[0] & 0xfe) == 0xfc)
+ return 5;
return 1;
}
@@ -1069,6 +1075,9 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)
continue;
}
+#else
+ if (hiscore.rule < 7)
+ hiscore.rule++;
#endif
/* Rule 8: Use longest matching prefix */
if (hiscore.rule < 8) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d29620f4910..abb94de3376 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -148,7 +148,7 @@ static int ip6_output2(struct sk_buff *skb)
int ip6_output(struct sk_buff *skb)
{
- if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
+ if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ||
dst_allfrag(skb->dst))
return ip6_fragment(skb, ip6_output2);
else
@@ -833,8 +833,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
struct frag_hdr fhdr;
/* specify the length of each IP datagram fragment*/
- skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) -
- sizeof(struct frag_hdr);
+ skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
+ sizeof(struct frag_hdr);
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
ipv6_select_ident(skb, &fhdr);
skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 16e84254a25..48fccb1eca0 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -94,7 +94,7 @@ error_nolock:
goto out_exit;
}
-static int xfrm6_output_finish(struct sk_buff *skb)
+static int xfrm6_output_finish2(struct sk_buff *skb)
{
int err;
@@ -110,7 +110,7 @@ static int xfrm6_output_finish(struct sk_buff *skb)
return dst_output(skb);
err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
- skb->dst->dev, xfrm6_output_finish);
+ skb->dst->dev, xfrm6_output_finish2);
if (unlikely(err != 1))
break;
}
@@ -118,6 +118,41 @@ static int xfrm6_output_finish(struct sk_buff *skb)
return err;
}
+static int xfrm6_output_finish(struct sk_buff *skb)
+{
+ struct sk_buff *segs;
+
+ if (!skb_shinfo(skb)->gso_size)
+ return xfrm6_output_finish2(skb);
+
+ skb->protocol = htons(ETH_P_IP);
+ segs = skb_gso_segment(skb, 0);
+ kfree_skb(skb);
+ if (unlikely(IS_ERR(segs)))
+ return PTR_ERR(segs);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+ err = xfrm6_output_finish2(segs);
+
+ if (unlikely(err)) {
+ while ((segs = nskb)) {
+ nskb = segs->next;
+ segs->next = NULL;
+ kfree_skb(segs);
+ }
+ return err;
+ }
+
+ segs = nskb;
+ } while (segs);
+
+ return 0;
+}
+
int xfrm6_output(struct sk_buff *skb)
{
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d7aca8ef524..74d4a1dceee 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -96,8 +96,11 @@ static inline int qdisc_restart(struct net_device *dev)
struct sk_buff *skb;
/* Dequeue packet */
- if ((skb = q->dequeue(q)) != NULL) {
+ if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
unsigned nolock = (dev->features & NETIF_F_LLTX);
+
+ dev->gso_skb = NULL;
+
/*
* When the driver has LLTX set it does its own locking
* in start_xmit. No need to add additional overhead by
@@ -134,10 +137,8 @@ static inline int qdisc_restart(struct net_device *dev)
if (!netif_queue_stopped(dev)) {
int ret;
- if (netdev_nit)
- dev_queue_xmit_nit(skb, dev);
- ret = dev->hard_start_xmit(skb, dev);
+ ret = dev_hard_start_xmit(skb, dev);
if (ret == NETDEV_TX_OK) {
if (!nolock) {
netif_tx_unlock(dev);
@@ -171,7 +172,10 @@ static inline int qdisc_restart(struct net_device *dev)
*/
requeue:
- q->ops->requeue(skb, q);
+ if (skb->next)
+ dev->gso_skb = skb;
+ else
+ q->ops->requeue(skb, q);
netif_schedule(dev);
return 1;
}
@@ -181,9 +185,13 @@ requeue:
void __qdisc_run(struct net_device *dev)
{
+ if (unlikely(dev->qdisc == &noop_qdisc))
+ goto out;
+
while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev))
/* NOTHING */;
+out:
clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
}
@@ -583,10 +591,17 @@ void dev_deactivate(struct net_device *dev)
dev_watchdog_down(dev);
- while (test_bit(__LINK_STATE_SCHED, &dev->state))
+ /* Wait for outstanding dev_queue_xmit calls. */
+ synchronize_rcu();
+
+ /* Wait for outstanding qdisc_run calls. */
+ while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
yield();
- spin_unlock_wait(&dev->_xmit_lock);
+ if (dev->gso_skb) {
+ kfree_skb(dev->gso_skb);
+ dev->gso_skb = NULL;
+ }
}
void dev_init_scheduler(struct net_device *dev)