aboutsummaryrefslogtreecommitdiff
path: root/net/core/skbuff.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r--net/core/skbuff.c196
1 files changed, 113 insertions, 83 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 27002dffe7e..ca4db40e75b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
goto out;
prefetchw(skb);
- size = SKB_DATA_ALIGN(size);
- data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
- gfp_mask, node);
+ /* We do our best to align skb_shared_info on a separate cache
+ * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+ * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+ * Both skb->head and skb_shared_info are cache line aligned.
+ */
+ size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ data = kmalloc_node_track_caller(size, gfp_mask, node);
if (!data)
goto nodata;
+ /* kmalloc(size) might give us more room than requested.
+ * Put skb_shared_info exactly at the end of allocated zone,
+ * to allow max possible filling before reallocation.
+ */
+ size = SKB_WITH_OVERHEAD(ksize(data));
prefetchw(data + size);
/*
@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->truesize = size + sizeof(struct sk_buff);
+ /* Account for allocated memory : skb + skb->head */
+ skb->truesize = SKB_TRUESIZE(size);
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
@@ -326,7 +336,7 @@ static void skb_release_data(struct sk_buff *skb)
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
}
/*
@@ -475,6 +485,30 @@ void consume_skb(struct sk_buff *skb)
EXPORT_SYMBOL(consume_skb);
/**
+ * skb_recycle - clean up an skb for reuse
+ * @skb: buffer
+ *
+ * Recycles the skb to be reused as a receive buffer. This
+ * function does any necessary reference count dropping, and
+ * cleans up the skbuff as if it just came from __alloc_skb().
+ */
+void skb_recycle(struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo;
+
+ skb_release_head_state(skb);
+
+ shinfo = skb_shinfo(skb);
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+ atomic_set(&shinfo->dataref, 1);
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ skb->data = skb->head + NET_SKB_PAD;
+ skb_reset_tail_pointer(skb);
+}
+EXPORT_SYMBOL(skb_recycle);
+
+/**
* skb_recycle_check - check if skb can be reused for receive
* @skb: buffer
* @skb_size: minimum receive buffer size
@@ -488,33 +522,10 @@ EXPORT_SYMBOL(consume_skb);
*/
bool skb_recycle_check(struct sk_buff *skb, int skb_size)
{
- struct skb_shared_info *shinfo;
-
- if (irqs_disabled())
+ if (!skb_is_recycleable(skb, skb_size))
return false;
- if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)
- return false;
-
- if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
- return false;
-
- skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
- if (skb_end_pointer(skb) - skb->head < skb_size)
- return false;
-
- if (skb_shared(skb) || skb_cloned(skb))
- return false;
-
- skb_release_head_state(skb);
-
- shinfo = skb_shinfo(skb);
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
- atomic_set(&shinfo->dataref, 1);
-
- memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->data = skb->head + NET_SKB_PAD;
- skb_reset_tail_pointer(skb);
+ skb_recycle(skb);
return true;
}
@@ -529,6 +540,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mac_header = old->mac_header;
skb_dst_copy(new, old);
new->rxhash = old->rxhash;
+ new->ooo_okay = old->ooo_okay;
+ new->l4_rxhash = old->l4_rxhash;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
@@ -611,8 +624,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
}
EXPORT_SYMBOL_GPL(skb_morph);
-/* skb frags copy userspace buffers to kernel */
-static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
+/* skb_copy_ubufs - copy userspace skb frags buffers to kernel
+ * @skb: the skb to modify
+ * @gfp_mask: allocation priority
+ *
+ * This must be called on SKBTX_DEV_ZEROCOPY skb.
+ * It will copy all frags into kernel and drop the reference
+ * to userspace pages.
+ *
+ * If this function is called from an interrupt gfp_mask() must be
+ * %GFP_ATOMIC.
+ *
+ * Returns 0 on success or a negative error code on failure
+ * to allocate kernel memory to copy to.
+ */
+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
{
int i;
int num_frags = skb_shinfo(skb)->nr_frags;
@@ -634,7 +660,7 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
}
vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
memcpy(page_address(page),
- vaddr + f->page_offset, f->size);
+ vaddr + f->page_offset, skb_frag_size(f));
kunmap_skb_frag(vaddr);
page->private = (unsigned long)head;
head = page;
@@ -642,16 +668,18 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
/* skb frags release userspace buffers */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
uarg->callback(uarg);
/* skb frags point to kernel buffers */
for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
- skb_shinfo(skb)->frags[i - 1].page_offset = 0;
- skb_shinfo(skb)->frags[i - 1].page = head;
+ __skb_fill_page_desc(skb, i-1, head, 0,
+ skb_shinfo(skb)->frags[i - 1].size);
head = (struct page *)head->private;
}
+
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
return 0;
}
@@ -677,7 +705,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
if (skb_copy_ubufs(skb, gfp_mask))
return NULL;
- skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
}
n = skb + 1;
@@ -803,11 +830,10 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
n = NULL;
goto out;
}
- skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ skb_frag_ref(skb, i);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -896,10 +922,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
if (skb_copy_ubufs(skb, gfp_mask))
goto nofrags;
- skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1166,20 +1191,20 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
goto drop_pages;
for (; i < nfrags; i++) {
- int end = offset + skb_shinfo(skb)->frags[i].size;
+ int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if (end < len) {
offset = end;
continue;
}
- skb_shinfo(skb)->frags[i++].size = len - offset;
+ skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1282,9 +1307,11 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Estimate size of pulled pages. */
eat = delta;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- if (skb_shinfo(skb)->frags[i].size >= eat)
+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+ if (size >= eat)
goto pull_pages;
- eat -= skb_shinfo(skb)->frags[i].size;
+ eat -= size;
}
/* If we need update frag list, we are in troubles.
@@ -1347,14 +1374,16 @@ pull_pages:
eat = delta;
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
- eat -= skb_shinfo(skb)->frags[i].size;
+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+
+ if (size <= eat) {
+ skb_frag_unref(skb, i);
+ eat -= size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
if (eat) {
skb_shinfo(skb)->frags[k].page_offset += eat;
- skb_shinfo(skb)->frags[k].size -= eat;
+ skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
eat = 0;
}
k++;
@@ -1409,7 +1438,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
u8 *vaddr;
@@ -1607,7 +1636,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
- if (__splice_segment(f->page, f->page_offset, f->size,
+ if (__splice_segment(skb_frag_page(f),
+ f->page_offset, skb_frag_size(f),
offset, len, skb, spd, 0, sk, pipe))
return 1;
}
@@ -1717,7 +1747,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
WARN_ON(start > offset + len);
- end = start + frag->size;
+ end = start + skb_frag_size(frag);
if ((copy = end - offset) > 0) {
u8 *vaddr;
@@ -1790,7 +1820,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
__wsum csum2;
u8 *vaddr;
@@ -1865,7 +1895,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
__wsum csum2;
u8 *vaddr;
@@ -2138,7 +2168,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
skb->data_len = len - pos;
for (i = 0; i < nfrags; i++) {
- int size = skb_shinfo(skb)->frags[i].size;
+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
if (pos + size > len) {
skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -2152,10 +2182,10 @@ static inline void skb_split_no_header(struct sk_buff *skb,
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_ref(skb, i);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
- skb_shinfo(skb1)->frags[0].size -= len - pos;
- skb_shinfo(skb)->frags[i].size = len - pos;
+ skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
+ skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
skb_shinfo(skb)->nr_frags++;
}
k++;
@@ -2227,12 +2257,13 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
* commit all, so that we don't have to undo partial changes
*/
if (!to ||
- !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) {
+ !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+ fragfrom->page_offset)) {
merge = -1;
} else {
merge = to - 1;
- todo -= fragfrom->size;
+ todo -= skb_frag_size(fragfrom);
if (todo < 0) {
if (skb_prepare_for_shift(skb) ||
skb_prepare_for_shift(tgt))
@@ -2242,8 +2273,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragfrom = &skb_shinfo(skb)->frags[from];
fragto = &skb_shinfo(tgt)->frags[merge];
- fragto->size += shiftlen;
- fragfrom->size -= shiftlen;
+ skb_frag_size_add(fragto, shiftlen);
+ skb_frag_size_sub(fragfrom, shiftlen);
fragfrom->page_offset += shiftlen;
goto onlymerged;
@@ -2267,20 +2298,20 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragfrom = &skb_shinfo(skb)->frags[from];
fragto = &skb_shinfo(tgt)->frags[to];
- if (todo >= fragfrom->size) {
+ if (todo >= skb_frag_size(fragfrom)) {
*fragto = *fragfrom;
- todo -= fragfrom->size;
+ todo -= skb_frag_size(fragfrom);
from++;
to++;
} else {
- get_page(fragfrom->page);
+ __skb_frag_ref(fragfrom);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
- fragto->size = todo;
+ skb_frag_size_set(fragto, todo);
fragfrom->page_offset += todo;
- fragfrom->size -= todo;
+ skb_frag_size_sub(fragfrom, todo);
todo = 0;
to++;
@@ -2295,8 +2326,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragfrom = &skb_shinfo(skb)->frags[0];
fragto = &skb_shinfo(tgt)->frags[merge];
- fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ skb_frag_size_add(fragto, skb_frag_size(fragfrom));
+ __skb_frag_unref(fragfrom);
}
/* Reposition in the original skb */
@@ -2393,7 +2424,7 @@ next_skb:
while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
- block_limit = frag->size + st->stepped_offset;
+ block_limit = skb_frag_size(frag) + st->stepped_offset;
if (abs_offset < block_limit) {
if (!st->frag_data)
@@ -2411,7 +2442,7 @@ next_skb:
}
st->frag_idx++;
- st->stepped_offset += frag->size;
+ st->stepped_offset += skb_frag_size(frag);
}
if (st->frag_data) {
@@ -2541,14 +2572,13 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
left = PAGE_SIZE - frag->page_offset;
copy = (length > left)? left : length;
- ret = getfrag(from, (page_address(frag->page) +
- frag->page_offset + frag->size),
+ ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag),
offset, copy, 0, skb);
if (ret < 0)
return -EFAULT;
/* copy was successful so update the size parameters */
- frag->size += copy;
+ skb_frag_size_add(frag, copy);
skb->len += copy;
skb->data_len += copy;
offset += copy;
@@ -2694,12 +2724,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
- size = frag->size;
+ __skb_frag_ref(frag);
+ size = skb_frag_size(frag);
if (pos < offset) {
frag->page_offset += offset - pos;
- frag->size -= offset - pos;
+ skb_frag_size_sub(frag, offset - pos);
}
skb_shinfo(nskb)->nr_frags++;
@@ -2708,7 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
i++;
pos += size;
} else {
- frag->size -= pos + size - (offset + len);
+ skb_frag_size_sub(frag, pos + size - (offset + len));
goto skip_fraglist;
}
@@ -2788,7 +2818,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
} while (--i);
frag->page_offset += offset;
- frag->size -= offset;
+ skb_frag_size_sub(frag, offset);
skb->truesize -= skb->data_len;
skb->len -= skb->data_len;
@@ -2840,7 +2870,7 @@ merge:
unsigned int eat = offset - headlen;
skbinfo->frags[0].page_offset += eat;
- skbinfo->frags[0].size -= eat;
+ skb_frag_size_sub(&skbinfo->frags[0], eat);
skb->data_len -= eat;
skb->len -= eat;
offset = headlen;
@@ -2911,13 +2941,13 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
WARN_ON(start > offset + len);
- end = start + skb_shinfo(skb)->frags[i].size;
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
if ((copy = end - offset) > 0) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
if (copy > len)
copy = len;
- sg_set_page(&sg[elt], frag->page, copy,
+ sg_set_page(&sg[elt], skb_frag_page(frag), copy,
frag->page_offset+offset-start);
elt++;
if (!(len -= copy))