aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c36
-rw-r--r--net/core/drop_monitor.c1
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/fib_rules.c10
-rw-r--r--net/core/filter.c36
-rw-r--r--net/core/flow_dissector.c8
-rw-r--r--net/core/iovec.c7
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/netpoll.c28
-rw-r--r--net/core/pktgen.c7
-rw-r--r--net/core/rtnetlink.c87
-rw-r--r--net/core/secure_seq.c50
-rw-r--r--net/core/skbuff.c69
-rw-r--r--net/core/sock.c59
-rw-r--r--net/core/sock_diag.c4
15 files changed, 262 insertions, 164 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ddbb31b10d3..cca7ae0ba915 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2374,7 +2374,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine.
*/
-static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
@@ -2454,46 +2454,51 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
}
static netdev_features_t harmonize_features(struct sk_buff *skb,
- __be16 protocol, netdev_features_t features)
+ __be16 protocol,
+ const struct net_device *dev,
+ netdev_features_t features)
{
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
- } else if (illegal_highdma(skb->dev, skb)) {
+ } else if (illegal_highdma(dev, skb)) {
features &= ~NETIF_F_SG;
}
return features;
}
-netdev_features_t netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
+ const struct net_device *dev)
{
__be16 protocol = skb->protocol;
- netdev_features_t features = skb->dev->features;
+ netdev_features_t features = dev->features;
- if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+ if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, protocol, features);
+ return harmonize_features(skb, protocol, dev, features);
}
- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+ features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
- return harmonize_features(skb, protocol, features);
+ return harmonize_features(skb, protocol, dev, features);
} else {
features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
- return harmonize_features(skb, protocol, features);
+ return harmonize_features(skb, protocol, dev, features);
}
+
+ return harmonize_features(skb, protocol, dev, features);
}
-EXPORT_SYMBOL(netif_skb_features);
+EXPORT_SYMBOL(netif_skb_dev_features);
/*
* Returns true if either:
@@ -3893,6 +3898,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
napi->skb = skb;
}
@@ -4478,7 +4484,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
- if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+ if (ops->ndo_change_rx_flags)
ops->ndo_change_rx_flags(dev, flags);
}
@@ -4629,6 +4635,7 @@ void __dev_set_rx_mode(struct net_device *dev)
if (ops->ndo_set_rx_mode)
ops->ndo_set_rx_mode(dev);
}
+EXPORT_SYMBOL(__dev_set_rx_mode);
void dev_set_rx_mode(struct net_device *dev)
{
@@ -5820,6 +5827,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
/**
* unregister_netdevice_many - unregister many devices
* @head: list of devices
+ *
+ * Note: As most callers use a stack allocated list_head,
+ * we force a list_del() to make sure stack wont be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
@@ -5829,6 +5839,7 @@ void unregister_netdevice_many(struct list_head *head)
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
+ list_del(head);
}
}
EXPORT_SYMBOL(unregister_netdevice_many);
@@ -6245,7 +6256,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
}
unregister_netdevice_many(&dev_kill_list);
- list_del(&dev_kill_list);
rtnl_unlock();
}
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d23b6682f4e9..a974dfec4bf1 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = {
.hdrsize = 0,
.name = "NET_DM",
.version = 2,
- .maxattr = NET_DM_CMD_MAX,
};
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
diff --git a/net/core/dst.c b/net/core/dst.c
index df9cc810ec8e..c0e021871df8 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -267,6 +267,15 @@ again:
}
EXPORT_SYMBOL(dst_destroy);
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+}
+
void dst_release(struct dst_entry *dst)
{
if (dst) {
@@ -274,11 +283,8 @@ void dst_release(struct dst_entry *dst)
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
- dst = dst_destroy(dst);
- if (dst)
- __dst_free(dst);
- }
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d5a9f8ead0d8..55e08e2de3a1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -445,7 +445,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (frh->action && (frh->action != rule->action))
continue;
- if (frh->table && (frh_get_table(frh, tb) != rule->table))
+ if (frh_get_table(frh, tb) &&
+ (frh_get_table(frh, tb) != rule->table))
continue;
if (tb[FRA_PRIORITY] &&
@@ -719,6 +720,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
attach_rules(&ops->rules_list, dev);
break;
+ case NETDEV_CHANGENAME:
+ list_for_each_entry(ops, &net->rules_ops, list) {
+ detach_rules(&ops->rules_list, dev);
+ attach_rules(&ops->rules_list, dev);
+ }
+ break;
+
case NETDEV_UNREGISTER:
list_for_each_entry(ops, &net->rules_ops, list)
detach_rules(&ops->rules_list, dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index 6438f29ff266..c6c18d8a2d88 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,7 +36,6 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
-#include <linux/reciprocal_div.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
@@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A = reciprocal_divide(A, K);
+ A /= K;
continue;
case BPF_S_ALU_MOD_X:
if (X == 0)
@@ -356,6 +355,8 @@ load_b:
if (skb_is_nonlinear(skb))
return 0;
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
if (A > skb->len - sizeof(struct nlattr))
return 0;
@@ -372,11 +373,13 @@ load_b:
if (skb_is_nonlinear(skb))
return 0;
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
if (A > skb->len - sizeof(struct nlattr))
return 0;
nla = (struct nlattr *)&skb->data[A];
- if (nla->nla_len > A - skb->len)
+ if (nla->nla_len > skb->len - A)
return 0;
nla = nla_find_nested(nla, X);
@@ -553,11 +556,6 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
/* Some instructions need special checks */
switch (code) {
case BPF_S_ALU_DIV_K:
- /* check for division by zero */
- if (ftest->k == 0)
- return -EINVAL;
- ftest->k = reciprocal_value(ftest->k);
- break;
case BPF_S_ALU_MOD_K:
/* check for division by zero */
if (ftest->k == 0)
@@ -853,27 +851,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
to->code = decodes[code];
to->jt = filt->jt;
to->jf = filt->jf;
-
- if (code == BPF_S_ALU_DIV_K) {
- /*
- * When loaded this rule user gave us X, which was
- * translated into R = r(X). Now we calculate the
- * RR = r(R) and report it back. If next time this
- * value is loaded and RRR = r(RR) is calculated
- * then the R == RRR will be true.
- *
- * One exception. X == 1 translates into R == 0 and
- * we can't calculate RR out of it with r().
- */
-
- if (filt->k == 0)
- to->k = 1;
- else
- to->k = reciprocal_value(filt->k);
-
- BUG_ON(reciprocal_value(to->k) != filt->k);
- } else
- to->k = filt->k;
+ to->k = filt->k;
}
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c99cc371bbd7..f97101b4d373 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -40,7 +40,7 @@ again:
struct iphdr _iph;
ip:
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
- if (!iph)
+ if (!iph || iph->ihl < 5)
return false;
if (ip_is_fragment(iph))
@@ -149,8 +149,8 @@ ipv6:
if (poff >= 0) {
__be32 *ports, _ports;
- nhoff += poff;
- ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
+ ports = skb_header_pointer(skb, nhoff + poff,
+ sizeof(_ports), &_ports);
if (ports)
flow->ports = *ports;
}
@@ -347,7 +347,7 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
if (queue_index != new_index && sk &&
rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, queue_index);
+ sk_tx_queue_set(sk, new_index);
queue_index = new_index;
}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index de178e462682..1117a26a8548 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
{
int size, ct, err;
- if (m->msg_namelen) {
+ if (m->msg_name && m->msg_namelen) {
if (mode == VERIFY_READ) {
void __user *namep;
namep = (void __user __force *) m->msg_name;
@@ -51,6 +51,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
m->msg_name = address;
} else {
m->msg_name = NULL;
+ m->msg_namelen = 0;
}
size = m->msg_iovlen * sizeof(struct iovec);
@@ -106,6 +107,10 @@ EXPORT_SYMBOL(memcpy_toiovecend);
int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
int offset, int len)
{
+ /* No data? Done! */
+ if (len == 0)
+ return 0;
+
/* Skip over the finished iovecs */
while (offset >= iov->iov_len) {
offset -= iov->iov_len;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0034b611fa5e..b49e8bafab17 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -764,9 +764,6 @@ static void neigh_periodic_work(struct work_struct *work)
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
- goto out;
-
/*
* periodically recompute ReachableTime from random function
*/
@@ -779,6 +776,9 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(p->base_reachable_time);
}
+ if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+ goto out;
+
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
@@ -1274,7 +1274,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
skb->len) < 0 &&
- dev->header_ops->rebuild(skb))
+ dev_rebuild_header(skb))
return 0;
return dev_queue_xmit(skb);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index cec074be8c43..e861438d5454 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
!vlan_hw_offload_capable(netif_skb_features(skb),
skb->vlan_proto)) {
skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
- if (unlikely(!skb))
- break;
+ if (unlikely(!skb)) {
+ /* This is actually a packet drop, but we
+ * don't want the code at the end of this
+ * function to try and re-queue a NULL skb.
+ */
+ status = NETDEV_TX_OK;
+ goto unlock_txq;
+ }
skb->vlan_tci = 0;
}
@@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
}
+ unlock_txq:
__netif_tx_unlock(txq);
if (status == NETDEV_TX_OK)
@@ -550,7 +557,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
return;
proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto == ETH_P_IP) {
+ if (proto == ETH_P_ARP) {
struct arphdr *arp;
unsigned char *arp_ptr;
/* No arp on this interface */
@@ -738,7 +745,7 @@ static bool pkt_is_ns(struct sk_buff *skb)
struct nd_msg *msg;
struct ipv6hdr *hdr;
- if (skb->protocol != htons(ETH_P_ARP))
+ if (skb->protocol != htons(ETH_P_IPV6))
return false;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
return false;
@@ -941,6 +948,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
int ipv6;
+ bool ipversion_set = false;
if (*cur != '@') {
if ((delim = strchr(cur, '@')) == NULL)
@@ -953,6 +961,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
cur++;
if (*cur != '/') {
+ ipversion_set = true;
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
*delim = 0;
@@ -995,7 +1004,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
if (ipv6 < 0)
goto parse_failed;
- else if (np->ipv6 != (bool)ipv6)
+ else if (ipversion_set && np->ipv6 != (bool)ipv6)
goto parse_failed;
else
np->ipv6 = (bool)ipv6;
@@ -1289,15 +1298,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async);
void netpoll_cleanup(struct netpoll *np)
{
- if (!np->dev)
- return;
-
rtnl_lock();
+ if (!np->dev)
+ goto out;
__netpoll_cleanup(np);
- rtnl_unlock();
-
dev_put(np->dev);
np->dev = NULL;
+out:
+ rtnl_unlock();
}
EXPORT_SYMBOL(netpoll_cleanup);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 11f2704c3810..ebbea5371967 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2515,6 +2515,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
if (x) {
int ret;
__u8 *eth;
+ struct iphdr *iph;
+
nhead = x->props.header_len - skb_headroom(skb);
if (nhead > 0) {
ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
@@ -2536,6 +2538,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
eth = (__u8 *) skb_push(skb, ETH_HLEN);
memcpy(eth, pkt_dev->hh, 12);
*(u16 *) &eth[12] = protocol;
+
+ /* Update IPv4 header len as well as checksum value */
+ iph = ip_hdr(skb);
+ iph->tot_len = htons(skb->len - ETH_HLEN);
+ ip_send_check(iph);
}
}
return 1;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fd01eca52a13..25c4dd563a79 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -714,7 +714,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
return 0;
}
-static size_t rtnl_port_size(const struct net_device *dev)
+static size_t rtnl_port_size(const struct net_device *dev,
+ u32 ext_filter_mask)
{
size_t port_size = nla_total_size(4) /* PORT_VF */
+ nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */
@@ -730,7 +731,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+ port_size;
- if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+ if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
+ !(ext_filter_mask & RTEXT_FILTER_VF))
return 0;
if (dev_num_vf(dev->dev.parent))
return port_self_size + vf_ports_size +
@@ -765,7 +767,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
- + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
}
@@ -826,11 +828,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
-static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
+ u32 ext_filter_mask)
{
int err;
- if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+ if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
+ !(ext_filter_mask & RTEXT_FILTER_VF))
return 0;
err = rtnl_port_self_fill(skb, dev);
@@ -985,7 +989,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_nest_end(skb, vfinfo);
}
- if (rtnl_port_fill(skb, dev))
+ if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
if (dev->rtnl_link_ops) {
@@ -1039,6 +1043,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_head *head;
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
+ int err;
+ int hdrlen;
s_h = cb->args[0];
s_idx = cb->args[1];
@@ -1046,8 +1052,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
cb->seq = net->dev_base_seq;
- if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ /* A hack to preserve kernel<->userspace interface.
+ * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
+ * However, before Linux v3.9 the code here assumed rtgenmsg and that's
+ * what iproute2 < v3.9.0 used.
+ * We can detect the old iproute2. Even including the IFLA_EXT_MASK
+ * attribute, its netlink message is shorter than struct ifinfomsg.
+ */
+ hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+ if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1059,11 +1074,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
- if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- NLM_F_MULTI,
- ext_filter_mask) <= 0)
+ err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ NLM_F_MULTI,
+ ext_filter_mask);
+ /* If we ran out of room on the first message,
+ * we're in trouble
+ */
+ WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+
+ if (err <= 0)
goto out;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1283,7 +1304,8 @@ static int do_set_master(struct net_device *dev, int ifindex)
return 0;
}
-static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+static int do_setlink(const struct sk_buff *skb,
+ struct net_device *dev, struct ifinfomsg *ifm,
struct nlattr **tb, char *ifname, int modified)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -1295,7 +1317,8 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
err = PTR_ERR(net);
goto errout;
}
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
err = -EPERM;
goto errout;
}
@@ -1549,7 +1572,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err < 0)
goto errout;
- err = do_setlink(dev, ifm, tb, ifname, 0);
+ err = do_setlink(skb, dev, ifm, tb, ifname, 0);
errout:
return err;
}
@@ -1589,7 +1612,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
ops->dellink(dev, &list_kill);
unregister_netdevice_many(&list_kill);
- list_del(&list_kill);
return 0;
}
@@ -1667,7 +1689,8 @@ err:
}
EXPORT_SYMBOL(rtnl_create_link);
-static int rtnl_group_changelink(struct net *net, int group,
+static int rtnl_group_changelink(const struct sk_buff *skb,
+ struct net *net, int group,
struct ifinfomsg *ifm,
struct nlattr **tb)
{
@@ -1676,7 +1699,7 @@ static int rtnl_group_changelink(struct net *net, int group,
for_each_netdev(net, dev) {
if (dev->group == group) {
- err = do_setlink(dev, ifm, tb, NULL, 0);
+ err = do_setlink(skb, dev, ifm, tb, NULL, 0);
if (err < 0)
return err;
}
@@ -1778,12 +1801,12 @@ replay:
modified = 1;
}
- return do_setlink(dev, ifm, tb, ifname, modified);
+ return do_setlink(skb, dev, ifm, tb, ifname, modified);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
- return rtnl_group_changelink(net,
+ return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
ifm, tb);
return -ENODEV;
@@ -1895,9 +1918,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[IFLA_MAX+1];
u32 ext_filter_mask = 0;
u16 min_ifinfo_dump_size = 0;
+ int hdrlen;
+
+ /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
+ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
- if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
- ifla_policy) >= 0) {
+ if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
}
@@ -1973,12 +2000,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
u8 *addr, u32 pid, u32 seq,
- int type, unsigned int flags)
+ int type, unsigned int flags,
+ int nlflags)
{
struct nlmsghdr *nlh;
struct ndmsg *ndm;
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags);
if (!nlh)
return -EMSGSIZE;
@@ -2016,7 +2044,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type)
if (!skb)
goto errout;
- err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF);
+ err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -2167,7 +2195,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
int err = -EINVAL;
__u8 *addr;
- if (!capable(CAP_NET_ADMIN))
+ if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
@@ -2249,7 +2277,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
portid, seq,
- RTM_NEWNEIGH, NTF_SELF);
+ RTM_NEWNEIGH, NTF_SELF,
+ NLM_F_MULTI);
if (err < 0)
return err;
skip:
@@ -2622,7 +2651,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
sz_idx = type>>2;
kind = type&3;
- if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13cee86a..d0afc322b961 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,12 +10,27 @@
#include <net/secure_seq.h>
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
{
- get_random_bytes(net_secret, sizeof(net_secret));
+ u32 tmp;
+ int i;
+
+ if (likely(net_secret[0]))
+ return;
+
+ for (i = NET_SECRET_SIZE; i > 0;) {
+ do {
+ get_random_bytes(&tmp, sizeof(tmp));
+ } while (!tmp);
+ cmpxchg(&net_secret[--i], 0, tmp);
+ }
}
+#endif
#ifdef CONFIG_INET
static u32 seq_scale(u32 seq)
@@ -42,6 +57,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +79,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -78,35 +95,13 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
-__u32 secure_ip_id(__be32 daddr)
-{
- u32 hash[MD5_DIGEST_WORDS];
-
- hash[0] = (__force __u32) daddr;
- hash[1] = net_secret[13];
- hash[2] = net_secret[14];
- hash[3] = net_secret[15];
-
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
-
-__u32 secure_ipv6_id(const __be32 daddr[4])
-{
- __u32 hash[4];
-
- memcpy(hash, daddr, 16);
- md5_transform(hash, net_secret);
-
- return hash[0];
-}
__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +116,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +136,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
u32 hash[MD5_DIGEST_WORDS];
u64 seq;
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +161,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
u64 seq;
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + daddr[i];
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1c1738cc4538..6148716884ae 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -47,6 +47,8 @@
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
@@ -74,36 +76,6 @@
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
-static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- put_page(buf->page);
-}
-
-static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- get_page(buf->page);
-}
-
-static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- return 1;
-}
-
-
-/* Pipe buffer operations for a socket. */
-static const struct pipe_buf_operations sock_pipe_buf_ops = {
- .can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
- .confirm = generic_pipe_buf_confirm,
- .release = sock_pipe_buf_release,
- .steal = sock_pipe_buf_steal,
- .get = sock_pipe_buf_get,
-};
-
/**
* skb_panic - private function for out-of-line support
* @skb: buffer
@@ -585,9 +557,6 @@ static void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb->nfct);
#endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
- nf_conntrack_put_reasm(skb->nfct_reasm);
-#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
#endif
@@ -1814,7 +1783,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
.partial = partial,
.nr_pages_max = MAX_SKB_FRAGS,
.flags = flags,
- .ops = &sock_pipe_buf_ops,
+ .ops = &nosteal_pipe_buf_ops,
.spd_release = sock_spd_release,
};
struct sk_buff *frag_iter;
@@ -2841,7 +2810,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
tail = nskb;
__copy_skb_header(nskb, skb);
- nskb->mac_len = skb->mac_len;
/* nskb and skb might have different headroom */
if (nskb->ip_summed == CHECKSUM_PARTIAL)
@@ -2851,13 +2819,14 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
skb_set_network_header(nskb, skb->mac_len);
nskb->transport_header = (nskb->network_header +
skb_network_header_len(skb));
+ skb_reset_mac_len(nskb);
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
nskb->data - tnl_hlen,
doffset + tnl_hlen);
if (fskb != skb_shinfo(skb)->frag_list)
- continue;
+ goto perform_csum_check;
if (!sg) {
nskb->ip_summed = CHECKSUM_NONE;
@@ -2875,6 +2844,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
while (pos < offset + len && i < nfrags) {
+ if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+ goto err;
*frag = skb_shinfo(skb)->frags[i];
__skb_frag_ref(frag);
size = skb_frag_size(frag);
@@ -2921,6 +2892,7 @@ skip_fraglist:
nskb->len += nskb->data_len;
nskb->truesize += nskb->data_len;
+perform_csum_check:
if (!csum) {
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
@@ -3503,3 +3475,28 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return true;
}
EXPORT_SYMBOL(skb_try_coalesce);
+
+/**
+ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_transport_seglen is used to determine the real size of the
+ * individual segments, including Layer4 headers (TCP/UDP).
+ *
+ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
+ */
+unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ return tcp_hdrlen(skb) + shinfo->gso_size;
+
+ /* UFO sets gso_size to the size of the fragmentation
+ * payload, i.e. the size of the L4 (UDP) header is already
+ * accounted for.
+ */
+ return shinfo->gso_size;
+}
+EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
diff --git a/net/core/sock.c b/net/core/sock.c
index d6d024cfaaaf..af65d17517b8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -142,6 +142,55 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+/**
+ * sk_ns_capable - General socket capability test
+ * @sk: Socket to use a capability on or through
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in the user
+ * namespace @user_ns.
+ */
+bool sk_ns_capable(const struct sock *sk,
+ struct user_namespace *user_ns, int cap)
+{
+ return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
+ ns_capable(user_ns, cap);
+}
+EXPORT_SYMBOL(sk_ns_capable);
+
+/**
+ * sk_capable - Socket global capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The global capbility to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in all user
+ * namespaces.
+ */
+bool sk_capable(const struct sock *sk, int cap)
+{
+ return sk_ns_capable(sk, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(sk_capable);
+
+/**
+ * sk_net_capable - Network namespace socket capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socke was created
+ * and the current process has the capability @cap over the network namespace
+ * the socket is a member of.
+ */
+bool sk_net_capable(const struct sock *sk, int cap)
+{
+ return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(sk_net_capable);
+
+
#ifdef CONFIG_MEMCG_KMEM
int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
@@ -885,7 +934,7 @@ set_rcvbuf:
case SO_PEEK_OFF:
if (sock->ops->set_peek_off)
- sock->ops->set_peek_off(sk, val);
+ ret = sock->ops->set_peek_off(sk, val);
else
ret = -EOPNOTSUPP;
break;
@@ -1814,7 +1863,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
gfp_t gfp = sk->sk_allocation;
if (order)
- gfp |= __GFP_COMP | __GFP_NOWARN;
+ gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
pfrag->page = alloc_pages(gfp, order);
if (likely(pfrag->page)) {
pfrag->offset = 0;
@@ -2271,6 +2320,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_stamp = ktime_set(-1L, 0);
+ sk->sk_pacing_rate = ~0U;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2308,10 +2358,13 @@ void release_sock(struct sock *sk)
if (sk->sk_backlog.tail)
__release_sock(sk);
+ /* Warning : release_cb() might need to release sk ownership,
+ * ie call sock_release_ownership(sk) before us.
+ */
if (sk->sk_prot->release_cb)
sk->sk_prot->release_cb(sk);
- sk->sk_lock.owned = 0;
+ sock_release_ownership(sk);
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
spin_unlock_bh(&sk->sk_lock.slock);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a0e9cf6379de..c38e7a2b5a8e 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
}
EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
-int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
+int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
struct sk_buff *skb, int attrtype)
{
struct nlattr *attr;
@@ -57,7 +57,7 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
unsigned int len;
int err = 0;
- if (!ns_capable(user_ns, CAP_NET_ADMIN)) {
+ if (!may_report_filterinfo) {
nla_reserve(skb, attrtype, 0);
return 0;
}