diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 36 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 1 | ||||
-rw-r--r-- | net/core/dst.c | 16 | ||||
-rw-r--r-- | net/core/fib_rules.c | 10 | ||||
-rw-r--r-- | net/core/filter.c | 36 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 8 | ||||
-rw-r--r-- | net/core/iovec.c | 7 | ||||
-rw-r--r-- | net/core/neighbour.c | 8 | ||||
-rw-r--r-- | net/core/netpoll.c | 28 | ||||
-rw-r--r-- | net/core/pktgen.c | 7 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 87 | ||||
-rw-r--r-- | net/core/secure_seq.c | 50 | ||||
-rw-r--r-- | net/core/skbuff.c | 69 | ||||
-rw-r--r-- | net/core/sock.c | 59 | ||||
-rw-r--r-- | net/core/sock_diag.c | 4 |
15 files changed, 262 insertions, 164 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 7ddbb31b10d3..cca7ae0ba915 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2374,7 +2374,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2454,46 +2454,51 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - __be16 protocol, netdev_features_t features) + __be16 protocol, + const struct net_device *dev, + netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(skb->dev, skb)) { + } else if (illegal_highdma(dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_features(struct sk_buff *skb) +netdev_features_t netif_skb_dev_features(struct sk_buff *skb, + const struct net_device *dev) { __be16 protocol = skb->protocol; - netdev_features_t features = skb->dev->features; + netdev_features_t features = dev->features; - if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, protocol, features); + return harmonize_features(skb, protocol, dev, features); } - features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) { - return harmonize_features(skb, protocol, features); + return harmonize_features(skb, protocol, dev, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, protocol, features); + return harmonize_features(skb, protocol, dev, features); } + + return harmonize_features(skb, protocol, dev, features); } -EXPORT_SYMBOL(netif_skb_features); +EXPORT_SYMBOL(netif_skb_dev_features); /* * Returns true if either: @@ -3893,6 +3898,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); napi->skb = skb; } @@ -4478,7 +4484,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + if (ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } @@ -4629,6 +4635,7 @@ void __dev_set_rx_mode(struct net_device *dev) if (ops->ndo_set_rx_mode) ops->ndo_set_rx_mode(dev); } +EXPORT_SYMBOL(__dev_set_rx_mode); void dev_set_rx_mode(struct net_device *dev) { @@ -5820,6 +5827,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); /** * unregister_netdevice_many - unregister many devices * @head: list of devices + * + * Note: As most callers use a stack allocated list_head, + * we force a list_del() to make sure stack wont be corrupted later. */ void unregister_netdevice_many(struct list_head *head) { @@ -5829,6 +5839,7 @@ void unregister_netdevice_many(struct list_head *head) rollback_registered_many(head); list_for_each_entry(dev, head, unreg_list) net_set_todo(dev); + list_del(head); } } EXPORT_SYMBOL(unregister_netdevice_many); @@ -6245,7 +6256,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) } } unregister_netdevice_many(&dev_kill_list); - list_del(&dev_kill_list); rtnl_unlock(); } diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d23b6682f4e9..a974dfec4bf1 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = { .hdrsize = 0, .name = "NET_DM", .version = 2, - .maxattr = NET_DM_CMD_MAX, }; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); diff --git a/net/core/dst.c b/net/core/dst.c index df9cc810ec8e..c0e021871df8 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -267,6 +267,15 @@ again: } EXPORT_SYMBOL(dst_destroy); +static void dst_destroy_rcu(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); +} + void dst_release(struct dst_entry *dst) { if (dst) { @@ -274,11 +283,8 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); - } + if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) + call_rcu(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d5a9f8ead0d8..55e08e2de3a1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -445,7 +445,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh_get_table(frh, tb) != rule->table)) + if (frh_get_table(frh, tb) && + (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && @@ -719,6 +720,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, attach_rules(&ops->rules_list, dev); break; + case NETDEV_CHANGENAME: + list_for_each_entry(ops, &net->rules_ops, list) { + detach_rules(&ops->rules_list, dev); + attach_rules(&ops->rules_list, dev); + } + break; + case NETDEV_UNREGISTER: list_for_each_entry(ops, &net->rules_ops, list) detach_rules(&ops->rules_list, dev); diff --git a/net/core/filter.c b/net/core/filter.c index 6438f29ff266..c6c18d8a2d88 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -36,7 +36,6 @@ #include <asm/uaccess.h> #include <asm/unaligned.h> #include <linux/filter.h> -#include <linux/reciprocal_div.h> #include <linux/ratelimit.h> #include <linux/seccomp.h> #include <linux/if_vlan.h> @@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb, A /= X; continue; case BPF_S_ALU_DIV_K: - A = reciprocal_divide(A, K); + A /= K; continue; case BPF_S_ALU_MOD_X: if (X == 0) @@ -356,6 +355,8 @@ load_b: if (skb_is_nonlinear(skb)) return 0; + if (skb->len < sizeof(struct nlattr)) + return 0; if (A > skb->len - sizeof(struct nlattr)) return 0; @@ -372,11 +373,13 @@ load_b: if (skb_is_nonlinear(skb)) return 0; + if (skb->len < sizeof(struct nlattr)) + return 0; if (A > skb->len - sizeof(struct nlattr)) return 0; nla = (struct nlattr *)&skb->data[A]; - if (nla->nla_len > A - skb->len) + if (nla->nla_len > skb->len - A) return 0; nla = nla_find_nested(nla, X); @@ -553,11 +556,6 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) /* Some instructions need special checks */ switch (code) { case BPF_S_ALU_DIV_K: - /* check for division by zero */ - if (ftest->k == 0) - return -EINVAL; - ftest->k = reciprocal_value(ftest->k); - break; case BPF_S_ALU_MOD_K: /* check for division by zero */ if (ftest->k == 0) @@ -853,27 +851,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) to->code = decodes[code]; to->jt = filt->jt; to->jf = filt->jf; - - if (code == BPF_S_ALU_DIV_K) { - /* - * When loaded this rule user gave us X, which was - * translated into R = r(X). Now we calculate the - * RR = r(R) and report it back. If next time this - * value is loaded and RRR = r(RR) is calculated - * then the R == RRR will be true. - * - * One exception. X == 1 translates into R == 0 and - * we can't calculate RR out of it with r(). - */ - - if (filt->k == 0) - to->k = 1; - else - to->k = reciprocal_value(filt->k); - - BUG_ON(reciprocal_value(to->k) != filt->k); - } else - to->k = filt->k; + to->k = filt->k; } int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c99cc371bbd7..f97101b4d373 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -40,7 +40,7 @@ again: struct iphdr _iph; ip: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); - if (!iph) + if (!iph || iph->ihl < 5) return false; if (ip_is_fragment(iph)) @@ -149,8 +149,8 @@ ipv6: if (poff >= 0) { __be32 *ports, _ports; - nhoff += poff; - ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); + ports = skb_header_pointer(skb, nhoff + poff, + sizeof(_ports), &_ports); if (ports) flow->ports = *ports; } @@ -347,7 +347,7 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) if (queue_index != new_index && sk && rcu_access_pointer(sk->sk_dst_cache)) - sk_tx_queue_set(sk, queue_index); + sk_tx_queue_set(sk, new_index); queue_index = new_index; } diff --git a/net/core/iovec.c b/net/core/iovec.c index de178e462682..1117a26a8548 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a { int size, ct, err; - if (m->msg_namelen) { + if (m->msg_name && m->msg_namelen) { if (mode == VERIFY_READ) { void __user *namep; namep = (void __user __force *) m->msg_name; @@ -51,6 +51,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a m->msg_name = address; } else { m->msg_name = NULL; + m->msg_namelen = 0; } size = m->msg_iovlen * sizeof(struct iovec); @@ -106,6 +107,10 @@ EXPORT_SYMBOL(memcpy_toiovecend); int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len) { + /* No data? Done! */ + if (len == 0) + return 0; + /* Skip over the finished iovecs */ while (offset >= iov->iov_len) { offset -= iov->iov_len; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 0034b611fa5e..b49e8bafab17 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -764,9 +764,6 @@ static void neigh_periodic_work(struct work_struct *work) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) < tbl->gc_thresh1) - goto out; - /* * periodically recompute ReachableTime from random function */ @@ -779,6 +776,9 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(p->base_reachable_time); } + if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + goto out; + for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; @@ -1274,7 +1274,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 && - dev->header_ops->rebuild(skb)) + dev_rebuild_header(skb)) return 0; return dev_queue_xmit(skb); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cec074be8c43..e861438d5454 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, !vlan_hw_offload_capable(netif_skb_features(skb), skb->vlan_proto)) { skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); - if (unlikely(!skb)) - break; + if (unlikely(!skb)) { + /* This is actually a packet drop, but we + * don't want the code at the end of this + * function to try and re-queue a NULL skb. + */ + status = NETDEV_TX_OK; + goto unlock_txq; + } skb->vlan_tci = 0; } @@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (status == NETDEV_TX_OK) txq_trans_update(txq); } + unlock_txq: __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) @@ -550,7 +557,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo return; proto = ntohs(eth_hdr(skb)->h_proto); - if (proto == ETH_P_IP) { + if (proto == ETH_P_ARP) { struct arphdr *arp; unsigned char *arp_ptr; /* No arp on this interface */ @@ -738,7 +745,7 @@ static bool pkt_is_ns(struct sk_buff *skb) struct nd_msg *msg; struct ipv6hdr *hdr; - if (skb->protocol != htons(ETH_P_ARP)) + if (skb->protocol != htons(ETH_P_IPV6)) return false; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) return false; @@ -941,6 +948,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; int ipv6; + bool ipversion_set = false; if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) @@ -953,6 +961,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) cur++; if (*cur != '/') { + ipversion_set = true; if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; @@ -995,7 +1004,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); if (ipv6 < 0) goto parse_failed; - else if (np->ipv6 != (bool)ipv6) + else if (ipversion_set && np->ipv6 != (bool)ipv6) goto parse_failed; else np->ipv6 = (bool)ipv6; @@ -1289,15 +1298,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { - if (!np->dev) - return; - rtnl_lock(); + if (!np->dev) + goto out; __netpoll_cleanup(np); - rtnl_unlock(); - dev_put(np->dev); np->dev = NULL; +out: + rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 11f2704c3810..ebbea5371967 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2515,6 +2515,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2536,6 +2538,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fd01eca52a13..25c4dd563a79 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -714,7 +714,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, return 0; } -static size_t rtnl_port_size(const struct net_device *dev) +static size_t rtnl_port_size(const struct net_device *dev, + u32 ext_filter_mask) { size_t port_size = nla_total_size(4) /* PORT_VF */ + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ @@ -730,7 +731,8 @@ static size_t rtnl_port_size(const struct net_device *dev) size_t port_self_size = nla_total_size(sizeof(struct nlattr)) + port_size; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; if (dev_num_vf(dev->dev.parent)) return port_self_size + vf_ports_size + @@ -765,7 +767,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ - + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ } @@ -826,11 +828,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) return 0; } -static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) +static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev, + u32 ext_filter_mask) { int err; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; err = rtnl_port_self_fill(skb, dev); @@ -985,7 +989,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_end(skb, vfinfo); } - if (rtnl_port_fill(skb, dev)) + if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; if (dev->rtnl_link_ops) { @@ -1039,6 +1043,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_head *head; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; + int err; + int hdrlen; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1046,8 +1052,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { + /* A hack to preserve kernel<->userspace interface. + * The correct header is ifinfomsg. It is consistent with rtnl_getlink. + * However, before Linux v3.9 the code here assumed rtgenmsg and that's + * what iproute2 < v3.9.0 used. + * We can detect the old iproute2. Even including the IFLA_EXT_MASK + * attribute, its netlink message is shorter than struct ifinfomsg. + */ + hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ? + sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); + + if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1059,11 +1074,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI, - ext_filter_mask) <= 0) + err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI, + ext_filter_mask); + /* If we ran out of room on the first message, + * we're in trouble + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err <= 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1283,7 +1304,8 @@ static int do_set_master(struct net_device *dev, int ifindex) return 0; } -static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, +static int do_setlink(const struct sk_buff *skb, + struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1295,7 +1317,8 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = PTR_ERR(net); goto errout; } - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); err = -EPERM; goto errout; } @@ -1549,7 +1572,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - err = do_setlink(dev, ifm, tb, ifname, 0); + err = do_setlink(skb, dev, ifm, tb, ifname, 0); errout: return err; } @@ -1589,7 +1612,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) ops->dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); - list_del(&list_kill); return 0; } @@ -1667,7 +1689,8 @@ err: } EXPORT_SYMBOL(rtnl_create_link); -static int rtnl_group_changelink(struct net *net, int group, +static int rtnl_group_changelink(const struct sk_buff *skb, + struct net *net, int group, struct ifinfomsg *ifm, struct nlattr **tb) { @@ -1676,7 +1699,7 @@ static int rtnl_group_changelink(struct net *net, int group, for_each_netdev(net, dev) { if (dev->group == group) { - err = do_setlink(dev, ifm, tb, NULL, 0); + err = do_setlink(skb, dev, ifm, tb, NULL, 0); if (err < 0) return err; } @@ -1778,12 +1801,12 @@ replay: modified = 1; } - return do_setlink(dev, ifm, tb, ifname, modified); + return do_setlink(skb, dev, ifm, tb, ifname, modified); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) - return rtnl_group_changelink(net, + return rtnl_group_changelink(skb, net, nla_get_u32(tb[IFLA_GROUP]), ifm, tb); return -ENODEV; @@ -1895,9 +1918,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; + int hdrlen; + + /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ + hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? + sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { + if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); } @@ -1973,12 +2000,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u32 pid, u32 seq, - int type, unsigned int flags) + int type, unsigned int flags, + int nlflags) { struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags); if (!nlh) return -EMSGSIZE; @@ -2016,7 +2044,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) if (!skb) goto errout; - err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF); + err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2167,7 +2195,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) int err = -EINVAL; __u8 *addr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); @@ -2249,7 +2277,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, portid, seq, - RTM_NEWNEIGH, NTF_SELF); + RTM_NEWNEIGH, NTF_SELF, + NLM_F_MULTI); if (err < 0) return err; skip: @@ -2622,7 +2651,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sz_idx = type>>2; kind = type&3; - if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13cee86a..d0afc322b961 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -10,12 +10,27 @@ #include <net/secure_seq.h> -static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) +#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) -void net_secret_init(void) +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; + +static void net_secret_init(void) { - get_random_bytes(net_secret, sizeof(net_secret)); + u32 tmp; + int i; + + if (likely(net_secret[0])) + return; + + for (i = NET_SECRET_SIZE; i > 0;) { + do { + get_random_bytes(&tmp, sizeof(tmp)); + } while (!tmp); + cmpxchg(&net_secret[--i], 0, tmp); + } } +#endif #ifdef CONFIG_INET static u32 seq_scale(u32 seq) @@ -42,6 +57,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32)daddr[i]; @@ -63,6 +79,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32) daddr[i]; @@ -78,35 +95,13 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -__u32 secure_ip_id(__be32 daddr) -{ - u32 hash[MD5_DIGEST_WORDS]; - - hash[0] = (__force __u32) daddr; - hash[1] = net_secret[13]; - hash[2] = net_secret[14]; - hash[3] = net_secret[15]; - - md5_transform(hash, net_secret); - - return hash[0]; -} - -__u32 secure_ipv6_id(const __be32 daddr[4]) -{ - __u32 hash[4]; - - memcpy(hash, daddr, 16); - md5_transform(hash, net_secret); - - return hash[0]; -} __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -121,6 +116,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = (__force u32)dport ^ net_secret[14]; @@ -140,6 +136,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, u32 hash[MD5_DIGEST_WORDS]; u64 seq; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -164,6 +161,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, u64 seq; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1c1738cc4538..6148716884ae 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -47,6 +47,8 @@ #include <linux/in.h> #include <linux/inet.h> #include <linux/slab.h> +#include <linux/tcp.h> +#include <linux/udp.h> #include <linux/netdevice.h> #ifdef CONFIG_NET_CLS_ACT #include <net/pkt_sched.h> @@ -74,36 +76,6 @@ struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; -static void sock_pipe_buf_release(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - put_page(buf->page); -} - -static void sock_pipe_buf_get(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - get_page(buf->page); -} - -static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return 1; -} - - -/* Pipe buffer operations for a socket. */ -static const struct pipe_buf_operations sock_pipe_buf_ops = { - .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, - .confirm = generic_pipe_buf_confirm, - .release = sock_pipe_buf_release, - .steal = sock_pipe_buf_steal, - .get = sock_pipe_buf_get, -}; - /** * skb_panic - private function for out-of-line support * @skb: buffer @@ -585,9 +557,6 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb->nfct); #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(skb->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); #endif @@ -1814,7 +1783,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .partial = partial, .nr_pages_max = MAX_SKB_FRAGS, .flags = flags, - .ops = &sock_pipe_buf_ops, + .ops = &nosteal_pipe_buf_ops, .spd_release = sock_spd_release, }; struct sk_buff *frag_iter; @@ -2841,7 +2810,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) tail = nskb; __copy_skb_header(nskb, skb); - nskb->mac_len = skb->mac_len; /* nskb and skb might have different headroom */ if (nskb->ip_summed == CHECKSUM_PARTIAL) @@ -2851,13 +2819,14 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); + skb_reset_mac_len(nskb); skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) - continue; + goto perform_csum_check; if (!sg) { nskb->ip_summed = CHECKSUM_NONE; @@ -2875,6 +2844,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; while (pos < offset + len && i < nfrags) { + if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + goto err; *frag = skb_shinfo(skb)->frags[i]; __skb_frag_ref(frag); size = skb_frag_size(frag); @@ -2921,6 +2892,7 @@ skip_fraglist: nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; +perform_csum_check: if (!csum) { nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); @@ -3503,3 +3475,28 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } EXPORT_SYMBOL(skb_try_coalesce); + +/** + * skb_gso_transport_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_transport_seglen is used to determine the real size of the + * individual segments, including Layer4 headers (TCP/UDP). + * + * The MAC/L2 or network (IP, IPv6) headers are not accounted for. + */ +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + return tcp_hdrlen(skb) + shinfo->gso_size; + + /* UFO sets gso_size to the size of the fragmentation + * payload, i.e. the size of the L4 (UDP) header is already + * accounted for. + */ + return shinfo->gso_size; +} +EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); diff --git a/net/core/sock.c b/net/core/sock.c index d6d024cfaaaf..af65d17517b8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -142,6 +142,55 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +/** + * sk_ns_capable - General socket capability test + * @sk: Socket to use a capability on or through + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in the user + * namespace @user_ns. + */ +bool sk_ns_capable(const struct sock *sk, + struct user_namespace *user_ns, int cap) +{ + return file_ns_capable(sk->sk_socket->file, user_ns, cap) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(sk_ns_capable); + +/** + * sk_capable - Socket global capability test + * @sk: Socket to use a capability on or through + * @cap: The global capbility to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in all user + * namespaces. + */ +bool sk_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, &init_user_ns, cap); +} +EXPORT_SYMBOL(sk_capable); + +/** + * sk_net_capable - Network namespace socket capability test + * @sk: Socket to use a capability on or through + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socke was created + * and the current process has the capability @cap over the network namespace + * the socket is a member of. + */ +bool sk_net_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); +} +EXPORT_SYMBOL(sk_net_capable); + + #ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { @@ -885,7 +934,7 @@ set_rcvbuf: case SO_PEEK_OFF: if (sock->ops->set_peek_off) - sock->ops->set_peek_off(sk, val); + ret = sock->ops->set_peek_off(sk, val); else ret = -EOPNOTSUPP; break; @@ -1814,7 +1863,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) gfp_t gfp = sk->sk_allocation; if (order) - gfp |= __GFP_COMP | __GFP_NOWARN; + gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; pfrag->page = alloc_pages(gfp, order); if (likely(pfrag->page)) { pfrag->offset = 0; @@ -2271,6 +2320,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); + sk->sk_pacing_rate = ~0U; /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -2308,10 +2358,13 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); + /* Warning : release_cb() might need to release sk ownership, + * ie call sock_release_ownership(sk) before us. + */ if (sk->sk_prot->release_cb) sk->sk_prot->release_cb(sk); - sk->sk_lock.owned = 0; + sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a0e9cf6379de..c38e7a2b5a8e 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) } EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); -int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, +int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype) { struct nlattr *attr; @@ -57,7 +57,7 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, unsigned int len; int err = 0; - if (!ns_capable(user_ns, CAP_NET_ADMIN)) { + if (!may_report_filterinfo) { nla_reserve(skb, attrtype, 0); return 0; } |