aboutsummaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c244
1 files changed, 182 insertions, 62 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 09cb3f6dc40c..d0cbc93fcf32 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -176,8 +176,10 @@
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
static DEFINE_SPINLOCK(ptype_lock);
+static DEFINE_SPINLOCK(offload_lock);
static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
static struct list_head ptype_all __read_mostly; /* Taps */
+static struct list_head offload_base __read_mostly;
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -201,6 +203,8 @@ static struct list_head ptype_all __read_mostly; /* Taps */
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+DEFINE_SEQLOCK(devnet_rename_seq);
+
static inline void dev_base_seq_inc(struct net *net)
{
while (++net->dev_base_seq == 0);
@@ -470,6 +474,82 @@ void dev_remove_pack(struct packet_type *pt)
}
EXPORT_SYMBOL(dev_remove_pack);
+
+/**
+ * dev_add_offload - register offload handlers
+ * @po: protocol offload declaration
+ *
+ * Add protocol offload handlers to the networking stack. The passed
+ * &proto_offload is linked into kernel lists and may not be freed until
+ * it has been removed from the kernel lists.
+ *
+ * This call does not sleep therefore it can not
+ * guarantee all CPU's that are in middle of receiving packets
+ * will see the new offload handlers (until the next received packet).
+ */
+void dev_add_offload(struct packet_offload *po)
+{
+ struct list_head *head = &offload_base;
+
+ spin_lock(&offload_lock);
+ list_add_rcu(&po->list, head);
+ spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(dev_add_offload);
+
+/**
+ * __dev_remove_offload - remove offload handler
+ * @po: packet offload declaration
+ *
+ * Remove a protocol offload handler that was previously added to the
+ * kernel offload handlers by dev_add_offload(). The passed &offload_type
+ * is removed from the kernel lists and can be freed or reused once this
+ * function returns.
+ *
+ * The packet type might still be in use by receivers
+ * and must not be freed until after all the CPU's have gone
+ * through a quiescent state.
+ */
+void __dev_remove_offload(struct packet_offload *po)
+{
+ struct list_head *head = &offload_base;
+ struct packet_offload *po1;
+
+ spin_lock(&offload_lock);
+
+ list_for_each_entry(po1, head, list) {
+ if (po == po1) {
+ list_del_rcu(&po->list);
+ goto out;
+ }
+ }
+
+ pr_warn("dev_remove_offload: %p not found\n", po);
+out:
+ spin_unlock(&offload_lock);
+}
+EXPORT_SYMBOL(__dev_remove_offload);
+
+/**
+ * dev_remove_offload - remove packet offload handler
+ * @po: packet offload declaration
+ *
+ * Remove a packet offload handler that was previously added to the kernel
+ * offload handlers by dev_add_offload(). The passed &offload_type is
+ * removed from the kernel lists and can be freed or reused once this
+ * function returns.
+ *
+ * This call sleeps to guarantee that no CPU is looking at the packet
+ * type after return.
+ */
+void dev_remove_offload(struct packet_offload *po)
+{
+ __dev_remove_offload(po);
+
+ synchronize_net();
+}
+EXPORT_SYMBOL(dev_remove_offload);
+
/******************************************************************************
Device Boot-time Settings Routines
@@ -1013,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname)
if (dev->flags & IFF_UP)
return -EBUSY;
- if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
+ write_seqlock(&devnet_rename_seq);
+
+ if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
+ write_sequnlock(&devnet_rename_seq);
return 0;
+ }
memcpy(oldname, dev->name, IFNAMSIZ);
err = dev_get_valid_name(net, dev, newname);
- if (err < 0)
+ if (err < 0) {
+ write_sequnlock(&devnet_rename_seq);
return err;
+ }
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
+ write_sequnlock(&devnet_rename_seq);
return ret;
}
+ write_sequnlock(&devnet_rename_seq);
+
write_lock_bh(&dev_base_lock);
hlist_del_rcu(&dev->name_hlist);
write_unlock_bh(&dev_base_lock);
@@ -1046,6 +1135,7 @@ rollback:
/* err >= 0 after dev_alloc_name() or stores the first errno */
if (err >= 0) {
err = ret;
+ write_seqlock(&devnet_rename_seq);
memcpy(dev->name, oldname, IFNAMSIZ);
goto rollback;
} else {
@@ -1075,10 +1165,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
return -EINVAL;
if (!len) {
- if (dev->ifalias) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- }
+ kfree(dev->ifalias);
+ dev->ifalias = NULL;
return 0;
}
@@ -1666,7 +1754,7 @@ static inline int deliver_skb(struct sk_buff *skb,
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
{
- if (ptype->af_packet_priv == NULL)
+ if (!ptype->af_packet_priv || !skb->sk)
return false;
if (ptype->id_match)
@@ -1994,7 +2082,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
int vlan_depth = ETH_HLEN;
int err;
@@ -2023,18 +2111,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
}
rcu_read_lock();
- list_for_each_entry_rcu(ptype,
- &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
- err = ptype->gso_send_check(skb);
+ err = ptype->callbacks.gso_send_check(skb);
segs = ERR_PTR(err);
if (err || skb_gso_ok(skb, features))
break;
__skb_push(skb, (skb->data -
skb_network_header(skb)));
}
- segs = ptype->gso_segment(skb, features);
+ segs = ptype->callbacks.gso_segment(skb, features);
break;
}
}
@@ -2237,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->vlan_tci = 0;
}
+ /* If encapsulation offload request, verify we are testing
+ * hardware encapsulation features instead of standard
+ * features for the netdev
+ */
+ if (skb->encapsulation)
+ features &= dev->hw_enc_features;
+
if (netif_needs_gso(skb, features)) {
if (unlikely(dev_gso_segment(skb, features)))
goto out_kfree_skb;
@@ -2252,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
* checksumming here.
*/
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- skb_set_transport_header(skb,
- skb_checksum_start_offset(skb));
+ if (skb->encapsulation)
+ skb_set_inner_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ else
+ skb_set_transport_header(skb,
+ skb_checksum_start_offset(skb));
if (!(features & NETIF_F_ALL_CSUM) &&
skb_checksum_help(skb))
goto out_kfree_skb;
@@ -2818,8 +2916,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
- rflow->last_qtail)) >= 0))
+ rflow->last_qtail)) >= 0)) {
+ tcpu = next_cpu;
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+ }
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
@@ -3444,11 +3544,13 @@ static void flush_backlog(void *arg)
static int napi_gro_complete(struct sk_buff *skb)
{
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ struct list_head *head = &offload_base;
int err = -ENOENT;
+ BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
+
if (NAPI_GRO_CB(skb)->count == 1) {
skb_shinfo(skb)->gso_size = 0;
goto out;
@@ -3456,10 +3558,10 @@ static int napi_gro_complete(struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+ if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->gro_complete(skb);
+ err = ptype->callbacks.gro_complete(skb);
break;
}
rcu_read_unlock();
@@ -3503,12 +3605,34 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
}
EXPORT_SYMBOL(napi_gro_flush);
-enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
+{
+ struct sk_buff *p;
+ unsigned int maclen = skb->dev->hard_header_len;
+
+ for (p = napi->gro_list; p; p = p->next) {
+ unsigned long diffs;
+
+ diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+ diffs |= p->vlan_tci ^ skb->vlan_tci;
+ if (maclen == ETH_HLEN)
+ diffs |= compare_ether_header(skb_mac_header(p),
+ skb_gro_mac_header(skb));
+ else if (!diffs)
+ diffs = memcmp(skb_mac_header(p),
+ skb_gro_mac_header(skb),
+ maclen);
+ NAPI_GRO_CB(p)->same_flow = !diffs;
+ NAPI_GRO_CB(p)->flush = 0;
+ }
+}
+
+static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
- struct packet_type *ptype;
+ struct packet_offload *ptype;
__be16 type = skb->protocol;
- struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ struct list_head *head = &offload_base;
int same_flow;
int mac_len;
enum gro_result ret;
@@ -3519,9 +3643,11 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
if (skb_is_gso(skb) || skb_has_frag_list(skb))
goto normal;
+ gro_list_prepare(napi, skb);
+
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+ if (ptype->type != type || !ptype->callbacks.gro_receive)
continue;
skb_set_network_header(skb, skb_gro_offset(skb));
@@ -3531,7 +3657,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
- pp = ptype->gro_receive(&napi->gro_list, skb);
+ pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
break;
}
rcu_read_unlock();
@@ -3594,34 +3720,9 @@ normal:
ret = GRO_NORMAL;
goto pull;
}
-EXPORT_SYMBOL(dev_gro_receive);
-static inline gro_result_t
-__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
-{
- struct sk_buff *p;
- unsigned int maclen = skb->dev->hard_header_len;
- for (p = napi->gro_list; p; p = p->next) {
- unsigned long diffs;
-
- diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
- diffs |= p->vlan_tci ^ skb->vlan_tci;
- if (maclen == ETH_HLEN)
- diffs |= compare_ether_header(skb_mac_header(p),
- skb_gro_mac_header(skb));
- else if (!diffs)
- diffs = memcmp(skb_mac_header(p),
- skb_gro_mac_header(skb),
- maclen);
- NAPI_GRO_CB(p)->same_flow = !diffs;
- NAPI_GRO_CB(p)->flush = 0;
- }
-
- return dev_gro_receive(napi, skb);
-}
-
-gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
switch (ret) {
case GRO_NORMAL:
@@ -3647,7 +3748,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
return ret;
}
-EXPORT_SYMBOL(napi_skb_finish);
static void skb_gro_reset_offset(struct sk_buff *skb)
{
@@ -3670,7 +3770,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
skb_gro_reset_offset(skb);
- return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
+ return napi_skb_finish(dev_gro_receive(napi, skb), skb);
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -3699,7 +3799,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_get_frags);
-gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
+static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
gro_result_t ret)
{
switch (ret) {
@@ -3724,7 +3824,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
return ret;
}
-EXPORT_SYMBOL(napi_frags_finish);
static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
{
@@ -3769,7 +3868,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
if (!skb)
return GRO_DROP;
- return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+ return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
}
EXPORT_SYMBOL(napi_gro_frags);
@@ -4071,6 +4170,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
{
struct net_device *dev;
struct ifreq ifr;
+ unsigned seq;
/*
* Fetch the caller's info block.
@@ -4079,6 +4179,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
+retry:
+ seq = read_seqbegin(&devnet_rename_seq);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
if (!dev) {
@@ -4088,6 +4190,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
strcpy(ifr.ifr_name, dev->name);
rcu_read_unlock();
+ if (read_seqretry(&devnet_rename_seq, seq))
+ goto retry;
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
@@ -4880,7 +4984,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
else
dev->mtu = new_mtu;
- if (!err && dev->flags & IFF_UP)
+ if (!err)
call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
return err;
}
@@ -5200,7 +5304,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSIFNAME:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
dev_load(net, ifr.ifr_name);
rtnl_lock();
@@ -5221,16 +5325,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
* - require strict serialization.
* - do not return a value
*/
+ case SIOCSIFMAP:
+ case SIOCSIFTXQLEN:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ /* fall through */
+ /*
+ * These ioctl calls:
+ * - require local superuser power.
+ * - require strict serialization.
+ * - do not return a value
+ */
case SIOCSIFFLAGS:
case SIOCSIFMETRIC:
case SIOCSIFMTU:
- case SIOCSIFMAP:
case SIOCSIFHWADDR:
case SIOCSIFSLAVE:
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCSIFHWBROADCAST:
- case SIOCSIFTXQLEN:
case SIOCSMIIREG:
case SIOCBONDENSLAVE:
case SIOCBONDRELEASE:
@@ -5239,7 +5352,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCBRADDIF:
case SIOCBRDELIF:
case SIOCSHWTSTAMP:
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
@@ -6264,7 +6377,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
goto out;
/* Ensure the device has been registrered */
- err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
goto out;
@@ -6319,6 +6431,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
dev_uc_flush(dev);
dev_mc_flush(dev);
+ /* Send a netdev-removed uevent to the old namespace */
+ kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
+
/* Actually switch the network namespace */
dev_net_set(dev, net);
@@ -6330,6 +6445,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
dev->iflink = dev->ifindex;
}
+ /* Send a netdev-add uevent to the new namespace */
+ kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+
/* Fixup kobjects */
err = device_rename(&dev->dev, dev->name);
WARN_ON(err);
@@ -6662,6 +6780,8 @@ static int __init net_dev_init(void)
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
+ INIT_LIST_HEAD(&offload_base);
+
if (register_pernet_subsys(&netdev_net_ops))
goto out;