aboutsummaryrefslogtreecommitdiff
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/datapath.c421
-rw-r--r--net/openvswitch/datapath.h72
-rw-r--r--net/openvswitch/dp_notify.c82
-rw-r--r--net/openvswitch/flow.c8
-rw-r--r--net/openvswitch/flow.h21
-rw-r--r--net/openvswitch/vport-internal_dev.c22
-rw-r--r--net/openvswitch/vport-netdev.c15
-rw-r--r--net/openvswitch/vport-netdev.h1
-rw-r--r--net/openvswitch/vport.c58
-rw-r--r--net/openvswitch/vport.h21
11 files changed, 420 insertions, 307 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index d4d5363c7ba..894b6cbdd92 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -98,7 +98,7 @@ static int pop_vlan(struct sk_buff *skb)
if (unlikely(err))
return err;
- __vlan_hwaccel_put_tag(skb, ntohs(tci));
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
return 0;
}
@@ -110,7 +110,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
/* push down current VLAN tag */
current_tag = vlan_tx_tag_get(skb);
- if (!__vlan_put_tag(skb, current_tag))
+ if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
return -ENOMEM;
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -118,7 +118,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
+ (2 * ETH_ALEN), VLAN_HLEN, 0));
}
- __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+ __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
return 0;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6980c3e6f06..d12d6b8b5e8 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
#include <linux/netfilter_ipv4.h>
#include <linux/inetdevice.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/openvswitch.h>
#include <linux/rculist.h>
#include <linux/dmi.h>
@@ -55,39 +56,61 @@
#include "datapath.h"
#include "flow.h"
#include "vport-internal_dev.h"
+#include "vport-netdev.h"
-/**
- * struct ovs_net - Per net-namespace data for ovs.
- * @dps: List of datapaths to enable dumping them all out.
- * Protected by genl_mutex.
- */
-struct ovs_net {
- struct list_head dps;
-};
-
-static int ovs_net_id __read_mostly;
#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
static void rehash_flow_table(struct work_struct *work);
static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
+int ovs_net_id __read_mostly;
+
+static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
+ struct genl_multicast_group *grp)
+{
+ genl_notify(skb, genl_info_net(info), info->snd_portid,
+ grp->id, info->nlhdr, GFP_KERNEL);
+}
+
/**
* DOC: Locking:
*
- * Writes to device state (add/remove datapath, port, set operations on vports,
- * etc.) are protected by RTNL.
- *
- * Writes to other state (flow table modifications, set miscellaneous datapath
- * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
- * genl_mutex.
+ * All writes e.g. Writes to device state (add/remove datapath, port, set
+ * operations on vports, etc.), Writes to other state (flow table
+ * modifications, set miscellaneous datapath parameters, etc.) are protected
+ * by ovs_lock.
*
* Reads are protected by RCU.
*
* There are a few special cases (mostly stats) that have their own
* synchronization but they nest under all of above and don't interact with
* each other.
+ *
+ * The RTNL lock nests inside ovs_mutex.
*/
+static DEFINE_MUTEX(ovs_mutex);
+
+void ovs_lock(void)
+{
+ mutex_lock(&ovs_mutex);
+}
+
+void ovs_unlock(void)
+{
+ mutex_unlock(&ovs_mutex);
+}
+
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void)
+{
+ if (debug_locks)
+ return lockdep_is_held(&ovs_mutex);
+ else
+ return 1;
+}
+#endif
+
static struct vport *new_vport(const struct vport_parms *);
static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
const struct dp_upcall_info *);
@@ -95,7 +118,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
struct sk_buff *,
const struct dp_upcall_info *);
-/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
+/* Must be called with rcu_read_lock or ovs_mutex. */
static struct datapath *get_dp(struct net *net, int dp_ifindex)
{
struct datapath *dp = NULL;
@@ -113,10 +136,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
return dp;
}
-/* Must be called with rcu_read_lock or RTNL lock. */
+/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
- struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
+ struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
return vport->ops->get_name(vport);
}
@@ -129,7 +152,7 @@ static int get_dpifindex(struct datapath *dp)
local = ovs_vport_rcu(dp, OVSP_LOCAL);
if (local)
- ifindex = local->ops->get_ifindex(local);
+ ifindex = netdev_vport_priv(local)->dev->ifindex;
else
ifindex = 0;
@@ -168,7 +191,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
return NULL;
}
-/* Called with RTNL lock and genl_lock. */
+/* Called with ovs_mutex. */
static struct vport *new_vport(const struct vport_parms *parms)
{
struct vport *vport;
@@ -180,14 +203,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
hlist_add_head_rcu(&vport->dp_hash_node, head);
}
-
return vport;
}
-/* Called with RTNL lock. */
void ovs_dp_detach_port(struct vport *p)
{
- ASSERT_RTNL();
+ ASSERT_OVSL();
/* First drop references to device. */
hlist_del_rcu(&p->dp_hash_node);
@@ -250,7 +271,8 @@ static struct genl_family dp_packet_genl_family = {
.name = OVS_PACKET_FAMILY,
.version = OVS_PACKET_VERSION,
.maxattr = OVS_PACKET_ATTR_MAX,
- .netnsok = true
+ .netnsok = true,
+ .parallel_ops = true,
};
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
@@ -337,6 +359,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
return err;
}
+static size_t key_attr_size(void)
+{
+ return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
+ + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
+ + nla_total_size(28); /* OVS_KEY_ATTR_ND */
+}
+
+static size_t upcall_msg_size(const struct sk_buff *skb,
+ const struct nlattr *userdata)
+{
+ size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
+ + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+
+ /* OVS_PACKET_ATTR_USERDATA */
+ if (userdata)
+ size += NLA_ALIGN(userdata->nla_len);
+
+ return size;
+}
+
static int queue_userspace_packet(struct net *net, int dp_ifindex,
struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
@@ -345,7 +396,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
struct sk_buff *nskb = NULL;
struct sk_buff *user_skb; /* to be queued to userspace */
struct nlattr *nla;
- unsigned int len;
int err;
if (vlan_tx_tag_present(skb)) {
@@ -353,7 +403,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
if (!nskb)
return -ENOMEM;
- nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
+ nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
if (!nskb)
return -ENOMEM;
@@ -366,13 +416,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
goto out;
}
- len = sizeof(struct ovs_header);
- len += nla_total_size(skb->len);
- len += nla_total_size(FLOW_BUFSIZE);
- if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
- len += nla_total_size(8);
-
- user_skb = genlmsg_new(len, GFP_ATOMIC);
+ user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
goto out;
@@ -387,8 +431,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
- nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
- nla_get_u64(upcall_info->userdata));
+ __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
+ nla_len(upcall_info->userdata),
+ nla_data(upcall_info->userdata));
nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
@@ -402,13 +447,13 @@ out:
return err;
}
-/* Called with genl_mutex. */
+/* Called with ovs_mutex. */
static int flush_flows(struct datapath *dp)
{
struct flow_table *old_table;
struct flow_table *new_table;
- old_table = genl_dereference(dp->table);
+ old_table = ovsl_dereference(dp->table);
new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
if (!new_table)
return -ENOMEM;
@@ -544,7 +589,7 @@ static int validate_userspace(const struct nlattr *attr)
{
static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
- [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
+ [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
};
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
@@ -661,8 +706,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
err = -EINVAL;
if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
- !a[OVS_PACKET_ATTR_ACTIONS] ||
- nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
+ !a[OVS_PACKET_ATTR_ACTIONS])
goto err;
len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
@@ -672,7 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err;
skb_reserve(packet, NET_IP_ALIGN);
- memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
+ nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
skb_reset_mac_header(packet);
eth = eth_hdr(packet);
@@ -680,7 +724,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
/* Normally, setting the skb 'protocol' field would be handled by a
* call to eth_type_trans(), but it assumes there's a sending
* device, which we may not have. */
- if (ntohs(eth->h_proto) >= 1536)
+ if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
packet->protocol = eth->h_proto;
else
packet->protocol = htons(ETH_P_802_2);
@@ -743,7 +787,7 @@ err:
}
static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
- [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
+ [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
};
@@ -759,7 +803,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
{
int i;
- struct flow_table *table = genl_dereference(dp->table);
+ struct flow_table *table = ovsl_dereference(dp->table);
stats->n_flows = ovs_flow_tbl_count(table);
@@ -794,14 +838,25 @@ static struct genl_family dp_flow_genl_family = {
.name = OVS_FLOW_FAMILY,
.version = OVS_FLOW_VERSION,
.maxattr = OVS_FLOW_ATTR_MAX,
- .netnsok = true
+ .netnsok = true,
+ .parallel_ops = true,
};
static struct genl_multicast_group ovs_dp_flow_multicast_group = {
.name = OVS_FLOW_MCGROUP
};
-/* Called with genl_lock. */
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+{
+ return NLMSG_ALIGN(sizeof(struct ovs_header))
+ + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
+ + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+}
+
+/* Called with ovs_mutex. */
static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
@@ -815,8 +870,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
u8 tcp_flags;
int err;
- sf_acts = rcu_dereference_protected(flow->sf_acts,
- lockdep_genl_is_held());
+ sf_acts = ovsl_dereference(flow->sf_acts);
ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
if (!ovs_header)
@@ -879,25 +933,10 @@ error:
static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
{
const struct sw_flow_actions *sf_acts;
- int len;
- sf_acts = rcu_dereference_protected(flow->sf_acts,
- lockdep_genl_is_held());
+ sf_acts = ovsl_dereference(flow->sf_acts);
- /* OVS_FLOW_ATTR_KEY */
- len = nla_total_size(FLOW_BUFSIZE);
- /* OVS_FLOW_ATTR_ACTIONS */
- len += nla_total_size(sf_acts->actions_len);
- /* OVS_FLOW_ATTR_STATS */
- len += nla_total_size(sizeof(struct ovs_flow_stats));
- /* OVS_FLOW_ATTR_TCP_FLAGS */
- len += nla_total_size(1);
- /* OVS_FLOW_ATTR_USED */
- len += nla_total_size(8);
-
- len += NLMSG_ALIGN(sizeof(struct ovs_header));
-
- return genlmsg_new(len, GFP_KERNEL);
+ return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
}
static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
@@ -946,12 +985,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error;
}
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
error = -ENODEV;
if (!dp)
- goto error;
+ goto err_unlock_ovs;
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
flow = ovs_flow_tbl_lookup(table, &key, key_len);
if (!flow) {
struct sw_flow_actions *acts;
@@ -959,7 +999,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
/* Bail out if we're not allowed to create a new flow. */
error = -ENOENT;
if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
- goto error;
+ goto err_unlock_ovs;
/* Expand table, if necessary, to make room. */
if (ovs_flow_tbl_need_to_expand(table)) {
@@ -969,7 +1009,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
if (!IS_ERR(new_table)) {
rcu_assign_pointer(dp->table, new_table);
ovs_flow_tbl_deferred_destroy(table);
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
}
}
@@ -977,7 +1017,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
flow = ovs_flow_alloc();
if (IS_ERR(flow)) {
error = PTR_ERR(flow);
- goto error;
+ goto err_unlock_ovs;
}
flow->key = key;
clear_stats(flow);
@@ -1010,11 +1050,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
error = -EEXIST;
if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
- goto error;
+ goto err_unlock_ovs;
/* Update actions. */
- old_acts = rcu_dereference_protected(flow->sf_acts,
- lockdep_genl_is_held());
+ old_acts = ovsl_dereference(flow->sf_acts);
acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
if (acts_attrs &&
(old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1025,7 +1064,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
new_acts = ovs_flow_actions_alloc(acts_attrs);
error = PTR_ERR(new_acts);
if (IS_ERR(new_acts))
- goto error;
+ goto err_unlock_ovs;
rcu_assign_pointer(flow->sf_acts, new_acts);
ovs_flow_deferred_free_acts(old_acts);
@@ -1041,11 +1080,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&flow->lock);
}
}
+ ovs_unlock();
if (!IS_ERR(reply))
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_flow_multicast_group.id, info->nlhdr,
- GFP_KERNEL);
+ ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
else
netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
@@ -1053,6 +1091,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
error_free_flow:
ovs_flow_free(flow);
+err_unlock_ovs:
+ ovs_unlock();
error:
return error;
}
@@ -1075,21 +1115,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp)
- return -ENODEV;
+ if (!dp) {
+ err = -ENODEV;
+ goto unlock;
+ }
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
flow = ovs_flow_tbl_lookup(table, &key, key_len);
- if (!flow)
- return -ENOENT;
+ if (!flow) {
+ err = -ENOENT;
+ goto unlock;
+ }
reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
info->snd_seq, OVS_FLOW_CMD_NEW);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ goto unlock;
+ }
+ ovs_unlock();
return genlmsg_reply(reply, info);
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1104,25 +1155,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
int err;
int key_len;
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp)
- return -ENODEV;
-
- if (!a[OVS_FLOW_ATTR_KEY])
- return flush_flows(dp);
+ if (!dp) {
+ err = -ENODEV;
+ goto unlock;
+ }
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ err = flush_flows(dp);
+ goto unlock;
+ }
err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
if (err)
- return err;
+ goto unlock;
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
flow = ovs_flow_tbl_lookup(table, &key, key_len);
- if (!flow)
- return -ENOENT;
+ if (!flow) {
+ err = -ENOENT;
+ goto unlock;
+ }
reply = ovs_flow_cmd_alloc_info(flow);
- if (!reply)
- return -ENOMEM;
+ if (!reply) {
+ err = -ENOMEM;
+ goto unlock;
+ }
ovs_flow_tbl_remove(table, flow);
@@ -1131,10 +1190,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
BUG_ON(err < 0);
ovs_flow_deferred_free(flow);
+ ovs_unlock();
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
+ ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
return 0;
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1143,11 +1205,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct datapath *dp;
struct flow_table *table;
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
- if (!dp)
+ if (!dp) {
+ ovs_unlock();
return -ENODEV;
+ }
- table = genl_dereference(dp->table);
+ table = ovsl_dereference(dp->table);
for (;;) {
struct sw_flow *flow;
@@ -1168,6 +1233,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0] = bucket;
cb->args[1] = obj;
}
+ ovs_unlock();
return skb->len;
}
@@ -1206,13 +1272,24 @@ static struct genl_family dp_datapath_genl_family = {
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
.maxattr = OVS_DP_ATTR_MAX,
- .netnsok = true
+ .netnsok = true,
+ .parallel_ops = true,
};
static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
.name = OVS_DATAPATH_MCGROUP
};
+static size_t ovs_dp_cmd_msg_size(void)
+{
+ size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
+
+ msgsize += nla_total_size(IFNAMSIZ);
+ msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
+
+ return msgsize;
+}
+
static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
u32 portid, u32 seq, u32 flags, u8 cmd)
{
@@ -1251,7 +1328,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
struct sk_buff *skb;
int retval;
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
@@ -1263,7 +1340,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
return skb;
}
-/* Called with genl_mutex and optionally with RTNL lock also. */
+/* Called with ovs_mutex. */
static struct datapath *lookup_datapath(struct net *net,
struct ovs_header *ovs_header,
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1297,12 +1374,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
goto err;
- rtnl_lock();
+ ovs_lock();
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_unlock_rtnl;
+ goto err_unlock_ovs;
ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
@@ -1353,37 +1430,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
list_add_tail(&dp->list_node, &ovs_net->dps);
- rtnl_unlock();
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_datapath_multicast_group.id, info->nlhdr,
- GFP_KERNEL);
+ ovs_unlock();
+
+ ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
return 0;
err_destroy_local_port:
- ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
+ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
err_destroy_ports_array:
kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(genl_dereference(dp->table));
+ ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
-err_unlock_rtnl:
- rtnl_unlock();
+err_unlock_ovs:
+ ovs_unlock();
err:
return err;
}
-/* Called with genl_mutex. */
+/* Called with ovs_mutex. */
static void __dp_destroy(struct datapath *dp)
{
int i;
- rtnl_lock();
-
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *n;
@@ -1394,14 +1468,11 @@ static void __dp_destroy(struct datapath *dp)
}
list_del(&dp->list_node);
- ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
- /* rtnl_unlock() will wait until all the references to devices that
- * are pending unregistration have been dropped. We do it here to
- * ensure that any internal devices (which contain DP pointers) are
- * fully destroyed before freeing the datapath.
+ /* OVSP_LOCAL is datapath internal port. We need to make sure that
+ * all port in datapath are destroyed first before freeing datapath.
*/
- rtnl_unlock();
+ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
call_rcu(&dp->rcu, destroy_dp_rcu);
}
@@ -1412,24 +1483,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
+ ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
- return err;
+ goto unlock;
reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_DEL);
err = PTR_ERR(reply);
if (IS_ERR(reply))
- return err;
+ goto unlock;
__dp_destroy(dp);
+ ovs_unlock();
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_datapath_multicast_group.id, info->nlhdr,
- GFP_KERNEL);
+ ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
return 0;
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1438,9 +1512,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
+ ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ err = PTR_ERR(dp);
if (IS_ERR(dp))
- return PTR_ERR(dp);
+ goto unlock;
reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_NEW);
@@ -1448,31 +1524,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = PTR_ERR(reply);
netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
ovs_dp_datapath_multicast_group.id, err);
- return 0;
+ err = 0;
+ goto unlock;
}
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_datapath_multicast_group.id, info->nlhdr,
- GFP_KERNEL);
+ ovs_unlock();
+ ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
return 0;
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct sk_buff *reply;
struct datapath *dp;
+ int err;
+ ovs_lock();
dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
- if (IS_ERR(dp))
- return PTR_ERR(dp);
+ if (IS_ERR(dp)) {
+ err = PTR_ERR(dp);
+ goto unlock;
+ }
reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_NEW);
- if (IS_ERR(reply))
- return PTR_ERR(reply);
+ if (IS_ERR(reply)) {
+ err = PTR_ERR(reply);
+ goto unlock;
+ }
+ ovs_unlock();
return genlmsg_reply(reply, info);
+
+unlock:
+ ovs_unlock();
+ return err;
}
static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1482,6 +1572,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
int skip = cb->args[0];
int i = 0;
+ ovs_lock();
list_for_each_entry(dp, &ovs_net->dps, list_node) {
if (i >= skip &&
ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1490,6 +1581,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
break;
i++;
}
+ ovs_unlock();
cb->args[0] = i;
@@ -1535,14 +1627,15 @@ static struct genl_family dp_vport_genl_family = {
.name = OVS_VPORT_FAMILY,
.version = OVS_VPORT_VERSION,
.maxattr = OVS_VPORT_ATTR_MAX,
- .netnsok = true
+ .netnsok = true,
+ .parallel_ops = true,
};
struct genl_multicast_group ovs_dp_vport_multicast_group = {
.name = OVS_VPORT_MCGROUP
};
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
u32 portid, u32 seq, u32 flags, u8 cmd)
{
@@ -1581,7 +1674,7 @@ error:
return err;
}
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
u32 seq, u8 cmd)
{
@@ -1598,7 +1691,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
return skb;
}
-/* Called with RTNL lock or RCU read lock. */
+/* Called with ovs_mutex or RCU read lock. */
static struct vport *lookup_vport(struct net *net,
struct ovs_header *ovs_header,
struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1624,9 +1717,9 @@ static struct vport *lookup_vport(struct net *net,
if (!dp)
return ERR_PTR(-ENODEV);
- vport = ovs_vport_rtnl_rcu(dp, port_no);
+ vport = ovs_vport_ovsl_rcu(dp, port_no);
if (!vport)
- return ERR_PTR(-ENOENT);
+ return ERR_PTR(-ENODEV);
return vport;
} else
return ERR_PTR(-EINVAL);
@@ -1648,7 +1741,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
!a[OVS_VPORT_ATTR_UPCALL_PID])
goto exit;
- rtnl_lock();
+ ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
@@ -1661,7 +1754,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (port_no >= DP_MAX_PORTS)
goto exit_unlock;
- vport = ovs_vport_rtnl_rcu(dp, port_no);
+ vport = ovs_vport_ovsl(dp, port_no);
err = -EBUSY;
if (vport)
goto exit_unlock;
@@ -1671,7 +1764,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -EFBIG;
goto exit_unlock;
}
- vport = ovs_vport_rtnl(dp, port_no);
+ vport = ovs_vport_ovsl(dp, port_no);
if (!vport)
break;
}
@@ -1697,11 +1790,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_detach_port(vport);
goto exit_unlock;
}
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+ ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
exit_unlock:
- rtnl_unlock();
+ ovs_unlock();
exit:
return err;
}
@@ -1713,7 +1806,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
- rtnl_lock();
+ ovs_lock();
vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
@@ -1742,8 +1835,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
BUG_ON(err < 0);
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+ ovs_unlock();
+ ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
+ return 0;
rtnl_unlock();
return 0;
@@ -1751,7 +1845,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
exit_free:
kfree_skb(reply);
exit_unlock:
- rtnl_unlock();
+ ovs_unlock();
return err;
}
@@ -1762,7 +1856,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct vport *vport;
int err;
- rtnl_lock();
+ ovs_lock();
vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
@@ -1782,11 +1876,10 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = 0;
ovs_dp_detach_port(vport);
- genl_notify(reply, genl_info_net(info), info->snd_portid,
- ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+ ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
exit_unlock:
- rtnl_unlock();
+ ovs_unlock();
return err;
}
@@ -1946,13 +2039,13 @@ static void rehash_flow_table(struct work_struct *work)
struct datapath *dp;
struct net *net;
- genl_lock();
+ ovs_lock();
rtnl_lock();
for_each_net(net) {
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
list_for_each_entry(dp, &ovs_net->dps, list_node) {
- struct flow_table *old_table = genl_dereference(dp->table);
+ struct flow_table *old_table = ovsl_dereference(dp->table);
struct flow_table *new_table;
new_table = ovs_flow_tbl_rehash(old_table);
@@ -1963,8 +2056,7 @@ static void rehash_flow_table(struct work_struct *work)
}
}
rtnl_unlock();
- genl_unlock();
-
+ ovs_unlock();
schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
}
@@ -1973,18 +2065,21 @@ static int __net_init ovs_init_net(struct net *net)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
INIT_LIST_HEAD(&ovs_net->dps);
+ INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
return 0;
}
static void __net_exit ovs_exit_net(struct net *net)
{
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
struct datapath *dp, *dp_next;
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- genl_lock();
+ ovs_lock();
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
- genl_unlock();
+ ovs_unlock();
+
+ cancel_work_sync(&ovs_net->dp_notify_work);
}
static struct pernet_operations ovs_net_ops = {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 031dfbf37c9..16b84069521 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -57,10 +57,9 @@ struct dp_stats_percpu {
* struct datapath - datapath for flow-based packet switching
* @rcu: RCU callback head for deferred destruction.
* @list_node: Element in global 'dps' list.
- * @n_flows: Number of flows currently in flow table.
- * @table: Current flow table. Protected by genl_lock and RCU.
+ * @table: Current flow table. Protected by ovs_mutex and RCU.
* @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
- * RTNL and RCU.
+ * ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
* @net: Reference to net namespace.
*
@@ -86,26 +85,6 @@ struct datapath {
#endif
};
-struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
-
-static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
-{
- WARN_ON_ONCE(!rcu_read_lock_held());
- return ovs_lookup_vport(dp, port_no);
-}
-
-static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
-{
- WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
- return ovs_lookup_vport(dp, port_no);
-}
-
-static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
-{
- ASSERT_RTNL();
- return ovs_lookup_vport(dp, port_no);
-}
-
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -119,7 +98,7 @@ struct ovs_skb_cb {
* struct dp_upcall - metadata to include with a packet to send to userspace
* @cmd: One of %OVS_PACKET_CMD_*.
* @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
- * @userdata: If nonnull, its u64 value is extracted and passed to userspace as
+ * @userdata: If nonnull, its variable-length value is passed to userspace as
* %OVS_PACKET_ATTR_USERDATA.
* @pid: Netlink PID to which packet should be sent. If @pid is 0 then no
* packet is sent and the packet is accounted in the datapath's @n_lost
@@ -132,6 +111,30 @@ struct dp_upcall_info {
u32 portid;
};
+/**
+ * struct ovs_net - Per net-namespace data for ovs.
+ * @dps: List of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+struct ovs_net {
+ struct list_head dps;
+ struct work_struct dp_notify_work;
+};
+
+extern int ovs_net_id;
+void ovs_lock(void);
+void ovs_unlock(void);
+
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void);
+#else
+#define lockdep_ovsl_is_held() 1
+#endif
+
+#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
+#define ovsl_dereference(p) \
+ rcu_dereference_protected(p, lockdep_ovsl_is_held())
+
static inline struct net *ovs_dp_get_net(struct datapath *dp)
{
return read_pnet(&dp->net);
@@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
write_pnet(&dp->net, net);
}
+struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
+
+static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+ return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
+{
+ ASSERT_OVSL();
+ return ovs_lookup_vport(dp, port_no);
+}
+
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_multicast_group ovs_dp_vport_multicast_group;
@@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+void ovs_dp_notify_wq(struct work_struct *work);
#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350e0d3..ef4feec6cd8 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -18,46 +18,78 @@
#include <linux/netdevice.h>
#include <net/genetlink.h>
+#include <net/netns/generic.h>
#include "datapath.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
+static void dp_detach_port_notify(struct vport *vport)
+{
+ struct sk_buff *notify;
+ struct datapath *dp;
+
+ dp = vport->dp;
+ notify = ovs_vport_cmd_build_info(vport, 0, 0,
+ OVS_VPORT_CMD_DEL);
+ ovs_dp_detach_port(vport);
+ if (IS_ERR(notify)) {
+ netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
+ ovs_dp_vport_multicast_group.id,
+ PTR_ERR(notify));
+ return;
+ }
+
+ genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
+ ovs_dp_vport_multicast_group.id,
+ GFP_KERNEL);
+}
+
+void ovs_dp_notify_wq(struct work_struct *work)
+{
+ struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
+ struct datapath *dp;
+
+ ovs_lock();
+ list_for_each_entry(dp, &ovs_net->dps, list_node) {
+ int i;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+ struct vport *vport;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
+ struct netdev_vport *netdev_vport;
+
+ if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+ continue;
+
+ netdev_vport = netdev_vport_priv(vport);
+ if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
+ netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
+ dp_detach_port_notify(vport);
+ }
+ }
+ }
+ ovs_unlock();
+}
+
static int dp_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
+ struct ovs_net *ovs_net;
struct net_device *dev = ptr;
- struct vport *vport;
+ struct vport *vport = NULL;
- if (ovs_is_internal_dev(dev))
- vport = ovs_internal_dev_get_vport(dev);
- else
+ if (!ovs_is_internal_dev(dev))
vport = ovs_netdev_get_vport(dev);
if (!vport)
return NOTIFY_DONE;
- switch (event) {
- case NETDEV_UNREGISTER:
- if (!ovs_is_internal_dev(dev)) {
- struct sk_buff *notify;
- struct datapath *dp = vport->dp;
-
- notify = ovs_vport_cmd_build_info(vport, 0, 0,
- OVS_VPORT_CMD_DEL);
- ovs_dp_detach_port(vport);
- if (IS_ERR(notify)) {
- netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
- ovs_dp_vport_multicast_group.id,
- PTR_ERR(notify));
- break;
- }
-
- genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
- ovs_dp_vport_multicast_group.id,
- GFP_KERNEL);
- }
- break;
+ if (event == NETDEV_UNREGISTER) {
+ ovs_net = net_generic(dev_net(dev), ovs_net_id);
+ queue_work(system_wq, &ovs_net->dp_notify_work);
}
return NOTIFY_DONE;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 67a2b783fe7..b15321a2228 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
return ERR_PTR(-ENOMEM);
sfa->actions_len = actions_len;
- memcpy(sfa->actions, nla_data(actions), actions_len);
+ nla_memcpy(sfa->actions, actions, actions_len);
return sfa;
}
@@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
proto = *(__be16 *) skb->data;
__skb_pull(skb, sizeof(__be16));
- if (ntohs(proto) >= 1536)
+ if (ntohs(proto) >= ETH_P_802_3_MIN)
return proto;
if (skb->len < sizeof(struct llc_snap_hdr))
@@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
__skb_pull(skb, sizeof(struct llc_snap_hdr));
- if (ntohs(llc->ethertype) >= 1536)
+ if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
return llc->ethertype;
return htons(ETH_P_802_2);
@@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
- if (ntohs(swkey->eth.type) < 1536)
+ if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
return -EINVAL;
attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
} else {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a7bb60ff3b5..0875fde65b9 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
void ovs_flow_used(struct sw_flow *, struct sk_buff *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
-/* Upper bound on the length of a nlattr-formatted flow key. The longest
- * nlattr-formatted flow key would be:
- *
- * struct pad nl hdr total
- * ------ --- ------ -----
- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8
- * OVS_KEY_ATTR_IN_PORT 4 -- 4 8
- * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
- * OVS_KEY_ATTR_ETHERNET 12 -- 4 16
- * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
- * OVS_KEY_ATTR_8021Q 4 -- 4 8
- * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation)
- * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype)
- * OVS_KEY_ATTR_IPV6 40 -- 4 44
- * OVS_KEY_ATTR_ICMPV6 2 2 4 8
- * OVS_KEY_ATTR_ND 28 -- 4 32
- * -------------------------------------------------
- * total 152
- */
-#define FLOW_BUFSIZE 152
-
int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
const struct nlattr *);
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 0531de6c7a4..84e0a037918 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -63,16 +63,6 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde
return stats;
}
-static int internal_dev_mac_addr(struct net_device *dev, void *p)
-{
- struct sockaddr *addr = p;
-
- if (!is_valid_ether_addr(addr->sa_data))
- return -EADDRNOTAVAIL;
- memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
- return 0;
-}
-
/* Called with rcu_read_lock_bh. */
static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
@@ -126,7 +116,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
- .ndo_set_mac_address = internal_dev_mac_addr,
+ .ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
.ndo_get_stats64 = internal_dev_get_stats,
};
@@ -138,6 +128,7 @@ static void do_setup(struct net_device *netdev)
netdev->netdev_ops = &internal_dev_netdev_ops;
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netdev->destructor = internal_dev_destructor;
SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
netdev->tx_queue_len = 0;
@@ -146,7 +137,7 @@ static void do_setup(struct net_device *netdev)
NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
netdev->vlan_features = netdev->features;
- netdev->features |= NETIF_F_HW_VLAN_TX;
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
eth_hw_addr_random(netdev);
}
@@ -182,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
if (vport->port_no == OVSP_LOCAL)
netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
+ rtnl_lock();
err = register_netdevice(netdev_vport->dev);
if (err)
goto error_free_netdev;
dev_set_promiscuity(netdev_vport->dev, 1);
+ rtnl_unlock();
netif_start_queue(netdev_vport->dev);
return vport;
error_free_netdev:
+ rtnl_unlock();
free_netdev(netdev_vport->dev);
error_free_vport:
ovs_vport_free(vport);
@@ -204,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
netif_stop_queue(netdev_vport->dev);
+ rtnl_lock();
dev_set_promiscuity(netdev_vport->dev, -1);
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(netdev_vport->dev);
+
+ rtnl_unlock();
}
static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
@@ -235,7 +232,6 @@ const struct vport_ops ovs_internal_vport_ops = {
.create = internal_dev_create,
.destroy = internal_dev_destroy,
.get_name = ovs_netdev_get_name,
- .get_ifindex = ovs_netdev_get_ifindex,
.send = internal_dev_recv,
};
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2130d61c384..4f01c6d2ffa 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -100,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)
goto error_put;
}
+ rtnl_lock();
err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
vport);
if (err)
- goto error_put;
+ goto error_unlock;
dev_set_promiscuity(netdev_vport->dev, 1);
netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+ rtnl_unlock();
return vport;
+error_unlock:
+ rtnl_unlock();
error_put:
dev_put(netdev_vport->dev);
error_free_vport:
@@ -131,9 +135,11 @@ static void netdev_destroy(struct vport *vport)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ rtnl_lock();
netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(netdev_vport->dev);
dev_set_promiscuity(netdev_vport->dev, -1);
+ rtnl_unlock();
call_rcu(&netdev_vport->rcu, free_port_rcu);
}
@@ -144,12 +150,6 @@ const char *ovs_netdev_get_name(const struct vport *vport)
return netdev_vport->dev->name;
}
-int ovs_netdev_get_ifindex(const struct vport *vport)
-{
- const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
- return netdev_vport->dev->ifindex;
-}
-
static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
@@ -200,6 +200,5 @@ const struct vport_ops ovs_netdev_vport_ops = {
.create = netdev_create,
.destroy = netdev_destroy,
.get_name = ovs_netdev_get_name,
- .get_ifindex = ovs_netdev_get_ifindex,
.send = netdev_send,
};
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 6478079b341..a3cb3a32cd7 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -40,6 +40,5 @@ netdev_vport_priv(const struct vport *vport)
const char *ovs_netdev_get_name(const struct vport *);
const char *ovs_netdev_get_config(const struct vport *);
-int ovs_netdev_get_ifindex(const struct vport *);
#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index f6b8132ce4c..720623190ea 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {
&ovs_internal_vport_ops,
};
-/* Protected by RCU read lock for reading, RTNL lock for writing. */
+/* Protected by RCU read lock for reading, ovs_mutex for writing. */
static struct hlist_head *dev_table;
#define VPORT_HASH_BUCKETS 1024
@@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
*
* @name: name of port to find
*
- * Must be called with RTNL or RCU read lock.
+ * Must be called with ovs or RCU read lock.
*/
struct vport *ovs_vport_locate(struct net *net, const char *name)
{
@@ -128,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
- vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
+ vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
if (!vport->percpu_stats) {
kfree(vport);
return ERR_PTR(-ENOMEM);
@@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport)
* @parms: Information about new vport.
*
* Creates a new vport with the specified configuration (which is dependent on
- * device type). RTNL lock must be held.
+ * device type). ovs_mutex must be held.
*/
struct vport *ovs_vport_add(const struct vport_parms *parms)
{
@@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
int err = 0;
int i;
- ASSERT_RTNL();
-
for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
if (vport_ops_list[i]->type == parms->type) {
struct hlist_head *bucket;
@@ -201,12 +199,10 @@ out:
* @port: New configuration.
*
* Modifies an existing device with the specified configuration (which is
- * dependent on device type). RTNL lock must be held.
+ * dependent on device type). ovs_mutex must be held.
*/
int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
{
- ASSERT_RTNL();
-
if (!vport->ops->set_options)
return -EOPNOTSUPP;
return vport->ops->set_options(vport, options);
@@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
* @vport: vport to delete.
*
* Detaches @vport from its datapath and destroys it. It is possible to fail
- * for reasons such as lack of memory. RTNL lock must be held.
+ * for reasons such as lack of memory. ovs_mutex must be held.
*/
void ovs_vport_del(struct vport *vport)
{
- ASSERT_RTNL();
+ ASSERT_OVSL();
hlist_del_rcu(&vport->hash_node);
@@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport)
*
* Retrieves transmit, receive, and error stats for the given device.
*
- * Must be called with RTNL lock or rcu_read_lock.
+ * Must be called with ovs_mutex or rcu_read_lock.
*/
void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
{
@@ -264,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
spin_unlock_bh(&vport->stats_lock);
for_each_possible_cpu(i) {
- const struct vport_percpu_stats *percpu_stats;
- struct vport_percpu_stats local_stats;
+ const struct pcpu_tstats *percpu_stats;
+ struct pcpu_tstats local_stats;
unsigned int start;
percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
do {
- start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+ start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
local_stats = *percpu_stats;
- } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+ } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
stats->rx_bytes += local_stats.rx_bytes;
stats->rx_packets += local_stats.rx_packets;
@@ -296,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
* negative error code if a real error occurred. If an error occurs, @skb is
* left unmodified.
*
- * Must be called with RTNL lock or rcu_read_lock.
+ * Must be called with ovs_mutex or rcu_read_lock.
*/
int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
{
struct nlattr *nla;
+ int err;
+
+ if (!vport->ops->get_options)
+ return 0;
nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
if (!nla)
return -EMSGSIZE;
- if (vport->ops->get_options) {
- int err = vport->ops->get_options(vport, skb);
- if (err) {
- nla_nest_cancel(skb, nla);
- return err;
- }
+ err = vport->ops->get_options(vport, skb);
+ if (err) {
+ nla_nest_cancel(skb, nla);
+ return err;
}
nla_nest_end(skb, nla);
@@ -329,13 +327,13 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
*/
void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
{
- struct vport_percpu_stats *stats;
+ struct pcpu_tstats *stats;
stats = this_cpu_ptr(vport->percpu_stats);
- u64_stats_update_begin(&stats->sync);
+ u64_stats_update_begin(&stats->syncp);
stats->rx_packets++;
stats->rx_bytes += skb->len;
- u64_stats_update_end(&stats->sync);
+ u64_stats_update_end(&stats->syncp);
ovs_dp_process_received_packet(vport, skb);
}
@@ -346,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
* @vport: vport on which to send the packet
* @skb: skb to send
*
- * Sends the given packet and returns the length of data sent. Either RTNL
+ * Sends the given packet and returns the length of data sent. Either ovs
* lock or rcu_read_lock must be held.
*/
int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
@@ -354,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
int sent = vport->ops->send(vport, skb);
if (likely(sent)) {
- struct vport_percpu_stats *stats;
+ struct pcpu_tstats *stats;
stats = this_cpu_ptr(vport->percpu_stats);
- u64_stats_update_begin(&stats->sync);
+ u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
stats->tx_bytes += sent;
- u64_stats_update_end(&stats->sync);
+ u64_stats_update_end(&stats->syncp);
}
return sent;
}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 3f7961ea3c5..68a377bc084 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -19,6 +19,7 @@
#ifndef VPORT_H
#define VPORT_H 1
+#include <linux/if_tunnel.h>
#include <linux/list.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
@@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *);
/* The following definitions are for implementers of vport devices: */
-struct vport_percpu_stats {
- u64 rx_bytes;
- u64 rx_packets;
- u64 tx_bytes;
- u64 tx_packets;
- struct u64_stats_sync sync;
-};
-
struct vport_err_stats {
u64 rx_dropped;
u64 rx_errors;
@@ -68,10 +61,10 @@ struct vport_err_stats {
/**
* struct vport - one port within a datapath
* @rcu: RCU callback head for deferred destruction.
- * @port_no: Index into @dp's @ports array.
* @dp: Datapath to which this port belongs.
* @upcall_portid: The Netlink port to use for packets received on this port that
* miss the flow table.
+ * @port_no: Index into @dp's @ports array.
* @hash_node: Element in @dev_table hash table in vport.c.
* @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
* @ops: Class structure.
@@ -81,15 +74,15 @@ struct vport_err_stats {
*/
struct vport {
struct rcu_head rcu;
- u16 port_no;
struct datapath *dp;
u32 upcall_portid;
+ u16 port_no;
struct hlist_node hash_node;
struct hlist_node dp_hash_node;
const struct vport_ops *ops;
- struct vport_percpu_stats __percpu *percpu_stats;
+ struct pcpu_tstats __percpu *percpu_stats;
spinlock_t stats_lock;
struct vport_err_stats err_stats;
@@ -131,24 +124,22 @@ struct vport_parms {
* have any configuration.
* @get_name: Get the device's name.
* @get_config: Get the device's configuration.
- * @get_ifindex: Get the system interface index associated with the device.
* May be null if the device does not have an ifindex.
* @send: Send a packet on the device. Returns the length of the packet sent.
*/
struct vport_ops {
enum ovs_vport_type type;
- /* Called with RTNL lock. */
+ /* Called with ovs_mutex. */
struct vport *(*create)(const struct vport_parms *);
void (*destroy)(struct vport *);
int (*set_options)(struct vport *, struct nlattr *);
int (*get_options)(const struct vport *, struct sk_buff *);
- /* Called with rcu_read_lock or RTNL lock. */
+ /* Called with rcu_read_lock or ovs_mutex. */
const char *(*get_name)(const struct vport *);
void (*get_config)(const struct vport *, void *);
- int (*get_ifindex)(const struct vport *);
int (*send)(struct vport *, struct sk_buff *);
};