aboutsummaryrefslogtreecommitdiff
path: root/include/net/cls_cgroup.h
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2010-05-24 00:12:34 -0700
committerDavid S. Miller <davem@davemloft.net>2010-05-24 00:12:34 -0700
commitf845172531fb7410c7fb7780b1a6e51ee6df7d52 (patch)
treeef1030d0ad9d9dbc8fe800a145c587f04be50ade /include/net/cls_cgroup.h
parenteda6e6f86b5f95b982ac7ebf7cf5be2a29a291e9 (diff)
cls_cgroup: Store classid in struct sock
Up until now cls_cgroup has relied on fetching the classid out of the current executing thread. This runs into trouble when a packet processing is delayed in which case it may execute out of another thread's context. Furthermore, even when a packet is not delayed we may fail to classify it if soft IRQs have been disabled, because this scenario is indistinguishable from one where a packet unrelated to the current thread is processed by a real soft IRQ. In fact, the current semantics is inherently broken, as a single skb may be constructed out of the writes of two different tasks. A different manifestation of this problem is when the TCP stack transmits in response of an incoming ACK. This is currently unclassified. As we already have a concept of packet ownership for accounting purposes in the skb->sk pointer, this is a natural place to store the classid in a persistent manner. This patch adds the cls_cgroup classid in struct sock, filling up an existing hole on 64-bit :) The value is set at socket creation time. So all sockets created via socket(2) automatically gains the ID of the thread creating it. Whenever another process touches the socket by either reading or writing to it, we will change the socket classid to that of the process if it has a valid (non-zero) classid. For sockets created on inbound connections through accept(2), we inherit the classid of the original listening socket through sk_clone, possibly preceding the actual accept(2) call. In order to minimise risks, I have not made this the authoritative classid. For now it is only used as a backup when we execute with soft IRQs disabled. Once we're completely happy with its semantics we can use it as the sole classid. Footnote: I have rearranged the error path on cls_group module creation. If we didn't do this, then there is a window where someone could create a tc rule using cls_group before the cgroup subsystem has been registered. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/cls_cgroup.h')
-rw-r--r--include/net/cls_cgroup.h63
1 files changed, 63 insertions, 0 deletions
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
new file mode 100644
index 00000000000..ef2df1475b5
--- /dev/null
+++ b/include/net/cls_cgroup.h
@@ -0,0 +1,63 @@
+/*
+ * cls_cgroup.h Control Group Classifier
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _NET_CLS_CGROUP_H
+#define _NET_CLS_CGROUP_H
+
+#include <linux/cgroup.h>
+#include <linux/hardirq.h>
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_CGROUPS
+struct cgroup_cls_state
+{
+ struct cgroup_subsys_state css;
+ u32 classid;
+};
+
+#ifdef CONFIG_NET_CLS_CGROUP
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+ if (in_interrupt())
+ return 0;
+
+ return container_of(task_subsys_state(p, net_cls_subsys_id),
+ struct cgroup_cls_state, css).classid;
+}
+#else
+extern int net_cls_subsys_id;
+
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+ int id;
+ u32 classid;
+
+ if (in_interrupt())
+ return 0;
+
+ rcu_read_lock();
+ id = rcu_dereference(net_cls_subsys_id);
+ if (id >= 0)
+ classid = container_of(task_subsys_state(p, id),
+ struct cgroup_cls_state, css)->classid;
+ rcu_read_unlock();
+
+ return classid;
+}
+#endif
+#else
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+ return 0;
+}
+#endif
+#endif /* _NET_CLS_CGROUP_H */