From 7e47682ea555e7c1edef1d8fd96e2aa4c12abe59 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:09 +1000 Subject: cgroup: allow a cgroup subsystem to reject a fork Add a new cgroup subsystem callback can_fork that conditionally states whether or not the fork is accepted or rejected by a cgroup policy. In addition, add a cancel_fork callback so that if an error occurs later in the forking process, any state modified by can_fork can be reverted. Allow for a private opaque pointer to be passed from cgroup_can_fork to cgroup_post_fork, allowing for the fork state to be stored by each subsystem separately. Also add a tagging system for cgroup_subsys.h to allow for CGROUP_ enumerations to be be defined and used. In addition, explicitly add a CGROUP_CANFORK_COUNT macro to make arrays easier to define. This is in preparation for implementing the pids cgroup subsystem. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 10 +++++++++- include/linux/cgroup.h | 15 +++++++++++++-- include/linux/cgroup_subsys.h | 23 +++++++++++++++++++++++ 3 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93755a629299..83e37d8c4d80 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -34,12 +34,17 @@ struct seq_file; /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, +#define SUBSYS_TAG(_t) CGROUP_ ## _t, \ + __unused_tag_ ## _t = CGROUP_ ## _t - 1, enum cgroup_subsys_id { #include CGROUP_SUBSYS_COUNT, }; +#undef SUBSYS_TAG #undef SUBSYS +#define CGROUP_CANFORK_COUNT (CGROUP_CANFORK_END - CGROUP_CANFORK_START) + /* bits in struct cgroup_subsys_state flags field */ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ @@ -406,7 +411,9 @@ struct cgroup_subsys { struct cgroup_taskset *tset); void (*attach)(struct cgroup_subsys_state *css, struct cgroup_taskset *tset); - void (*fork)(struct task_struct *task); + int (*can_fork)(struct task_struct *task, void **priv_p); + void (*cancel_fork)(struct task_struct *task, void *priv); + void (*fork)(struct task_struct *task, void *priv); void (*exit)(struct cgroup_subsys_state *css, struct cgroup_subsys_state *old_css, struct task_struct *task); @@ -491,6 +498,7 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #else /* CONFIG_CGROUPS */ +#define CGROUP_CANFORK_COUNT 0 #define CGROUP_SUBSYS_COUNT 0 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a593e299162e..a71fe2a3984e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,7 +62,12 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); void cgroup_fork(struct task_struct *p); -void cgroup_post_fork(struct task_struct *p); +extern int cgroup_can_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]); +extern void cgroup_cancel_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]); +extern void cgroup_post_fork(struct task_struct *p, + void *old_ss_priv[CGROUP_CANFORK_COUNT]); void cgroup_exit(struct task_struct *p); int cgroup_init_early(void); @@ -524,7 +529,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { return -EINVAL; } static inline void cgroup_fork(struct task_struct *p) {} -static inline void cgroup_post_fork(struct task_struct *p) {} +static inline int cgroup_can_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) +{ return 0; } +static inline void cgroup_cancel_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) {} +static inline void cgroup_post_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) {} static inline void cgroup_exit(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e4a96fb14403..ec43bce7e1ea 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -3,6 +3,17 @@ * * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ + +/* + * This file *must* be included with SUBSYS() defined. + * SUBSYS_TAG() is a noop if undefined. + */ + +#ifndef SUBSYS_TAG +#define __TMP_SUBSYS_TAG +#define SUBSYS_TAG(_x) +#endif + #if IS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif @@ -47,12 +58,24 @@ SUBSYS(net_prio) SUBSYS(hugetlb) #endif +/* + * Subsystems that implement the can_fork() family of callbacks. + */ +SUBSYS_TAG(CANFORK_START) +SUBSYS_TAG(CANFORK_END) + /* * The following subsystems are not supported on the default hierarchy. */ #if IS_ENABLED(CONFIG_CGROUP_DEBUG) SUBSYS(debug) #endif + +#ifdef __TMP_SUBSYS_TAG +#undef __TMP_SUBSYS_TAG +#undef SUBSYS_TAG +#endif + /* * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ -- cgit v1.2.3 From 49b786ea146f69c371df18e81ce0a2d5839f865c Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:10 +1000 Subject: cgroup: implement the PIDs subsystem Adds a new single-purpose PIDs subsystem to limit the number of tasks that can be forked inside a cgroup. Essentially this is an implementation of RLIMIT_NPROC that applies to a cgroup rather than a process tree. However, it should be noted that organisational operations (adding and removing tasks from a PIDs hierarchy) will *not* be prevented. Rather, the number of tasks in the hierarchy cannot exceed the limit through forking. This is due to the fact that, in the unified hierarchy, attach cannot fail (and it is not possible for a task to overcome its PIDs cgroup policy limit by attaching to a child cgroup -- even if migrating mid-fork it must be able to fork in the parent first). PIDs are fundamentally a global resource, and it is possible to reach PID exhaustion inside a cgroup without hitting any reasonable kmemcg policy. Once you've hit PID exhaustion, you're only in a marginally better state than OOM. This subsystem allows PID exhaustion inside a cgroup to be prevented. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- include/linux/cgroup_subsys.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ec43bce7e1ea..1f36945fd23d 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -62,6 +62,11 @@ SUBSYS(hugetlb) * Subsystems that implement the can_fork() family of callbacks. */ SUBSYS_TAG(CANFORK_START) + +#if IS_ENABLED(CONFIG_CGROUP_PIDS) +SUBSYS(pids) +#endif + SUBSYS_TAG(CANFORK_END) /* -- cgit v1.2.3