From: Matt Mackall Add a pair of rlimits for allowing non-root tasks to raise nice and rt priorities. Defaults to traditional behavior. Originally written by Chris Wright. The patch implements a simple rlimit ceiling for the RT (and nice) priorities a task can set. The rlimit defaults to 0, meaning no change in behavior by default. A value of 50 means RT priority levels 1-50 are allowed. A value of 100 means all 99 privilege levels from 1 to 99 are allowed. CAP_SYS_NICE is blanket permission. (akpm: see http://www.uwsg.iu.edu/hypermail/linux/kernel/0503.1/1921.html for tips on integrating this with PAM). Signed-off-by: Matt Mackall Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- 25-akpm/include/asm-generic/resource.h | 7 ++++++- 25-akpm/include/linux/sched.h | 1 + 25-akpm/kernel/sched.c | 25 +++++++++++++++++++------ 25-akpm/kernel/sys.c | 2 +- 4 files changed, 27 insertions(+), 8 deletions(-) diff -puN include/asm-generic/resource.h~nice-and-rt-prio-rlimits include/asm-generic/resource.h --- 25/include/asm-generic/resource.h~nice-and-rt-prio-rlimits 2005-03-16 21:42:03.000000000 -0800 +++ 25-akpm/include/asm-generic/resource.h 2005-03-16 21:42:03.000000000 -0800 @@ -41,8 +41,11 @@ #define RLIMIT_LOCKS 10 /* maximum file locks held */ #define RLIMIT_SIGPENDING 11 /* max number of pending signals */ #define RLIMIT_MSGQUEUE 12 /* maximum bytes in POSIX mqueues */ +#define RLIMIT_NICE 13 /* max nice prio allowed to raise to + 0-39 for nice level 19 .. -20 */ +#define RLIMIT_RTPRIO 14 /* maximum realtime priority */ -#define RLIM_NLIMITS 13 +#define RLIM_NLIMITS 15 /* * SuS says limits have to be unsigned. @@ -81,6 +84,8 @@ [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, \ [RLIMIT_SIGPENDING] = { 0, 0 }, \ [RLIMIT_MSGQUEUE] = { MQ_BYTES_MAX, MQ_BYTES_MAX }, \ + [RLIMIT_NICE] = { 0, 0 }, \ + [RLIMIT_RTPRIO] = { 0, 0 }, \ } #endif /* __KERNEL__ */ diff -puN include/linux/sched.h~nice-and-rt-prio-rlimits include/linux/sched.h --- 25/include/linux/sched.h~nice-and-rt-prio-rlimits 2005-03-16 21:42:03.000000000 -0800 +++ 25-akpm/include/linux/sched.h 2005-03-16 21:42:03.000000000 -0800 @@ -862,6 +862,7 @@ extern void sched_idle_next(void); extern void set_user_nice(task_t *p, long nice); extern int task_prio(const task_t *p); extern int task_nice(const task_t *p); +extern int can_nice(const task_t *p, const int nice); extern int task_curr(const task_t *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); diff -puN kernel/sched.c~nice-and-rt-prio-rlimits kernel/sched.c --- 25/kernel/sched.c~nice-and-rt-prio-rlimits 2005-03-16 21:42:03.000000000 -0800 +++ 25-akpm/kernel/sched.c 2005-03-16 21:42:03.000000000 -0800 @@ -3223,6 +3223,19 @@ out_unlock: EXPORT_SYMBOL(set_user_nice); +/* + * can_nice - check if a task can reduce its nice value + * @p: task + * @nice: nice value + */ +int can_nice(const task_t *p, const int nice) +{ + /* convert nice value [19,-20] to rlimit style value [0,39] */ + int nice_rlim = 19 - nice; + return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || + capable(CAP_SYS_NICE)); +} + #ifdef __ARCH_WANT_SYS_NICE /* @@ -3242,12 +3255,8 @@ asmlinkage long sys_nice(int increment) * We don't have to worry. Conceptually one call occurs first * and we have a single winner. */ - if (increment < 0) { - if (!capable(CAP_SYS_NICE)) - return -EPERM; - if (increment < -40) - increment = -40; - } + if (increment < -40) + increment = -40; if (increment > 40) increment = 40; @@ -3257,6 +3266,9 @@ asmlinkage long sys_nice(int increment) if (nice > 19) nice = 19; + if (increment < 0 && !can_nice(current, nice)) + return -EPERM; + retval = security_task_setnice(current, nice); if (retval) return retval; @@ -3372,6 +3384,7 @@ recheck: return -EINVAL; if ((policy == SCHED_FIFO || policy == SCHED_RR) && + param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur && !capable(CAP_SYS_NICE)) return -EPERM; if ((current->euid != p->euid) && (current->euid != p->uid) && diff -puN kernel/sys.c~nice-and-rt-prio-rlimits kernel/sys.c --- 25/kernel/sys.c~nice-and-rt-prio-rlimits 2005-03-16 21:42:03.000000000 -0800 +++ 25-akpm/kernel/sys.c 2005-03-16 21:42:03.000000000 -0800 @@ -227,7 +227,7 @@ static int set_one_prio(struct task_stru error = -EPERM; goto out; } - if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) { + if (niceval < task_nice(p) && !can_nice(p, niceval)) { error = -EACCES; goto out; } _