Signed-Off-By: Andrea Arcangeli Index: linux-2.5/arch/i386/Kconfig =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/Kconfig,v retrieving revision 1.131 diff -u -p -r1.131 Kconfig --- linux-2.5/arch/i386/Kconfig 13 Sep 2004 18:32:00 -0000 1.131 +++ linux-2.5/arch/i386/Kconfig 12 Oct 2004 01:04:09 -0000 @@ -33,6 +33,10 @@ config GENERIC_IOMAP bool default y +config SECCOMP + bool + default y + source "init/Kconfig" menu "Processor type and features" Index: linux-2.5/arch/i386/kernel/entry.S =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/kernel/entry.S,v retrieving revision 1.90 diff -u -p -r1.90 entry.S --- linux-2.5/arch/i386/kernel/entry.S 8 Sep 2004 14:49:36 -0000 1.90 +++ linux-2.5/arch/i386/kernel/entry.S 12 Oct 2004 00:52:07 -0000 @@ -157,12 +157,19 @@ do_lcall: movl %edx,EIP(%ebp) # Now we move them to their "normal" places movl %ecx,CS(%ebp) # GET_THREAD_INFO_WITH_ESP(%ebp) # GET_THREAD_INFO + /* call gates cannot run with SECCOMP enabled */ + testw $_TIF_SECCOMP,TI_flags(%ebp) + jnz sigkill movl TI_exec_domain(%ebp), %edx # Get the execution domain call *EXEC_DOMAIN_handler(%edx) # Call the handler for the domain addl $4, %esp popl %eax jmp resume_userspace +sigkill: + pushl $9 + call do_exit + ENTRY(lcall27) pushfl # We get a different stack layout with call # gates, which has to be cleaned up later.. @@ -256,7 +263,7 @@ sysenter_past_esp: SAVE_ALL GET_THREAD_INFO(%ebp) - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -279,7 +286,7 @@ ENTRY(system_call) SAVE_ALL GET_THREAD_INFO(%ebp) # system call tracing in operation - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys Index: linux-2.5/arch/i386/kernel/ptrace.c =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/kernel/ptrace.c,v retrieving revision 1.26 diff -u -p -r1.26 ptrace.c --- linux-2.5/arch/i386/kernel/ptrace.c 23 Aug 2004 19:40:02 -0000 1.26 +++ linux-2.5/arch/i386/kernel/ptrace.c 12 Oct 2004 00:52:07 -0000 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -530,6 +531,10 @@ out: __attribute__((regparm(3))) void do_syscall_trace(struct pt_regs *regs, int entryexit) { + /* do the secure computing check first */ + if (unlikely(test_thread_flag(TIF_SECCOMP))) + secure_computing(regs->orig_eax); + if (unlikely(current->audit_context)) { if (!entryexit) audit_syscall_entry(current, regs->orig_eax, Index: linux-2.5/arch/x86_64/Kconfig =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/x86_64/Kconfig,v retrieving revision 1.56 diff -u -p -r1.56 Kconfig --- linux-2.5/arch/x86_64/Kconfig 6 Oct 2004 15:14:04 -0000 1.56 +++ linux-2.5/arch/x86_64/Kconfig 12 Oct 2004 01:01:18 -0000 @@ -82,6 +82,10 @@ config GENERIC_IOMAP bool default y +config SECCOMP + bool + default y + source "init/Kconfig" Index: linux-2.5/arch/x86_64/ia32/ia32entry.S =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/x86_64/ia32/ia32entry.S,v retrieving revision 1.39 diff -u -p -r1.39 ia32entry.S --- linux-2.5/arch/x86_64/ia32/ia32entry.S 31 Aug 2004 17:35:25 -0000 1.39 +++ linux-2.5/arch/x86_64/ia32/ia32entry.S 12 Oct 2004 00:52:07 -0000 @@ -78,7 +78,7 @@ ENTRY(ia32_sysenter_target) .quad 1b,ia32_badarg .previous GET_THREAD_INFO(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) jnz sysenter_tracesys sysenter_do_call: cmpl $(IA32_NR_syscalls),%eax @@ -163,7 +163,7 @@ ENTRY(ia32_cstar_target) .quad 1b,ia32_badarg .previous GET_THREAD_INFO(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) jnz cstar_tracesys cstar_do_call: cmpl $IA32_NR_syscalls,%eax @@ -236,7 +236,7 @@ ENTRY(ia32_syscall) this could be a problem. */ SAVE_ARGS 0,0,1 GET_THREAD_INFO(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) jnz ia32_tracesys ia32_do_syscall: cmpl $(IA32_NR_syscalls),%eax Index: linux-2.5/arch/x86_64/kernel/entry.S =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/x86_64/kernel/entry.S,v retrieving revision 1.24 diff -u -p -r1.24 entry.S --- linux-2.5/arch/x86_64/kernel/entry.S 24 Aug 2004 18:20:09 -0000 1.24 +++ linux-2.5/arch/x86_64/kernel/entry.S 12 Oct 2004 00:52:07 -0000 @@ -185,7 +185,7 @@ ENTRY(system_call) movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys Index: linux-2.5/arch/x86_64/kernel/ptrace.c =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/x86_64/kernel/ptrace.c,v retrieving revision 1.18 diff -u -p -r1.18 ptrace.c --- linux-2.5/arch/x86_64/kernel/ptrace.c 24 Aug 2004 18:20:09 -0000 1.18 +++ linux-2.5/arch/x86_64/kernel/ptrace.c 12 Oct 2004 00:52:07 -0000 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -519,6 +520,10 @@ static void syscall_trace(struct pt_regs asmlinkage void syscall_trace_enter(struct pt_regs *regs) { + /* do the secure computing check first */ + if (unlikely(test_thread_flag(TIF_SECCOMP))) + secure_computing(regs->orig_rax); + if (unlikely(current->audit_context)) audit_syscall_entry(current, regs->orig_rax, regs->rdi, regs->rsi, Index: linux-2.5/fs/proc/base.c =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/fs/proc/base.c,v retrieving revision 1.82 diff -u -p -r1.82 base.c --- linux-2.5/fs/proc/base.c 8 Sep 2004 14:50:33 -0000 1.82 +++ linux-2.5/fs/proc/base.c 12 Oct 2004 01:02:59 -0000 @@ -32,6 +32,9 @@ #include #include #include +#ifdef CONFIG_SECCOMP +#include +#endif /* * For hysterical raisins we keep the same inumbers as in the old procfs. @@ -48,6 +51,9 @@ enum pid_directory_inos { PROC_TGID_TASK, PROC_TGID_STATUS, PROC_TGID_MEM, +#ifdef CONFIG_SECCOMP + PROC_TGID_SECCOMP, +#endif PROC_TGID_CWD, PROC_TGID_ROOT, PROC_TGID_EXE, @@ -74,6 +80,9 @@ enum pid_directory_inos { PROC_TID_INO, PROC_TID_STATUS, PROC_TID_MEM, +#ifdef CONFIG_SECCOMP + PROC_TID_SECCOMP, +#endif PROC_TID_CWD, PROC_TID_ROOT, PROC_TID_EXE, @@ -119,6 +128,9 @@ static struct pid_entry tgid_base_stuff[ E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP + E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), @@ -144,6 +156,9 @@ static struct pid_entry tid_base_stuff[] E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP + E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), @@ -701,6 +716,60 @@ static struct inode_operations proc_mem_ .permission = proc_permission, }; +#ifdef CONFIG_SECCOMP +static ssize_t seccomp_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + struct task_struct * tsk = proc_task(file->f_dentry->d_inode); + char __buf[20]; + loff_t __ppos = *ppos; + size_t len; + + len = sprintf(__buf, "%u\n", tsk->seccomp_mode) + 1; + if (__ppos >= len) + return 0; + if (count > len-__ppos) + count = len-__ppos; + if (copy_to_user(buf, __buf + __ppos, count)) + return -EFAULT; + *ppos += count; + return count; +} + +static ssize_t seccomp_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + struct task_struct * tsk = proc_task(file->f_dentry->d_inode); + char __buf[20], * end; + unsigned int seccomp_mode; + + /* can set it only once to be even more secure */ + if (unlikely(tsk->seccomp_mode)) + return -EPERM; + + memset(__buf, 0, 20); + if (count > 19) + count = 19; + if (copy_from_user(__buf, buf, count)) + return -EFAULT; + seccomp_mode = simple_strtoul(__buf, &end, 0); + if (*end == '\n') + end++; + if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { + tsk->seccomp_mode = seccomp_mode; + set_tsk_thread_flag(tsk, TIF_SECCOMP); + } + if (unlikely(!(end - __buf))) + return -EIO; + return end - __buf; +} + +static struct file_operations proc_seccomp_operations = { + .read = seccomp_read, + .write = seccomp_write, +}; +#endif /* CONFIG_SECCOMP */ + static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; @@ -1338,6 +1407,12 @@ static struct dentry *proc_pident_lookup inode->i_op = &proc_mem_inode_operations; inode->i_fop = &proc_mem_operations; break; +#ifdef CONFIG_SECCOMP + case PROC_TID_SECCOMP: + case PROC_TGID_SECCOMP: + inode->i_fop = &proc_seccomp_operations; + break; +#endif /* CONFIG_SECCOMP */ case PROC_TID_MOUNTS: case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; Index: linux-2.5/include/asm-i386/thread_info.h =================================================================== RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-i386/thread_info.h,v retrieving revision 1.21 diff -u -p -r1.21 thread_info.h --- linux-2.5/include/asm-i386/thread_info.h 23 Aug 2004 19:36:54 -0000 1.21 +++ linux-2.5/include/asm-i386/thread_info.h 12 Oct 2004 00:52:07 -0000 @@ -144,6 +144,7 @@ static inline unsigned long current_stac #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_IRET 5 /* return with iret */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SECCOMP 8 /* secure computing */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define _TIF_SYSCALL_TRACE (1< + * + * This defines a simple but solid secure-computing mode. + */ + +#include +#include +#include +#ifdef TIF_IA32 +#include +#endif + +/* #define SECCOMP_DEBUG 1 */ + +/* + * Secure computing mode 1 allows only read/write/exit/sigreturn. + * To be fully secure this must be combined with rlimit + * to limit the stack allocations too. + */ +static int mode1_syscalls[] = { + __NR_read, __NR_write, __NR_exit, + /* + * Allow either sigreturn or rt_sigreturn, newer archs + * like x86-64 only defines __NR_rt_sigreturn. + */ +#ifdef __NR_sigreturn + __NR_sigreturn, +#else + __NR_rt_sigreturn, +#endif + 0, /* null terminated */ +}; + +#ifdef TIF_IA32 +static int mode1_syscalls_32bit[] = { + __NR_ia32_read, __NR_ia32_write, __NR_ia32_exit, + /* + * Allow either sigreturn or rt_sigreturn, newer archs + * like x86-64 only defines __NR_rt_sigreturn. + */ + __NR_ia32_sigreturn, + 0, /* null terminated */ +}; +#endif + +void secure_computing(int this_syscall) +{ + int mode = current->seccomp_mode; + int * syscall; + + switch (mode) { + case 1: + syscall = mode1_syscalls; +#ifdef TIF_IA32 + if (test_thread_flag(TIF_IA32)) + syscall = mode1_syscalls_32bit; +#endif + do { + if (*syscall == this_syscall) + return; + } while (*++syscall); + break; + default: + BUG(); + } + +#ifdef SECCOMP_DEBUG + dump_stack(); +#endif + do_exit(SIGKILL); +} --- /dev/null 2004-04-06 15:27:52.000000000 +0200 +++ linux-2.5/include/linux/seccomp.h 2004-10-12 02:52:07.105845704 +0200 @@ -0,0 +1,8 @@ +#ifndef _LINUX_SECCOMP_H +#define _LINUX_SECCOMP_H + +#define NR_SECCOMP_MODES 1 + +extern void secure_computing(int); + +#endif /* _LINUX_SECCOMP_H */