diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-10-18 20:50:22 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-10-18 20:50:22 -0700 |
commit | 098fc560ef2bbd1bde80845c898fa95db616eb6c (patch) | |
tree | ca722c6fdbdffe9b7cfd31d61e8f4aae906a319c /kernel | |
parent | bffe01870598b7a0a77073e25ee94e026bc98e6b (diff) | |
parent | 2a136606fe21b603a0ce484fc578f862f8e8384d (diff) | |
download | history-098fc560ef2bbd1bde80845c898fa95db616eb6c.tar.gz |
Trivial Makefile merge
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/exit.c | 4 | ||||
-rw-r--r-- | kernel/fork.c | 139 | ||||
-rw-r--r-- | kernel/kallsyms.c | 194 | ||||
-rw-r--r-- | kernel/kfifo.c | 170 | ||||
-rw-r--r-- | kernel/panic.c | 16 | ||||
-rw-r--r-- | kernel/pid.c | 117 | ||||
-rw-r--r-- | kernel/power/pm.c | 31 | ||||
-rw-r--r-- | kernel/printk.c | 8 | ||||
-rw-r--r-- | kernel/profile.c | 258 | ||||
-rw-r--r-- | kernel/signal.c | 47 | ||||
-rw-r--r-- | kernel/sys.c | 20 | ||||
-rw-r--r-- | kernel/sysctl.c | 6 | ||||
-rw-r--r-- | kernel/timer.c | 5 | ||||
-rw-r--r-- | kernel/user.c | 16 | ||||
-rw-r--r-- | kernel/wait.c | 246 |
16 files changed, 928 insertions, 351 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index c4337751cee716..abab504f01e120 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ - kthread.o + kthread.o wait.o kfifo.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o diff --git a/kernel/exit.c b/kernel/exit.c index 55d85339252454..a8ae81ed1d41af 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -14,6 +14,7 @@ #include <linux/personality.h> #include <linux/tty.h> #include <linux/namespace.h> +#include <linux/key.h> #include <linux/security.h> #include <linux/cpu.h> #include <linux/acct.h> @@ -511,8 +512,6 @@ void exit_mm(struct task_struct *tsk) __exit_mm(tsk); } -EXPORT_SYMBOL(exit_mm); - static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper) { /* @@ -816,6 +815,7 @@ asmlinkage NORET_TYPE void do_exit(long code) __exit_fs(tsk); exit_namespace(tsk); exit_thread(); + exit_keys(tsk); if (tsk->signal->leader) disassociate_ctty(1); diff --git a/kernel/fork.c b/kernel/fork.c index 3020dccc548ffb..96714c501cc831 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -24,6 +24,7 @@ #include <linux/mempolicy.h> #include <linux/sem.h> #include <linux/file.h> +#include <linux/key.h> #include <linux/binfmts.h> #include <linux/mman.h> #include <linux/fs.h> @@ -100,131 +101,6 @@ void __put_task_struct(struct task_struct *tsk) free_task(tsk); } -void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) -{ - unsigned long flags; - - wait->flags &= ~WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); - __add_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); -} - -EXPORT_SYMBOL(add_wait_queue); - -void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait) -{ - unsigned long flags; - - wait->flags |= WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); - __add_wait_queue_tail(q, wait); - spin_unlock_irqrestore(&q->lock, flags); -} - -EXPORT_SYMBOL(add_wait_queue_exclusive); - -void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait) -{ - unsigned long flags; - - spin_lock_irqsave(&q->lock, flags); - __remove_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); -} - -EXPORT_SYMBOL(remove_wait_queue); - - -/* - * Note: we use "set_current_state()" _after_ the wait-queue add, - * because we need a memory barrier there on SMP, so that any - * wake-function that tests for the wait-queue being active - * will be guaranteed to see waitqueue addition _or_ subsequent - * tests in this thread will see the wakeup having taken place. - * - * The spin_unlock() itself is semi-permeable and only protects - * one way (it only protects stuff inside the critical region and - * stops them from bleeding out - it would still allow subsequent - * loads to move into the the critical region). - */ -void fastcall prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) -{ - unsigned long flags; - - wait->flags &= ~WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue(q, wait); - /* - * don't alter the task state if this is just going to - * queue an async wait queue callback - */ - if (is_sync_wait(wait)) - set_current_state(state); - spin_unlock_irqrestore(&q->lock, flags); -} - -EXPORT_SYMBOL(prepare_to_wait); - -void fastcall -prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) -{ - unsigned long flags; - - wait->flags |= WQ_FLAG_EXCLUSIVE; - spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue_tail(q, wait); - /* - * don't alter the task state if this is just going to - * queue an async wait queue callback - */ - if (is_sync_wait(wait)) - set_current_state(state); - spin_unlock_irqrestore(&q->lock, flags); -} - -EXPORT_SYMBOL(prepare_to_wait_exclusive); - -void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait) -{ - unsigned long flags; - - __set_current_state(TASK_RUNNING); - /* - * We can check for list emptiness outside the lock - * IFF: - * - we use the "careful" check that verifies both - * the next and prev pointers, so that there cannot - * be any half-pending updates in progress on other - * CPU's that we haven't seen yet (and that might - * still change the stack area. - * and - * - all other users take the lock (ie we can only - * have _one_ other CPU that looks at or modifies - * the list). - */ - if (!list_empty_careful(&wait->task_list)) { - spin_lock_irqsave(&q->lock, flags); - list_del_init(&wait->task_list); - spin_unlock_irqrestore(&q->lock, flags); - } -} - -EXPORT_SYMBOL(finish_wait); - -int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) -{ - int ret = default_wake_function(wait, mode, sync, key); - - if (ret) - list_del_init(&wait->task_list); - return ret; -} - -EXPORT_SYMBOL(autoremove_wake_function); - void __init fork_init(unsigned long mempages) { #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR @@ -426,6 +302,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); mm->core_waiters = 0; + mm->nr_ptes = 0; mm->page_table_lock = SPIN_LOCK_UNLOCKED; mm->ioctx_list_lock = RW_LOCK_UNLOCKED; mm->ioctx_list = NULL; @@ -1019,6 +896,10 @@ static task_t *copy_process(unsigned long clone_flags, } #endif + p->tgid = p->pid; + if (clone_flags & CLONE_THREAD) + p->tgid = current->tgid; + if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) @@ -1036,8 +917,10 @@ static task_t *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; - if ((retval = copy_namespace(clone_flags, p))) + if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; + if ((retval = copy_namespace(clone_flags, p))) + goto bad_fork_cleanup_keys; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; @@ -1071,7 +954,6 @@ static task_t *copy_process(unsigned long clone_flags, * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ - p->tgid = p->pid; p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); @@ -1119,7 +1001,6 @@ static task_t *copy_process(unsigned long clone_flags, retval = -EAGAIN; goto bad_fork_cleanup_namespace; } - p->tgid = current->tgid; p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { @@ -1159,6 +1040,8 @@ fork_out: bad_fork_cleanup_namespace: exit_namespace(p); +bad_fork_cleanup_keys: + exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 74ba3cb2180945..8f3c6c1d1ce7a6 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -4,7 +4,12 @@ * Rewritten and vastly simplified by Rusty Russell for in-kernel * module loader: * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation - * Stem compression by Andi Kleen. + * + * ChangeLog: + * + * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com> + * Changed the compression method from stem compression to "table lookup" + * compression (see scripts/kallsyms.c for a more complete description) */ #include <linux/kallsyms.h> #include <linux/module.h> @@ -17,7 +22,12 @@ /* These will be re-linked against their real values during the second link stage */ extern unsigned long kallsyms_addresses[] __attribute__((weak)); extern unsigned long kallsyms_num_syms __attribute__((weak)); -extern char kallsyms_names[] __attribute__((weak)); +extern u8 kallsyms_names[] __attribute__((weak)); + +extern u8 kallsyms_token_table[] __attribute__((weak)); +extern u16 kallsyms_token_index[] __attribute__((weak)); + +extern unsigned long kallsyms_markers[] __attribute__((weak)); /* Defined by the linker script. */ extern char _stext[], _etext[], _sinittext[], _einittext[]; @@ -37,21 +47,88 @@ static inline int is_kernel_text(unsigned long addr) return 0; } +/* expand a compressed symbol data into the resulting uncompressed string, + given the offset to where the symbol is in the compressed stream */ +static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) +{ + int len, skipped_first = 0; + u8 *tptr, *data; + + /* get the compressed symbol length from the first symbol byte */ + data = &kallsyms_names[off]; + len = *data; + data++; + + /* update the offset to return the offset for the next symbol on + * the compressed stream */ + off += len + 1; + + /* for every byte on the compressed symbol data, copy the table + entry for that byte */ + while(len) { + tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ]; + data++; + len--; + + while (*tptr) { + if(skipped_first) { + *result = *tptr; + result++; + } else + skipped_first = 1; + tptr++; + } + } + + *result = '\0'; + + /* return to offset to the next symbol */ + return off; +} + +/* get symbol type information. This is encoded as a single char at the + * begining of the symbol name */ +static char kallsyms_get_symbol_type(unsigned int off) +{ + /* get just the first code, look it up in the token table, and return the + * first char from this token */ + return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ]; +} + + +/* find the offset on the compressed stream given and index in the + * kallsyms array */ +static unsigned int get_symbol_offset(unsigned long pos) +{ + u8 *name; + int i; + + /* use the closest marker we have. We have markers every 256 positions, + * so that should be close enough */ + name = &kallsyms_names[ kallsyms_markers[pos>>8] ]; + + /* sequentially scan all the symbols up to the point we're searching for. + * Every symbol is stored in a [<len>][<len> bytes of data] format, so we + * just need to add the len to the current pointer for every symbol we + * wish to skip */ + for(i = 0; i < (pos&0xFF); i++) + name = name + (*name) + 1; + + return name - kallsyms_names; +} + /* Lookup the address for this symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name) { char namebuf[KSYM_NAME_LEN+1]; unsigned long i; - char *knames; + unsigned int off; - for (i = 0, knames = kallsyms_names; i < kallsyms_num_syms; i++) { - unsigned prefix = *knames++; + for (i = 0, off = 0; i < kallsyms_num_syms; i++) { + off = kallsyms_expand_symbol(off, namebuf); - strlcpy(namebuf + prefix, knames, KSYM_NAME_LEN - prefix); if (strcmp(namebuf, name) == 0) return kallsyms_addresses[i]; - - knames += strlen(knames) + 1; } return module_kallsyms_lookup_name(name); } @@ -62,7 +139,7 @@ const char *kallsyms_lookup(unsigned long addr, unsigned long *offset, char **modname, char *namebuf) { - unsigned long i, best = 0; + unsigned long i, low, high, mid; /* This kernel should never had been booted. */ BUG_ON(!kallsyms_addresses); @@ -71,40 +148,45 @@ const char *kallsyms_lookup(unsigned long addr, namebuf[0] = 0; if (is_kernel_text(addr) || is_kernel_inittext(addr)) { - unsigned long symbol_end; - char *name = kallsyms_names; - - /* They're sorted, we could be clever here, but who cares? */ - for (i = 0; i < kallsyms_num_syms; i++) { - if (kallsyms_addresses[i] > kallsyms_addresses[best] && - kallsyms_addresses[i] <= addr) - best = i; - } + unsigned long symbol_end=0; - /* Grab name */ - for (i = 0; i <= best; i++) { - unsigned prefix = *name++; - strncpy(namebuf + prefix, name, KSYM_NAME_LEN - prefix); - name += strlen(name) + 1; + /* do a binary search on the sorted kallsyms_addresses array */ + low = 0; + high = kallsyms_num_syms; + + while (high-low > 1) { + mid = (low + high) / 2; + if (kallsyms_addresses[mid] <= addr) low = mid; + else high = mid; } - /* At worst, symbol ends at end of section. */ - if (is_kernel_inittext(addr)) - symbol_end = (unsigned long)_einittext; - else - symbol_end = (unsigned long)_etext; + /* search for the first aliased symbol. Aliased symbols are + symbols with the same address */ + while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low]) + --low; + + /* Grab name */ + kallsyms_expand_symbol(get_symbol_offset(low), namebuf); /* Search for next non-aliased symbol */ - for (i = best+1; i < kallsyms_num_syms; i++) { - if (kallsyms_addresses[i] > kallsyms_addresses[best]) { + for (i = low + 1; i < kallsyms_num_syms; i++) { + if (kallsyms_addresses[i] > kallsyms_addresses[low]) { symbol_end = kallsyms_addresses[i]; break; } } - *symbolsize = symbol_end - kallsyms_addresses[best]; + /* if we found no next symbol, we use the end of the section */ + if (!symbol_end) { + if (is_kernel_inittext(addr)) + symbol_end = (unsigned long)_einittext; + else + symbol_end = (unsigned long)_etext; + } + + *symbolsize = symbol_end - kallsyms_addresses[low]; *modname = NULL; - *offset = addr - kallsyms_addresses[best]; + *offset = addr - kallsyms_addresses[low]; return namebuf; } @@ -135,7 +217,7 @@ void __print_symbol(const char *fmt, unsigned long address) printk(fmt, buffer); } -/* To avoid O(n^2) iteration, we carry prefix along. */ +/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ struct kallsym_iter { loff_t pos; @@ -168,31 +250,23 @@ static int get_ksymbol_mod(struct kallsym_iter *iter) /* Returns space to next name. */ static unsigned long get_ksymbol_core(struct kallsym_iter *iter) { - unsigned stemlen, off = iter->nameoff; - - /* First char of each symbol name indicates prefix length - shared with previous name (stem compression). */ - stemlen = kallsyms_names[off++]; + unsigned off = iter->nameoff; - strlcpy(iter->name+stemlen, kallsyms_names + off, - KSYM_NAME_LEN+1-stemlen); - off += strlen(kallsyms_names + off) + 1; iter->owner = NULL; iter->value = kallsyms_addresses[iter->pos]; - if (is_kernel_text(iter->value) || is_kernel_inittext(iter->value)) - iter->type = 't'; - else - iter->type = 'd'; - upcase_if_global(iter); + iter->type = kallsyms_get_symbol_type(off); + + off = kallsyms_expand_symbol(off, iter->name); + return off - iter->nameoff; } -static void reset_iter(struct kallsym_iter *iter) +static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) { iter->name[0] = '\0'; - iter->nameoff = 0; - iter->pos = 0; + iter->nameoff = get_symbol_offset(new_pos); + iter->pos = new_pos; } /* Returns false if pos at or past end of file. */ @@ -204,16 +278,13 @@ static int update_iter(struct kallsym_iter *iter, loff_t pos) return get_ksymbol_mod(iter); } - /* If we're past the desired position, reset to start. */ - if (pos < iter->pos) - reset_iter(iter); - - /* We need to iterate through the previous symbols: can be slow */ - for (; iter->pos != pos; iter->pos++) { - iter->nameoff += get_ksymbol_core(iter); - cond_resched(); - } - get_ksymbol_core(iter); + /* If we're not on the desired position, reset to new position. */ + if (pos != iter->pos) + reset_iter(iter, pos); + + iter->nameoff += get_ksymbol_core(iter); + iter->pos++; + return 1; } @@ -267,14 +338,15 @@ struct seq_operations kallsyms_op = { static int kallsyms_open(struct inode *inode, struct file *file) { /* We keep iterator in m->private, since normal case is to - * s_start from where we left off, so we avoid O(N^2). */ + * s_start from where we left off, so we avoid doing + * using get_symbol_offset for every symbol */ struct kallsym_iter *iter; int ret; iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; - reset_iter(iter); + reset_iter(iter, 0); ret = seq_open(file, &kallsyms_op); if (ret == 0) diff --git a/kernel/kfifo.c b/kernel/kfifo.c new file mode 100644 index 00000000000000..9a5e17b507fe7d --- /dev/null +++ b/kernel/kfifo.c @@ -0,0 +1,170 @@ +/* + * A simple kernel FIFO implementation. + * + * Copyright (C) 2004 Stelian Pop <stelian@popies.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/kfifo.h> + +/* + * kfifo_init - allocates a new FIFO using a preallocated buffer + * @buffer: the preallocated buffer to be used. + * @size: the size of the internal buffer, this have to be a power of 2. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the + * struct kfifo with kfree(). + */ +struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, + int gfp_mask, spinlock_t *lock) +{ + struct kfifo *fifo; + + /* size must be a power of 2 */ + BUG_ON(size & (size - 1)); + + fifo = kmalloc(sizeof(struct kfifo), gfp_mask); + if (!fifo) + return ERR_PTR(-ENOMEM); + + fifo->buffer = buffer; + fifo->size = size; + fifo->in = fifo->out = 0; + fifo->lock = lock; + + return fifo; +} +EXPORT_SYMBOL(kfifo_init); + +/* + * kfifo_alloc - allocates a new FIFO and its internal buffer + * @size: the size of the internal buffer to be allocated. + * @gfp_mask: get_free_pages mask, passed to kmalloc() + * @lock: the lock to be used to protect the fifo buffer + * + * The size will be rounded-up to a power of 2. + */ +struct kfifo *kfifo_alloc(unsigned int size, int gfp_mask, spinlock_t *lock) +{ + unsigned int newsize; + unsigned char *buffer; + struct kfifo *ret; + + /* + * round up to the next power of 2, since our 'let the indices + * wrap' tachnique works only in this case. + */ + newsize = size; + if (size & (size - 1)) { + BUG_ON(size > 0x80000000); + newsize = roundup_pow_of_two(size); + } + + buffer = kmalloc(newsize, gfp_mask); + if (!buffer) + return ERR_PTR(-ENOMEM); + + ret = kfifo_init(buffer, size, gfp_mask, lock); + + if (IS_ERR(ret)) + kfree(buffer); + + return ret; +} +EXPORT_SYMBOL(kfifo_alloc); + +/* + * kfifo_free - frees the FIFO + * @fifo: the fifo to be freed. + */ +void kfifo_free(struct kfifo *fifo) +{ + kfree(fifo->buffer); + kfree(fifo); +} +EXPORT_SYMBOL(kfifo_free); + +/* + * __kfifo_put - puts some data into the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: the data to be added. + * @len: the length of the data to be added. + * + * This function copies at most 'len' bytes from the 'buffer' into + * the FIFO depending on the free space, and returns the number of + * bytes copied. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int __kfifo_put(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->size - fifo->in + fifo->out); + + /* first put the data starting from fifo->in to buffer end */ + l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); + memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); + + /* then put the rest (if any) at the beginning of the buffer */ + memcpy(fifo->buffer, buffer + l, len - l); + + fifo->in += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_put); + +/* + * __kfifo_get - gets some data from the FIFO, no locking version + * @fifo: the fifo to be used. + * @buffer: where the data must be copied. + * @len: the size of the destination buffer. + * + * This function copies at most 'len' bytes from the FIFO into the + * 'buffer' and returns the number of copied bytes. + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these functions. + */ +unsigned int __kfifo_get(struct kfifo *fifo, + unsigned char *buffer, unsigned int len) +{ + unsigned int l; + + len = min(len, fifo->in - fifo->out); + + /* first get the data from fifo->out until the end of the buffer */ + l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); + memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); + + /* then get the rest (if any) from the beginning of the buffer */ + memcpy(buffer + l, fifo->buffer, len - l); + + fifo->out += len; + + return len; +} +EXPORT_SYMBOL(__kfifo_get); diff --git a/kernel/panic.c b/kernel/panic.c index fce7f4030d0a75..c7ab9981c7aa76 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -110,6 +110,9 @@ EXPORT_SYMBOL(panic); * 'P' - Proprietary module has been loaded. * 'F' - Module has been forcibly loaded. * 'S' - SMP with CPUs not designed for SMP. + * 'R' - User forced a module unload. + * 'M' - Machine had a machine check experience. + * 'B' - System has hit bad_page. * * The string is overwritten by the next call to print_taint(). */ @@ -118,12 +121,21 @@ const char *print_tainted(void) { static char buf[20]; if (tainted) { - snprintf(buf, sizeof(buf), "Tainted: %c%c%c", + snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c", tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', tainted & TAINT_FORCED_MODULE ? 'F' : ' ', - tainted & TAINT_UNSAFE_SMP ? 'S' : ' '); + tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', + tainted & TAINT_FORCED_RMMOD ? 'R' : ' ', + tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', + tainted & TAINT_BAD_PAGE ? 'B' : ' '); } else snprintf(buf, sizeof(buf), "Not tainted"); return(buf); } + +void add_taint(unsigned flag) +{ + tainted |= flag; +} +EXPORT_SYMBOL(add_taint); diff --git a/kernel/pid.c b/kernel/pid.c index 83008f812f4974..21024b7ae37c2f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -1,8 +1,9 @@ /* * Generic pidhash and scalable, time-bounded PID allocator * - * (C) 2002 William Irwin, IBM - * (C) 2002 Ingo Molnar, Red Hat + * (C) 2002-2003 William Irwin, IBM + * (C) 2004 William Irwin, Oracle + * (C) 2002-2004 Ingo Molnar, Red Hat * * pid-structures are backing objects for tasks sharing a given ID to chain * against. There is very little to them aside from hashing them and @@ -35,9 +36,15 @@ int last_pid; #define RESERVED_PIDS 300 -#define PIDMAP_ENTRIES (PID_MAX_LIMIT/PAGE_SIZE/8) +int pid_max_min = RESERVED_PIDS + 1; +int pid_max_max = PID_MAX_LIMIT; + +#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8) #define BITS_PER_PAGE (PAGE_SIZE*8) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) +#define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off)) +#define find_next_offset(map, off) \ + find_next_zero_bit((map)->page, BITS_PER_PAGE, off) /* * PID-map pages start out as NULL, they get allocated upon @@ -53,8 +60,6 @@ typedef struct pidmap { static pidmap_t pidmap_array[PIDMAP_ENTRIES] = { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }; -static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES; - static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; fastcall void free_pidmap(int pid) @@ -66,15 +71,18 @@ fastcall void free_pidmap(int pid) atomic_inc(&map->nr_free); } -/* - * Here we search for the next map that has free bits left. - * Normally the next map has free PIDs. - */ -static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps) +int alloc_pidmap(void) { - while (--*max_steps) { - if (++map == map_limit) - map = pidmap_array; + int i, offset, max_scan, pid, last = last_pid; + pidmap_t *map; + + pid = last + 1; + if (pid >= pid_max) + pid = RESERVED_PIDS; + offset = pid & BITS_PER_PAGE_MASK; + map = &pidmap_array[pid/BITS_PER_PAGE]; + max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset; + for (i = 0; i <= max_scan; ++i) { if (unlikely(!map->page)) { unsigned long page = get_zeroed_page(GFP_KERNEL); /* @@ -87,62 +95,39 @@ static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps) else map->page = (void *)page; spin_unlock(&pidmap_lock); - - if (!map->page) + if (unlikely(!map->page)) break; } - if (atomic_read(&map->nr_free)) - return map; - } - return NULL; -} - -int alloc_pidmap(void) -{ - int pid, offset, max_steps = PIDMAP_ENTRIES + 1; - pidmap_t *map; - - pid = last_pid + 1; - if (pid >= pid_max) - pid = RESERVED_PIDS; - - offset = pid & BITS_PER_PAGE_MASK; - map = pidmap_array + pid / BITS_PER_PAGE; - - if (likely(map->page && !test_and_set_bit(offset, map->page))) { - /* - * There is a small window for last_pid updates to race, - * but in that case the next allocation will go into the - * slowpath and that fixes things up. - */ -return_pid: - atomic_dec(&map->nr_free); - last_pid = pid; - return pid; - } - - if (!offset || !atomic_read(&map->nr_free)) { -next_map: - map = next_free_map(map, &max_steps); - if (!map) - goto failure; - offset = 0; + if (likely(atomic_read(&map->nr_free))) { + do { + if (!test_and_set_bit(offset, map->page)) { + atomic_dec(&map->nr_free); + last_pid = pid; + return pid; + } + offset = find_next_offset(map, offset); + pid = mk_pid(map, offset); + /* + * find_next_offset() found a bit, the pid from it + * is in-bounds, and if we fell back to the last + * bitmap block and the final block was the same + * as the starting point, pid is before last_pid. + */ + } while (offset < BITS_PER_PAGE && pid < pid_max && + (i != max_scan || pid < last || + !((last+1) & BITS_PER_PAGE_MASK))); + } + if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) { + ++map; + offset = 0; + } else { + map = &pidmap_array[0]; + offset = RESERVED_PIDS; + if (unlikely(last == offset)) + break; + } + pid = mk_pid(map, offset); } - /* - * Find the next zero bit: - */ -scan_more: - offset = find_next_zero_bit(map->page, BITS_PER_PAGE, offset); - if (offset >= BITS_PER_PAGE) - goto next_map; - if (test_and_set_bit(offset, map->page)) - goto scan_more; - - /* we got the PID: */ - pid = (map - pidmap_array) * BITS_PER_PAGE + offset; - goto return_pid; - -failure: return -1; } diff --git a/kernel/power/pm.c b/kernel/power/pm.c index d1bc943072d409..8fca5822a80776 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c @@ -256,41 +256,10 @@ int pm_send_all(pm_request_t rqst, void *data) return 0; } -/** - * pm_find - find a device - * @type: type of device - * @from: where to start looking - * - * Scan the power management list for devices of a specific type. The - * return value for a matching device may be passed to further calls - * to this function to find further matches. A %NULL indicates the end - * of the list. - * - * To search from the beginning pass %NULL as the @from value. - * - * The caller MUST hold the pm_devs_lock lock when calling this - * function. The instant that the lock is dropped all pointers returned - * may become invalid. - */ - -struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from) -{ - struct list_head *entry = from ? from->entry.next:pm_devs.next; - while (entry != &pm_devs) { - struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); - if (type == PM_UNKNOWN_DEV || dev->type == type) - return dev; - entry = entry->next; - } - return NULL; -} - EXPORT_SYMBOL(pm_register); EXPORT_SYMBOL(pm_unregister); EXPORT_SYMBOL(pm_unregister_all); -EXPORT_SYMBOL(pm_send); EXPORT_SYMBOL(pm_send_all); -EXPORT_SYMBOL(pm_find); EXPORT_SYMBOL(pm_active); diff --git a/kernel/printk.c b/kernel/printk.c index c02ec626f38404..390396fc6d017f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -661,12 +661,10 @@ EXPORT_SYMBOL(release_console_sem); * * Must be called within acquire_console_sem(). */ -void console_conditional_schedule(void) +void __sched console_conditional_schedule(void) { - if (console_may_schedule && need_resched()) { - set_current_state(TASK_RUNNING); - schedule(); - } + if (console_may_schedule) + cond_resched(); } EXPORT_SYMBOL(console_conditional_schedule); diff --git a/kernel/profile.c b/kernel/profile.c index 1c4375fad9230f..e7ff9b32d8220d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -1,5 +1,16 @@ /* * linux/kernel/profile.c + * Simple profiling. Manages a direct-mapped profile hit count buffer, + * with configurable resolution, support for restricting the cpus on + * which profiling is done, and switching between cpu time and + * schedule() calls via kernel command line parameters passed at boot. + * + * Scheduler profiling support, Arjan van de Ven and Ingo Molnar, + * Red Hat, July 2004 + * Consolidation of architecture support code for profiling, + * William Irwin, Oracle, July 2004 + * Amortized hit count accounting via per-cpu open-addressed hashtables + * to resolve timer interrupt livelocks, William Irwin, Oracle, 2004 */ #include <linux/config.h> @@ -9,13 +20,29 @@ #include <linux/notifier.h> #include <linux/mm.h> #include <linux/cpumask.h> +#include <linux/cpu.h> #include <linux/profile.h> +#include <linux/highmem.h> #include <asm/sections.h> +#include <asm/semaphore.h> + +struct profile_hit { + u32 pc, hits; +}; +#define PROFILE_GRPSHIFT 3 +#define PROFILE_GRPSZ (1 << PROFILE_GRPSHIFT) +#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit)) +#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) static atomic_t *prof_buffer; static unsigned long prof_len, prof_shift; static int prof_on; static cpumask_t prof_cpu_mask = CPU_MASK_ALL; +#ifdef CONFIG_SMP +static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); +static DEFINE_PER_CPU(int, cpu_profile_flip); +static DECLARE_MUTEX(profile_flip_mutex); +#endif /* CONFIG_SMP */ static int __init profile_setup(char * str) { @@ -181,6 +208,179 @@ EXPORT_SYMBOL_GPL(task_handoff_unregister); EXPORT_SYMBOL_GPL(profile_event_register); EXPORT_SYMBOL_GPL(profile_event_unregister); +#ifdef CONFIG_SMP +/* + * Each cpu has a pair of open-addressed hashtables for pending + * profile hits. read_profile() IPI's all cpus to request them + * to flip buffers and flushes their contents to prof_buffer itself. + * Flip requests are serialized by the profile_flip_mutex. The sole + * use of having a second hashtable is for avoiding cacheline + * contention that would otherwise happen during flushes of pending + * profile hits required for the accuracy of reported profile hits + * and so resurrect the interrupt livelock issue. + * + * The open-addressed hashtables are indexed by profile buffer slot + * and hold the number of pending hits to that profile buffer slot on + * a cpu in an entry. When the hashtable overflows, all pending hits + * are accounted to their corresponding profile buffer slots with + * atomic_add() and the hashtable emptied. As numerous pending hits + * may be accounted to a profile buffer slot in a hashtable entry, + * this amortizes a number of atomic profile buffer increments likely + * to be far larger than the number of entries in the hashtable, + * particularly given that the number of distinct profile buffer + * positions to which hits are accounted during short intervals (e.g. + * several seconds) is usually very small. Exclusion from buffer + * flipping is provided by interrupt disablement (note that for + * SCHED_PROFILING profile_hit() may be called from process context). + * The hash function is meant to be lightweight as opposed to strong, + * and was vaguely inspired by ppc64 firmware-supported inverted + * pagetable hash functions, but uses a full hashtable full of finite + * collision chains, not just pairs of them. + * + * -- wli + */ +static void __profile_flip_buffers(void *unused) +{ + int cpu = smp_processor_id(); + + per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu); +} + +static void profile_flip_buffers(void) +{ + int i, j, cpu; + + down(&profile_flip_mutex); + j = per_cpu(cpu_profile_flip, get_cpu()); + put_cpu(); + on_each_cpu(__profile_flip_buffers, NULL, 0, 1); + for_each_online_cpu(cpu) { + struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; + for (i = 0; i < NR_PROFILE_HIT; ++i) { + if (!hits[i].hits) { + if (hits[i].pc) + hits[i].pc = 0; + continue; + } + atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); + hits[i].hits = hits[i].pc = 0; + } + } + up(&profile_flip_mutex); +} + +static void profile_discard_flip_buffers(void) +{ + int i, cpu; + + down(&profile_flip_mutex); + i = per_cpu(cpu_profile_flip, get_cpu()); + put_cpu(); + on_each_cpu(__profile_flip_buffers, NULL, 0, 1); + for_each_online_cpu(cpu) { + struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; + memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); + } + up(&profile_flip_mutex); +} + +void profile_hit(int type, void *__pc) +{ + unsigned long primary, secondary, flags, pc = (unsigned long)__pc; + int i, j, cpu; + struct profile_hit *hits; + + if (prof_on != type || !prof_buffer) + return; + pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); + i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; + secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; + cpu = get_cpu(); + hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)]; + if (!hits) { + put_cpu(); + return; + } + local_irq_save(flags); + do { + for (j = 0; j < PROFILE_GRPSZ; ++j) { + if (hits[i + j].pc == pc) { + hits[i + j].hits++; + goto out; + } else if (!hits[i + j].hits) { + hits[i + j].pc = pc; + hits[i + j].hits = 1; + goto out; + } + } + i = (i + secondary) & (NR_PROFILE_HIT - 1); + } while (i != primary); + atomic_inc(&prof_buffer[pc]); + for (i = 0; i < NR_PROFILE_HIT; ++i) { + atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); + hits[i].pc = hits[i].hits = 0; + } +out: + local_irq_restore(flags); + put_cpu(); +} + +#ifdef CONFIG_HOTPLUG_CPU +static int __devinit profile_cpu_callback(struct notifier_block *info, + unsigned long action, void *__cpu) +{ + int node, cpu = (unsigned long)__cpu; + struct page *page; + + switch (action) { + case CPU_UP_PREPARE: + node = cpu_to_node(cpu); + per_cpu(cpu_profile_flip, cpu) = 0; + if (!per_cpu(cpu_profile_hits, cpu)[1]) { + page = alloc_pages_node(node, GFP_KERNEL, 0); + if (!page) + return NOTIFY_BAD; + clear_highpage(page); + per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); + } + if (!per_cpu(cpu_profile_hits, cpu)[0]) { + page = alloc_pages_node(node, GFP_KERNEL, 0); + if (!page) + goto out_free; + clear_highpage(page); + per_cpu(cpu_profile_hits, cpu)[0] = page_address(page); + } + break; + out_free: + page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]); + per_cpu(cpu_profile_hits, cpu)[1] = NULL; + __free_page(page); + return NOTIFY_BAD; + case CPU_ONLINE: + cpu_set(cpu, prof_cpu_mask); + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + cpu_clear(cpu, prof_cpu_mask); + if (per_cpu(cpu_profile_hits, cpu)[0]) { + page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]); + per_cpu(cpu_profile_hits, cpu)[0] = NULL; + __free_page(page); + } + if (per_cpu(cpu_profile_hits, cpu)[1]) { + page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]); + per_cpu(cpu_profile_hits, cpu)[1] = NULL; + __free_page(page); + } + break; + } + return NOTIFY_OK; +} +#endif /* CONFIG_HOTPLUG_CPU */ +#else /* !CONFIG_SMP */ +#define profile_flip_buffers() do { } while (0) +#define profile_discard_flip_buffers() do { } while (0) + void profile_hit(int type, void *__pc) { unsigned long pc; @@ -190,6 +390,7 @@ void profile_hit(int type, void *__pc) pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; atomic_inc(&prof_buffer[min(pc, prof_len - 1)]); } +#endif /* !CONFIG_SMP */ void profile_tick(int type, struct pt_regs *regs) { @@ -256,6 +457,7 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) char * pnt; unsigned int sample_step = 1 << prof_shift; + profile_flip_buffers(); if (p >= (prof_len+1)*sizeof(unsigned int)) return 0; if (count > (prof_len+1)*sizeof(unsigned int) - p) @@ -296,7 +498,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return -EINVAL; } #endif - + profile_discard_flip_buffers(); memset(prof_buffer, 0, prof_len * sizeof(atomic_t)); return count; } @@ -306,16 +508,70 @@ static struct file_operations proc_profile_operations = { .write = write_profile, }; +#ifdef CONFIG_SMP +static void __init profile_nop(void *unused) +{ +} + +static int __init create_hash_tables(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + int node = cpu_to_node(cpu); + struct page *page; + + page = alloc_pages_node(node, GFP_KERNEL, 0); + if (!page) + goto out_cleanup; + clear_highpage(page); + per_cpu(cpu_profile_hits, cpu)[1] + = (struct profile_hit *)page_address(page); + page = alloc_pages_node(node, GFP_KERNEL, 0); + if (!page) + goto out_cleanup; + clear_highpage(page); + per_cpu(cpu_profile_hits, cpu)[0] + = (struct profile_hit *)page_address(page); + } + return 0; +out_cleanup: + prof_on = 0; + mb(); + on_each_cpu(profile_nop, NULL, 0, 1); + for_each_online_cpu(cpu) { + struct page *page; + + if (per_cpu(cpu_profile_hits, cpu)[0]) { + page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]); + per_cpu(cpu_profile_hits, cpu)[0] = NULL; + __free_page(page); + } + if (per_cpu(cpu_profile_hits, cpu)[1]) { + page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]); + per_cpu(cpu_profile_hits, cpu)[1] = NULL; + __free_page(page); + } + } + return -1; +} +#else +#define create_hash_tables() ({ 0; }) +#endif + static int __init create_proc_profile(void) { struct proc_dir_entry *entry; if (!prof_on) return 0; + if (create_hash_tables()) + return -1; if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL))) return 0; entry->proc_fops = &proc_profile_operations; entry->size = (1+prof_len) * sizeof(atomic_t); + hotcpu_notifier(profile_cpu_callback, 0); return 0; } module_init(create_proc_profile); diff --git a/kernel/signal.c b/kernel/signal.c index f67390806d7367..ba039fab37e8b5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1143,36 +1143,6 @@ kill_pg_info(int sig, struct siginfo *info, pid_t pgrp) return retval; } -/* - * kill_sl_info() sends a signal to the session leader: this is used - * to send SIGHUP to the controlling process of a terminal when - * the connection is lost. - */ - - -int -kill_sl_info(int sig, struct siginfo *info, pid_t sid) -{ - int err, retval = -EINVAL; - struct task_struct *p; - - if (sid <= 0) - goto out; - - retval = -ESRCH; - read_lock(&tasklist_lock); - do_each_task_pid(sid, PIDTYPE_SID, p) { - if (!p->signal->leader) - continue; - err = group_send_sig_info(sig, info, p); - if (retval) - retval = err; - } while_each_task_pid(sid, PIDTYPE_SID, p); - read_unlock(&tasklist_lock); -out: - return retval; -} - int kill_proc_info(int sig, struct siginfo *info, pid_t pid) { @@ -1309,12 +1279,6 @@ kill_pg(pid_t pgrp, int sig, int priv) } int -kill_sl(pid_t sess, int sig, int priv) -{ - return kill_sl_info(sig, (void *)(long)(priv != 0), sess); -} - -int kill_proc(pid_t pid, int sig, int priv) { return kill_proc_info(sig, (void *)(long)(priv != 0), pid); @@ -1978,22 +1942,11 @@ relock: EXPORT_SYMBOL(recalc_sigpending); EXPORT_SYMBOL_GPL(dequeue_signal); EXPORT_SYMBOL(flush_signals); -EXPORT_SYMBOL(force_sig); -EXPORT_SYMBOL(force_sig_info); EXPORT_SYMBOL(kill_pg); -EXPORT_SYMBOL(kill_pg_info); EXPORT_SYMBOL(kill_proc); -EXPORT_SYMBOL(kill_proc_info); -EXPORT_SYMBOL(kill_sl); -EXPORT_SYMBOL(kill_sl_info); EXPORT_SYMBOL(ptrace_notify); EXPORT_SYMBOL(send_sig); EXPORT_SYMBOL(send_sig_info); -EXPORT_SYMBOL(send_group_sig_info); -EXPORT_SYMBOL(sigqueue_alloc); -EXPORT_SYMBOL(sigqueue_free); -EXPORT_SYMBOL(send_sigqueue); -EXPORT_SYMBOL(send_group_sigqueue); EXPORT_SYMBOL(sigprocmask); EXPORT_SYMBOL(block_all_signals); EXPORT_SYMBOL(unblock_all_signals); diff --git a/kernel/sys.c b/kernel/sys.c index a95e3900dc1e23..e6dbc2940751a3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/workqueue.h> #include <linux/device.h> +#include <linux/key.h> #include <linux/times.h> #include <linux/security.h> #include <linux/dcookies.h> @@ -282,6 +283,9 @@ cond_syscall(sys_set_mempolicy) cond_syscall(compat_mbind) cond_syscall(compat_get_mempolicy) cond_syscall(compat_set_mempolicy) +cond_syscall(sys_add_key) +cond_syscall(sys_request_key) +cond_syscall(sys_keyctl) /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read) @@ -605,6 +609,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) current->fsgid = new_egid; current->egid = new_egid; current->gid = new_rgid; + key_fsgid_changed(current); return 0; } @@ -642,6 +647,8 @@ asmlinkage long sys_setgid(gid_t gid) } else return -EPERM; + + key_fsgid_changed(current); return 0; } @@ -730,6 +737,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) current->suid = current->euid; current->fsuid = current->euid; + key_fsuid_changed(current); + return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); } @@ -775,6 +784,8 @@ asmlinkage long sys_setuid(uid_t uid) current->fsuid = current->euid = uid; current->suid = new_suid; + key_fsuid_changed(current); + return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); } @@ -821,6 +832,8 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if (suid != (uid_t) -1) current->suid = suid; + key_fsuid_changed(current); + return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); } @@ -870,6 +883,8 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) current->gid = rgid; if (sgid != (gid_t) -1) current->sgid = sgid; + + key_fsgid_changed(current); return 0; } @@ -911,6 +926,8 @@ asmlinkage long sys_setfsuid(uid_t uid) current->fsuid = uid; } + key_fsuid_changed(current); + security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); return old_fsuid; @@ -937,6 +954,7 @@ asmlinkage long sys_setfsgid(gid_t gid) wmb(); } current->fsgid = gid; + key_fsgid_changed(current); } return old_fsgid; } @@ -1669,7 +1687,7 @@ asmlinkage long sys_umask(int mask) asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { - int error; + long error; int sig; error = security_task_prctl(option, arg2, arg3, arg4, arg5); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 469cf0c2f26ece..80bf15f035cde2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -66,6 +66,7 @@ extern int sysctl_lower_zone_protection; extern int min_free_kbytes; extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; +extern int pid_max_min, pid_max_max; #if defined(CONFIG_X86_LOCAL_APIC) && defined(__i386__) int unknown_nmi_panic; @@ -575,7 +576,10 @@ static ctl_table kern_table[] = { .data = &pid_max, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .strategy = sysctl_intvec, + .extra1 = &pid_max_min, + .extra2 = &pid_max_max, }, { .ctl_name = KERN_PANIC_ON_OOPS, diff --git a/kernel/timer.c b/kernel/timer.c index e3c9b5fcd52f56..ac9386e22bd332 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -959,11 +959,6 @@ static inline void update_times(void) void do_timer(struct pt_regs *regs) { jiffies_64++; -#ifndef CONFIG_SMP - /* SMP process accounting uses the local APIC timer */ - - update_process_times(user_mode(regs)); -#endif update_times(); } diff --git a/kernel/user.c b/kernel/user.c index 523175afeecdca..693487dc940e6e 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/bitops.h> +#include <linux/key.h> /* * UID task count cache, to get fast user lookup in "alloc_uid" @@ -34,6 +35,10 @@ struct user_struct root_user = { .sigpending = ATOMIC_INIT(0), .mq_bytes = 0, .locked_shm = 0, +#ifdef CONFIG_KEYS + .uid_keyring = &root_user_keyring, + .session_keyring = &root_session_keyring, +#endif }; /* @@ -87,6 +92,8 @@ void free_uid(struct user_struct *up) { if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) { uid_hash_remove(up); + key_put(up->uid_keyring); + key_put(up->session_keyring); kmem_cache_free(uid_cachep, up); spin_unlock(&uidhash_lock); } @@ -116,6 +123,11 @@ struct user_struct * alloc_uid(uid_t uid) new->mq_bytes = 0; new->locked_shm = 0; + if (alloc_uid_keyring(new) < 0) { + kmem_cache_free(uid_cachep, new); + return NULL; + } + /* * Before adding this, check whether we raced * on adding the same user already.. @@ -123,6 +135,8 @@ struct user_struct * alloc_uid(uid_t uid) spin_lock(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { + key_put(new->uid_keyring); + key_put(new->session_keyring); kmem_cache_free(uid_cachep, new); } else { uid_hash_insert(new, hashent); @@ -146,8 +160,10 @@ void switch_uid(struct user_struct *new_user) old_user = current->user; atomic_inc(&new_user->processes); atomic_dec(&old_user->processes); + switch_uid_keyring(new_user); current->user = new_user; free_uid(old_user); + suid_keys(current); } diff --git a/kernel/wait.c b/kernel/wait.c new file mode 100644 index 00000000000000..791681cfea981d --- /dev/null +++ b/kernel/wait.c @@ -0,0 +1,246 @@ +/* + * Generic waiting primitives. + * + * (C) 2004 William Irwin, Oracle + */ +#include <linux/config.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/wait.h> +#include <linux/hash.h> + +void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +{ + unsigned long flags; + + wait->flags &= ~WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&q->lock, flags); + __add_wait_queue(q, wait); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue); + +void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +{ + unsigned long flags; + + wait->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&q->lock, flags); + __add_wait_queue_tail(q, wait); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue_exclusive); + +void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) +{ + unsigned long flags; + + spin_lock_irqsave(&q->lock, flags); + __remove_wait_queue(q, wait); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(remove_wait_queue); + + +/* + * Note: we use "set_current_state()" _after_ the wait-queue add, + * because we need a memory barrier there on SMP, so that any + * wake-function that tests for the wait-queue being active + * will be guaranteed to see waitqueue addition _or_ subsequent + * tests in this thread will see the wakeup having taken place. + * + * The spin_unlock() itself is semi-permeable and only protects + * one way (it only protects stuff inside the critical region and + * stops them from bleeding out - it would still allow subsequent + * loads to move into the the critical region). + */ +void fastcall +prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) +{ + unsigned long flags; + + wait->flags &= ~WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&q->lock, flags); + if (list_empty(&wait->task_list)) + __add_wait_queue(q, wait); + /* + * don't alter the task state if this is just going to + * queue an async wait queue callback + */ + if (is_sync_wait(wait)) + set_current_state(state); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(prepare_to_wait); + +void fastcall +prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) +{ + unsigned long flags; + + wait->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&q->lock, flags); + if (list_empty(&wait->task_list)) + __add_wait_queue_tail(q, wait); + /* + * don't alter the task state if this is just going to + * queue an async wait queue callback + */ + if (is_sync_wait(wait)) + set_current_state(state); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(prepare_to_wait_exclusive); + +void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait) +{ + unsigned long flags; + + __set_current_state(TASK_RUNNING); + /* + * We can check for list emptiness outside the lock + * IFF: + * - we use the "careful" check that verifies both + * the next and prev pointers, so that there cannot + * be any half-pending updates in progress on other + * CPU's that we haven't seen yet (and that might + * still change the stack area. + * and + * - all other users take the lock (ie we can only + * have _one_ other CPU that looks at or modifies + * the list). + */ + if (!list_empty_careful(&wait->task_list)) { + spin_lock_irqsave(&q->lock, flags); + list_del_init(&wait->task_list); + spin_unlock_irqrestore(&q->lock, flags); + } +} +EXPORT_SYMBOL(finish_wait); + +int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + int ret = default_wake_function(wait, mode, sync, key); + + if (ret) + list_del_init(&wait->task_list); + return ret; +} +EXPORT_SYMBOL(autoremove_wake_function); + +int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) +{ + struct wait_bit_key *key = arg; + struct wait_bit_queue *wait_bit + = container_of(wait, struct wait_bit_queue, wait); + + if (wait_bit->key.flags != key->flags || + wait_bit->key.bit_nr != key->bit_nr || + test_bit(key->bit_nr, key->flags)) + return 0; + else + return autoremove_wake_function(wait, mode, sync, key); +} +EXPORT_SYMBOL(wake_bit_function); + +/* + * To allow interruptible waiting and asynchronous (i.e. nonblocking) + * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are + * permitted return codes. Nonzero return codes halt waiting and return. + */ +int __sched fastcall +__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, + int (*action)(void *), unsigned mode) +{ + int ret = 0; + + do { + prepare_to_wait(wq, &q->wait, mode); + if (test_bit(q->key.bit_nr, q->key.flags)) + ret = (*action)(q->key.flags); + } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); + finish_wait(wq, &q->wait); + return ret; +} +EXPORT_SYMBOL(__wait_on_bit); + +int __sched fastcall out_of_line_wait_on_bit(void *word, int bit, + int (*action)(void *), unsigned mode) +{ + wait_queue_head_t *wq = bit_waitqueue(word, bit); + DEFINE_WAIT_BIT(wait, word, bit); + + return __wait_on_bit(wq, &wait, action, mode); +} +EXPORT_SYMBOL(out_of_line_wait_on_bit); + +int __sched fastcall +__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, + int (*action)(void *), unsigned mode) +{ + int ret = 0; + + do { + prepare_to_wait_exclusive(wq, &q->wait, mode); + if (test_bit(q->key.bit_nr, q->key.flags)) { + if ((ret = (*action)(q->key.flags))) + break; + } + } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); + finish_wait(wq, &q->wait); + return ret; +} +EXPORT_SYMBOL(__wait_on_bit_lock); + +int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit, + int (*action)(void *), unsigned mode) +{ + wait_queue_head_t *wq = bit_waitqueue(word, bit); + DEFINE_WAIT_BIT(wait, word, bit); + + return __wait_on_bit_lock(wq, &wait, action, mode); +} +EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); + +void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) +{ + struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); + if (waitqueue_active(wq)) + __wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key); +} +EXPORT_SYMBOL(__wake_up_bit); + +/** + * wake_up_bit - wake up a waiter on a bit + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that wakes up waiters + * on a bit. For instance, if one were to have waiters on a bitflag, + * one would call wake_up_bit() after clearing the bit. + * + * In order for this to function properly, as it uses waitqueue_active() + * internally, some kind of memory barrier must be done prior to calling + * this. Typically, this will be smp_mb__after_clear_bit(), but in some + * cases where bitflags are manipulated non-atomically under a lock, one + * may need to use a less regular barrier, such fs/inode.c's smp_mb(), + * because spin_unlock() does not guarantee a memory barrier. + */ +void fastcall wake_up_bit(void *word, int bit) +{ + __wake_up_bit(bit_waitqueue(word, bit), word, bit); +} +EXPORT_SYMBOL(wake_up_bit); + +fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit) +{ + const int shift = BITS_PER_LONG == 32 ? 5 : 6; + const struct zone *zone = page_zone(virt_to_page(word)); + unsigned long val = (unsigned long)word << shift | bit; + + return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; +} +EXPORT_SYMBOL(bit_waitqueue); |