aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2004-10-18 20:50:22 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-10-18 20:50:22 -0700
commit098fc560ef2bbd1bde80845c898fa95db616eb6c (patch)
treeca722c6fdbdffe9b7cfd31d61e8f4aae906a319c /kernel
parentbffe01870598b7a0a77073e25ee94e026bc98e6b (diff)
parent2a136606fe21b603a0ce484fc578f862f8e8384d (diff)
downloadhistory-098fc560ef2bbd1bde80845c898fa95db616eb6c.tar.gz
Trivial Makefile merge
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c139
-rw-r--r--kernel/kallsyms.c194
-rw-r--r--kernel/kfifo.c170
-rw-r--r--kernel/panic.c16
-rw-r--r--kernel/pid.c117
-rw-r--r--kernel/power/pm.c31
-rw-r--r--kernel/printk.c8
-rw-r--r--kernel/profile.c258
-rw-r--r--kernel/signal.c47
-rw-r--r--kernel/sys.c20
-rw-r--r--kernel/sysctl.c6
-rw-r--r--kernel/timer.c5
-rw-r--r--kernel/user.c16
-rw-r--r--kernel/wait.c246
16 files changed, 928 insertions, 351 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index c4337751cee716..abab504f01e120 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -7,7 +7,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
sysctl.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
- kthread.o
+ kthread.o wait.o kfifo.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 55d85339252454..a8ae81ed1d41af 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -14,6 +14,7 @@
#include <linux/personality.h>
#include <linux/tty.h>
#include <linux/namespace.h>
+#include <linux/key.h>
#include <linux/security.h>
#include <linux/cpu.h>
#include <linux/acct.h>
@@ -511,8 +512,6 @@ void exit_mm(struct task_struct *tsk)
__exit_mm(tsk);
}
-EXPORT_SYMBOL(exit_mm);
-
static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
{
/*
@@ -816,6 +815,7 @@ asmlinkage NORET_TYPE void do_exit(long code)
__exit_fs(tsk);
exit_namespace(tsk);
exit_thread();
+ exit_keys(tsk);
if (tsk->signal->leader)
disassociate_ctty(1);
diff --git a/kernel/fork.c b/kernel/fork.c
index 3020dccc548ffb..96714c501cc831 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -24,6 +24,7 @@
#include <linux/mempolicy.h>
#include <linux/sem.h>
#include <linux/file.h>
+#include <linux/key.h>
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/fs.h>
@@ -100,131 +101,6 @@ void __put_task_struct(struct task_struct *tsk)
free_task(tsk);
}
-void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
-{
- unsigned long flags;
-
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
- spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue(q, wait);
- spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(add_wait_queue);
-
-void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
-{
- unsigned long flags;
-
- wait->flags |= WQ_FLAG_EXCLUSIVE;
- spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue_tail(q, wait);
- spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(add_wait_queue_exclusive);
-
-void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&q->lock, flags);
- __remove_wait_queue(q, wait);
- spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(remove_wait_queue);
-
-
-/*
- * Note: we use "set_current_state()" _after_ the wait-queue add,
- * because we need a memory barrier there on SMP, so that any
- * wake-function that tests for the wait-queue being active
- * will be guaranteed to see waitqueue addition _or_ subsequent
- * tests in this thread will see the wakeup having taken place.
- *
- * The spin_unlock() itself is semi-permeable and only protects
- * one way (it only protects stuff inside the critical region and
- * stops them from bleeding out - it would still allow subsequent
- * loads to move into the the critical region).
- */
-void fastcall prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
-{
- unsigned long flags;
-
- wait->flags &= ~WQ_FLAG_EXCLUSIVE;
- spin_lock_irqsave(&q->lock, flags);
- if (list_empty(&wait->task_list))
- __add_wait_queue(q, wait);
- /*
- * don't alter the task state if this is just going to
- * queue an async wait queue callback
- */
- if (is_sync_wait(wait))
- set_current_state(state);
- spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(prepare_to_wait);
-
-void fastcall
-prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
-{
- unsigned long flags;
-
- wait->flags |= WQ_FLAG_EXCLUSIVE;
- spin_lock_irqsave(&q->lock, flags);
- if (list_empty(&wait->task_list))
- __add_wait_queue_tail(q, wait);
- /*
- * don't alter the task state if this is just going to
- * queue an async wait queue callback
- */
- if (is_sync_wait(wait))
- set_current_state(state);
- spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(prepare_to_wait_exclusive);
-
-void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
-{
- unsigned long flags;
-
- __set_current_state(TASK_RUNNING);
- /*
- * We can check for list emptiness outside the lock
- * IFF:
- * - we use the "careful" check that verifies both
- * the next and prev pointers, so that there cannot
- * be any half-pending updates in progress on other
- * CPU's that we haven't seen yet (and that might
- * still change the stack area.
- * and
- * - all other users take the lock (ie we can only
- * have _one_ other CPU that looks at or modifies
- * the list).
- */
- if (!list_empty_careful(&wait->task_list)) {
- spin_lock_irqsave(&q->lock, flags);
- list_del_init(&wait->task_list);
- spin_unlock_irqrestore(&q->lock, flags);
- }
-}
-
-EXPORT_SYMBOL(finish_wait);
-
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
-{
- int ret = default_wake_function(wait, mode, sync, key);
-
- if (ret)
- list_del_init(&wait->task_list);
- return ret;
-}
-
-EXPORT_SYMBOL(autoremove_wake_function);
-
void __init fork_init(unsigned long mempages)
{
#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -426,6 +302,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
mm->core_waiters = 0;
+ mm->nr_ptes = 0;
mm->page_table_lock = SPIN_LOCK_UNLOCKED;
mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
mm->ioctx_list = NULL;
@@ -1019,6 +896,10 @@ static task_t *copy_process(unsigned long clone_flags,
}
#endif
+ p->tgid = p->pid;
+ if (clone_flags & CLONE_THREAD)
+ p->tgid = current->tgid;
+
if ((retval = security_task_alloc(p)))
goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p)))
@@ -1036,8 +917,10 @@ static task_t *copy_process(unsigned long clone_flags,
goto bad_fork_cleanup_sighand;
if ((retval = copy_mm(clone_flags, p)))
goto bad_fork_cleanup_signal;
- if ((retval = copy_namespace(clone_flags, p)))
+ if ((retval = copy_keys(clone_flags, p)))
goto bad_fork_cleanup_mm;
+ if ((retval = copy_namespace(clone_flags, p)))
+ goto bad_fork_cleanup_keys;
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
if (retval)
goto bad_fork_cleanup_namespace;
@@ -1071,7 +954,6 @@ static task_t *copy_process(unsigned long clone_flags,
* Ok, make it visible to the rest of the system.
* We dont wake it up yet.
*/
- p->tgid = p->pid;
p->group_leader = p;
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);
@@ -1119,7 +1001,6 @@ static task_t *copy_process(unsigned long clone_flags,
retval = -EAGAIN;
goto bad_fork_cleanup_namespace;
}
- p->tgid = current->tgid;
p->group_leader = current->group_leader;
if (current->signal->group_stop_count > 0) {
@@ -1159,6 +1040,8 @@ fork_out:
bad_fork_cleanup_namespace:
exit_namespace(p);
+bad_fork_cleanup_keys:
+ exit_keys(p);
bad_fork_cleanup_mm:
if (p->mm)
mmput(p->mm);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 74ba3cb2180945..8f3c6c1d1ce7a6 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -4,7 +4,12 @@
* Rewritten and vastly simplified by Rusty Russell for in-kernel
* module loader:
* Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- * Stem compression by Andi Kleen.
+ *
+ * ChangeLog:
+ *
+ * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
+ * Changed the compression method from stem compression to "table lookup"
+ * compression (see scripts/kallsyms.c for a more complete description)
*/
#include <linux/kallsyms.h>
#include <linux/module.h>
@@ -17,7 +22,12 @@
/* These will be re-linked against their real values during the second link stage */
extern unsigned long kallsyms_addresses[] __attribute__((weak));
extern unsigned long kallsyms_num_syms __attribute__((weak));
-extern char kallsyms_names[] __attribute__((weak));
+extern u8 kallsyms_names[] __attribute__((weak));
+
+extern u8 kallsyms_token_table[] __attribute__((weak));
+extern u16 kallsyms_token_index[] __attribute__((weak));
+
+extern unsigned long kallsyms_markers[] __attribute__((weak));
/* Defined by the linker script. */
extern char _stext[], _etext[], _sinittext[], _einittext[];
@@ -37,21 +47,88 @@ static inline int is_kernel_text(unsigned long addr)
return 0;
}
+/* expand a compressed symbol data into the resulting uncompressed string,
+ given the offset to where the symbol is in the compressed stream */
+static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
+{
+ int len, skipped_first = 0;
+ u8 *tptr, *data;
+
+ /* get the compressed symbol length from the first symbol byte */
+ data = &kallsyms_names[off];
+ len = *data;
+ data++;
+
+ /* update the offset to return the offset for the next symbol on
+ * the compressed stream */
+ off += len + 1;
+
+ /* for every byte on the compressed symbol data, copy the table
+ entry for that byte */
+ while(len) {
+ tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
+ data++;
+ len--;
+
+ while (*tptr) {
+ if(skipped_first) {
+ *result = *tptr;
+ result++;
+ } else
+ skipped_first = 1;
+ tptr++;
+ }
+ }
+
+ *result = '\0';
+
+ /* return to offset to the next symbol */
+ return off;
+}
+
+/* get symbol type information. This is encoded as a single char at the
+ * begining of the symbol name */
+static char kallsyms_get_symbol_type(unsigned int off)
+{
+ /* get just the first code, look it up in the token table, and return the
+ * first char from this token */
+ return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
+}
+
+
+/* find the offset on the compressed stream given and index in the
+ * kallsyms array */
+static unsigned int get_symbol_offset(unsigned long pos)
+{
+ u8 *name;
+ int i;
+
+ /* use the closest marker we have. We have markers every 256 positions,
+ * so that should be close enough */
+ name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
+
+ /* sequentially scan all the symbols up to the point we're searching for.
+ * Every symbol is stored in a [<len>][<len> bytes of data] format, so we
+ * just need to add the len to the current pointer for every symbol we
+ * wish to skip */
+ for(i = 0; i < (pos&0xFF); i++)
+ name = name + (*name) + 1;
+
+ return name - kallsyms_names;
+}
+
/* Lookup the address for this symbol. Returns 0 if not found. */
unsigned long kallsyms_lookup_name(const char *name)
{
char namebuf[KSYM_NAME_LEN+1];
unsigned long i;
- char *knames;
+ unsigned int off;
- for (i = 0, knames = kallsyms_names; i < kallsyms_num_syms; i++) {
- unsigned prefix = *knames++;
+ for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
+ off = kallsyms_expand_symbol(off, namebuf);
- strlcpy(namebuf + prefix, knames, KSYM_NAME_LEN - prefix);
if (strcmp(namebuf, name) == 0)
return kallsyms_addresses[i];
-
- knames += strlen(knames) + 1;
}
return module_kallsyms_lookup_name(name);
}
@@ -62,7 +139,7 @@ const char *kallsyms_lookup(unsigned long addr,
unsigned long *offset,
char **modname, char *namebuf)
{
- unsigned long i, best = 0;
+ unsigned long i, low, high, mid;
/* This kernel should never had been booted. */
BUG_ON(!kallsyms_addresses);
@@ -71,40 +148,45 @@ const char *kallsyms_lookup(unsigned long addr,
namebuf[0] = 0;
if (is_kernel_text(addr) || is_kernel_inittext(addr)) {
- unsigned long symbol_end;
- char *name = kallsyms_names;
-
- /* They're sorted, we could be clever here, but who cares? */
- for (i = 0; i < kallsyms_num_syms; i++) {
- if (kallsyms_addresses[i] > kallsyms_addresses[best] &&
- kallsyms_addresses[i] <= addr)
- best = i;
- }
+ unsigned long symbol_end=0;
- /* Grab name */
- for (i = 0; i <= best; i++) {
- unsigned prefix = *name++;
- strncpy(namebuf + prefix, name, KSYM_NAME_LEN - prefix);
- name += strlen(name) + 1;
+ /* do a binary search on the sorted kallsyms_addresses array */
+ low = 0;
+ high = kallsyms_num_syms;
+
+ while (high-low > 1) {
+ mid = (low + high) / 2;
+ if (kallsyms_addresses[mid] <= addr) low = mid;
+ else high = mid;
}
- /* At worst, symbol ends at end of section. */
- if (is_kernel_inittext(addr))
- symbol_end = (unsigned long)_einittext;
- else
- symbol_end = (unsigned long)_etext;
+ /* search for the first aliased symbol. Aliased symbols are
+ symbols with the same address */
+ while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low])
+ --low;
+
+ /* Grab name */
+ kallsyms_expand_symbol(get_symbol_offset(low), namebuf);
/* Search for next non-aliased symbol */
- for (i = best+1; i < kallsyms_num_syms; i++) {
- if (kallsyms_addresses[i] > kallsyms_addresses[best]) {
+ for (i = low + 1; i < kallsyms_num_syms; i++) {
+ if (kallsyms_addresses[i] > kallsyms_addresses[low]) {
symbol_end = kallsyms_addresses[i];
break;
}
}
- *symbolsize = symbol_end - kallsyms_addresses[best];
+ /* if we found no next symbol, we use the end of the section */
+ if (!symbol_end) {
+ if (is_kernel_inittext(addr))
+ symbol_end = (unsigned long)_einittext;
+ else
+ symbol_end = (unsigned long)_etext;
+ }
+
+ *symbolsize = symbol_end - kallsyms_addresses[low];
*modname = NULL;
- *offset = addr - kallsyms_addresses[best];
+ *offset = addr - kallsyms_addresses[low];
return namebuf;
}
@@ -135,7 +217,7 @@ void __print_symbol(const char *fmt, unsigned long address)
printk(fmt, buffer);
}
-/* To avoid O(n^2) iteration, we carry prefix along. */
+/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
struct kallsym_iter
{
loff_t pos;
@@ -168,31 +250,23 @@ static int get_ksymbol_mod(struct kallsym_iter *iter)
/* Returns space to next name. */
static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
{
- unsigned stemlen, off = iter->nameoff;
-
- /* First char of each symbol name indicates prefix length
- shared with previous name (stem compression). */
- stemlen = kallsyms_names[off++];
+ unsigned off = iter->nameoff;
- strlcpy(iter->name+stemlen, kallsyms_names + off,
- KSYM_NAME_LEN+1-stemlen);
- off += strlen(kallsyms_names + off) + 1;
iter->owner = NULL;
iter->value = kallsyms_addresses[iter->pos];
- if (is_kernel_text(iter->value) || is_kernel_inittext(iter->value))
- iter->type = 't';
- else
- iter->type = 'd';
- upcase_if_global(iter);
+ iter->type = kallsyms_get_symbol_type(off);
+
+ off = kallsyms_expand_symbol(off, iter->name);
+
return off - iter->nameoff;
}
-static void reset_iter(struct kallsym_iter *iter)
+static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
{
iter->name[0] = '\0';
- iter->nameoff = 0;
- iter->pos = 0;
+ iter->nameoff = get_symbol_offset(new_pos);
+ iter->pos = new_pos;
}
/* Returns false if pos at or past end of file. */
@@ -204,16 +278,13 @@ static int update_iter(struct kallsym_iter *iter, loff_t pos)
return get_ksymbol_mod(iter);
}
- /* If we're past the desired position, reset to start. */
- if (pos < iter->pos)
- reset_iter(iter);
-
- /* We need to iterate through the previous symbols: can be slow */
- for (; iter->pos != pos; iter->pos++) {
- iter->nameoff += get_ksymbol_core(iter);
- cond_resched();
- }
- get_ksymbol_core(iter);
+ /* If we're not on the desired position, reset to new position. */
+ if (pos != iter->pos)
+ reset_iter(iter, pos);
+
+ iter->nameoff += get_ksymbol_core(iter);
+ iter->pos++;
+
return 1;
}
@@ -267,14 +338,15 @@ struct seq_operations kallsyms_op = {
static int kallsyms_open(struct inode *inode, struct file *file)
{
/* We keep iterator in m->private, since normal case is to
- * s_start from where we left off, so we avoid O(N^2). */
+ * s_start from where we left off, so we avoid doing
+ * using get_symbol_offset for every symbol */
struct kallsym_iter *iter;
int ret;
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return -ENOMEM;
- reset_iter(iter);
+ reset_iter(iter, 0);
ret = seq_open(file, &kallsyms_op);
if (ret == 0)
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
new file mode 100644
index 00000000000000..9a5e17b507fe7d
--- /dev/null
+++ b/kernel/kfifo.c
@@ -0,0 +1,170 @@
+/*
+ * A simple kernel FIFO implementation.
+ *
+ * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/kfifo.h>
+
+/*
+ * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * @buffer: the preallocated buffer to be used.
+ * @size: the size of the internal buffer, this have to be a power of 2.
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @lock: the lock to be used to protect the fifo buffer
+ *
+ * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the
+ * struct kfifo with kfree().
+ */
+struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
+ int gfp_mask, spinlock_t *lock)
+{
+ struct kfifo *fifo;
+
+ /* size must be a power of 2 */
+ BUG_ON(size & (size - 1));
+
+ fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
+ if (!fifo)
+ return ERR_PTR(-ENOMEM);
+
+ fifo->buffer = buffer;
+ fifo->size = size;
+ fifo->in = fifo->out = 0;
+ fifo->lock = lock;
+
+ return fifo;
+}
+EXPORT_SYMBOL(kfifo_init);
+
+/*
+ * kfifo_alloc - allocates a new FIFO and its internal buffer
+ * @size: the size of the internal buffer to be allocated.
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @lock: the lock to be used to protect the fifo buffer
+ *
+ * The size will be rounded-up to a power of 2.
+ */
+struct kfifo *kfifo_alloc(unsigned int size, int gfp_mask, spinlock_t *lock)
+{
+ unsigned int newsize;
+ unsigned char *buffer;
+ struct kfifo *ret;
+
+ /*
+ * round up to the next power of 2, since our 'let the indices
+ * wrap' tachnique works only in this case.
+ */
+ newsize = size;
+ if (size & (size - 1)) {
+ BUG_ON(size > 0x80000000);
+ newsize = roundup_pow_of_two(size);
+ }
+
+ buffer = kmalloc(newsize, gfp_mask);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+
+ ret = kfifo_init(buffer, size, gfp_mask, lock);
+
+ if (IS_ERR(ret))
+ kfree(buffer);
+
+ return ret;
+}
+EXPORT_SYMBOL(kfifo_alloc);
+
+/*
+ * kfifo_free - frees the FIFO
+ * @fifo: the fifo to be freed.
+ */
+void kfifo_free(struct kfifo *fifo)
+{
+ kfree(fifo->buffer);
+ kfree(fifo);
+}
+EXPORT_SYMBOL(kfifo_free);
+
+/*
+ * __kfifo_put - puts some data into the FIFO, no locking version
+ * @fifo: the fifo to be used.
+ * @buffer: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most 'len' bytes from the 'buffer' into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int __kfifo_put(struct kfifo *fifo,
+ unsigned char *buffer, unsigned int len)
+{
+ unsigned int l;
+
+ len = min(len, fifo->size - fifo->in + fifo->out);
+
+ /* first put the data starting from fifo->in to buffer end */
+ l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
+ memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+
+ /* then put the rest (if any) at the beginning of the buffer */
+ memcpy(fifo->buffer, buffer + l, len - l);
+
+ fifo->in += len;
+
+ return len;
+}
+EXPORT_SYMBOL(__kfifo_put);
+
+/*
+ * __kfifo_get - gets some data from the FIFO, no locking version
+ * @fifo: the fifo to be used.
+ * @buffer: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most 'len' bytes from the FIFO into the
+ * 'buffer' and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int __kfifo_get(struct kfifo *fifo,
+ unsigned char *buffer, unsigned int len)
+{
+ unsigned int l;
+
+ len = min(len, fifo->in - fifo->out);
+
+ /* first get the data from fifo->out until the end of the buffer */
+ l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
+ memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+
+ /* then get the rest (if any) from the beginning of the buffer */
+ memcpy(buffer + l, fifo->buffer, len - l);
+
+ fifo->out += len;
+
+ return len;
+}
+EXPORT_SYMBOL(__kfifo_get);
diff --git a/kernel/panic.c b/kernel/panic.c
index fce7f4030d0a75..c7ab9981c7aa76 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -110,6 +110,9 @@ EXPORT_SYMBOL(panic);
* 'P' - Proprietary module has been loaded.
* 'F' - Module has been forcibly loaded.
* 'S' - SMP with CPUs not designed for SMP.
+ * 'R' - User forced a module unload.
+ * 'M' - Machine had a machine check experience.
+ * 'B' - System has hit bad_page.
*
* The string is overwritten by the next call to print_taint().
*/
@@ -118,12 +121,21 @@ const char *print_tainted(void)
{
static char buf[20];
if (tainted) {
- snprintf(buf, sizeof(buf), "Tainted: %c%c%c",
+ snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
- tainted & TAINT_UNSAFE_SMP ? 'S' : ' ');
+ tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
+ tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
+ tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
+ tainted & TAINT_BAD_PAGE ? 'B' : ' ');
}
else
snprintf(buf, sizeof(buf), "Not tainted");
return(buf);
}
+
+void add_taint(unsigned flag)
+{
+ tainted |= flag;
+}
+EXPORT_SYMBOL(add_taint);
diff --git a/kernel/pid.c b/kernel/pid.c
index 83008f812f4974..21024b7ae37c2f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -1,8 +1,9 @@
/*
* Generic pidhash and scalable, time-bounded PID allocator
*
- * (C) 2002 William Irwin, IBM
- * (C) 2002 Ingo Molnar, Red Hat
+ * (C) 2002-2003 William Irwin, IBM
+ * (C) 2004 William Irwin, Oracle
+ * (C) 2002-2004 Ingo Molnar, Red Hat
*
* pid-structures are backing objects for tasks sharing a given ID to chain
* against. There is very little to them aside from hashing them and
@@ -35,9 +36,15 @@ int last_pid;
#define RESERVED_PIDS 300
-#define PIDMAP_ENTRIES (PID_MAX_LIMIT/PAGE_SIZE/8)
+int pid_max_min = RESERVED_PIDS + 1;
+int pid_max_max = PID_MAX_LIMIT;
+
+#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
#define BITS_PER_PAGE (PAGE_SIZE*8)
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
+#define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off))
+#define find_next_offset(map, off) \
+ find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
/*
* PID-map pages start out as NULL, they get allocated upon
@@ -53,8 +60,6 @@ typedef struct pidmap {
static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
{ [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
-static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
-
static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
fastcall void free_pidmap(int pid)
@@ -66,15 +71,18 @@ fastcall void free_pidmap(int pid)
atomic_inc(&map->nr_free);
}
-/*
- * Here we search for the next map that has free bits left.
- * Normally the next map has free PIDs.
- */
-static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
+int alloc_pidmap(void)
{
- while (--*max_steps) {
- if (++map == map_limit)
- map = pidmap_array;
+ int i, offset, max_scan, pid, last = last_pid;
+ pidmap_t *map;
+
+ pid = last + 1;
+ if (pid >= pid_max)
+ pid = RESERVED_PIDS;
+ offset = pid & BITS_PER_PAGE_MASK;
+ map = &pidmap_array[pid/BITS_PER_PAGE];
+ max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
+ for (i = 0; i <= max_scan; ++i) {
if (unlikely(!map->page)) {
unsigned long page = get_zeroed_page(GFP_KERNEL);
/*
@@ -87,62 +95,39 @@ static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
else
map->page = (void *)page;
spin_unlock(&pidmap_lock);
-
- if (!map->page)
+ if (unlikely(!map->page))
break;
}
- if (atomic_read(&map->nr_free))
- return map;
- }
- return NULL;
-}
-
-int alloc_pidmap(void)
-{
- int pid, offset, max_steps = PIDMAP_ENTRIES + 1;
- pidmap_t *map;
-
- pid = last_pid + 1;
- if (pid >= pid_max)
- pid = RESERVED_PIDS;
-
- offset = pid & BITS_PER_PAGE_MASK;
- map = pidmap_array + pid / BITS_PER_PAGE;
-
- if (likely(map->page && !test_and_set_bit(offset, map->page))) {
- /*
- * There is a small window for last_pid updates to race,
- * but in that case the next allocation will go into the
- * slowpath and that fixes things up.
- */
-return_pid:
- atomic_dec(&map->nr_free);
- last_pid = pid;
- return pid;
- }
-
- if (!offset || !atomic_read(&map->nr_free)) {
-next_map:
- map = next_free_map(map, &max_steps);
- if (!map)
- goto failure;
- offset = 0;
+ if (likely(atomic_read(&map->nr_free))) {
+ do {
+ if (!test_and_set_bit(offset, map->page)) {
+ atomic_dec(&map->nr_free);
+ last_pid = pid;
+ return pid;
+ }
+ offset = find_next_offset(map, offset);
+ pid = mk_pid(map, offset);
+ /*
+ * find_next_offset() found a bit, the pid from it
+ * is in-bounds, and if we fell back to the last
+ * bitmap block and the final block was the same
+ * as the starting point, pid is before last_pid.
+ */
+ } while (offset < BITS_PER_PAGE && pid < pid_max &&
+ (i != max_scan || pid < last ||
+ !((last+1) & BITS_PER_PAGE_MASK)));
+ }
+ if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
+ ++map;
+ offset = 0;
+ } else {
+ map = &pidmap_array[0];
+ offset = RESERVED_PIDS;
+ if (unlikely(last == offset))
+ break;
+ }
+ pid = mk_pid(map, offset);
}
- /*
- * Find the next zero bit:
- */
-scan_more:
- offset = find_next_zero_bit(map->page, BITS_PER_PAGE, offset);
- if (offset >= BITS_PER_PAGE)
- goto next_map;
- if (test_and_set_bit(offset, map->page))
- goto scan_more;
-
- /* we got the PID: */
- pid = (map - pidmap_array) * BITS_PER_PAGE + offset;
- goto return_pid;
-
-failure:
return -1;
}
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
index d1bc943072d409..8fca5822a80776 100644
--- a/kernel/power/pm.c
+++ b/kernel/power/pm.c
@@ -256,41 +256,10 @@ int pm_send_all(pm_request_t rqst, void *data)
return 0;
}
-/**
- * pm_find - find a device
- * @type: type of device
- * @from: where to start looking
- *
- * Scan the power management list for devices of a specific type. The
- * return value for a matching device may be passed to further calls
- * to this function to find further matches. A %NULL indicates the end
- * of the list.
- *
- * To search from the beginning pass %NULL as the @from value.
- *
- * The caller MUST hold the pm_devs_lock lock when calling this
- * function. The instant that the lock is dropped all pointers returned
- * may become invalid.
- */
-
-struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from)
-{
- struct list_head *entry = from ? from->entry.next:pm_devs.next;
- while (entry != &pm_devs) {
- struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
- if (type == PM_UNKNOWN_DEV || dev->type == type)
- return dev;
- entry = entry->next;
- }
- return NULL;
-}
-
EXPORT_SYMBOL(pm_register);
EXPORT_SYMBOL(pm_unregister);
EXPORT_SYMBOL(pm_unregister_all);
-EXPORT_SYMBOL(pm_send);
EXPORT_SYMBOL(pm_send_all);
-EXPORT_SYMBOL(pm_find);
EXPORT_SYMBOL(pm_active);
diff --git a/kernel/printk.c b/kernel/printk.c
index c02ec626f38404..390396fc6d017f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -661,12 +661,10 @@ EXPORT_SYMBOL(release_console_sem);
*
* Must be called within acquire_console_sem().
*/
-void console_conditional_schedule(void)
+void __sched console_conditional_schedule(void)
{
- if (console_may_schedule && need_resched()) {
- set_current_state(TASK_RUNNING);
- schedule();
- }
+ if (console_may_schedule)
+ cond_resched();
}
EXPORT_SYMBOL(console_conditional_schedule);
diff --git a/kernel/profile.c b/kernel/profile.c
index 1c4375fad9230f..e7ff9b32d8220d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -1,5 +1,16 @@
/*
* linux/kernel/profile.c
+ * Simple profiling. Manages a direct-mapped profile hit count buffer,
+ * with configurable resolution, support for restricting the cpus on
+ * which profiling is done, and switching between cpu time and
+ * schedule() calls via kernel command line parameters passed at boot.
+ *
+ * Scheduler profiling support, Arjan van de Ven and Ingo Molnar,
+ * Red Hat, July 2004
+ * Consolidation of architecture support code for profiling,
+ * William Irwin, Oracle, July 2004
+ * Amortized hit count accounting via per-cpu open-addressed hashtables
+ * to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
*/
#include <linux/config.h>
@@ -9,13 +20,29 @@
#include <linux/notifier.h>
#include <linux/mm.h>
#include <linux/cpumask.h>
+#include <linux/cpu.h>
#include <linux/profile.h>
+#include <linux/highmem.h>
#include <asm/sections.h>
+#include <asm/semaphore.h>
+
+struct profile_hit {
+ u32 pc, hits;
+};
+#define PROFILE_GRPSHIFT 3
+#define PROFILE_GRPSZ (1 << PROFILE_GRPSHIFT)
+#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit))
+#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ)
static atomic_t *prof_buffer;
static unsigned long prof_len, prof_shift;
static int prof_on;
static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
+static DEFINE_PER_CPU(int, cpu_profile_flip);
+static DECLARE_MUTEX(profile_flip_mutex);
+#endif /* CONFIG_SMP */
static int __init profile_setup(char * str)
{
@@ -181,6 +208,179 @@ EXPORT_SYMBOL_GPL(task_handoff_unregister);
EXPORT_SYMBOL_GPL(profile_event_register);
EXPORT_SYMBOL_GPL(profile_event_unregister);
+#ifdef CONFIG_SMP
+/*
+ * Each cpu has a pair of open-addressed hashtables for pending
+ * profile hits. read_profile() IPI's all cpus to request them
+ * to flip buffers and flushes their contents to prof_buffer itself.
+ * Flip requests are serialized by the profile_flip_mutex. The sole
+ * use of having a second hashtable is for avoiding cacheline
+ * contention that would otherwise happen during flushes of pending
+ * profile hits required for the accuracy of reported profile hits
+ * and so resurrect the interrupt livelock issue.
+ *
+ * The open-addressed hashtables are indexed by profile buffer slot
+ * and hold the number of pending hits to that profile buffer slot on
+ * a cpu in an entry. When the hashtable overflows, all pending hits
+ * are accounted to their corresponding profile buffer slots with
+ * atomic_add() and the hashtable emptied. As numerous pending hits
+ * may be accounted to a profile buffer slot in a hashtable entry,
+ * this amortizes a number of atomic profile buffer increments likely
+ * to be far larger than the number of entries in the hashtable,
+ * particularly given that the number of distinct profile buffer
+ * positions to which hits are accounted during short intervals (e.g.
+ * several seconds) is usually very small. Exclusion from buffer
+ * flipping is provided by interrupt disablement (note that for
+ * SCHED_PROFILING profile_hit() may be called from process context).
+ * The hash function is meant to be lightweight as opposed to strong,
+ * and was vaguely inspired by ppc64 firmware-supported inverted
+ * pagetable hash functions, but uses a full hashtable full of finite
+ * collision chains, not just pairs of them.
+ *
+ * -- wli
+ */
+static void __profile_flip_buffers(void *unused)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
+}
+
+static void profile_flip_buffers(void)
+{
+ int i, j, cpu;
+
+ down(&profile_flip_mutex);
+ j = per_cpu(cpu_profile_flip, get_cpu());
+ put_cpu();
+ on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+ for_each_online_cpu(cpu) {
+ struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
+ for (i = 0; i < NR_PROFILE_HIT; ++i) {
+ if (!hits[i].hits) {
+ if (hits[i].pc)
+ hits[i].pc = 0;
+ continue;
+ }
+ atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
+ hits[i].hits = hits[i].pc = 0;
+ }
+ }
+ up(&profile_flip_mutex);
+}
+
+static void profile_discard_flip_buffers(void)
+{
+ int i, cpu;
+
+ down(&profile_flip_mutex);
+ i = per_cpu(cpu_profile_flip, get_cpu());
+ put_cpu();
+ on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+ for_each_online_cpu(cpu) {
+ struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
+ memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
+ }
+ up(&profile_flip_mutex);
+}
+
+void profile_hit(int type, void *__pc)
+{
+ unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
+ int i, j, cpu;
+ struct profile_hit *hits;
+
+ if (prof_on != type || !prof_buffer)
+ return;
+ pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
+ i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
+ secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
+ cpu = get_cpu();
+ hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
+ if (!hits) {
+ put_cpu();
+ return;
+ }
+ local_irq_save(flags);
+ do {
+ for (j = 0; j < PROFILE_GRPSZ; ++j) {
+ if (hits[i + j].pc == pc) {
+ hits[i + j].hits++;
+ goto out;
+ } else if (!hits[i + j].hits) {
+ hits[i + j].pc = pc;
+ hits[i + j].hits = 1;
+ goto out;
+ }
+ }
+ i = (i + secondary) & (NR_PROFILE_HIT - 1);
+ } while (i != primary);
+ atomic_inc(&prof_buffer[pc]);
+ for (i = 0; i < NR_PROFILE_HIT; ++i) {
+ atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
+ hits[i].pc = hits[i].hits = 0;
+ }
+out:
+ local_irq_restore(flags);
+ put_cpu();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int __devinit profile_cpu_callback(struct notifier_block *info,
+ unsigned long action, void *__cpu)
+{
+ int node, cpu = (unsigned long)__cpu;
+ struct page *page;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ node = cpu_to_node(cpu);
+ per_cpu(cpu_profile_flip, cpu) = 0;
+ if (!per_cpu(cpu_profile_hits, cpu)[1]) {
+ page = alloc_pages_node(node, GFP_KERNEL, 0);
+ if (!page)
+ return NOTIFY_BAD;
+ clear_highpage(page);
+ per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
+ }
+ if (!per_cpu(cpu_profile_hits, cpu)[0]) {
+ page = alloc_pages_node(node, GFP_KERNEL, 0);
+ if (!page)
+ goto out_free;
+ clear_highpage(page);
+ per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
+ }
+ break;
+ out_free:
+ page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+ per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+ __free_page(page);
+ return NOTIFY_BAD;
+ case CPU_ONLINE:
+ cpu_set(cpu, prof_cpu_mask);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ cpu_clear(cpu, prof_cpu_mask);
+ if (per_cpu(cpu_profile_hits, cpu)[0]) {
+ page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
+ per_cpu(cpu_profile_hits, cpu)[0] = NULL;
+ __free_page(page);
+ }
+ if (per_cpu(cpu_profile_hits, cpu)[1]) {
+ page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+ per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+ __free_page(page);
+ }
+ break;
+ }
+ return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+#else /* !CONFIG_SMP */
+#define profile_flip_buffers() do { } while (0)
+#define profile_discard_flip_buffers() do { } while (0)
+
void profile_hit(int type, void *__pc)
{
unsigned long pc;
@@ -190,6 +390,7 @@ void profile_hit(int type, void *__pc)
pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
atomic_inc(&prof_buffer[min(pc, prof_len - 1)]);
}
+#endif /* !CONFIG_SMP */
void profile_tick(int type, struct pt_regs *regs)
{
@@ -256,6 +457,7 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
char * pnt;
unsigned int sample_step = 1 << prof_shift;
+ profile_flip_buffers();
if (p >= (prof_len+1)*sizeof(unsigned int))
return 0;
if (count > (prof_len+1)*sizeof(unsigned int) - p)
@@ -296,7 +498,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf,
return -EINVAL;
}
#endif
-
+ profile_discard_flip_buffers();
memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
return count;
}
@@ -306,16 +508,70 @@ static struct file_operations proc_profile_operations = {
.write = write_profile,
};
+#ifdef CONFIG_SMP
+static void __init profile_nop(void *unused)
+{
+}
+
+static int __init create_hash_tables(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ int node = cpu_to_node(cpu);
+ struct page *page;
+
+ page = alloc_pages_node(node, GFP_KERNEL, 0);
+ if (!page)
+ goto out_cleanup;
+ clear_highpage(page);
+ per_cpu(cpu_profile_hits, cpu)[1]
+ = (struct profile_hit *)page_address(page);
+ page = alloc_pages_node(node, GFP_KERNEL, 0);
+ if (!page)
+ goto out_cleanup;
+ clear_highpage(page);
+ per_cpu(cpu_profile_hits, cpu)[0]
+ = (struct profile_hit *)page_address(page);
+ }
+ return 0;
+out_cleanup:
+ prof_on = 0;
+ mb();
+ on_each_cpu(profile_nop, NULL, 0, 1);
+ for_each_online_cpu(cpu) {
+ struct page *page;
+
+ if (per_cpu(cpu_profile_hits, cpu)[0]) {
+ page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
+ per_cpu(cpu_profile_hits, cpu)[0] = NULL;
+ __free_page(page);
+ }
+ if (per_cpu(cpu_profile_hits, cpu)[1]) {
+ page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+ per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+ __free_page(page);
+ }
+ }
+ return -1;
+}
+#else
+#define create_hash_tables() ({ 0; })
+#endif
+
static int __init create_proc_profile(void)
{
struct proc_dir_entry *entry;
if (!prof_on)
return 0;
+ if (create_hash_tables())
+ return -1;
if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL)))
return 0;
entry->proc_fops = &proc_profile_operations;
entry->size = (1+prof_len) * sizeof(atomic_t);
+ hotcpu_notifier(profile_cpu_callback, 0);
return 0;
}
module_init(create_proc_profile);
diff --git a/kernel/signal.c b/kernel/signal.c
index f67390806d7367..ba039fab37e8b5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1143,36 +1143,6 @@ kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
return retval;
}
-/*
- * kill_sl_info() sends a signal to the session leader: this is used
- * to send SIGHUP to the controlling process of a terminal when
- * the connection is lost.
- */
-
-
-int
-kill_sl_info(int sig, struct siginfo *info, pid_t sid)
-{
- int err, retval = -EINVAL;
- struct task_struct *p;
-
- if (sid <= 0)
- goto out;
-
- retval = -ESRCH;
- read_lock(&tasklist_lock);
- do_each_task_pid(sid, PIDTYPE_SID, p) {
- if (!p->signal->leader)
- continue;
- err = group_send_sig_info(sig, info, p);
- if (retval)
- retval = err;
- } while_each_task_pid(sid, PIDTYPE_SID, p);
- read_unlock(&tasklist_lock);
-out:
- return retval;
-}
-
int
kill_proc_info(int sig, struct siginfo *info, pid_t pid)
{
@@ -1309,12 +1279,6 @@ kill_pg(pid_t pgrp, int sig, int priv)
}
int
-kill_sl(pid_t sess, int sig, int priv)
-{
- return kill_sl_info(sig, (void *)(long)(priv != 0), sess);
-}
-
-int
kill_proc(pid_t pid, int sig, int priv)
{
return kill_proc_info(sig, (void *)(long)(priv != 0), pid);
@@ -1978,22 +1942,11 @@ relock:
EXPORT_SYMBOL(recalc_sigpending);
EXPORT_SYMBOL_GPL(dequeue_signal);
EXPORT_SYMBOL(flush_signals);
-EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(force_sig_info);
EXPORT_SYMBOL(kill_pg);
-EXPORT_SYMBOL(kill_pg_info);
EXPORT_SYMBOL(kill_proc);
-EXPORT_SYMBOL(kill_proc_info);
-EXPORT_SYMBOL(kill_sl);
-EXPORT_SYMBOL(kill_sl_info);
EXPORT_SYMBOL(ptrace_notify);
EXPORT_SYMBOL(send_sig);
EXPORT_SYMBOL(send_sig_info);
-EXPORT_SYMBOL(send_group_sig_info);
-EXPORT_SYMBOL(sigqueue_alloc);
-EXPORT_SYMBOL(sigqueue_free);
-EXPORT_SYMBOL(send_sigqueue);
-EXPORT_SYMBOL(send_group_sigqueue);
EXPORT_SYMBOL(sigprocmask);
EXPORT_SYMBOL(block_all_signals);
EXPORT_SYMBOL(unblock_all_signals);
diff --git a/kernel/sys.c b/kernel/sys.c
index a95e3900dc1e23..e6dbc2940751a3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -19,6 +19,7 @@
#include <linux/fs.h>
#include <linux/workqueue.h>
#include <linux/device.h>
+#include <linux/key.h>
#include <linux/times.h>
#include <linux/security.h>
#include <linux/dcookies.h>
@@ -282,6 +283,9 @@ cond_syscall(sys_set_mempolicy)
cond_syscall(compat_mbind)
cond_syscall(compat_get_mempolicy)
cond_syscall(compat_set_mempolicy)
+cond_syscall(sys_add_key)
+cond_syscall(sys_request_key)
+cond_syscall(sys_keyctl)
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read)
@@ -605,6 +609,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
current->fsgid = new_egid;
current->egid = new_egid;
current->gid = new_rgid;
+ key_fsgid_changed(current);
return 0;
}
@@ -642,6 +647,8 @@ asmlinkage long sys_setgid(gid_t gid)
}
else
return -EPERM;
+
+ key_fsgid_changed(current);
return 0;
}
@@ -730,6 +737,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
current->suid = current->euid;
current->fsuid = current->euid;
+ key_fsuid_changed(current);
+
return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
}
@@ -775,6 +784,8 @@ asmlinkage long sys_setuid(uid_t uid)
current->fsuid = current->euid = uid;
current->suid = new_suid;
+ key_fsuid_changed(current);
+
return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
}
@@ -821,6 +832,8 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if (suid != (uid_t) -1)
current->suid = suid;
+ key_fsuid_changed(current);
+
return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
}
@@ -870,6 +883,8 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
current->gid = rgid;
if (sgid != (gid_t) -1)
current->sgid = sgid;
+
+ key_fsgid_changed(current);
return 0;
}
@@ -911,6 +926,8 @@ asmlinkage long sys_setfsuid(uid_t uid)
current->fsuid = uid;
}
+ key_fsuid_changed(current);
+
security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
return old_fsuid;
@@ -937,6 +954,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
wmb();
}
current->fsgid = gid;
+ key_fsgid_changed(current);
}
return old_fsgid;
}
@@ -1669,7 +1687,7 @@ asmlinkage long sys_umask(int mask)
asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5)
{
- int error;
+ long error;
int sig;
error = security_task_prctl(option, arg2, arg3, arg4, arg5);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 469cf0c2f26ece..80bf15f035cde2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,6 +66,7 @@ extern int sysctl_lower_zone_protection;
extern int min_free_kbytes;
extern int printk_ratelimit_jiffies;
extern int printk_ratelimit_burst;
+extern int pid_max_min, pid_max_max;
#if defined(CONFIG_X86_LOCAL_APIC) && defined(__i386__)
int unknown_nmi_panic;
@@ -575,7 +576,10 @@ static ctl_table kern_table[] = {
.data = &pid_max,
.maxlen = sizeof (int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = sysctl_intvec,
+ .extra1 = &pid_max_min,
+ .extra2 = &pid_max_max,
},
{
.ctl_name = KERN_PANIC_ON_OOPS,
diff --git a/kernel/timer.c b/kernel/timer.c
index e3c9b5fcd52f56..ac9386e22bd332 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -959,11 +959,6 @@ static inline void update_times(void)
void do_timer(struct pt_regs *regs)
{
jiffies_64++;
-#ifndef CONFIG_SMP
- /* SMP process accounting uses the local APIC timer */
-
- update_process_times(user_mode(regs));
-#endif
update_times();
}
diff --git a/kernel/user.c b/kernel/user.c
index 523175afeecdca..693487dc940e6e 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -12,6 +12,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/bitops.h>
+#include <linux/key.h>
/*
* UID task count cache, to get fast user lookup in "alloc_uid"
@@ -34,6 +35,10 @@ struct user_struct root_user = {
.sigpending = ATOMIC_INIT(0),
.mq_bytes = 0,
.locked_shm = 0,
+#ifdef CONFIG_KEYS
+ .uid_keyring = &root_user_keyring,
+ .session_keyring = &root_session_keyring,
+#endif
};
/*
@@ -87,6 +92,8 @@ void free_uid(struct user_struct *up)
{
if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
uid_hash_remove(up);
+ key_put(up->uid_keyring);
+ key_put(up->session_keyring);
kmem_cache_free(uid_cachep, up);
spin_unlock(&uidhash_lock);
}
@@ -116,6 +123,11 @@ struct user_struct * alloc_uid(uid_t uid)
new->mq_bytes = 0;
new->locked_shm = 0;
+ if (alloc_uid_keyring(new) < 0) {
+ kmem_cache_free(uid_cachep, new);
+ return NULL;
+ }
+
/*
* Before adding this, check whether we raced
* on adding the same user already..
@@ -123,6 +135,8 @@ struct user_struct * alloc_uid(uid_t uid)
spin_lock(&uidhash_lock);
up = uid_hash_find(uid, hashent);
if (up) {
+ key_put(new->uid_keyring);
+ key_put(new->session_keyring);
kmem_cache_free(uid_cachep, new);
} else {
uid_hash_insert(new, hashent);
@@ -146,8 +160,10 @@ void switch_uid(struct user_struct *new_user)
old_user = current->user;
atomic_inc(&new_user->processes);
atomic_dec(&old_user->processes);
+ switch_uid_keyring(new_user);
current->user = new_user;
free_uid(old_user);
+ suid_keys(current);
}
diff --git a/kernel/wait.c b/kernel/wait.c
new file mode 100644
index 00000000000000..791681cfea981d
--- /dev/null
+++ b/kernel/wait.c
@@ -0,0 +1,246 @@
+/*
+ * Generic waiting primitives.
+ *
+ * (C) 2004 William Irwin, Oracle
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/wait.h>
+#include <linux/hash.h>
+
+void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ unsigned long flags;
+
+ wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ __add_wait_queue(q, wait);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue);
+
+void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ unsigned long flags;
+
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ __add_wait_queue_tail(q, wait);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive);
+
+void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->lock, flags);
+ __remove_wait_queue(q, wait);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(remove_wait_queue);
+
+
+/*
+ * Note: we use "set_current_state()" _after_ the wait-queue add,
+ * because we need a memory barrier there on SMP, so that any
+ * wake-function that tests for the wait-queue being active
+ * will be guaranteed to see waitqueue addition _or_ subsequent
+ * tests in this thread will see the wakeup having taken place.
+ *
+ * The spin_unlock() itself is semi-permeable and only protects
+ * one way (it only protects stuff inside the critical region and
+ * stops them from bleeding out - it would still allow subsequent
+ * loads to move into the the critical region).
+ */
+void fastcall
+prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+ unsigned long flags;
+
+ wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ if (list_empty(&wait->task_list))
+ __add_wait_queue(q, wait);
+ /*
+ * don't alter the task state if this is just going to
+ * queue an async wait queue callback
+ */
+ if (is_sync_wait(wait))
+ set_current_state(state);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait);
+
+void fastcall
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+ unsigned long flags;
+
+ wait->flags |= WQ_FLAG_EXCLUSIVE;
+ spin_lock_irqsave(&q->lock, flags);
+ if (list_empty(&wait->task_list))
+ __add_wait_queue_tail(q, wait);
+ /*
+ * don't alter the task state if this is just going to
+ * queue an async wait queue callback
+ */
+ if (is_sync_wait(wait))
+ set_current_state(state);
+ spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive);
+
+void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+{
+ unsigned long flags;
+
+ __set_current_state(TASK_RUNNING);
+ /*
+ * We can check for list emptiness outside the lock
+ * IFF:
+ * - we use the "careful" check that verifies both
+ * the next and prev pointers, so that there cannot
+ * be any half-pending updates in progress on other
+ * CPU's that we haven't seen yet (and that might
+ * still change the stack area.
+ * and
+ * - all other users take the lock (ie we can only
+ * have _one_ other CPU that looks at or modifies
+ * the list).
+ */
+ if (!list_empty_careful(&wait->task_list)) {
+ spin_lock_irqsave(&q->lock, flags);
+ list_del_init(&wait->task_list);
+ spin_unlock_irqrestore(&q->lock, flags);
+ }
+}
+EXPORT_SYMBOL(finish_wait);
+
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ int ret = default_wake_function(wait, mode, sync, key);
+
+ if (ret)
+ list_del_init(&wait->task_list);
+ return ret;
+}
+EXPORT_SYMBOL(autoremove_wake_function);
+
+int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+{
+ struct wait_bit_key *key = arg;
+ struct wait_bit_queue *wait_bit
+ = container_of(wait, struct wait_bit_queue, wait);
+
+ if (wait_bit->key.flags != key->flags ||
+ wait_bit->key.bit_nr != key->bit_nr ||
+ test_bit(key->bit_nr, key->flags))
+ return 0;
+ else
+ return autoremove_wake_function(wait, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking)
+ * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
+ * permitted return codes. Nonzero return codes halt waiting and return.
+ */
+int __sched fastcall
+__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
+ int (*action)(void *), unsigned mode)
+{
+ int ret = 0;
+
+ do {
+ prepare_to_wait(wq, &q->wait, mode);
+ if (test_bit(q->key.bit_nr, q->key.flags))
+ ret = (*action)(q->key.flags);
+ } while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
+ finish_wait(wq, &q->wait);
+ return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit);
+
+int __sched fastcall out_of_line_wait_on_bit(void *word, int bit,
+ int (*action)(void *), unsigned mode)
+{
+ wait_queue_head_t *wq = bit_waitqueue(word, bit);
+ DEFINE_WAIT_BIT(wait, word, bit);
+
+ return __wait_on_bit(wq, &wait, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit);
+
+int __sched fastcall
+__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
+ int (*action)(void *), unsigned mode)
+{
+ int ret = 0;
+
+ do {
+ prepare_to_wait_exclusive(wq, &q->wait, mode);
+ if (test_bit(q->key.bit_nr, q->key.flags)) {
+ if ((ret = (*action)(q->key.flags)))
+ break;
+ }
+ } while (test_and_set_bit(q->key.bit_nr, q->key.flags));
+ finish_wait(wq, &q->wait);
+ return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit_lock);
+
+int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit,
+ int (*action)(void *), unsigned mode)
+{
+ wait_queue_head_t *wq = bit_waitqueue(word, bit);
+ DEFINE_WAIT_BIT(wait, word, bit);
+
+ return __wait_on_bit_lock(wq, &wait, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
+
+void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
+{
+ struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+ if (waitqueue_active(wq))
+ __wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
+/**
+ * wake_up_bit - wake up a waiter on a bit
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that wakes up waiters
+ * on a bit. For instance, if one were to have waiters on a bitflag,
+ * one would call wake_up_bit() after clearing the bit.
+ *
+ * In order for this to function properly, as it uses waitqueue_active()
+ * internally, some kind of memory barrier must be done prior to calling
+ * this. Typically, this will be smp_mb__after_clear_bit(), but in some
+ * cases where bitflags are manipulated non-atomically under a lock, one
+ * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
+ * because spin_unlock() does not guarantee a memory barrier.
+ */
+void fastcall wake_up_bit(void *word, int bit)
+{
+ __wake_up_bit(bit_waitqueue(word, bit), word, bit);
+}
+EXPORT_SYMBOL(wake_up_bit);
+
+fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+ const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+ const struct zone *zone = page_zone(virt_to_page(word));
+ unsigned long val = (unsigned long)word << shift | bit;
+
+ return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
+}
+EXPORT_SYMBOL(bit_waitqueue);