Trivial Makefile merge

author: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-10-18 20:50:22 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-10-18 20:50:22 -0700
commit: 098fc560ef2bbd1bde80845c898fa95db616eb6c (patch)
tree: ca722c6fdbdffe9b7cfd31d61e8f4aae906a319c /kernel
parent: bffe01870598b7a0a77073e25ee94e026bc98e6b (diff)
parent: 2a136606fe21b603a0ce484fc578f862f8e8384d (diff)
download: history-098fc560ef2bbd1bde80845c898fa95db616eb6c.tar.gz
16 files changed, 928 insertions, 351 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index c4337751cee716..abab504f01e120 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -7,7 +7,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
-	    kthread.o 
+	    kthread.o wait.o kfifo.o
 
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 55d85339252454..a8ae81ed1d41af 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -14,6 +14,7 @@
 #include <linux/personality.h>
 #include <linux/tty.h>
 #include <linux/namespace.h>
+#include <linux/key.h>
 #include <linux/security.h>
 #include <linux/cpu.h>
 #include <linux/acct.h>
@@ -511,8 +512,6 @@ void exit_mm(struct task_struct *tsk)
 	__exit_mm(tsk);
 }
 
-EXPORT_SYMBOL(exit_mm);
-
 static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
 {
 	/*
@@ -816,6 +815,7 @@ asmlinkage NORET_TYPE void do_exit(long code)
 	__exit_fs(tsk);
 	exit_namespace(tsk);
 	exit_thread();
+	exit_keys(tsk);
 
 	if (tsk->signal->leader)
 		disassociate_ctty(1);
diff --git a/kernel/fork.c b/kernel/fork.c
index 3020dccc548ffb..96714c501cc831 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -24,6 +24,7 @@
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
 #include <linux/file.h>
+#include <linux/key.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
 #include <linux/fs.h>
@@ -100,131 +101,6 @@ void __put_task_struct(struct task_struct *tsk)
 		free_task(tsk);
 }
 
-void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
-{
-	unsigned long flags;
-
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(add_wait_queue);
-
-void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
-{
-	unsigned long flags;
-
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue_tail(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(add_wait_queue_exclusive);
-
-void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&q->lock, flags);
-	__remove_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(remove_wait_queue);
-
-
-/*
- * Note: we use "set_current_state()" _after_ the wait-queue add,
- * because we need a memory barrier there on SMP, so that any
- * wake-function that tests for the wait-queue being active
- * will be guaranteed to see waitqueue addition _or_ subsequent
- * tests in this thread will see the wakeup having taken place.
- *
- * The spin_unlock() itself is semi-permeable and only protects
- * one way (it only protects stuff inside the critical region and
- * stops them from bleeding out - it would still allow subsequent
- * loads to move into the the critical region).
- */
-void fastcall prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
-{
-	unsigned long flags;
-
-	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue(q, wait);
-	/*
-	 * don't alter the task state if this is just going to
-	 * queue an async wait queue callback
-	 */
-	if (is_sync_wait(wait))
-		set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(prepare_to_wait);
-
-void fastcall
-prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
-{
-	unsigned long flags;
-
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue_tail(q, wait);
-	/*
-	 * don't alter the task state if this is just going to
- 	 * queue an async wait queue callback
-	 */
-	if (is_sync_wait(wait))
-		set_current_state(state);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
-EXPORT_SYMBOL(prepare_to_wait_exclusive);
-
-void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
-{
-	unsigned long flags;
-
-	__set_current_state(TASK_RUNNING);
-	/*
-	 * We can check for list emptiness outside the lock
-	 * IFF:
-	 *  - we use the "careful" check that verifies both
-	 *    the next and prev pointers, so that there cannot
-	 *    be any half-pending updates in progress on other
-	 *    CPU's that we haven't seen yet (and that might
-	 *    still change the stack area.
-	 * and
-	 *  - all other users take the lock (ie we can only
-	 *    have _one_ other CPU that looks at or modifies
-	 *    the list).
-	 */
-	if (!list_empty_careful(&wait->task_list)) {
-		spin_lock_irqsave(&q->lock, flags);
-		list_del_init(&wait->task_list);
-		spin_unlock_irqrestore(&q->lock, flags);
-	}
-}
-
-EXPORT_SYMBOL(finish_wait);
-
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
-{
-	int ret = default_wake_function(wait, mode, sync, key);
-
-	if (ret)
-		list_del_init(&wait->task_list);
-	return ret;
-}
-
-EXPORT_SYMBOL(autoremove_wake_function);
-
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
@@ -426,6 +302,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	mm->core_waiters = 0;
+	mm->nr_ptes = 0;
 	mm->page_table_lock = SPIN_LOCK_UNLOCKED;
 	mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
 	mm->ioctx_list = NULL;
@@ -1019,6 +896,10 @@ static task_t *copy_process(unsigned long clone_flags,
  	}
 #endif
 
+	p->tgid = p->pid;
+	if (clone_flags & CLONE_THREAD)
+		p->tgid = current->tgid;
+
 	if ((retval = security_task_alloc(p)))
 		goto bad_fork_cleanup_policy;
 	if ((retval = audit_alloc(p)))
@@ -1036,8 +917,10 @@ static task_t *copy_process(unsigned long clone_flags,
 		goto bad_fork_cleanup_sighand;
 	if ((retval = copy_mm(clone_flags, p)))
 		goto bad_fork_cleanup_signal;
-	if ((retval = copy_namespace(clone_flags, p)))
+	if ((retval = copy_keys(clone_flags, p)))
 		goto bad_fork_cleanup_mm;
+	if ((retval = copy_namespace(clone_flags, p)))
+		goto bad_fork_cleanup_keys;
 	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
 	if (retval)
 		goto bad_fork_cleanup_namespace;
@@ -1071,7 +954,6 @@ static task_t *copy_process(unsigned long clone_flags,
 	 * Ok, make it visible to the rest of the system.
 	 * We dont wake it up yet.
 	 */
-	p->tgid = p->pid;
 	p->group_leader = p;
 	INIT_LIST_HEAD(&p->ptrace_children);
 	INIT_LIST_HEAD(&p->ptrace_list);
@@ -1119,7 +1001,6 @@ static task_t *copy_process(unsigned long clone_flags,
 			retval = -EAGAIN;
 			goto bad_fork_cleanup_namespace;
 		}
-		p->tgid = current->tgid;
 		p->group_leader = current->group_leader;
 
 		if (current->signal->group_stop_count > 0) {
@@ -1159,6 +1040,8 @@ fork_out:
 
 bad_fork_cleanup_namespace:
 	exit_namespace(p);
+bad_fork_cleanup_keys:
+	exit_keys(p);
 bad_fork_cleanup_mm:
 	if (p->mm)
 		mmput(p->mm);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 74ba3cb2180945..8f3c6c1d1ce7a6 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -4,7 +4,12 @@
  * Rewritten and vastly simplified by Rusty Russell for in-kernel
  * module loader:
  *   Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- * Stem compression by Andi Kleen.
+ *
+ * ChangeLog:
+ *
+ * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
+ *      Changed the compression method from stem compression to "table lookup"
+ *      compression (see scripts/kallsyms.c for a more complete description)
  */
 #include <linux/kallsyms.h>
 #include <linux/module.h>
@@ -17,7 +22,12 @@
 /* These will be re-linked against their real values during the second link stage */
 extern unsigned long kallsyms_addresses[] __attribute__((weak));
 extern unsigned long kallsyms_num_syms __attribute__((weak));
-extern char kallsyms_names[] __attribute__((weak));
+extern u8 kallsyms_names[] __attribute__((weak));
+
+extern u8 kallsyms_token_table[] __attribute__((weak));
+extern u16 kallsyms_token_index[] __attribute__((weak));
+
+extern unsigned long kallsyms_markers[] __attribute__((weak));
 
 /* Defined by the linker script. */
 extern char _stext[], _etext[], _sinittext[], _einittext[];
@@ -37,21 +47,88 @@ static inline int is_kernel_text(unsigned long addr)
 	return 0;
 }
 
+/* expand a compressed symbol data into the resulting uncompressed string,
+   given the offset to where the symbol is in the compressed stream */
+static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
+{
+	int len, skipped_first = 0;
+	u8 *tptr, *data;
+
+	/* get the compressed symbol length from the first symbol byte */
+	data = &kallsyms_names[off];
+	len = *data;
+	data++;
+
+	/* update the offset to return the offset for the next symbol on
+	 * the compressed stream */
+	off += len + 1;
+
+	/* for every byte on the compressed symbol data, copy the table
+	   entry for that byte */
+	while(len) {
+		tptr = &kallsyms_token_table[ kallsyms_token_index[*data] ];
+		data++;
+		len--;
+
+		while (*tptr) {
+			if(skipped_first) {
+				*result = *tptr;
+				result++;
+			} else
+				skipped_first = 1;
+			tptr++;
+		}
+	}
+
+	*result = '\0';
+
+	/* return to offset to the next symbol */
+	return off;
+}
+
+/* get symbol type information. This is encoded as a single char at the
+ * begining of the symbol name */
+static char kallsyms_get_symbol_type(unsigned int off)
+{
+	/* get just the first code, look it up in the token table, and return the
+	 * first char from this token */
+	return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ];
+}
+
+
+/* find the offset on the compressed stream given and index in the
+ * kallsyms array */
+static unsigned int get_symbol_offset(unsigned long pos)
+{
+	u8 *name;
+	int i;
+
+	/* use the closest marker we have. We have markers every 256 positions,
+	 * so that should be close enough */
+	name = &kallsyms_names[ kallsyms_markers[pos>>8] ];
+
+	/* sequentially scan all the symbols up to the point we're searching for.
+	 * Every symbol is stored in a [<len>][<len> bytes of data] format, so we
+	 * just need to add the len to the current pointer for every symbol we
+	 * wish to skip */
+	for(i = 0; i < (pos&0xFF); i++)
+		name = name + (*name) + 1;
+
+	return name - kallsyms_names;
+}
+
 /* Lookup the address for this symbol. Returns 0 if not found. */
 unsigned long kallsyms_lookup_name(const char *name)
 {
 	char namebuf[KSYM_NAME_LEN+1];
 	unsigned long i;
-	char *knames;
+	unsigned int off;
 
-	for (i = 0, knames = kallsyms_names; i < kallsyms_num_syms; i++) {
-		unsigned prefix = *knames++;
+	for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
+		off = kallsyms_expand_symbol(off, namebuf);
 
-		strlcpy(namebuf + prefix, knames, KSYM_NAME_LEN - prefix);
 		if (strcmp(namebuf, name) == 0)
 			return kallsyms_addresses[i];
-
-		knames += strlen(knames) + 1;
 	}
 	return module_kallsyms_lookup_name(name);
 }
@@ -62,7 +139,7 @@ const char *kallsyms_lookup(unsigned long addr,
 			    unsigned long *offset,
 			    char **modname, char *namebuf)
 {
-	unsigned long i, best = 0;
+	unsigned long i, low, high, mid;
 
 	/* This kernel should never had been booted. */
 	BUG_ON(!kallsyms_addresses);
@@ -71,40 +148,45 @@ const char *kallsyms_lookup(unsigned long addr,
 	namebuf[0] = 0;
 
 	if (is_kernel_text(addr) || is_kernel_inittext(addr)) {
-		unsigned long symbol_end;
-		char *name = kallsyms_names;
-
-		/* They're sorted, we could be clever here, but who cares? */
-		for (i = 0; i < kallsyms_num_syms; i++) {
-			if (kallsyms_addresses[i] > kallsyms_addresses[best] &&
-			    kallsyms_addresses[i] <= addr)
-				best = i;
-		}
+		unsigned long symbol_end=0;
 
-		/* Grab name */
-		for (i = 0; i <= best; i++) { 
-			unsigned prefix = *name++;
-			strncpy(namebuf + prefix, name, KSYM_NAME_LEN - prefix);
-			name += strlen(name) + 1;
+		/* do a binary search on the sorted kallsyms_addresses array */
+		low = 0;
+		high = kallsyms_num_syms;
+
+		while (high-low > 1) {
+			mid = (low + high) / 2;
+			if (kallsyms_addresses[mid] <= addr) low = mid;
+			else high = mid;
 		}
 
-		/* At worst, symbol ends at end of section. */
-		if (is_kernel_inittext(addr))
-			symbol_end = (unsigned long)_einittext;
-		else
-			symbol_end = (unsigned long)_etext;
+		/* search for the first aliased symbol. Aliased symbols are
+		   symbols with the same address */
+		while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low])
+			--low;
+
+		/* Grab name */
+		kallsyms_expand_symbol(get_symbol_offset(low), namebuf);
 
 		/* Search for next non-aliased symbol */
-		for (i = best+1; i < kallsyms_num_syms; i++) {
-			if (kallsyms_addresses[i] > kallsyms_addresses[best]) {
+		for (i = low + 1; i < kallsyms_num_syms; i++) {
+			if (kallsyms_addresses[i] > kallsyms_addresses[low]) {
 				symbol_end = kallsyms_addresses[i];
 				break;
 			}
 		}
 
-		*symbolsize = symbol_end - kallsyms_addresses[best];
+		/* if we found no next symbol, we use the end of the section */
+		if (!symbol_end) {
+			if (is_kernel_inittext(addr))
+				symbol_end = (unsigned long)_einittext;
+			else
+				symbol_end = (unsigned long)_etext;
+		}
+
+		*symbolsize = symbol_end - kallsyms_addresses[low];
 		*modname = NULL;
-		*offset = addr - kallsyms_addresses[best];
+		*offset = addr - kallsyms_addresses[low];
 		return namebuf;
 	}
 
@@ -135,7 +217,7 @@ void __print_symbol(const char *fmt, unsigned long address)
 	printk(fmt, buffer);
 }
 
-/* To avoid O(n^2) iteration, we carry prefix along. */
+/* To avoid using get_symbol_offset for every symbol, we carry prefix along. */
 struct kallsym_iter
 {
 	loff_t pos;
@@ -168,31 +250,23 @@ static int get_ksymbol_mod(struct kallsym_iter *iter)
 /* Returns space to next name. */
 static unsigned long get_ksymbol_core(struct kallsym_iter *iter)
 {
-	unsigned stemlen, off = iter->nameoff;
-
-	/* First char of each symbol name indicates prefix length
-	   shared with previous name (stem compression). */
-	stemlen = kallsyms_names[off++];
+	unsigned off = iter->nameoff;
 
-	strlcpy(iter->name+stemlen, kallsyms_names + off,
-		KSYM_NAME_LEN+1-stemlen);
-	off += strlen(kallsyms_names + off) + 1;
 	iter->owner = NULL;
 	iter->value = kallsyms_addresses[iter->pos];
-	if (is_kernel_text(iter->value) || is_kernel_inittext(iter->value))
-		iter->type = 't';
-	else
-		iter->type = 'd';
 
-	upcase_if_global(iter);
+	iter->type = kallsyms_get_symbol_type(off);
+
+	off = kallsyms_expand_symbol(off, iter->name);
+
 	return off - iter->nameoff;
 }
 
-static void reset_iter(struct kallsym_iter *iter)
+static void reset_iter(struct kallsym_iter *iter, loff_t new_pos)
 {
 	iter->name[0] = '\0';
-	iter->nameoff = 0;
-	iter->pos = 0;
+	iter->nameoff = get_symbol_offset(new_pos);
+	iter->pos = new_pos;
 }
 
 /* Returns false if pos at or past end of file. */
@@ -204,16 +278,13 @@ static int update_iter(struct kallsym_iter *iter, loff_t pos)
 		return get_ksymbol_mod(iter);
 	}
 	
-	/* If we're past the desired position, reset to start. */
-	if (pos < iter->pos)
-		reset_iter(iter);
-
-	/* We need to iterate through the previous symbols: can be slow */
-	for (; iter->pos != pos; iter->pos++) {
-		iter->nameoff += get_ksymbol_core(iter);
-		cond_resched();
-	}
-	get_ksymbol_core(iter);
+	/* If we're not on the desired position, reset to new position. */
+	if (pos != iter->pos)
+		reset_iter(iter, pos);
+
+	iter->nameoff += get_ksymbol_core(iter);
+	iter->pos++;
+
 	return 1;
 }
 
@@ -267,14 +338,15 @@ struct seq_operations kallsyms_op = {
 static int kallsyms_open(struct inode *inode, struct file *file)
 {
 	/* We keep iterator in m->private, since normal case is to
-	 * s_start from where we left off, so we avoid O(N^2). */
+	 * s_start from where we left off, so we avoid doing
+	 * using get_symbol_offset for every symbol */
 	struct kallsym_iter *iter;
 	int ret;
 
 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter)
 		return -ENOMEM;
-	reset_iter(iter);
+	reset_iter(iter, 0);
 
 	ret = seq_open(file, &kallsyms_op);
 	if (ret == 0)
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
new file mode 100644
index 00000000000000..9a5e17b507fe7d
--- /dev/null
+++ b/kernel/kfifo.c
@@ -0,0 +1,170 @@
+/*
+ * A simple kernel FIFO implementation.
+ *
+ * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/kfifo.h>
+
+/*
+ * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * @buffer: the preallocated buffer to be used.
+ * @size: the size of the internal buffer, this have to be a power of 2.
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @lock: the lock to be used to protect the fifo buffer
+ *
+ * Do NOT pass the kfifo to kfifo_free() after use ! Simply free the
+ * struct kfifo with kfree().
+ */
+struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
+			 int gfp_mask, spinlock_t *lock)
+{
+	struct kfifo *fifo;
+
+	/* size must be a power of 2 */
+	BUG_ON(size & (size - 1));
+
+	fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
+	if (!fifo)
+		return ERR_PTR(-ENOMEM);
+
+	fifo->buffer = buffer;
+	fifo->size = size;
+	fifo->in = fifo->out = 0;
+	fifo->lock = lock;
+
+	return fifo;
+}
+EXPORT_SYMBOL(kfifo_init);
+
+/*
+ * kfifo_alloc - allocates a new FIFO and its internal buffer
+ * @size: the size of the internal buffer to be allocated.
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @lock: the lock to be used to protect the fifo buffer
+ *
+ * The size will be rounded-up to a power of 2.
+ */
+struct kfifo *kfifo_alloc(unsigned int size, int gfp_mask, spinlock_t *lock)
+{
+	unsigned int newsize;
+	unsigned char *buffer;
+	struct kfifo *ret;
+
+	/*
+	 * round up to the next power of 2, since our 'let the indices
+	 * wrap' tachnique works only in this case.
+	 */
+	newsize = size;
+	if (size & (size - 1)) {
+		BUG_ON(size > 0x80000000);
+		newsize = roundup_pow_of_two(size);
+	}
+
+	buffer = kmalloc(newsize, gfp_mask);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+
+	ret = kfifo_init(buffer, size, gfp_mask, lock);
+
+	if (IS_ERR(ret))
+		kfree(buffer);
+
+	return ret;
+}
+EXPORT_SYMBOL(kfifo_alloc);
+
+/*
+ * kfifo_free - frees the FIFO
+ * @fifo: the fifo to be freed.
+ */
+void kfifo_free(struct kfifo *fifo)
+{
+	kfree(fifo->buffer);
+	kfree(fifo);
+}
+EXPORT_SYMBOL(kfifo_free);
+
+/*
+ * __kfifo_put - puts some data into the FIFO, no locking version
+ * @fifo: the fifo to be used.
+ * @buffer: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most 'len' bytes from the 'buffer' into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int __kfifo_put(struct kfifo *fifo,
+			 unsigned char *buffer, unsigned int len)
+{
+	unsigned int l;
+
+	len = min(len, fifo->size - fifo->in + fifo->out);
+
+	/* first put the data starting from fifo->in to buffer end */
+	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
+	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+
+	/* then put the rest (if any) at the beginning of the buffer */
+	memcpy(fifo->buffer, buffer + l, len - l);
+
+	fifo->in += len;
+
+	return len;
+}
+EXPORT_SYMBOL(__kfifo_put);
+
+/*
+ * __kfifo_get - gets some data from the FIFO, no locking version
+ * @fifo: the fifo to be used.
+ * @buffer: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most 'len' bytes from the FIFO into the
+ * 'buffer' and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int __kfifo_get(struct kfifo *fifo,
+			 unsigned char *buffer, unsigned int len)
+{
+	unsigned int l;
+
+	len = min(len, fifo->in - fifo->out);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
+	memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+
+	/* then get the rest (if any) from the beginning of the buffer */
+	memcpy(buffer + l, fifo->buffer, len - l);
+
+	fifo->out += len;
+
+	return len;
+}
+EXPORT_SYMBOL(__kfifo_get);
diff --git a/kernel/panic.c b/kernel/panic.c
index fce7f4030d0a75..c7ab9981c7aa76 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -110,6 +110,9 @@ EXPORT_SYMBOL(panic);
  *  'P' - Proprietary module has been loaded.
  *  'F' - Module has been forcibly loaded.
  *  'S' - SMP with CPUs not designed for SMP.
+ *  'R' - User forced a module unload.
+ *  'M' - Machine had a machine check experience.
+ *  'B' - System has hit bad_page.
  *
  *	The string is overwritten by the next call to print_taint().
  */
@@ -118,12 +121,21 @@ const char *print_tainted(void)
 {
 	static char buf[20];
 	if (tainted) {
-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c",
+		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
 			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
 			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
-			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ');
+			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
+			tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
+ 			tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
+			tainted & TAINT_BAD_PAGE ? 'B' : ' ');
 	}
 	else
 		snprintf(buf, sizeof(buf), "Not tainted");
 	return(buf);
 }
+
+void add_taint(unsigned flag)
+{
+	tainted |= flag;
+}
+EXPORT_SYMBOL(add_taint);
diff --git a/kernel/pid.c b/kernel/pid.c
index 83008f812f4974..21024b7ae37c2f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -1,8 +1,9 @@
 /*
  * Generic pidhash and scalable, time-bounded PID allocator
  *
- * (C) 2002 William Irwin, IBM
- * (C) 2002 Ingo Molnar, Red Hat
+ * (C) 2002-2003 William Irwin, IBM
+ * (C) 2004 William Irwin, Oracle
+ * (C) 2002-2004 Ingo Molnar, Red Hat
  *
  * pid-structures are backing objects for tasks sharing a given ID to chain
  * against. There is very little to them aside from hashing them and
@@ -35,9 +36,15 @@ int last_pid;
 
 #define RESERVED_PIDS		300
 
-#define PIDMAP_ENTRIES		(PID_MAX_LIMIT/PAGE_SIZE/8)
+int pid_max_min = RESERVED_PIDS + 1;
+int pid_max_max = PID_MAX_LIMIT;
+
+#define PIDMAP_ENTRIES		((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
+#define mk_pid(map, off)	(((map) - pidmap_array)*BITS_PER_PAGE + (off))
+#define find_next_offset(map, off)					\
+		find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
 
 /*
  * PID-map pages start out as NULL, they get allocated upon
@@ -53,8 +60,6 @@ typedef struct pidmap {
 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
 	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
 
-static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
-
 static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 fastcall void free_pidmap(int pid)
@@ -66,15 +71,18 @@ fastcall void free_pidmap(int pid)
 	atomic_inc(&map->nr_free);
 }
 
-/*
- * Here we search for the next map that has free bits left.
- * Normally the next map has free PIDs.
- */
-static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
+int alloc_pidmap(void)
 {
-	while (--*max_steps) {
-		if (++map == map_limit)
-			map = pidmap_array;
+	int i, offset, max_scan, pid, last = last_pid;
+	pidmap_t *map;
+
+	pid = last + 1;
+	if (pid >= pid_max)
+		pid = RESERVED_PIDS;
+	offset = pid & BITS_PER_PAGE_MASK;
+	map = &pidmap_array[pid/BITS_PER_PAGE];
+	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
+	for (i = 0; i <= max_scan; ++i) {
 		if (unlikely(!map->page)) {
 			unsigned long page = get_zeroed_page(GFP_KERNEL);
 			/*
@@ -87,62 +95,39 @@ static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
 			else
 				map->page = (void *)page;
 			spin_unlock(&pidmap_lock);
-
-			if (!map->page)
+			if (unlikely(!map->page))
 				break;
 		}
-		if (atomic_read(&map->nr_free))
-			return map;
-	}
-	return NULL;
-}
-
-int alloc_pidmap(void)
-{
-	int pid, offset, max_steps = PIDMAP_ENTRIES + 1;
-	pidmap_t *map;
-
-	pid = last_pid + 1;
-	if (pid >= pid_max)
-		pid = RESERVED_PIDS;
-
-	offset = pid & BITS_PER_PAGE_MASK;
-	map = pidmap_array + pid / BITS_PER_PAGE;
-
-	if (likely(map->page && !test_and_set_bit(offset, map->page))) {
-		/*
-		 * There is a small window for last_pid updates to race,
-		 * but in that case the next allocation will go into the
-		 * slowpath and that fixes things up.
-		 */
-return_pid:
-		atomic_dec(&map->nr_free);
-		last_pid = pid;
-		return pid;
-	}
-	
-	if (!offset || !atomic_read(&map->nr_free)) {
-next_map:
-		map = next_free_map(map, &max_steps);
-		if (!map)
-			goto failure;
-		offset = 0;
+		if (likely(atomic_read(&map->nr_free))) {
+			do {
+				if (!test_and_set_bit(offset, map->page)) {
+					atomic_dec(&map->nr_free);
+					last_pid = pid;
+					return pid;
+				}
+				offset = find_next_offset(map, offset);
+				pid = mk_pid(map, offset);
+			/*
+			 * find_next_offset() found a bit, the pid from it
+			 * is in-bounds, and if we fell back to the last
+			 * bitmap block and the final block was the same
+			 * as the starting point, pid is before last_pid.
+			 */
+			} while (offset < BITS_PER_PAGE && pid < pid_max &&
+					(i != max_scan || pid < last ||
+					    !((last+1) & BITS_PER_PAGE_MASK)));
+		}
+		if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
+			++map;
+			offset = 0;
+		} else {
+			map = &pidmap_array[0];
+			offset = RESERVED_PIDS;
+			if (unlikely(last == offset))
+				break;
+		}
+		pid = mk_pid(map, offset);
 	}
-	/*
-	 * Find the next zero bit:
-	 */
-scan_more:
-	offset = find_next_zero_bit(map->page, BITS_PER_PAGE, offset);
-	if (offset >= BITS_PER_PAGE)
-		goto next_map;
-	if (test_and_set_bit(offset, map->page))
-		goto scan_more;
-
-	/* we got the PID: */
-	pid = (map - pidmap_array) * BITS_PER_PAGE + offset;
-	goto return_pid;
-
-failure:
 	return -1;
 }
 
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
index d1bc943072d409..8fca5822a80776 100644
--- a/kernel/power/pm.c
+++ b/kernel/power/pm.c
@@ -256,41 +256,10 @@ int pm_send_all(pm_request_t rqst, void *data)
 	return 0;
 }
 
-/**
- *	pm_find  - find a device
- *	@type: type of device
- *	@from: where to start looking
- *
- *	Scan the power management list for devices of a specific type. The
- *	return value for a matching device may be passed to further calls
- *	to this function to find further matches. A %NULL indicates the end
- *	of the list. 
- *
- *	To search from the beginning pass %NULL as the @from value.
- *
- *	The caller MUST hold the pm_devs_lock lock when calling this 
- *	function. The instant that the lock is dropped all pointers returned
- *	may become invalid.
- */
- 
-struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from)
-{
-	struct list_head *entry = from ? from->entry.next:pm_devs.next;
-	while (entry != &pm_devs) {
-		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
-		if (type == PM_UNKNOWN_DEV || dev->type == type)
-			return dev;
-		entry = entry->next;
-	}
-	return NULL;
-}
-
 EXPORT_SYMBOL(pm_register);
 EXPORT_SYMBOL(pm_unregister);
 EXPORT_SYMBOL(pm_unregister_all);
-EXPORT_SYMBOL(pm_send);
 EXPORT_SYMBOL(pm_send_all);
-EXPORT_SYMBOL(pm_find);
 EXPORT_SYMBOL(pm_active);
 
 
diff --git a/kernel/printk.c b/kernel/printk.c
index c02ec626f38404..390396fc6d017f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -661,12 +661,10 @@ EXPORT_SYMBOL(release_console_sem);
  *
  * Must be called within acquire_console_sem().
  */
-void console_conditional_schedule(void)
+void __sched console_conditional_schedule(void)
 {
-	if (console_may_schedule && need_resched()) {
-		set_current_state(TASK_RUNNING);
-		schedule();
-	}
+	if (console_may_schedule)
+		cond_resched();
 }
 EXPORT_SYMBOL(console_conditional_schedule);
 
diff --git a/kernel/profile.c b/kernel/profile.c
index 1c4375fad9230f..e7ff9b32d8220d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -1,5 +1,16 @@
 /*
  *  linux/kernel/profile.c
+ *  Simple profiling. Manages a direct-mapped profile hit count buffer,
+ *  with configurable resolution, support for restricting the cpus on
+ *  which profiling is done, and switching between cpu time and
+ *  schedule() calls via kernel command line parameters passed at boot.
+ *
+ *  Scheduler profiling support, Arjan van de Ven and Ingo Molnar,
+ *	Red Hat, July 2004
+ *  Consolidation of architecture support code for profiling,
+ *	William Irwin, Oracle, July 2004
+ *  Amortized hit count accounting via per-cpu open-addressed hashtables
+ *	to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
  */
 
 #include <linux/config.h>
@@ -9,13 +20,29 @@
 #include <linux/notifier.h>
 #include <linux/mm.h>
 #include <linux/cpumask.h>
+#include <linux/cpu.h>
 #include <linux/profile.h>
+#include <linux/highmem.h>
 #include <asm/sections.h>
+#include <asm/semaphore.h>
+
+struct profile_hit {
+	u32 pc, hits;
+};
+#define PROFILE_GRPSHIFT	3
+#define PROFILE_GRPSZ		(1 << PROFILE_GRPSHIFT)
+#define NR_PROFILE_HIT		(PAGE_SIZE/sizeof(struct profile_hit))
+#define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
 
 static atomic_t *prof_buffer;
 static unsigned long prof_len, prof_shift;
 static int prof_on;
 static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
+static DEFINE_PER_CPU(int, cpu_profile_flip);
+static DECLARE_MUTEX(profile_flip_mutex);
+#endif /* CONFIG_SMP */
 
 static int __init profile_setup(char * str)
 {
@@ -181,6 +208,179 @@ EXPORT_SYMBOL_GPL(task_handoff_unregister);
 EXPORT_SYMBOL_GPL(profile_event_register);
 EXPORT_SYMBOL_GPL(profile_event_unregister);
 
+#ifdef CONFIG_SMP
+/*
+ * Each cpu has a pair of open-addressed hashtables for pending
+ * profile hits. read_profile() IPI's all cpus to request them
+ * to flip buffers and flushes their contents to prof_buffer itself.
+ * Flip requests are serialized by the profile_flip_mutex. The sole
+ * use of having a second hashtable is for avoiding cacheline
+ * contention that would otherwise happen during flushes of pending
+ * profile hits required for the accuracy of reported profile hits
+ * and so resurrect the interrupt livelock issue.
+ *
+ * The open-addressed hashtables are indexed by profile buffer slot
+ * and hold the number of pending hits to that profile buffer slot on
+ * a cpu in an entry. When the hashtable overflows, all pending hits
+ * are accounted to their corresponding profile buffer slots with
+ * atomic_add() and the hashtable emptied. As numerous pending hits
+ * may be accounted to a profile buffer slot in a hashtable entry,
+ * this amortizes a number of atomic profile buffer increments likely
+ * to be far larger than the number of entries in the hashtable,
+ * particularly given that the number of distinct profile buffer
+ * positions to which hits are accounted during short intervals (e.g.
+ * several seconds) is usually very small. Exclusion from buffer
+ * flipping is provided by interrupt disablement (note that for
+ * SCHED_PROFILING profile_hit() may be called from process context).
+ * The hash function is meant to be lightweight as opposed to strong,
+ * and was vaguely inspired by ppc64 firmware-supported inverted
+ * pagetable hash functions, but uses a full hashtable full of finite
+ * collision chains, not just pairs of them.
+ *
+ * -- wli
+ */
+static void __profile_flip_buffers(void *unused)
+{
+	int cpu = smp_processor_id();
+
+	per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
+}
+
+static void profile_flip_buffers(void)
+{
+	int i, j, cpu;
+
+	down(&profile_flip_mutex);
+	j = per_cpu(cpu_profile_flip, get_cpu());
+	put_cpu();
+	on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+	for_each_online_cpu(cpu) {
+		struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
+		for (i = 0; i < NR_PROFILE_HIT; ++i) {
+			if (!hits[i].hits) {
+				if (hits[i].pc)
+					hits[i].pc = 0;
+				continue;
+			}
+			atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
+			hits[i].hits = hits[i].pc = 0;
+		}
+	}
+	up(&profile_flip_mutex);
+}
+
+static void profile_discard_flip_buffers(void)
+{
+	int i, cpu;
+
+	down(&profile_flip_mutex);
+	i = per_cpu(cpu_profile_flip, get_cpu());
+	put_cpu();
+	on_each_cpu(__profile_flip_buffers, NULL, 0, 1);
+	for_each_online_cpu(cpu) {
+		struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
+		memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
+	}
+	up(&profile_flip_mutex);
+}
+
+void profile_hit(int type, void *__pc)
+{
+	unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
+	int i, j, cpu;
+	struct profile_hit *hits;
+
+	if (prof_on != type || !prof_buffer)
+		return;
+	pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
+	i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
+	secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
+	cpu = get_cpu();
+	hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
+	if (!hits) {
+		put_cpu();
+		return;
+	}
+	local_irq_save(flags);
+	do {
+		for (j = 0; j < PROFILE_GRPSZ; ++j) {
+			if (hits[i + j].pc == pc) {
+				hits[i + j].hits++;
+				goto out;
+			} else if (!hits[i + j].hits) {
+				hits[i + j].pc = pc;
+				hits[i + j].hits = 1;
+				goto out;
+			}
+		}
+		i = (i + secondary) & (NR_PROFILE_HIT - 1);
+	} while (i != primary);
+	atomic_inc(&prof_buffer[pc]);
+	for (i = 0; i < NR_PROFILE_HIT; ++i) {
+		atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
+		hits[i].pc = hits[i].hits = 0;
+	}
+out:
+	local_irq_restore(flags);
+	put_cpu();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int __devinit profile_cpu_callback(struct notifier_block *info,
+					unsigned long action, void *__cpu)
+{
+	int node, cpu = (unsigned long)__cpu;
+	struct page *page;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		node = cpu_to_node(cpu);
+		per_cpu(cpu_profile_flip, cpu) = 0;
+		if (!per_cpu(cpu_profile_hits, cpu)[1]) {
+			page = alloc_pages_node(node, GFP_KERNEL, 0);
+			if (!page)
+				return NOTIFY_BAD;
+			clear_highpage(page);
+			per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
+		}
+		if (!per_cpu(cpu_profile_hits, cpu)[0]) {
+			page = alloc_pages_node(node, GFP_KERNEL, 0);
+			if (!page)
+				goto out_free;
+			clear_highpage(page);
+			per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
+		}
+		break;
+	out_free:
+		page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+		per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+		__free_page(page);
+		return NOTIFY_BAD;
+	case CPU_ONLINE:
+		cpu_set(cpu, prof_cpu_mask);
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DEAD:
+		cpu_clear(cpu, prof_cpu_mask);
+		if (per_cpu(cpu_profile_hits, cpu)[0]) {
+			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
+			per_cpu(cpu_profile_hits, cpu)[0] = NULL;
+			__free_page(page);
+		}
+		if (per_cpu(cpu_profile_hits, cpu)[1]) {
+			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+			per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+			__free_page(page);
+		}
+		break;
+	}
+	return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+#else /* !CONFIG_SMP */
+#define profile_flip_buffers()		do { } while (0)
+#define profile_discard_flip_buffers()	do { } while (0)
+
 void profile_hit(int type, void *__pc)
 {
 	unsigned long pc;
@@ -190,6 +390,7 @@ void profile_hit(int type, void *__pc)
 	pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
 	atomic_inc(&prof_buffer[min(pc, prof_len - 1)]);
 }
+#endif /* !CONFIG_SMP */
 
 void profile_tick(int type, struct pt_regs *regs)
 {
@@ -256,6 +457,7 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	char * pnt;
 	unsigned int sample_step = 1 << prof_shift;
 
+	profile_flip_buffers();
 	if (p >= (prof_len+1)*sizeof(unsigned int))
 		return 0;
 	if (count > (prof_len+1)*sizeof(unsigned int) - p)
@@ -296,7 +498,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf,
 			return -EINVAL;
 	}
 #endif
-
+	profile_discard_flip_buffers();
 	memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
 	return count;
 }
@@ -306,16 +508,70 @@ static struct file_operations proc_profile_operations = {
 	.write		= write_profile,
 };
 
+#ifdef CONFIG_SMP
+static void __init profile_nop(void *unused)
+{
+}
+
+static int __init create_hash_tables(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		int node = cpu_to_node(cpu);
+		struct page *page;
+
+		page = alloc_pages_node(node, GFP_KERNEL, 0);
+		if (!page)
+			goto out_cleanup;
+		clear_highpage(page);
+		per_cpu(cpu_profile_hits, cpu)[1]
+				= (struct profile_hit *)page_address(page);
+		page = alloc_pages_node(node, GFP_KERNEL, 0);
+		if (!page)
+			goto out_cleanup;
+		clear_highpage(page);
+		per_cpu(cpu_profile_hits, cpu)[0]
+				= (struct profile_hit *)page_address(page);
+	}
+	return 0;
+out_cleanup:
+	prof_on = 0;
+	mb();
+	on_each_cpu(profile_nop, NULL, 0, 1);
+	for_each_online_cpu(cpu) {
+		struct page *page;
+
+		if (per_cpu(cpu_profile_hits, cpu)[0]) {
+			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
+			per_cpu(cpu_profile_hits, cpu)[0] = NULL;
+			__free_page(page);
+		}
+		if (per_cpu(cpu_profile_hits, cpu)[1]) {
+			page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
+			per_cpu(cpu_profile_hits, cpu)[1] = NULL;
+			__free_page(page);
+		}
+	}
+	return -1;
+}
+#else
+#define create_hash_tables()			({ 0; })
+#endif
+
 static int __init create_proc_profile(void)
 {
 	struct proc_dir_entry *entry;
 
 	if (!prof_on)
 		return 0;
+	if (create_hash_tables())
+		return -1;
 	if (!(entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL)))
 		return 0;
 	entry->proc_fops = &proc_profile_operations;
 	entry->size = (1+prof_len) * sizeof(atomic_t);
+	hotcpu_notifier(profile_cpu_callback, 0);
 	return 0;
 }
 module_init(create_proc_profile);
diff --git a/kernel/signal.c b/kernel/signal.c
index f67390806d7367..ba039fab37e8b5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1143,36 +1143,6 @@ kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
 	return retval;
 }
 
-/*
- * kill_sl_info() sends a signal to the session leader: this is used
- * to send SIGHUP to the controlling process of a terminal when
- * the connection is lost.
- */
-
-
-int
-kill_sl_info(int sig, struct siginfo *info, pid_t sid)
-{
-	int err, retval = -EINVAL;
-	struct task_struct *p;
-
-	if (sid <= 0)
-		goto out;
-
-	retval = -ESRCH;
-	read_lock(&tasklist_lock);
-	do_each_task_pid(sid, PIDTYPE_SID, p) {
-		if (!p->signal->leader)
-			continue;
-		err = group_send_sig_info(sig, info, p);
-		if (retval)
-			retval = err;
-	} while_each_task_pid(sid, PIDTYPE_SID, p);
-	read_unlock(&tasklist_lock);
-out:
-	return retval;
-}
-
 int
 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
@@ -1309,12 +1279,6 @@ kill_pg(pid_t pgrp, int sig, int priv)
 }
 
 int
-kill_sl(pid_t sess, int sig, int priv)
-{
-	return kill_sl_info(sig, (void *)(long)(priv != 0), sess);
-}
-
-int
 kill_proc(pid_t pid, int sig, int priv)
 {
 	return kill_proc_info(sig, (void *)(long)(priv != 0), pid);
@@ -1978,22 +1942,11 @@ relock:
 EXPORT_SYMBOL(recalc_sigpending);
 EXPORT_SYMBOL_GPL(dequeue_signal);
 EXPORT_SYMBOL(flush_signals);
-EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(force_sig_info);
 EXPORT_SYMBOL(kill_pg);
-EXPORT_SYMBOL(kill_pg_info);
 EXPORT_SYMBOL(kill_proc);
-EXPORT_SYMBOL(kill_proc_info);
-EXPORT_SYMBOL(kill_sl);
-EXPORT_SYMBOL(kill_sl_info);
 EXPORT_SYMBOL(ptrace_notify);
 EXPORT_SYMBOL(send_sig);
 EXPORT_SYMBOL(send_sig_info);
-EXPORT_SYMBOL(send_group_sig_info);
-EXPORT_SYMBOL(sigqueue_alloc);
-EXPORT_SYMBOL(sigqueue_free);
-EXPORT_SYMBOL(send_sigqueue);
-EXPORT_SYMBOL(send_group_sigqueue);
 EXPORT_SYMBOL(sigprocmask);
 EXPORT_SYMBOL(block_all_signals);
 EXPORT_SYMBOL(unblock_all_signals);
diff --git a/kernel/sys.c b/kernel/sys.c
index a95e3900dc1e23..e6dbc2940751a3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/workqueue.h>
 #include <linux/device.h>
+#include <linux/key.h>
 #include <linux/times.h>
 #include <linux/security.h>
 #include <linux/dcookies.h>
@@ -282,6 +283,9 @@ cond_syscall(sys_set_mempolicy)
 cond_syscall(compat_mbind)
 cond_syscall(compat_get_mempolicy)
 cond_syscall(compat_set_mempolicy)
+cond_syscall(sys_add_key)
+cond_syscall(sys_request_key)
+cond_syscall(sys_keyctl)
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read)
@@ -605,6 +609,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 	current->fsgid = new_egid;
 	current->egid = new_egid;
 	current->gid = new_rgid;
+	key_fsgid_changed(current);
 	return 0;
 }
 
@@ -642,6 +647,8 @@ asmlinkage long sys_setgid(gid_t gid)
 	}
 	else
 		return -EPERM;
+
+	key_fsgid_changed(current);
 	return 0;
 }
   
@@ -730,6 +737,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 		current->suid = current->euid;
 	current->fsuid = current->euid;
 
+	key_fsuid_changed(current);
+
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
 }
 
@@ -775,6 +784,8 @@ asmlinkage long sys_setuid(uid_t uid)
 	current->fsuid = current->euid = uid;
 	current->suid = new_suid;
 
+	key_fsuid_changed(current);
+
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
 }
 
@@ -821,6 +832,8 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	if (suid != (uid_t) -1)
 		current->suid = suid;
 
+	key_fsuid_changed(current);
+
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
 }
 
@@ -870,6 +883,8 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 		current->gid = rgid;
 	if (sgid != (gid_t) -1)
 		current->sgid = sgid;
+
+	key_fsgid_changed(current);
 	return 0;
 }
 
@@ -911,6 +926,8 @@ asmlinkage long sys_setfsuid(uid_t uid)
 		current->fsuid = uid;
 	}
 
+	key_fsuid_changed(current);
+
 	security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
 
 	return old_fsuid;
@@ -937,6 +954,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
 			wmb();
 		}
 		current->fsgid = gid;
+		key_fsgid_changed(current);
 	}
 	return old_fsgid;
 }
@@ -1669,7 +1687,7 @@ asmlinkage long sys_umask(int mask)
 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 			  unsigned long arg4, unsigned long arg5)
 {
-	int error;
+	long error;
 	int sig;
 
 	error = security_task_prctl(option, arg2, arg3, arg4, arg5);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 469cf0c2f26ece..80bf15f035cde2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,6 +66,7 @@ extern int sysctl_lower_zone_protection;
 extern int min_free_kbytes;
 extern int printk_ratelimit_jiffies;
 extern int printk_ratelimit_burst;
+extern int pid_max_min, pid_max_max;
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(__i386__)
 int unknown_nmi_panic;
@@ -575,7 +576,10 @@ static ctl_table kern_table[] = {
 		.data		= &pid_max,
 		.maxlen		= sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= sysctl_intvec,
+		.extra1		= &pid_max_min,
+		.extra2		= &pid_max_max,
 	},
 	{
 		.ctl_name	= KERN_PANIC_ON_OOPS,
diff --git a/kernel/timer.c b/kernel/timer.c
index e3c9b5fcd52f56..ac9386e22bd332 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -959,11 +959,6 @@ static inline void update_times(void)
 void do_timer(struct pt_regs *regs)
 {
 	jiffies_64++;
-#ifndef CONFIG_SMP
-	/* SMP process accounting uses the local APIC timer */
-
-	update_process_times(user_mode(regs));
-#endif
 	update_times();
 }
 
diff --git a/kernel/user.c b/kernel/user.c
index 523175afeecdca..693487dc940e6e 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
+#include <linux/key.h>
 
 /*
  * UID task count cache, to get fast user lookup in "alloc_uid"
@@ -34,6 +35,10 @@ struct user_struct root_user = {
 	.sigpending	= ATOMIC_INIT(0),
 	.mq_bytes	= 0,
 	.locked_shm     = 0,
+#ifdef CONFIG_KEYS
+	.uid_keyring	= &root_user_keyring,
+	.session_keyring = &root_session_keyring,
+#endif
 };
 
 /*
@@ -87,6 +92,8 @@ void free_uid(struct user_struct *up)
 {
 	if (up && atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
 		uid_hash_remove(up);
+		key_put(up->uid_keyring);
+		key_put(up->session_keyring);
 		kmem_cache_free(uid_cachep, up);
 		spin_unlock(&uidhash_lock);
 	}
@@ -116,6 +123,11 @@ struct user_struct * alloc_uid(uid_t uid)
 		new->mq_bytes = 0;
 		new->locked_shm = 0;
 
+		if (alloc_uid_keyring(new) < 0) {
+			kmem_cache_free(uid_cachep, new);
+			return NULL;
+		}
+
 		/*
 		 * Before adding this, check whether we raced
 		 * on adding the same user already..
@@ -123,6 +135,8 @@ struct user_struct * alloc_uid(uid_t uid)
 		spin_lock(&uidhash_lock);
 		up = uid_hash_find(uid, hashent);
 		if (up) {
+			key_put(new->uid_keyring);
+			key_put(new->session_keyring);
 			kmem_cache_free(uid_cachep, new);
 		} else {
 			uid_hash_insert(new, hashent);
@@ -146,8 +160,10 @@ void switch_uid(struct user_struct *new_user)
 	old_user = current->user;
 	atomic_inc(&new_user->processes);
 	atomic_dec(&old_user->processes);
+	switch_uid_keyring(new_user);
 	current->user = new_user;
 	free_uid(old_user);
+	suid_keys(current);
 }
 
 
diff --git a/kernel/wait.c b/kernel/wait.c
new file mode 100644
index 00000000000000..791681cfea981d
--- /dev/null
+++ b/kernel/wait.c
@@ -0,0 +1,246 @@
+/*
+ * Generic waiting primitives.
+ *
+ * (C) 2004 William Irwin, Oracle
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/wait.h>
+#include <linux/hash.h>
+
+void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&q->lock, flags);
+	__add_wait_queue(q, wait);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue);
+
+void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	wait->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&q->lock, flags);
+	__add_wait_queue_tail(q, wait);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive);
+
+void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->lock, flags);
+	__remove_wait_queue(q, wait);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(remove_wait_queue);
+
+
+/*
+ * Note: we use "set_current_state()" _after_ the wait-queue add,
+ * because we need a memory barrier there on SMP, so that any
+ * wake-function that tests for the wait-queue being active
+ * will be guaranteed to see waitqueue addition _or_ subsequent
+ * tests in this thread will see the wakeup having taken place.
+ *
+ * The spin_unlock() itself is semi-permeable and only protects
+ * one way (it only protects stuff inside the critical region and
+ * stops them from bleeding out - it would still allow subsequent
+ * loads to move into the the critical region).
+ */
+void fastcall
+prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+	unsigned long flags;
+
+	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&q->lock, flags);
+	if (list_empty(&wait->task_list))
+		__add_wait_queue(q, wait);
+	/*
+	 * don't alter the task state if this is just going to
+	 * queue an async wait queue callback
+	 */
+	if (is_sync_wait(wait))
+		set_current_state(state);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait);
+
+void fastcall
+prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+{
+	unsigned long flags;
+
+	wait->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&q->lock, flags);
+	if (list_empty(&wait->task_list))
+		__add_wait_queue_tail(q, wait);
+	/*
+	 * don't alter the task state if this is just going to
+ 	 * queue an async wait queue callback
+	 */
+	if (is_sync_wait(wait))
+		set_current_state(state);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive);
+
+void fastcall finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	__set_current_state(TASK_RUNNING);
+	/*
+	 * We can check for list emptiness outside the lock
+	 * IFF:
+	 *  - we use the "careful" check that verifies both
+	 *    the next and prev pointers, so that there cannot
+	 *    be any half-pending updates in progress on other
+	 *    CPU's that we haven't seen yet (and that might
+	 *    still change the stack area.
+	 * and
+	 *  - all other users take the lock (ie we can only
+	 *    have _one_ other CPU that looks at or modifies
+	 *    the list).
+	 */
+	if (!list_empty_careful(&wait->task_list)) {
+		spin_lock_irqsave(&q->lock, flags);
+		list_del_init(&wait->task_list);
+		spin_unlock_irqrestore(&q->lock, flags);
+	}
+}
+EXPORT_SYMBOL(finish_wait);
+
+int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	int ret = default_wake_function(wait, mode, sync, key);
+
+	if (ret)
+		list_del_init(&wait->task_list);
+	return ret;
+}
+EXPORT_SYMBOL(autoremove_wake_function);
+
+int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue *wait_bit
+		= container_of(wait, struct wait_bit_queue, wait);
+
+	if (wait_bit->key.flags != key->flags ||
+			wait_bit->key.bit_nr != key->bit_nr ||
+			test_bit(key->bit_nr, key->flags))
+		return 0;
+	else
+		return autoremove_wake_function(wait, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+/*
+ * To allow interruptible waiting and asynchronous (i.e. nonblocking)
+ * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are
+ * permitted return codes. Nonzero return codes halt waiting and return.
+ */
+int __sched fastcall
+__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
+			int (*action)(void *), unsigned mode)
+{
+	int ret = 0;
+
+	do {
+		prepare_to_wait(wq, &q->wait, mode);
+		if (test_bit(q->key.bit_nr, q->key.flags))
+			ret = (*action)(q->key.flags);
+	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
+	finish_wait(wq, &q->wait);
+	return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit);
+
+int __sched fastcall out_of_line_wait_on_bit(void *word, int bit,
+					int (*action)(void *), unsigned mode)
+{
+	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wait, word, bit);
+
+	return __wait_on_bit(wq, &wait, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit);
+
+int __sched fastcall
+__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
+			int (*action)(void *), unsigned mode)
+{
+	int ret = 0;
+
+	do {
+		prepare_to_wait_exclusive(wq, &q->wait, mode);
+		if (test_bit(q->key.bit_nr, q->key.flags)) {
+			if ((ret = (*action)(q->key.flags)))
+				break;
+		}
+	} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
+	finish_wait(wq, &q->wait);
+	return ret;
+}
+EXPORT_SYMBOL(__wait_on_bit_lock);
+
+int __sched fastcall out_of_line_wait_on_bit_lock(void *word, int bit,
+					int (*action)(void *), unsigned mode)
+{
+	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wait, word, bit);
+
+	return __wait_on_bit_lock(wq, &wait, action, mode);
+}
+EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
+
+void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
+{
+	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+	if (waitqueue_active(wq))
+		__wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
+/**
+ * wake_up_bit - wake up a waiter on a bit
+ * @word: the word being waited on, a kernel virtual address
+ * @bit: the bit of the word being waited on
+ *
+ * There is a standard hashed waitqueue table for generic use. This
+ * is the part of the hashtable's accessor API that wakes up waiters
+ * on a bit. For instance, if one were to have waiters on a bitflag,
+ * one would call wake_up_bit() after clearing the bit.
+ *
+ * In order for this to function properly, as it uses waitqueue_active()
+ * internally, some kind of memory barrier must be done prior to calling
+ * this. Typically, this will be smp_mb__after_clear_bit(), but in some
+ * cases where bitflags are manipulated non-atomically under a lock, one
+ * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
+ * because spin_unlock() does not guarantee a memory barrier.
+ */
+void fastcall wake_up_bit(void *word, int bit)
+{
+	__wake_up_bit(bit_waitqueue(word, bit), word, bit);
+}
+EXPORT_SYMBOL(wake_up_bit);
+
+fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+	const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+	const struct zone *zone = page_zone(virt_to_page(word));
+	unsigned long val = (unsigned long)word << shift | bit;
+
+	return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
+}
+EXPORT_SYMBOL(bit_waitqueue);
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-10-18 20:50:22 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-10-18 20:50:22 -0700
commit	098fc560ef2bbd1bde80845c898fa95db616eb6c (patch)
tree	ca722c6fdbdffe9b7cfd31d61e8f4aae906a319c /kernel
parent	bffe01870598b7a0a77073e25ee94e026bc98e6b (diff)
parent	2a136606fe21b603a0ce484fc578f862f8e8384d (diff)
download	history-098fc560ef2bbd1bde80845c898fa95db616eb6c.tar.gz