aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2004-08-22 22:26:07 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-08-22 22:26:07 -0700
commita55702bb86b58773e7e880956f00ee55cb939355 (patch)
treec733b8ad4de2f54285a8c69413c0f5b0afd31ae1 /arch
parent14729dbe26e736a6cc5479d3acb9e288b56a9a1b (diff)
downloadhistory-a55702bb86b58773e7e880956f00ee55cb939355.tar.gz
[PATCH] context-switching overhead in X, ioport()
while debugging/improving scheduling latencies i got the following strange latency report from Lee Revell: http://krustophenia.net/testresults.php?dataset=2.6.8.1-P6#/var/www/2.6.8.1-P6 this trace shows a 120 usec latency caused by XFree86, on a 600 MHz x86 system. Looking closer reveals: 00000002 0.006ms (+0.003ms): __switch_to (schedule) 00000002 0.088ms (+0.082ms): finish_task_switch (schedule) it took more than 80 usecs for XFree86 to do a context-switch! it turns out that the reason for this (massive) context-switching overhead is the following change in 2.6.8: [PATCH] larger IO bitmaps To demonstrate the effect of this change i've written ioperm-latency.c (attached), which gives the following on vanilla 2.6.8.1: # ./ioperm-latency default no ioperm: scheduling latency: 2528 cycles turning on port 80 ioperm: scheduling latency: 10563 cycles turning on port 65535 ioperm: scheduling latency: 10517 cycles the ChangeSet says: Now, with the lazy bitmap allocation and per-CPU TSS, this will really not drain any resources I think. this is plain wrong. An increase in the IO bitmap size introduces per-context-switch overhead as well: we now have to copy an 8K bitmap every time XFree86 context-switches - even though XFree86 never uses ports higher than 1024! I've straced XFree86 on a number of x86 systems and in every instance ioperm() was used - so i'd say the majority of x86 Linux systems running 2.6.8.1 are affected by this problem. This not only causes lots of overhead, it also trashes ~16K out of the L1 and L2 caches, on every context-switch. It's as if XFree86 did a L1 cache flush on every context-switch ... the simple solution would be to revert IO_BITMAP_BITS back to 1024 and release 2.6.8.2? I've implemented another solution as well, which tracks the highest-enabled port # for every task and does the copying of the bitmap intelligently. (patch attached) The patched kernel gives: # ./ioperm-latency default no ioperm: scheduling latency: 2423 cycles turning on port 80 ioperm: scheduling latency: 2503 cycles turning on port 65535 ioperm: scheduling latency: 10607 cycles this is much more acceptable - the full overhead only occurs in the very unlikely event of a task using the high ioport range. X doesnt suffer any significant overhead. (tracking the maximum allowed port # also allows a simplification of io_bitmap handling: e.g. we dont do the invalid-offset trick anymore - the IO bitmap in the TSS is always valid and secure.) I tested the patch on x86 SMP and UP, it works fine for me. I tested boundary conditions as well, it all seems secure. Ingo #include <errno.h> #include <stdio.h> #include <sched.h> #include <signal.h> #include <sys/io.h> #include <stdlib.h> #include <unistd.h> #include <linux/unistd.h> #define CYCLES(x) asm volatile ("rdtsc" :"=a" (x)::"edx") #define __NR_sched_set_affinity 241 _syscall3 (int, sched_set_affinity, pid_t, pid, unsigned int, mask_len, unsigned long *, mask) /* * Use a pair of RT processes bound to the same CPU to measure * context-switch overhead: */ static void measure(void) { unsigned long i, min = ~0UL, pid, mask = 1, t1, t2; sched_set_affinity(0, sizeof(mask), &mask); pid = fork(); if (!pid) for (;;) { asm volatile ("sti; nop; cli"); sched_yield(); } sched_yield(); for (i = 0; i < 100; i++) { asm volatile ("sti; nop; cli"); CYCLES(t1); sched_yield(); CYCLES(t2); if (i > 10) { if (t2 - t1 < min) min = t2 - t1; } } asm volatile ("sti"); kill(pid, 9); printf("scheduling latency: %ld cycles\n", min); sched_yield(); } int main(void) { struct sched_param p = { sched_priority: 2 }; unsigned long mask = 1; if (iopl(3)) { printf("need to run as root!\n"); exit(-1); } sched_setscheduler(0, SCHED_FIFO, &p); sched_set_affinity(0, sizeof(mask), &mask); printf("default no ioperm: "); measure(); printf("turning on port 80 ioperm: "); ioperm(0x80,1,1); measure(); printf("turning on port 65535 ioperm: "); if (ioperm(0xffff,1,1)) printf("FAILED - older kernel.\n"); else measure(); return 0; } Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/kernel/ioport.c33
-rw-r--r--arch/i386/kernel/process.c44
2 files changed, 49 insertions, 28 deletions
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 3346657c3dd21f..e4350d22b5f6ea 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -56,6 +56,7 @@ static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int ex
*/
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
+ unsigned int i, max_long, bytes, bytes_updated;
struct thread_struct * t = &current->thread;
struct tss_struct * tss;
unsigned long *bitmap;
@@ -81,16 +82,34 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
/*
* do it in the per-thread copy and in the TSS ...
+ *
+ * Disable preemption via get_cpu() - we must not switch away
+ * because the ->io_bitmap_max value must match the bitmap
+ * contents:
*/
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
tss = init_tss + get_cpu();
- if (tss->io_bitmap_base == IO_BITMAP_OFFSET) { /* already active? */
- set_bitmap(tss->io_bitmap, from, num, !turn_on);
- } else {
- memcpy(tss->io_bitmap, t->io_bitmap_ptr, IO_BITMAP_BYTES);
- tss->io_bitmap_base = IO_BITMAP_OFFSET; /* Activate it in the TSS */
- }
+
+ set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+
+ /*
+ * Search for a (possibly new) maximum. This is simple and stupid,
+ * to keep it obviously correct:
+ */
+ max_long = 0;
+ for (i = 0; i < IO_BITMAP_LONGS; i++)
+ if (t->io_bitmap_ptr[i] != ~0UL)
+ max_long = i;
+
+ bytes = (max_long + 1) * sizeof(long);
+ bytes_updated = max(bytes, t->io_bitmap_max);
+
+ t->io_bitmap_max = bytes;
+
+ /* Update the TSS: */
+ memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+
put_cpu();
+
return 0;
}
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 75b200985224b0..bf3cdbfe27d113 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -294,14 +294,20 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
void exit_thread(void)
{
struct task_struct *tsk = current;
+ struct thread_struct *t = &tsk->thread;
/* The process may have allocated an io port bitmap... nuke it. */
- if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
+ if (unlikely(NULL != t->io_bitmap_ptr)) {
int cpu = get_cpu();
struct tss_struct *tss = init_tss + cpu;
- kfree(tsk->thread.io_bitmap_ptr);
- tsk->thread.io_bitmap_ptr = NULL;
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+
+ kfree(t->io_bitmap_ptr);
+ t->io_bitmap_ptr = NULL;
+ /*
+ * Careful, clear this in the TSS too:
+ */
+ memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+ t->io_bitmap_max = 0;
put_cpu();
}
}
@@ -369,8 +375,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
tsk = current;
if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
- if (!p->thread.io_bitmap_ptr)
+ if (!p->thread.io_bitmap_ptr) {
+ p->thread.io_bitmap_max = 0;
return -ENOMEM;
+ }
memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES);
}
@@ -401,8 +409,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
err = 0;
out:
- if (err && p->thread.io_bitmap_ptr)
+ if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
+ p->thread.io_bitmap_max = 0;
+ }
return err;
}
@@ -552,26 +562,18 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
}
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
- if (next->io_bitmap_ptr) {
+ if (next->io_bitmap_ptr)
/*
- * 4 cachelines copy ... not good, but not that
- * bad either. Anyone got something better?
- * This only affects processes which use ioperm().
- * [Putting the TSSs into 4k-tlb mapped regions
- * and playing VM tricks to switch the IO bitmap
- * is not really acceptable.]
+ * Copy the relevant range of the IO bitmap.
+ * Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
- IO_BITMAP_BYTES);
- tss->io_bitmap_base = IO_BITMAP_OFFSET;
- } else
+ max(prev->io_bitmap_max, next->io_bitmap_max));
+ else
/*
- * a bitmap offset pointing outside of the TSS limit
- * causes a nicely controllable SIGSEGV if a process
- * tries to use a port IO instruction. The first
- * sys_ioperm() call sets up the bitmap properly.
+ * Clear any possible leftover bits:
*/
- tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
return prev_p;
}