From: Roland Dreier Get rid of the x86 SSE code for atomic 64-bit writes to doorbell registers. Saving/setting CR0 plus a clts instruction are too expensive for it to ever be a win, and the config option was just confusing. Signed-off-by: Roland Dreier Signed-off-by: Andrew Morton --- 25-akpm/drivers/infiniband/hw/mthca/Kconfig | 10 --- 25-akpm/drivers/infiniband/hw/mthca/mthca_doorbell.h | 52 ++----------------- 25-akpm/drivers/infiniband/hw/mthca/mthca_main.c | 20 ------- 3 files changed, 6 insertions(+), 76 deletions(-) diff -puN drivers/infiniband/hw/mthca/Kconfig~infiniband-mthca-remove-x86-sse-pessimization drivers/infiniband/hw/mthca/Kconfig --- 25/drivers/infiniband/hw/mthca/Kconfig~infiniband-mthca-remove-x86-sse-pessimization 2005-01-23 22:25:08.093371792 -0800 +++ 25-akpm/drivers/infiniband/hw/mthca/Kconfig 2005-01-23 22:25:08.515307648 -0800 @@ -14,13 +14,3 @@ config INFINIBAND_MTHCA_DEBUG This option causes the mthca driver produce a bunch of debug messages. Select this is you are developing the driver or trying to diagnose a problem. - -config INFINIBAND_MTHCA_SSE_DOORBELL - bool "SSE doorbell code" - depends on INFINIBAND_MTHCA && X86 && !X86_64 - default n - ---help--- - This option will have the mthca driver use SSE instructions - to ring hardware doorbell registers. This may improve - performance for some workloads, but the driver will not run - on processors without SSE instructions. diff -puN drivers/infiniband/hw/mthca/mthca_doorbell.h~infiniband-mthca-remove-x86-sse-pessimization drivers/infiniband/hw/mthca/mthca_doorbell.h --- 25/drivers/infiniband/hw/mthca/mthca_doorbell.h~infiniband-mthca-remove-x86-sse-pessimization 2005-01-23 22:25:08.094371640 -0800 +++ 25-akpm/drivers/infiniband/hw/mthca/mthca_doorbell.h 2005-01-23 22:25:08.516307496 -0800 @@ -32,9 +32,7 @@ * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $ */ -#include #include -#include #define MTHCA_RD_DOORBELL 0x00 #define MTHCA_SEND_DOORBELL 0x10 @@ -59,51 +57,13 @@ static inline void mthca_write64(u32 val __raw_writeq(*(u64 *) val, dest); } -#elif defined(CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL) -/* Use SSE to write 64 bits atomically without a lock. */ - -#define MTHCA_DECLARE_DOORBELL_LOCK(name) -#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0) -#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL) - -static inline unsigned long mthca_get_fpu(void) -{ - unsigned long cr0; - - preempt_disable(); - asm volatile("mov %%cr0,%0; clts" : "=r" (cr0)); - return cr0; -} - -static inline void mthca_put_fpu(unsigned long cr0) -{ - asm volatile("mov %0,%%cr0" : : "r" (cr0)); - preempt_enable(); -} - -static inline void mthca_write64(u32 val[2], void __iomem *dest, - spinlock_t *doorbell_lock) -{ - /* i386 stack is aligned to 8 bytes, so this should be OK: */ - u8 xmmsave[8] __attribute__((aligned(8))); - unsigned long cr0; - - cr0 = mthca_get_fpu(); - - asm volatile ( - "movlps %%xmm0,(%0); \n\t" - "movlps (%1),%%xmm0; \n\t" - "movlps %%xmm0,(%2); \n\t" - "movlps (%0),%%xmm0; \n\t" - : - : "r" (xmmsave), "r" (val), "r" (dest) - : "memory" ); - - mthca_put_fpu(cr0); -} - #else -/* Just fall back to a spinlock to protect the doorbell */ + +/* + * Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ #define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name; #define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) diff -puN drivers/infiniband/hw/mthca/mthca_main.c~infiniband-mthca-remove-x86-sse-pessimization drivers/infiniband/hw/mthca/mthca_main.c --- 25/drivers/infiniband/hw/mthca/mthca_main.c~infiniband-mthca-remove-x86-sse-pessimization 2005-01-23 22:25:08.096371336 -0800 +++ 25-akpm/drivers/infiniband/hw/mthca/mthca_main.c 2005-01-23 22:25:08.516307496 -0800 @@ -40,10 +40,6 @@ #include #include -#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL -#include -#endif - #include "mthca_dev.h" #include "mthca_config_reg.h" #include "mthca_cmd.h" @@ -1117,22 +1113,6 @@ static int __init mthca_init(void) { int ret; - /* - * TODO: measure whether dynamically choosing doorbell code at - * runtime affects our performance. Is there a "magic" way to - * choose without having to follow a function pointer every - * time we ring a doorbell? - */ -#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL - if (!cpu_has_xmm) { - printk(KERN_ERR PFX "mthca was compiled with SSE doorbell code, but\n"); - printk(KERN_ERR PFX "the current CPU does not support SSE.\n"); - printk(KERN_ERR PFX "Turn off CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL " - "and recompile.\n"); - return -ENODEV; - } -#endif - ret = pci_register_driver(&mthca_driver); return ret < 0 ? ret : 0; } _