To: marcelo@conectiva.com.br
From: davej@suse.de
Subject: Intel cache handling fixes.

- Pentium III Tualatin workaround happens in wrong place.
- Add missing cache descriptors
  This has been reported to affect performance quite
  dramatically, but this surprises me a little.
- Handle P4 trace cache

Same patches are in 2.5 for some weeks now.

diff -urpN --exclude-from=/home/davej/.exclude linux-2.4.20-pre8/arch/i386/kernel/setup.c linux-2.4.20-pre8-leakfix/arch/i386/kernel/setup.c
--- linux-2.4.20-pre8/arch/i386/kernel/setup.c	2002-09-26 15:29:12.000000000 +0100
+++ linux-2.4.20-pre8-leakfix/arch/i386/kernel/setup.c	2002-09-30 23:03:04.000000000 +0100
@@ -1256,15 +1256,6 @@ static void __init display_cacheinfo(str
 			l2size = 256;
 	}
 
-	/* Intel PIII Tualatin. This comes in two flavours.
-	 * One has 256kb of cache, the other 512. We have no way
-	 * to determine which, so we use a boottime override
-	 * for the 512kb model, and assume 256 otherwise.
-	 */
-	if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) &&
-		(c->x86_model == 11) && (l2size == 0))
-		l2size = 256;
-
 	/* VIA C3 CPUs (670-68F) need further shifting. */
 	if (c->x86_vendor == X86_VENDOR_CENTAUR && (c->x86 == 6) &&
 		((c->x86_model == 7) || (c->x86_model == 8))) {
@@ -2180,6 +2171,7 @@ extern void trap_init_f00f_bug(void);
 #define LVL_1_DATA      2
 #define LVL_2           3
 #define LVL_3           4
+#define LVL_TRACE       5
 
 struct _cache_table
 {
@@ -2199,6 +2191,8 @@ static struct _cache_table cache_table[]
 	{ 0x23, LVL_3,      1024 },
 	{ 0x25, LVL_3,      2048 },
 	{ 0x29, LVL_3,      4096 },
+	{ 0x39, LVL_2,      128 },
+	{ 0x3C, LVL_2,      256 },
 	{ 0x41, LVL_2,      128 },
 	{ 0x42, LVL_2,      256 },
 	{ 0x43, LVL_2,      512 },
@@ -2207,11 +2201,15 @@ static struct _cache_table cache_table[]
 	{ 0x66, LVL_1_DATA, 8 },
 	{ 0x67, LVL_1_DATA, 16 },
 	{ 0x68, LVL_1_DATA, 32 },
+	{ 0x70, LVL_TRACE,  12 },
+	{ 0x71, LVL_TRACE,  16 },
+	{ 0x72, LVL_TRACE,  32 },
 	{ 0x79, LVL_2,      128 },
 	{ 0x7A, LVL_2,      256 },
 	{ 0x7B, LVL_2,      512 },
 	{ 0x7C, LVL_2,      1024 },
 	{ 0x82, LVL_2,      256 },
+	{ 0x83, LVL_2,      512 },
 	{ 0x84, LVL_2,      1024 },
 	{ 0x85, LVL_2,      2048 },
 	{ 0x00, 0, 0}
@@ -2219,7 +2217,7 @@ static struct _cache_table cache_table[]
 
 static void __init init_intel(struct cpuinfo_x86 *c)
 {
-	unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
 	char *p = NULL;
 #ifndef CONFIG_X86_F00F_WORKS_OK
 	static int f00f_workaround_enabled = 0;
@@ -2279,8 +2277,10 @@ static void __init init_intel(struct cpu
 						case LVL_3:
 							l3 += cache_table[k].size;
 							break;
+						case LVL_TRACE:
+							trace += cache_table[k].size;
+							break;
 						}
-
 						break;
 					}
 
@@ -2288,9 +2288,25 @@ static void __init init_intel(struct cpu
 				}
 			}
 		}
-		if ( l1i || l1d )
-			printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n",
-			       l1i, l1d);
+
+		/* Intel PIII Tualatin. This comes in two flavours.
+		 * One has 256kb of cache, the other 512. We have no way
+		 * to determine which, so we use a boottime override
+		 * for the 512kb model, and assume 256 otherwise.
+		 */
+		if ((c->x86 == 6) && (c->x86_model == 11) && (l2 == 0))
+			l2 = 256;
+		/* Allow user to override all this if necessary. */
+		if (cachesize_override != -1)
+			l2 = cachesize_override;
+
+		if ( trace )
+			printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
+		else if ( l1i )
+			printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+		if ( l1d )
+			printk(", L1 D cache: %dK\n", l1d);
+
 		if ( l2 )
 			printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
 		if ( l3 )