From: "Eric W. Biederman" To enable bootloaders to boot to directly load the x86_64 vmlinux and to enable the x86_64 kernel to switch into 64bit mode earlier this patch refactors the x86_64 entry code so there is a native 64bit entry point to the kernel. I ran this by Andi Kleen and he agreed it looks fairly sane. Signed-off-by: Eric Biederman Signed-off-by: Andrew Morton --- 25-akpm/arch/x86_64/kernel/head.S | 112 +++++++++++++++++-------------- 25-akpm/arch/x86_64/kernel/smpboot.c | 2 25-akpm/arch/x86_64/kernel/trampoline.S | 22 +----- 25-akpm/arch/x86_64/kernel/vmlinux.lds.S | 3 4 files changed, 69 insertions(+), 70 deletions(-) diff -puN arch/x86_64/kernel/head.S~x86_64-entry64 arch/x86_64/kernel/head.S --- 25/arch/x86_64/kernel/head.S~x86_64-entry64 2005-03-07 12:14:12.000000000 -0800 +++ 25-akpm/arch/x86_64/kernel/head.S 2005-03-07 12:14:12.000000000 -0800 @@ -26,6 +26,7 @@ .text .code32 + .globl startup_32 /* %bx: 1 if coming from smp trampoline on secondary cpu */ startup_32: @@ -37,11 +38,13 @@ startup_32: * There is no stack until we set one up. */ - movl %ebx,%ebp /* Save trampoline flag */ - + /* Initialize the %ds segment register */ movl $__KERNEL_DS,%eax movl %eax,%ds - + + /* Load new GDT with the 64bit segments using 32bit descriptor */ + lgdt pGDT32 - __START_KERNEL_map + /* If the CPU doesn't support CPUID this will double fault. * Unfortunately it is hard to check for CPUID without a stack. */ @@ -57,16 +60,13 @@ startup_32: btl $29, %edx jnc no_long_mode - movl %edx,%edi - /* * Prepare for entering 64bits mode */ - /* Enable PAE mode and PGE */ + /* Enable PAE mode */ xorl %eax, %eax btsl $5, %eax - btsl $7, %eax movl %eax, %cr4 /* Setup early boot stage 4 level pagetables */ @@ -79,14 +79,6 @@ startup_32: /* Enable Long Mode */ btsl $_EFER_LME, %eax - /* Enable System Call */ - btsl $_EFER_SCE, %eax - - /* No Execute supported? */ - btl $20,%edi - jnc 1f - btsl $_EFER_NX, %eax -1: /* Make changes effective */ wrmsr @@ -94,38 +86,69 @@ startup_32: xorl %eax, %eax btsl $31, %eax /* Enable paging and in turn activate Long Mode */ btsl $0, %eax /* Enable protected mode */ - btsl $1, %eax /* Enable MP */ - btsl $4, %eax /* Enable ET */ - btsl $5, %eax /* Enable NE */ - btsl $16, %eax /* Enable WP */ - btsl $18, %eax /* Enable AM */ /* Make changes effective */ movl %eax, %cr0 - jmp reach_compatibility_mode -reach_compatibility_mode: - /* * At this point we're in long mode but in 32bit compatibility mode * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn - * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we load + * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use * the new gdt/idt that has __KERNEL_CS with CS.L = 1. */ - - testw %bp,%bp /* secondary CPU? */ - jnz second - - /* Load new GDT with the 64bit segment using 32bit descriptor */ - movl $(pGDT32 - __START_KERNEL_map), %eax - lgdt (%eax) - -second: - movl $(ljumpvector - __START_KERNEL_map), %eax - /* Finally jump in 64bit mode */ - ljmp *(%eax) + ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map) .code64 .org 0x100 -reach_long64: + .globl startup_64 +startup_64: + /* We come here either from startup_32 + * or directly from a 64bit bootloader. + * Since we may have come directly from a bootloader we + * reload the page tables here. + */ + + /* Enable PAE mode and PGE */ + xorq %rax, %rax + btsq $5, %rax + btsq $7, %rax + movq %rax, %cr4 + + /* Setup early boot stage 4 level pagetables. */ + movq $(init_level4_pgt - __START_KERNEL_map), %rax + movq %rax, %cr3 + + /* Check if nx is implemented */ + movl $0x80000001, %eax + cpuid + movl %edx,%edi + + /* Setup EFER (Extended Feature Enable Register) */ + movl $MSR_EFER, %ecx + rdmsr + + /* Enable System Call */ + btsl $_EFER_SCE, %eax + + /* No Execute supported? */ + btl $20,%edi + jnc 1f + btsl $_EFER_NX, %eax +1: + /* Make changes effective */ + wrmsr + + /* Setup cr0 */ + xorq %rax, %rax + btsq $31, %rax /* Enable paging */ + btsq $0, %rax /* Enable protected mode */ + btsq $1, %rax /* Enable MP */ + btsq $4, %rax /* Enable ET */ + btsq $5, %rax /* Enable NE */ + btsq $16, %rax /* Enable WP */ + btsq $18, %rax /* Enable AM */ + /* Make changes effective */ + movq %rax, %cr0 + + /* Setup a boot time stack */ movq init_rsp(%rip),%rsp /* zero EFLAGS after setting rsp */ @@ -199,13 +222,8 @@ ENTRY(no_long_mode) .org 0xf00 .globl pGDT32 pGDT32: - .word gdt32_end-gdt_table32 - .long gdt_table32-__START_KERNEL_map - -.org 0xf10 -ljumpvector: - .long reach_long64-__START_KERNEL_map - .word __KERNEL_CS + .word gdt_end-cpu_gdt_table + .long cpu_gdt_table-__START_KERNEL_map ENTRY(stext) ENTRY(_stext) @@ -335,12 +353,6 @@ gdt: .endr #endif -ENTRY(gdt_table32) - .quad 0x0000000000000000 /* This one is magic */ - .quad 0x0000000000000000 /* unused */ - .quad 0x00af9a000000ffff /* __KERNEL_CS */ -gdt32_end: - /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout diff -puN arch/x86_64/kernel/smpboot.c~x86_64-entry64 arch/x86_64/kernel/smpboot.c --- 25/arch/x86_64/kernel/smpboot.c~x86_64-entry64 2005-03-07 12:14:12.000000000 -0800 +++ 25-akpm/arch/x86_64/kernel/smpboot.c 2005-03-07 12:14:12.000000000 -0800 @@ -91,8 +91,6 @@ extern unsigned char trampoline_end []; static unsigned long __init setup_trampoline(void) { void *tramp = __va(SMP_TRAMPOLINE_BASE); - extern volatile __u32 tramp_gdt_ptr; - tramp_gdt_ptr = __pa_symbol(&cpu_gdt_table); memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); return virt_to_phys(tramp); } diff -puN arch/x86_64/kernel/trampoline.S~x86_64-entry64 arch/x86_64/kernel/trampoline.S --- 25/arch/x86_64/kernel/trampoline.S~x86_64-entry64 2005-03-07 12:14:12.000000000 -0800 +++ 25-akpm/arch/x86_64/kernel/trampoline.S 2005-03-07 12:14:12.000000000 -0800 @@ -37,7 +37,6 @@ r_base = . mov %cs, %ax # Code and data in the same place mov %ax, %ds - mov $1, %bx # Flag an SMP trampoline cli # We should be safe anyway movl $0xA5A5A5A5, trampoline_data - r_base @@ -46,31 +45,20 @@ r_base = . lidt idt_48 - r_base # load idt with 0, 0 lgdt gdt_48 - r_base # load gdt with whatever is appropriate - movw $__KERNEL_DS,%ax - movw %ax,%ds - movw %ax,%es - xor %ax, %ax inc %ax # protected mode (PE) bit lmsw %ax # into protected mode - jmp flush_instr -flush_instr: - ljmpl $__KERNEL32_CS, $0x00100000 - # jump to startup_32 in arch/x86_64/kernel/head.S + # flaush prefetch and jump to startup_32 in arch/x86_64/kernel/head.S + ljmpl $__KERNEL32_CS, $(startup_32-__START_KERNEL_map) + # Careful these need to be in the same 64K segment as the above; idt_48: .word 0 # idt limit = 0 .word 0, 0 # idt base = 0L gdt_48: - .short 0x0800 # gdt limit = 2048, 256 GDT entries - .globl tramp_gdt_ptr -tramp_gdt_ptr: - .long 0 # gdt base = gdt (first SMP CPU) - # this is filled in by C because the 64bit - # linker doesn't support absolute 32bit - # relocations. - + .short __KERNEL32_CS + 7 # gdt limit + .long cpu_gdt_table-__START_KERNEL_map .globl trampoline_end trampoline_end: diff -puN arch/x86_64/kernel/vmlinux.lds.S~x86_64-entry64 arch/x86_64/kernel/vmlinux.lds.S --- 25/arch/x86_64/kernel/vmlinux.lds.S~x86_64-entry64 2005-03-07 12:14:12.000000000 -0800 +++ 25-akpm/arch/x86_64/kernel/vmlinux.lds.S 2005-03-07 12:14:12.000000000 -0800 @@ -10,11 +10,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) -ENTRY(stext) +ENTRY(phys_startup_64) jiffies_64 = jiffies; SECTIONS { . = __START_KERNEL; + phys_startup_64 = startup_64 - LOAD_OFFSET; _text = .; /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { *(.text) _