aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2024-01-19 16:49:24 +0100
committerArd Biesheuvel <ardb@kernel.org>2024-01-22 00:03:18 +0100
commit8f1e677e8f0468563a755275036b9a9b11c0a521 (patch)
tree8d26dac7e7cc1128989216118c4db0eff95abf81
parent74af0e9f91202ed3ecf819e0acd079ee22e9d880 (diff)
downloadlinux-x86-pie-v2.tar.gz
x86/boot: Permit GOTPCREL relocations for x86_64 buildsx86-pie-v2
Some of the early x86_64 startup code is written in C, and executes in the early 1:1 mapping of the kernel, which is not the address it was linked at, and this requires special care when accessing global variables. This is currently being dealt with on an ad-hoc basis, primarily in head64.c, using explicit pointer fixups, but it would be better to rely on the compiler for this, by using -fPIE to generate code that can run at any address, and uses RIP-relative accesses to refer to global variables. While it is possible to avoid most GOT based symbol references that the compiler typically emits when running in -fPIE mode, by using 'hidden' visibility, there are cases where the compiler will always rely on the GOT, for instance, for weak external references (which may remain unsatisfied at link time). This means the build may produce a small number of GOT entries nonetheless. So update the reloc processing host tool to add support for this, and place the GOT in the .text section rather than discard it. Note that multiple GOT based references to the same symbol will share a single GOT entry, and so naively emitting a relocation for the GOT entry each time a reference to it is encountered could result in duplicates. Work around this by relying on the fact that the relocation lists are sorted, and deduplicate 64-bit relocations as they are emitted by comparing each entry with the previous one. Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
-rw-r--r--arch/x86/kernel/vmlinux.lds.S5
-rw-r--r--arch/x86/tools/relocs.c21
2 files changed, 24 insertions, 2 deletions
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a349dbfc6d5ab4..6ec818078905df 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -137,6 +137,9 @@ SECTIONS
*(.text..__x86.return_thunk)
#endif
STATIC_CALL_TEXT
+#ifdef CONFIG_X86_64
+ *(.got) *(.igot.*)
+#endif
ALIGN_ENTRY_TEXT_BEGIN
*(.text..__x86.rethunk_untrain)
@@ -462,10 +465,12 @@ SECTIONS
* Sections that should stay zero sized, which is safer to
* explicitly check instead of blindly discarding.
*/
+#ifdef CONFIG_X86_32
.got : {
*(.got) *(.igot.*)
}
ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!")
+#endif
.plt : {
*(.plt) *(.plt.*) *(.iplt)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index a2d3df11a8a4bf..27e8abb114e419 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -818,6 +818,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
case R_X86_64_32:
case R_X86_64_32S:
case R_X86_64_64:
+ case R_X86_64_GOTPCREL:
/*
* References to the percpu area don't need to be adjusted.
*/
@@ -837,6 +838,21 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
break;
}
+ if (r_type == R_X86_64_GOTPCREL) {
+ Elf_Shdr *s = &secs[sec->shdr.sh_info].shdr;
+ unsigned file_off = offset - s->sh_addr + s->sh_offset;
+
+ /*
+ * GOTPCREL relocations refer to instructions that load
+ * a 64-bit address via a 32-bit relative reference to
+ * the GOT. In this case, it is the GOT entry that
+ * needs to be fixed up, not the immediate offset in
+ * the opcode.
+ */
+ offset += (int32_t)get_unaligned_le32(elf_image + file_off)
+ + rel->r_addend;
+ }
+
/*
* Relocation offsets for 64 bit kernels are output
* as 32 bits and sign extended back to 64 bits when
@@ -846,7 +862,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
if ((int32_t)offset != (int64_t)offset)
die("Relocation offset doesn't fit in 32 bits\n");
- if (r_type == R_X86_64_64)
+ if (r_type == R_X86_64_64 || r_type == R_X86_64_GOTPCREL)
add_reloc(&relocs64, offset);
else
add_reloc(&relocs32, offset);
@@ -1067,7 +1083,8 @@ static void emit_relocs(int as_text, int use_real_mode)
/* Now print each relocation */
for (i = 0; i < relocs64.count; i++)
- write_reloc(relocs64.offset[i], stdout);
+ if (i && relocs64.offset[i] != relocs64.offset[i - 1])
+ write_reloc(relocs64.offset[i], stdout);
/* Print a stop */
write_reloc(0, stdout);