I'm trying to follow the OSDev "Higher Half x86 Bare Bones" tutorial (after having done it multiple times) and modify it to send the kernel to the higher half of the PML4 as opposed to the higher half of a 32-bit page table. The reason being because of mixed syntaxes in the tutorials: the BB ones use GNU syntax, while the only 64-bit tutorial on there uses MASM syntax which isn't compatible.
So far, I've got this 235-line mess:
# In 32-bit mode until we get to _start
.code32
# Declare constants for the multiboot header.
.set ALIGN, 1<<0 # align loaded modules on page boundaries
.set MEMINFO, 1<<1 # provide memory map
.set FLAGS, ALIGN | MEMINFO # this is the Multiboot 'flag' field
.set MAGIC, 0x1BADB002 # 'magic number' lets bootloader find the header
.set CHECKSUM, -(MAGIC + FLAGS) # checksum of above, to prove we are multiboot
# Declare a header as in the Multiboot Standard.
.section .multiboot
.align 4
.long MAGIC
.long FLAGS
.long CHECKSUM
.section .boot_stack, "aw", @nobits
.align 16
stack_bottom:
.skip 16384 # 16 KiB
stack_top:
.section .bss, "aw", @nobits
# 64-bit higher half page tables
.align 4096
.global pml4_root
pml4_root:
.skip 4096
.global pml4_pdptr
pml4_pdptr:
.skip 4096
.global pml4_dir
pml4_dir:
.skip 4096
.global pml4_bpt0
pml4_bpt0:
.skip 4096
#TODO: PML5
#64-bit kernel GDT
.section .gdt
gdt_start:
null:
.word 0xffff #Limit
.word 0 #Base (low)
.byte 0 #Base (middle)
.byte 0 #Access
.byte 1 #Granularity
.byte 0 #Base (high)
code:
.word 0 #Limit
.word 0 #Base (low)
.byte 0 #Base (middle)
.byte 0b10011010 #Access (755)
.byte 0b10101111 #Granularity
.byte 0 #Base (high)
data:
.word 0 #Limit
.word 0 #Base (low)
.byte 0 #Base (middle)
.byte 0b10010010 #Access (777)
.byte 0b00000000 #Granularity
.byte 0 #Base (high)
gdt_end:
.global gdtp
gdtp:
.align 8
.equ gdt_len, gdt_end - gdt_start - 1
.equ gdt_addr, $0xffff000000000000
# The kernel entry point.
.section .text
.global NoLongMode
NoLongMode:
.ascii "Error\: Long Mode not detected"
hlt
loop NoLongMode #Infinite loop because we've got nothing better to do
.global NoCPUID
NoCPUID:
.ascii "Error\: could not determine CPUID"
hlt
loop NoCPUID #Infinite loop because we've got nothing better to do
.global _start
.type _start, @function
_start:
setup_64:
#Block interrupts until we have the IDT
cli
#CPUID: flags
pushfl
popl %eax
#CPUID: compare
movl %eax, %ecx
#CPUID: ID bit
xorl $(1<<21), %eax
#FLAGS
pushl %eax
popfl
pushfl
popl %eax
pushl %ecx
popfl
#If no CPUID functionality exists
xorl %ecx, %eax
jz NoCPUID
ret
#Long mode detection, part 1
movl $0x80000000, %eax
cpuid
cmpl $0x80000001, %eax
jb NoLongMode
#Long mode detection, part 2
movl $0x80000001, %eax
cpuid
testl $(1<<29), %edx
jz NoLongMode
#Temporarily disable paging until we've got it properly set up
movl %cr0, %eax
andl $0b01111111111111111111111111111111, %eax
movl %eax, %cr0
#PAE
movl %cr4, %eax
orl $(1<<5), %eax
movl %eax, %cr4
#LM-bit
movl $0xC0000080, %ecx
rdmsr
orl $(1<<8), %eax
wrmsr
#Reenable paging
movl %cr0, %eax
orl $(1<<31), %eax
movl %eax, %cr0
#Clear all 32-bit registers to shut linker up
movl $0, %eax
movl $0, %ecx
#GDT + LM jump
lgdt (gdt_len)
jmp longmode
#Actually enter 64-bit mode for good
.code64
longmode:
#Physical address of first boot page table
movabsq $(pml4_bpt0 - 0xffff000000000000), %rdi #Physical address of first boot page table
movabsq $0, %rsi #First address to map
#64-bit entries are double the size of 32-bit entries but table size is the same
movabsq $511, %rcx
1:
#Kernel mapping
cmpq $(_kernel_start - 0xffff000000000000), %rsi
jl 2f
cmpq $(_kernel_end - 0xffff000000000000), %rsi
jge 3f
#Map physical address space as present+writable
movq %rsi, %rdx
orq $0x003, %rdx
movq %rdx, (%rdi)
2:
addq $4096, %rsi #page size in bytes
addq $8, %rdi #size of page entries
loop 1b #loop if unfinished
3:
#Video memory location
movabsq $(0x00000000000B8000 | 0x003), %rax
movq %rax, pml4_bpt0 - 0xffff000000000000 + 511 * 8
#Map first kernel page to the first kernel PDT
movabsq $(pml4_bpt0 - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_dir - 0xffff000000000000 + 0
movabsq $(pml4_bpt0 - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_dir - 0xffff000000000000 + 384 * 8
#Map first kernel PDT to first kernel PDPT
movabsq $(pml4_dir - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_pdptr - 0xffff000000000000 + 0
movabsq $(pml4_dir - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_pdptr - 0xffff000000000000 + 384 * 8
#Map first kernel PDPT to the PML4T
movabsq $(pml4_pdptr - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_root - 0xffff000000000000 + 0
movabsq $(pml4_pdptr - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_root - 0xffff000000000000 + 384 * 8
#Set third control register to address of PML4T
movabsq $(pml4_root - 0xffff000000000000), %rcx
movq %rcx, %cr3
#Jump to 64-bit higher half
leaq 4f, %rcx
jmpq *%rcx
4:
#Reload PML4T along with all of its children, incl kernel pages
movq %cr3, %rcx
movq %rcx, %cr3
movabsq $stack_top, %rsp
#Self-explanatory
callq kernel_main
cli
5: hlt
jmp 5b
.size _start, . - _start
It had a lot of linker errors before I started using movabs, etc. which got the linker woes from about 20 down to just 1:
boot64.o: in function `longmode':
(.text+0x18b): relocation truncated to fit: R_X86_64_32S against `.text'
collect2: error: ld returned 1 exit status
This would be easy to solve if the linker actually specified line numbers to find errors on ― but it doesn't. So if anyone can help find the offending line, I'd appreciate it.
The linker script is identical to the one used in the tutorial with only one exception (the hardcoded address is 0xFFFF000000000000 instead of 0xC0000000), if that helps any.
The original 32-bit code is using lea 4f, %ecx
/ jmp *%ecx
to set EIP to an absolute address that depends on the linker script, not the current EIP. (lea 4f, %ecx
is an inefficient equivalent to mov $4f, %ecx
, putting a 32-bit absolute address into a register)
lea 4f, %rcx
can only work with an absolute address that fits in a 32-bit sign-extended disp32 addressing mode. (Because that's how x86-64 addressing modes work). That's what relocation truncated to fit: R_X86_64_32S against `.text'
means: the 32S
relocation in the object file metadata specifies that the correct absolute address should be encoded into a 32-bit sign-extended value. But since you presumably adjusted the linker script to put . = 0xFFFF000000000000
instead of . = 0xC0100000;
, the label 4
has too many significant digits.
lea 4f(%rip), %rcx
will assemble but defeats the entire purpose; you might as well jmp 4f
or just nop
or nothing. It calculates the address relative to the current RIP, not based on the linker script. If you had single-stepped looked at RIP in a debugger you would have seen that RIP wasn't what you wanted with this suggestion.
You want movabs $4f, %rcx
which can use a 64-bit immediate to hold the full 64-bit address. The purpose of that indirect jump is to set RIP to a known absolute high address, so you must not calculate the address relative to the current RIP. You need to avoid position-independent methods here, despite the fact that x86-64 makes position-independent code easier.
Remember that before that jmp *%rcx
, your code is execution from a RIP that doesn't match what you used in the linker script. You can see this if you single-step it in the debugger built-in to BOCHS, for example.
If you had put your kernel within 2GiB of the top of virtual address space, lea 4f, %rcx
would have Just Worked. (But mov $4f, %rcx
would still have been better.) 7-byte mov $sign_extended_imm32, %rcx
is more efficient than 10-byte movabs $imm64, %rcx
; all else being equal, smaller code-size is better.
High-half kernels are the rare case where mov $sign_extended_imm32, %r64
is a good option for putting a static address in a register; normally (outside of bootstrap / setup code like this) you normally want a RIP-relative LEA. Or mov $imm32, %r32
if your address is known to be in the low 2GiB of virtual address space, e.g. in user-space in a non-PIE Linux executable.
Having your kernel's static code/data within 2GiB of the top of virtual address space also means you can use addressing modes like array(%rdx)
, where array
's address is encoded as a sign-extended disp32. So it's the same as a Linux non-PIE executable except only sign-extended works, not zero-extended.
I'd recommend doing like @MichaelPetch suggested and using 0xFFFFFFFF80000000
as your kernel base address.
BTW, if you know the absolute virtual address where your image will be running from before the jmp
, you could use a direct relative jmp rel32
with a large negative displacement to wrap RIP around from small positive to within that 2GiB "high half". Not sure if there's a simple way to get the linker to calculate that for you, though, so it's certainly easier to mov $abs_address, %rcx
/ jmp *%rcx
, and this startup code can be reclaimed once your kernel is up and running. So code-size here only matters for total size of the kernel image.
#Clear all 32-bit registers to shut the linker up
movl $0, %eax
movl $0, %ecx
What? That makes no sense. Also, if you want to zero a register, xor %eax,%eax
is the optimal way.
#64-bit GDT must be loaded BEFORE the switch to actual 64-bit address space ― see https://wiki.osdev.org/Creating_a_64-bit_kernel for more details
lgdt (gdtp)
GAS accepts that, but the standard syntax for a memory operand is just the bare symbol name. lgdt
isn't special, it still uses a ModR/M addressing mode just like add gdtp, %eax
. lgdt
load a pointer + length from its memory operand.
lgdt gdtp
would be more standard syntax for using the absolute address of a symbol as the addressing mode. But if you like (symbol)
as a reminder that it's a memory operand, that's ok.
Some of your other code looks inefficient; lots of absolute addresses being used instead of simple pointer increments or offsets.
User contributions licensed under CC BY-SA 3.0