I'm trying to develop a basic kernel and I'd like to to have it perform a task switch to some code that I load into memory from an attached disk.
I've tried following chapter 7 in intel's manual but my attempts all seem to result in triple faulting.
In particular, I think I have a problem with the setup of paging. I want the task that is switched to to run with its own arbitrary page mapping so as you'll see in the code snippet below a virtual address of 0x0 in this task should map to a physical address of 0x2000000, which is also where I load the code.
My attempt to setup paging defines an array of ints as in the function setup_page_directory_and_page_tables
for the user and kernel page tables and page directories. The kernel page mapping is an identity mapping while the user's is not.
One interesting thing to note is that when I change the paging setup so that the user's page mapping is also an identity mapping, the task switch is successful.
The entire code can be gotten here (note: relevant branch is dev branch).
mm.c
#include "mm.h"
#include "task.h"
// System memory map as told by BIOS.
extern unsigned int mem_map_buf_addr;
extern unsigned int mem_map_buf_entry_count;
// Kernel uninitialized data start and end.
extern int _bss_start;
extern int _bss_end;
// TSS for kernel and user tasks.
extern tss kernel_tss __attribute__((aligned(0x1000)));
extern tss user_tss __attribute__((aligned(0x1000)));
// Memory management structures for the kernel's use.
static uint32_t kernel_mem_bitmap;
static struct bios_mem_map* bmm;
#define NUM_GDT_ENTRIES 8
// Kernel structures for segmentation.
struct gdt_entry pm_gdt[NUM_GDT_ENTRIES];
struct gdt_info pm_gdt_info = {
.len = sizeof(pm_gdt),
.addr = (unsigned int) pm_gdt,
};
// Kernel structures for paging.
unsigned int kernel_page_directory[1024]__attribute__((aligned(0x1000)));
unsigned int kernel_page_tables[1024][1024];
unsigned int user_page_directory[USER_PAGE_DIR_SIZE]__attribute__((aligned(0x1000)));
unsigned int user_page_tables[USER_PAGE_DIR_SIZE][USER_PAGE_TABLE_SIZE];
extern void setup_and_enable_paging(void);
extern void *USER_PHY_ADDR;
void init_mm(void) {
// These aren't used for anything.
bmm = (struct bios_mem_map*) mem_map_buf_addr;
kernel_mem_bitmap = ~((unsigned int) 0);
setup_tss(); //defined in task.c
setup_pm_gdt();
setup_page_directory_and_page_tables();
setup_and_enable_paging();
}
extern unsigned int kernel_entry;
void setup_page_directory_and_page_tables(void) {
unsigned int addr = (unsigned int) &kernel_page_tables[0];
unsigned int i, j, page_frame = 0;
// Setup kernel paging.
for (i = 0; i < 1024; i++) {
// Set all PDEs.
kernel_page_directory[i] = (addr & 0xfffff000) | 0x3;
addr += 0x1000;
}
for (i = 0; i < 1024; i++) {
for (j = 0; j < 1024; j++) {
kernel_page_tables[i][j] = page_frame | 0x3;
page_frame += 0x1000;
}
}
// Setup user paging.
addr = (unsigned int) &user_page_tables[0];
for (i = 0; i < USER_PAGE_DIR_SIZE; i++) {
// Set all PDEs.
user_page_directory[i] = (addr & 0xfffff000) | 0x7;
addr += 0x1000;
}
page_frame = USER_PHY_ADDR;
for (i = 0; i < USER_PAGE_DIR_SIZE; i++) {
for (j = 0; j < USER_PAGE_TABLE_SIZE; j++) {
user_page_tables[i][j] = page_frame | 0x7;
page_frame += 0x1000;
if (page_frame >= 0xfffff000)
goto exit;
}
}
exit:
return;
}
void setup_pm_gdt(void) {
pm_gdt_info.len = sizeof(pm_gdt);
pm_gdt_info.addr = (long unsigned int) pm_gdt;
make_gdt_entry(&pm_gdt[0], 0x0, 0x0, 0x0, 0x0);
// KERNEL_CODE_SEGMENT
make_gdt_entry(&pm_gdt[1], 0xfffff, 0x0, 0xa, 0xc9);
// KERNEL_DATA_SEGMENT
make_gdt_entry(&pm_gdt[2], 0xfffff, 0x0, 0x2, 0xc9);
// USER_CODE_SEGMENT
// make_gdt_entry(&pm_gdt[3], 0xfffff, 0x0, 0xa, 0xcf);
// USER_DATA_SEGMENT
// make_gdt_entry(&pm_gdt[4], 0xfffff, 0x0, 0x2, 0xcf);
// Add entry for kernel task (TSS descriptor).
make_gdt_entry(&pm_gdt[5], sizeof(kernel_tss), (unsigned int) &kernel_tss, 0x9, 0x18);
// Add entry for user task (TSS descriptor).
make_gdt_entry(&pm_gdt[6], sizeof(user_tss), (unsigned int) &user_tss, 0x9, 0x1e);
// Add task gate for user TSS.
make_gdt_entry(&pm_gdt[7], sizeof(user_tss), 0x30, 0x5, 0xe);
load_pm_gdt();
}
extern void pm_jump(void);
void load_pm_gdt(void) {
asm volatile("lgdt %0" : : "m"(pm_gdt_info));
pm_jump(); // in kernel_entry.asm
}
void make_gdt_entry(struct gdt_entry* entry,
unsigned int limit,
unsigned int base,
char type,
/*flags format: S_DPL_P_AVL_L_DB_G*/
/*bits: 1_2___1_1___1_1__1*/
char flags) {
// Set lower 16 bits of limit.
entry->limit0_15 = limit & 0xffff;
// Set lower 16 bits of base.
entry->base0_15 = base & 0xffff;
// Set bits 16 to 13 of base.
entry->base16_23 = (base >> 16) & 0xff;
// Set bits 24 to 31 of base.
entry->base24_31 = (base >> 24) & 0xff;
// Set upper 4 bits of 20 bit limit.
entry->limit16_19_avl_l_db_g = (limit >> 16) & 0xf;
// Set 4 bits of type.
entry->type_s_dpl_p = type & 0xf;
// Set S_DPL_P flags (lower 4 bits of 8 bit flags).
entry->type_s_dpl_p |= (flags & 0xf) << 4;
// Set AVL_L_DB_G flags (upper 4 bits of 8 bit flags).
entry->limit16_19_avl_l_db_g |= flags & 0xf0;
}
task.c
#include "task.h"
extern unsigned int user_page_directory[USER_PAGE_TABLE_SIZE]__attribute__((aligned(0x1000)));
tss user_tss;
void setup_tss(void) {
tss *tss_ = &user_tss;
tss_->CR3 = (unsigned int) user_page_directory | 0x3;
tss_->EIP = 0x0;
__asm__(" movw %%es, %0 \n" : "=m" (tss_->ES_l16b) : );
__asm__(" movw %%cs, %0 \n" : "=m" (tss_->CS_l16b) : );
__asm__(" movw %%ss, %0 \n" : "=m" (tss_->SS_l16b) : );
__asm__(" movw %%ds, %0 \n" : "=m" (tss_->DS_l16b) : );
__asm__(" movw %%fs, %0 \n" : "=m" (tss_->FS_l16b) : );
__asm__(" movw %%gs, %0 \n" : "=m" (tss_->GS_l16b) : );
__asm__(" movw %%esp, %0 \n" : "=m" (tss_->ESP) : );
__asm__(" movw %%ebp, %0 \n" : "=m" (tss_->EBP) : );
}
void do_task_switch(void) {
print("attempting to loading task register.\n");
load_kernel_tr(); // in kernel_entry.asm
print("successfully loaded task register.\n");
print("attempting task switch.\n");
switch_task(); // in kernel_entry.asm
print("task switch successful.\n");
}
switch_task
never returns as the CPU triple faults. I realize that in setting up the user's TSS, simply copying over the current segment registers and stack pointer is probably not what I want to do but the first instruction in the app is a jmp $
so since I'm not jumping across segments or using the stack I thought it would work still.
task.h
#ifndef __TASK_H__
#include "system.h"
// Note: Fields ending in `_{l,u}16b` only use 16 bits.
// If such a field is defined with `unsigned int`,
// it occupies the upper (u) or lower (l) 16 bits.
struct task_state_segment {
unsigned int previous_task_link_l16b;
unsigned int ESP0;
unsigned int SS0_l16b;
unsigned int ESP1;
unsigned int SS1_l16b;
unsigned int ESP2;
unsigned int SS2_l16b;
unsigned int CR3;
unsigned int EIP;
unsigned int EFLAGS;
unsigned int EAX;
unsigned int ECX;
unsigned int EDX;
unsigned int EBX;
unsigned int ESP;
unsigned int EBP;
unsigned int ESI;
unsigned int EDI;
unsigned int ES_l16b;
unsigned int CS_l16b;
unsigned int SS_l16b;
unsigned int DS_l16b;
unsigned int FS_l16b;
unsigned int GS_l16b;
unsigned int LDT_u16b;
unsigned int SSP;
}__attribute__((packed));
typedef struct task_state_segment tss;
void setup_tss();
void do_task_switch();
// #define USER_PAGE_DIR_SIZE 1024
// #define USER_PAGE_TABLE_SIZE 1024
#define USER_PAGE_DIR_SIZE 1024
#define USER_PAGE_TABLE_SIZE 1024
#endif // __TASK_H__
kernel_entry.asm
[bits 32]
global initialize_idt
global mem_map_buf_addr
global mem_map_buf_entry_count
global kernel_entry
extern main
extern idt_info_ptr
; kernel_entry expects the following information about the
; BIOS's memory map to be put on the stack:
; the address of the buffer holding the memory map (top of stack)
; the number of entries in the memory map.
kernel_entry:
mov eax, [esp]
mov [mem_map_buf_addr], eax
mov eax, [esp+4]
mov [mem_map_buf_entry_count], eax
call main
jmp $
mem_map_buf_addr: dd 0x0
mem_map_buf_entry_count: dd 0x0
global pm_jump
pm_jump:
jmp 0x8:pm_jmp_ret
pm_jmp_ret:
mov ax, 0x10
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
ret
extern kernel_page_directory
global setup_and_enable_paging
setup_and_enable_paging:
; point CR3 to page directory
mov eax, kernel_page_directory
or eax, 0x3
mov cr3, eax
; set CRO.PG to 1
mov ebx, cr0 ; set left-most bit of CPU special control register.
or ebx, 0x80000000
mov cr0, ebx
ret
USER_TASK_GATE_GDT_IDX equ 7
USER_TASK_GATE equ 8 * USER_TASK_GATE_GDT_IDX
global switch_task
switch_task:
call USER_TASK_GATE: 0x0
ret
KERNEL_TASK_SEG_IDX equ 5
KERNEL_TASK_SEG equ 8 * KERNEL_TASK_SEG_IDX
kernel_task_selector: dw KERNEL_TASK_SEG
; Function to load kernel task register.
global load_kernel_tr
load_kernel_tr:
ltr [kernel_task_selector]
ret
global dummy_branch
dummy_branch:
mov eax, 0xfadefade
iret
initialize_idt:
lidt [idt_info_ptr]
ret
global enable_interrupts
enable_interrupts:
sti
ret
global disable_interrupts
disable_interrupts:
cli
ret
extern fault_handler
extern irq_handler
global isr_common
global isr0
global isr1
global isr2
global isr3
global isr4
global isr5
global isr6
global isr7
global isr8
global isr9
global isr10
global isr11
global isr12
global isr13
global isr14
global isr15
global isr16
global isr17
global isr18
global isr19
global isr20
global isr21
global isr22
global isr23
global isr24
global isr25
global isr26
global isr27
global isr28
global isr29
global isr30
global isr31
global irq0
global irq1
global irq2
global irq3
global irq4
global irq5
global irq6
global irq7
global irq8
global irq9
global irq10
global irq11
global irq12
global irq13
global irq14
global irq15
isr0:
cli
push byte 0
push byte 0
jmp isr_common
isr1:
cli
push byte 0
push byte 1
jmp isr_common
isr2:
cli
push byte 0
push byte 2
jmp isr_common
isr3:
cli
push byte 0
push byte 3
jmp isr_common
isr4:
cli
push byte 0
push byte 4
jmp isr_common
isr5:
cli
push byte 0
push byte 5
jmp isr_common
isr6:
cli
push byte 0
push byte 6
jmp isr_common
isr7:
cli
push byte 0
push byte 7
jmp isr_common
isr8:
cli
push byte 8
jmp isr_common
isr9:
cli
push byte 0
push byte 9
jmp isr_common
isr10:
cli
push byte 10
jmp isr_common
isr11:
cli
push byte 11
jmp isr_common
isr12:
cli
push byte 12
jmp isr_common
isr13:
cli
push byte 13
jmp isr_common
isr14:
cli
push byte 14
jmp isr_common
isr15:
cli
push byte 0
push byte 15
jmp isr_common
isr16:
cli
push byte 0
push byte 16
jmp isr_common
isr17:
cli
push byte 0
push byte 17
jmp isr_common
isr18:
cli
push byte 0
push byte 18
jmp isr_common
isr19:
cli
push byte 0
push byte 19
jmp isr_common
isr20:
cli
push byte 0
push byte 20
jmp isr_common
isr21:
cli
push byte 0
push byte 21
jmp isr_common
isr22:
cli
push byte 0
push byte 22
jmp isr_common
isr23:
cli
push byte 0
push byte 23
jmp isr_common
isr24:
cli
push byte 0
push byte 24
jmp isr_common
isr25:
cli
push byte 0
push byte 25
jmp isr_common
isr26:
cli
push byte 0
push byte 26
jmp isr_common
isr27:
cli
push byte 0
push byte 27
jmp isr_common
isr28:
cli
push byte 0
push byte 28
jmp isr_common
isr29:
cli
push byte 0
push byte 29
jmp isr_common
isr30:
cli
push byte 0
push byte 30
jmp isr_common
isr31:
cli
push byte 0
push byte 31
jmp isr_common
isr_common:
pusha
push ds
push es
push fs
push gs
mov ax, 0x10
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov eax, esp
push eax
mov eax, fault_handler
call eax
pop eax
pop gs
pop fs
pop es
pop ds
popa
add esp, 8
iret
irq0:
cli
push byte 0
push byte 32
jmp irq_common
irq1:
cli
push byte 0
push byte 33
jmp irq_common
irq2:
cli
push byte 0
push byte 34
jmp irq_common
irq3:
cli
push byte 0
push byte 35
jmp irq_common
irq4:
cli
push byte 0
push byte 36
jmp irq_common
irq5:
cli
push byte 0
push byte 37
jmp irq_common
irq6:
cli
push byte 0
push byte 38
jmp irq_common
irq7:
cli
push byte 0
push byte 39
jmp irq_common
irq8:
cli
push byte 0
push byte 40
jmp irq_common
irq9:
cli
push byte 0
push byte 41
jmp irq_common
irq10:
cli
push byte 0
push byte 42
jmp irq_common
irq11:
cli
push byte 0
push byte 43
jmp irq_common
irq12:
cli
push byte 0
push byte 44
jmp irq_common
irq13:
cli
push byte 0
push byte 45
jmp irq_common
irq14:
cli
push byte 0
push byte 46
jmp irq_common
irq15:
cli
push byte 0
push byte 47
jmp irq_common
irq_common:
pusha
push ds
push es
push fs
push gs
mov ax, 0x10
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov eax, esp
push eax
mov eax, irq_handler
call eax
pop eax
pop gs
pop fs
pop es
pop ds
popa
add esp, 8
iret
The kernel is compiled with:
C_SOURCES = $(wildcard kernel/*.c kernel/**/*.c drivers/**/*.c fs/*.c)
C_FLAGS = -Wall -O0 -m32 -fno-pie -fno-stack-protector -ffreestanding -fno-hosted -nolibc -nostdlib -g
C_FLAGS += -I./
OBJ = $(patsubst %.c, %.o, ${C_SOURCES})
kernel.bin: kernel/kernel_entry.o ${OBJ}
ld -o kernel.bin -m elf_i386 $^ --oformat binary -T kernel.ld
%.o: %.c
gcc ${C_FLAGS} -c $< -o $@
%.o: %.asm
nasm $< -f elf -g -o $@
kernel.ld
SECTIONS
{
. = 0x1000;
.text : { *(.text) }
.data : { *(.data) }
.bss 0x100000 : {
_bss_start = ( ADDR(.bss) ) ;
*(.bss)
*(.COMMON)
_bss_end = ( ADDR(.bss ) + SIZEOF(.bss) ) ;
}
}
ENTRY(kernel_entry)
The application that is loaded is app.s (nevermind that does nothing useful)
.globl main
.intel_syntax noprefix
main:
jmp $
jmp 0xbaba
iret
add eax, ebx
loop:
mov eax, 0x1
cmp eax, 0x1
jmp 0x8: 0x119f
exit:
iret
jmp 0x0:0x119d
The app is compiled with:
gcc -o app.bin -m32 -fno-pic -fno-pie -flinker-output=exec -Ttext=0x400000 -Wl,-emain app.s
I verify that the binary (app.bin) is correctly loaded into memory by manually inspecting the bytes at 0x2000000 with Qemu command xp /16i 0x2000000
and comparing them with hexdump -C app.bin -n 16
.
Interestingly, the output of objdump -d app.bin
, while containing a section similar to the original app.s, doesn't seem to have matching bytes. It starts with f3 0f 1e ..
which app.bin starts with 7f 45 4c ...
.
I'll paste it here with some parts cut out.
objdump -d app.bin (truncated):
app.bin: file format elf32-i386
Disassembly of section .text:
00400000 <_start>:
400000: f3 0f 1e fb endbr32
400004: 31 ed xor %ebp,%ebp
400006: 5e pop %esi
400007: 89 e1 mov %esp,%ecx
400009: 83 e4 f0 and $0xfffffff0,%esp
40000c: 50 push %eax
40000d: 54 push %esp
40000e: 52 push %edx
40000f: e8 22 00 00 00 call 400036 <_start+0x36>
400014: 81 c3 c8 2f 00 00 add $0x2fc8,%ebx
40001a: 8d 83 f4 d1 ff ff lea -0x2e0c(%ebx),%eax
400020: 50 push %eax
400021: 8d 83 84 d1 ff ff lea -0x2e7c(%ebx),%eax
400027: 50 push %eax
400028: 51 push %ecx
400029: 56 push %esi
40002a: ff b3 1c 00 00 00 pushl 0x1c(%ebx)
400030: e8 0b 10 c0 ff call 1040 <__libc_start_main@plt>
400035: f4 hlt
400036: 8b 1c 24 mov (%esp),%ebx
400039: c3 ret
40003a: 66 90 xchg %ax,%ax
40003c: 66 90 xchg %ax,%ax
40003e: 66 90 xchg %ax,%ax
...
00400139 <__x86.get_pc_thunk.dx>:
400139: 8b 14 24 mov (%esp),%edx
40013c: c3 ret
0040013d <main>:
40013d: eb fe jmp 40013d <main>
40013f: e9 76 b9 c0 ff jmp baba <__cxa_finalize@plt+0xaa6a>
400144: cf iret
400145: 01 d8 add %ebx,%eax
00400147 <loop>:
400147: b8 01 00 00 00 mov $0x1,%eax
40014c: 83 f8 01 cmp $0x1,%eax
40014f: ea 9f 11 00 00 08 00 ljmp $0x8,$0x119f
00400156 <exit>:
400156: cf iret
400157: ea 9d 11 00 00 00 00 ljmp $0x0,$0x119d
40015e: 66 90 xchg %ax,%ax
...
An additional thing I observed:
At one point in my investigation, the objdump output DID matched hexdump
and xp
bytes (I'm not sure what I've done that now keeps this from happening :() and I observed that after loading the binary to 0x2000000, the instruction at 0x2000000 + offset_of_main (0x13d in this case), would end up being:
i. jmp 0x2000000 + offset_of_main
if I used jmp $
(I would expect it to have been jmp offset_of_main
)
ii.jmp 0x2000000 + arbitrary_constant
if I used jmp arbitrary_constant
e.g jmp 0x13d
would appear as jmp 0x200013d
when I run xp /16 0x200013d
(I would have expected it to be jmp 0x13d
)
(I think this is relevant because I'm assuming that whatever Qemu reports via xp is exactly how the CPU sees the instructions.)
User contributions licensed under CC BY-SA 3.0