How to correctly set up a task switch via x86 TSS

2

I'm trying to develop a basic kernel and I'd like to to have it perform a task switch to some code that I load into memory from an attached disk.

I've tried following chapter 7 in intel's manual but my attempts all seem to result in triple faulting.

In particular, I think I have a problem with the setup of paging. I want the task that is switched to to run with its own arbitrary page mapping so as you'll see in the code snippet below a virtual address of 0x0 in this task should map to a physical address of 0x2000000, which is also where I load the code.

My attempt to setup paging defines an array of ints as in the function setup_page_directory_and_page_tables for the user and kernel page tables and page directories. The kernel page mapping is an identity mapping while the user's is not. One interesting thing to note is that when I change the paging setup so that the user's page mapping is also an identity mapping, the task switch is successful.

The entire code can be gotten here (note: relevant branch is dev branch).

mm.c

#include "mm.h"

#include "task.h"

// System memory map as told by BIOS.
extern unsigned int mem_map_buf_addr;
extern unsigned int mem_map_buf_entry_count;

// Kernel uninitialized data start and end.
extern int _bss_start;
extern int _bss_end;

// TSS for kernel and user tasks.
extern tss kernel_tss __attribute__((aligned(0x1000)));
extern tss user_tss __attribute__((aligned(0x1000)));

// Memory management structures for the kernel's use.
static uint32_t kernel_mem_bitmap;
static struct bios_mem_map* bmm;

#define NUM_GDT_ENTRIES 8
// Kernel structures for segmentation.
struct gdt_entry pm_gdt[NUM_GDT_ENTRIES];
struct gdt_info pm_gdt_info = {
    .len = sizeof(pm_gdt),
    .addr = (unsigned int) pm_gdt,
};

// Kernel structures for paging.
unsigned int kernel_page_directory[1024]__attribute__((aligned(0x1000)));
unsigned int kernel_page_tables[1024][1024];

unsigned int user_page_directory[USER_PAGE_DIR_SIZE]__attribute__((aligned(0x1000)));
unsigned int user_page_tables[USER_PAGE_DIR_SIZE][USER_PAGE_TABLE_SIZE];

extern void setup_and_enable_paging(void);

extern void *USER_PHY_ADDR;

void init_mm(void) {
    // These aren't used for anything.
    bmm = (struct bios_mem_map*) mem_map_buf_addr;
    kernel_mem_bitmap = ~((unsigned int) 0);

    setup_tss(); //defined in task.c
    setup_pm_gdt();
    setup_page_directory_and_page_tables();
    setup_and_enable_paging();
}

extern unsigned int kernel_entry;
void setup_page_directory_and_page_tables(void) {
    unsigned int addr = (unsigned int) &kernel_page_tables[0];
    unsigned int i, j, page_frame = 0;

    // Setup kernel paging.
    for (i = 0; i < 1024; i++) {
        // Set all PDEs.
        kernel_page_directory[i] = (addr & 0xfffff000) | 0x3;
        addr += 0x1000;
    }
    for (i = 0; i < 1024; i++) {
        for (j = 0; j < 1024; j++) {
            kernel_page_tables[i][j] = page_frame | 0x3;
            page_frame += 0x1000;
        }
    }

    // Setup user paging.
    addr = (unsigned int) &user_page_tables[0];
    for (i = 0; i < USER_PAGE_DIR_SIZE; i++) {
        // Set all PDEs.
        user_page_directory[i] = (addr & 0xfffff000) | 0x7;
        addr += 0x1000;
    }
    page_frame = USER_PHY_ADDR;
    for (i = 0; i < USER_PAGE_DIR_SIZE; i++) {
        for (j = 0; j < USER_PAGE_TABLE_SIZE; j++) {
            user_page_tables[i][j] = page_frame | 0x7;
            page_frame += 0x1000;

            if (page_frame >= 0xfffff000)
                goto exit;
        }
    }

exit:
    return;
}

void setup_pm_gdt(void) {
    pm_gdt_info.len = sizeof(pm_gdt);
    pm_gdt_info.addr = (long unsigned int) pm_gdt;

    make_gdt_entry(&pm_gdt[0], 0x0, 0x0, 0x0, 0x0);
    // KERNEL_CODE_SEGMENT
    make_gdt_entry(&pm_gdt[1], 0xfffff, 0x0, 0xa, 0xc9);
    // KERNEL_DATA_SEGMENT
    make_gdt_entry(&pm_gdt[2], 0xfffff, 0x0, 0x2, 0xc9);
    // USER_CODE_SEGMENT
    // make_gdt_entry(&pm_gdt[3], 0xfffff, 0x0, 0xa, 0xcf);
    // USER_DATA_SEGMENT
    // make_gdt_entry(&pm_gdt[4], 0xfffff, 0x0, 0x2, 0xcf);

    // Add entry for kernel task (TSS descriptor).
    make_gdt_entry(&pm_gdt[5], sizeof(kernel_tss), (unsigned int) &kernel_tss, 0x9, 0x18);
    // Add entry for user task (TSS descriptor).
    make_gdt_entry(&pm_gdt[6], sizeof(user_tss), (unsigned int) &user_tss, 0x9, 0x1e);
    // Add task gate for user TSS.
    make_gdt_entry(&pm_gdt[7], sizeof(user_tss), 0x30, 0x5, 0xe);

    load_pm_gdt();
}

extern void pm_jump(void);
void load_pm_gdt(void) {
    asm volatile("lgdt %0" : : "m"(pm_gdt_info));
    pm_jump(); // in kernel_entry.asm
}

void make_gdt_entry(struct gdt_entry* entry,
                    unsigned int limit,
                    unsigned int base,
                    char type,
                    /*flags format: S_DPL_P_AVL_L_DB_G*/
                    /*bits:         1_2___1_1___1_1__1*/
                    char flags) {
    // Set lower 16 bits of limit.
    entry->limit0_15 = limit & 0xffff;
    // Set lower 16 bits of base.
    entry->base0_15 = base & 0xffff;
    // Set bits 16 to 13 of base.
    entry->base16_23 = (base >> 16) & 0xff;
    // Set bits 24 to 31 of base.
    entry->base24_31 = (base >> 24) & 0xff;
    // Set upper 4 bits of 20 bit limit.
    entry->limit16_19_avl_l_db_g = (limit >> 16) & 0xf;
    // Set 4 bits of type.
    entry->type_s_dpl_p = type & 0xf;
    // Set S_DPL_P flags (lower 4 bits of 8 bit flags).
    entry->type_s_dpl_p |= (flags & 0xf) << 4;
    // Set AVL_L_DB_G flags (upper 4 bits of 8 bit flags).
    entry->limit16_19_avl_l_db_g |= flags & 0xf0;
}

task.c

#include "task.h"

extern unsigned int user_page_directory[USER_PAGE_TABLE_SIZE]__attribute__((aligned(0x1000)));

tss user_tss;

void setup_tss(void) {
    tss *tss_ = &user_tss;

    tss_->CR3 = (unsigned int) user_page_directory | 0x3;
    tss_->EIP = 0x0;
    __asm__("   movw %%es, %0 \n" : "=m" (tss_->ES_l16b) : );
    __asm__("   movw %%cs, %0 \n" : "=m" (tss_->CS_l16b) : );
    __asm__("   movw %%ss, %0 \n" : "=m" (tss_->SS_l16b) : );
    __asm__("   movw %%ds, %0 \n" : "=m" (tss_->DS_l16b) : );
    __asm__("   movw %%fs, %0 \n" : "=m" (tss_->FS_l16b) : );
    __asm__("   movw %%gs, %0 \n" : "=m" (tss_->GS_l16b) : );
    __asm__("   movw %%esp, %0 \n" : "=m" (tss_->ESP) : );
    __asm__("   movw %%ebp, %0 \n" : "=m" (tss_->EBP) : );
}

void do_task_switch(void) {
    print("attempting to loading task register.\n");
    load_kernel_tr(); // in kernel_entry.asm
    print("successfully loaded task register.\n");

    print("attempting task switch.\n");
    switch_task(); // in kernel_entry.asm
    print("task switch successful.\n");
}

switch_task never returns as the CPU triple faults. I realize that in setting up the user's TSS, simply copying over the current segment registers and stack pointer is probably not what I want to do but the first instruction in the app is a jmp $ so since I'm not jumping across segments or using the stack I thought it would work still.

task.h

#ifndef __TASK_H__

#include "system.h"

// Note: Fields ending in `_{l,u}16b` only use 16 bits.
// If such a field is defined with `unsigned int`, 
// it occupies the upper (u) or lower (l) 16 bits.
struct task_state_segment {
    unsigned int previous_task_link_l16b;
    unsigned int ESP0;
    unsigned int SS0_l16b;
    unsigned int ESP1;
    unsigned int SS1_l16b;
    unsigned int ESP2;
    unsigned int SS2_l16b;
    unsigned int CR3;
    unsigned int EIP;
    unsigned int EFLAGS;
    unsigned int EAX;
    unsigned int ECX;
    unsigned int EDX;
    unsigned int EBX;
    unsigned int ESP;
    unsigned int EBP;
    unsigned int ESI;
    unsigned int EDI;
    unsigned int ES_l16b;
    unsigned int CS_l16b;
    unsigned int SS_l16b;
    unsigned int DS_l16b;
    unsigned int FS_l16b;
    unsigned int GS_l16b;
    unsigned int LDT_u16b;
    unsigned int SSP;
}__attribute__((packed));

typedef struct task_state_segment tss;

void setup_tss();

void do_task_switch();

// #define USER_PAGE_DIR_SIZE 1024
// #define USER_PAGE_TABLE_SIZE 1024

#define USER_PAGE_DIR_SIZE 1024
#define USER_PAGE_TABLE_SIZE 1024

#endif // __TASK_H__

kernel_entry.asm

[bits 32]

global initialize_idt
global mem_map_buf_addr
global mem_map_buf_entry_count
global kernel_entry

extern main
extern idt_info_ptr

; kernel_entry expects the following information about the
; BIOS's memory map to be put on the stack:
;   the address of the buffer holding the memory map (top of stack)
;   the number of entries in the memory map.
kernel_entry:
  mov eax, [esp]
  mov [mem_map_buf_addr], eax
  mov eax, [esp+4]
  mov [mem_map_buf_entry_count], eax

  call main
jmp $

mem_map_buf_addr: dd 0x0
mem_map_buf_entry_count: dd 0x0

global pm_jump
pm_jump:
  jmp 0x8:pm_jmp_ret
pm_jmp_ret:
  mov ax, 0x10
    mov ds, ax
    mov ss, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
  ret

extern kernel_page_directory
global setup_and_enable_paging
setup_and_enable_paging:
  ; point CR3 to page directory
  mov eax, kernel_page_directory
  or eax, 0x3
  mov cr3, eax

  ; set CRO.PG to 1
  mov ebx, cr0  ; set left-most bit of CPU special control register.
    or ebx, 0x80000000
    mov cr0, ebx

  ret

USER_TASK_GATE_GDT_IDX equ 7
USER_TASK_GATE equ 8 * USER_TASK_GATE_GDT_IDX

global switch_task
switch_task:
  call USER_TASK_GATE: 0x0
  ret

KERNEL_TASK_SEG_IDX equ 5
KERNEL_TASK_SEG equ 8 * KERNEL_TASK_SEG_IDX
kernel_task_selector: dw KERNEL_TASK_SEG

; Function to load kernel task register.
global load_kernel_tr
load_kernel_tr:
  ltr [kernel_task_selector]
  ret

global dummy_branch
dummy_branch:
  mov eax, 0xfadefade
  iret

initialize_idt:
  lidt [idt_info_ptr]
  ret

global enable_interrupts
enable_interrupts:
  sti
  ret

global disable_interrupts
disable_interrupts:
  cli
  ret

extern fault_handler
extern irq_handler

global isr_common
global isr0
global isr1
global isr2
global isr3
global isr4
global isr5
global isr6
global isr7
global isr8
global isr9
global isr10
global isr11
global isr12
global isr13
global isr14
global isr15
global isr16
global isr17
global isr18
global isr19
global isr20
global isr21
global isr22
global isr23
global isr24
global isr25
global isr26
global isr27
global isr28
global isr29
global isr30
global isr31

global irq0
global irq1
global irq2
global irq3
global irq4
global irq5
global irq6
global irq7
global irq8
global irq9
global irq10
global irq11
global irq12
global irq13
global irq14
global irq15

isr0:
  cli
  push byte 0
  push byte 0
  jmp isr_common

isr1:
  cli
  push byte 0
  push byte 1
  jmp isr_common

isr2:
  cli
  push byte 0
  push byte 2
  jmp isr_common

isr3:
  cli
  push byte 0
  push byte 3
  jmp isr_common

isr4:
  cli
  push byte 0
  push byte 4
  jmp isr_common

isr5:
  cli
  push byte 0
  push byte 5
  jmp isr_common

isr6:
  cli
  push byte 0
  push byte 6
  jmp isr_common

isr7:
  cli
  push byte 0
  push byte 7
  jmp isr_common

isr8:
  cli
  push byte 8
  jmp isr_common

isr9:
  cli
  push byte 0
  push byte 9
  jmp isr_common

isr10:
  cli
  push byte 10
  jmp isr_common

isr11:
  cli
  push byte 11
  jmp isr_common

isr12:
  cli
  push byte 12
  jmp isr_common

isr13:
  cli
  push byte 13
  jmp isr_common

isr14:
  cli
  push byte 14
  jmp isr_common

isr15:
  cli
  push byte 0
  push byte 15
  jmp isr_common

isr16:
  cli
  push byte 0
  push byte 16
  jmp isr_common

isr17:
  cli
  push byte 0
  push byte 17
  jmp isr_common

isr18:
  cli
  push byte 0
  push byte 18
  jmp isr_common

isr19:
  cli
  push byte 0
  push byte 19
  jmp isr_common

isr20:
  cli
  push byte 0
  push byte 20
  jmp isr_common

isr21:
  cli
  push byte 0
  push byte 21
  jmp isr_common

isr22:
  cli
  push byte 0
  push byte 22
  jmp isr_common

isr23:
  cli
  push byte 0
  push byte 23
  jmp isr_common

isr24:
  cli
  push byte 0
  push byte 24
  jmp isr_common

isr25:
  cli
  push byte 0
  push byte 25
  jmp isr_common

isr26:
  cli
  push byte 0
  push byte 26
  jmp isr_common

isr27:
  cli
  push byte 0
  push byte 27
  jmp isr_common

isr28:
  cli
  push byte 0
  push byte 28
  jmp isr_common

isr29:
  cli
  push byte 0
  push byte 29
  jmp isr_common

isr30:
  cli
  push byte 0
  push byte 30
  jmp isr_common

isr31:
  cli
  push byte 0
  push byte 31
  jmp isr_common

isr_common:
  pusha
  push ds
  push es
  push fs
  push gs
  mov ax, 0x10
  mov ds, ax
  mov es, ax
  mov fs, ax
  mov gs, ax
  mov eax, esp
  push eax
  mov eax, fault_handler
  call eax
  pop eax
  pop gs
  pop fs
  pop es
  pop ds
  popa
  add esp, 8
  iret

irq0:
  cli
  push byte 0
  push byte 32
  jmp irq_common

irq1:
  cli
  push byte 0
  push byte 33
  jmp irq_common

irq2:
  cli
  push byte 0
  push byte 34
  jmp irq_common

irq3:
  cli
  push byte 0
  push byte 35
  jmp irq_common

irq4:
  cli
  push byte 0
  push byte 36
  jmp irq_common

irq5:
  cli
  push byte 0
  push byte 37
  jmp irq_common

irq6:
  cli
  push byte 0
  push byte 38
  jmp irq_common

irq7:
  cli
  push byte 0
  push byte 39
  jmp irq_common

irq8:
  cli
  push byte 0
  push byte 40
  jmp irq_common

irq9:
  cli
  push byte 0
  push byte 41
  jmp irq_common

irq10:
  cli
  push byte 0
  push byte 42
  jmp irq_common

irq11:
  cli
  push byte 0
  push byte 43
  jmp irq_common

irq12:
  cli
  push byte 0
  push byte 44
  jmp irq_common

irq13:
  cli
  push byte 0
  push byte 45
  jmp irq_common

irq14:
  cli
  push byte 0
  push byte 46
  jmp irq_common

irq15:
  cli
  push byte 0
  push byte 47
  jmp irq_common


irq_common:
  pusha
  push ds
  push es
  push fs
  push gs
  mov ax, 0x10
  mov ds, ax
  mov es, ax
  mov fs, ax
  mov gs, ax
  mov eax, esp
  push eax
  mov eax, irq_handler
  call eax
  pop eax
  pop gs
  pop fs
  pop es
  pop ds
  popa
  add esp, 8
  iret

The kernel is compiled with:

C_SOURCES = $(wildcard kernel/*.c kernel/**/*.c drivers/**/*.c fs/*.c)

C_FLAGS = -Wall -O0 -m32 -fno-pie -fno-stack-protector -ffreestanding -fno-hosted -nolibc -nostdlib -g
C_FLAGS += -I./

OBJ = $(patsubst %.c, %.o, ${C_SOURCES})
kernel.bin: kernel/kernel_entry.o ${OBJ}
    ld -o kernel.bin -m elf_i386 $^ --oformat binary -T kernel.ld
%.o: %.c
    gcc ${C_FLAGS} -c $< -o $@

%.o: %.asm
    nasm $< -f elf -g -o $@

kernel.ld

SECTIONS
{
  . = 0x1000;
  .text : { *(.text) }
  .data : { *(.data) }
  .bss 0x100000 : {
   _bss_start = ( ADDR(.bss) ) ;
    *(.bss)
    *(.COMMON)
   _bss_end = ( ADDR(.bss ) + SIZEOF(.bss) ) ;
  }
}

ENTRY(kernel_entry)

The application that is loaded is app.s (nevermind that does nothing useful)

    .globl main
    .intel_syntax noprefix

main:
    jmp $
    jmp 0xbaba
    iret
    add eax, ebx
loop:
    mov eax, 0x1
    cmp eax, 0x1
    jmp 0x8: 0x119f
exit:
    iret
    jmp 0x0:0x119d

The app is compiled with:

gcc -o app.bin -m32 -fno-pic -fno-pie -flinker-output=exec -Ttext=0x400000 -Wl,-emain app.s

I verify that the binary (app.bin) is correctly loaded into memory by manually inspecting the bytes at 0x2000000 with Qemu command xp /16i 0x2000000 and comparing them with hexdump -C app.bin -n 16. Interestingly, the output of objdump -d app.bin, while containing a section similar to the original app.s, doesn't seem to have matching bytes. It starts with f3 0f 1e .. which app.bin starts with 7f 45 4c .... I'll paste it here with some parts cut out.

objdump -d app.bin (truncated):

app.bin:     file format elf32-i386


Disassembly of section .text:

00400000 <_start>:
  400000:   f3 0f 1e fb             endbr32 
  400004:   31 ed                   xor    %ebp,%ebp
  400006:   5e                      pop    %esi
  400007:   89 e1                   mov    %esp,%ecx
  400009:   83 e4 f0                and    $0xfffffff0,%esp
  40000c:   50                      push   %eax
  40000d:   54                      push   %esp
  40000e:   52                      push   %edx
  40000f:   e8 22 00 00 00          call   400036 <_start+0x36>
  400014:   81 c3 c8 2f 00 00       add    $0x2fc8,%ebx
  40001a:   8d 83 f4 d1 ff ff       lea    -0x2e0c(%ebx),%eax
  400020:   50                      push   %eax
  400021:   8d 83 84 d1 ff ff       lea    -0x2e7c(%ebx),%eax
  400027:   50                      push   %eax
  400028:   51                      push   %ecx
  400029:   56                      push   %esi
  40002a:   ff b3 1c 00 00 00       pushl  0x1c(%ebx)
  400030:   e8 0b 10 c0 ff          call   1040 <__libc_start_main@plt>
  400035:   f4                      hlt    
  400036:   8b 1c 24                mov    (%esp),%ebx
  400039:   c3                      ret    
  40003a:   66 90                   xchg   %ax,%ax
  40003c:   66 90                   xchg   %ax,%ax
  40003e:   66 90                   xchg   %ax,%ax

...


00400139 <__x86.get_pc_thunk.dx>:
  400139:   8b 14 24                mov    (%esp),%edx
  40013c:   c3                      ret    

0040013d <main>:
  40013d:   eb fe                   jmp    40013d <main>
  40013f:   e9 76 b9 c0 ff          jmp    baba <__cxa_finalize@plt+0xaa6a>
  400144:   cf                      iret   
  400145:   01 d8                   add    %ebx,%eax

00400147 <loop>:
  400147:   b8 01 00 00 00          mov    $0x1,%eax
  40014c:   83 f8 01                cmp    $0x1,%eax
  40014f:   ea 9f 11 00 00 08 00    ljmp   $0x8,$0x119f

00400156 <exit>:
  400156:   cf                      iret   
  400157:   ea 9d 11 00 00 00 00    ljmp   $0x0,$0x119d
  40015e:   66 90                   xchg   %ax,%ax
...

An additional thing I observed: At one point in my investigation, the objdump output DID matched hexdump and xp bytes (I'm not sure what I've done that now keeps this from happening :() and I observed that after loading the binary to 0x2000000, the instruction at 0x2000000 + offset_of_main (0x13d in this case), would end up being:

i. jmp 0x2000000 + offset_of_main if I used jmp $ (I would expect it to have been jmp offset_of_main)

ii.jmp 0x2000000 + arbitrary_constant if I used jmp arbitrary_constant e.g jmp 0x13d would appear as jmp 0x200013d when I run xp /16 0x200013d (I would have expected it to be jmp 0x13d)

(I think this is relevant because I'm assuming that whatever Qemu reports via xp is exactly how the CPU sees the instructions.)

assembly
gcc
x86
osdev
task-switching
asked on Stack Overflow Nov 21, 2020 by David • edited Nov 22, 2020 by David

0 Answers

Nobody has answered this question yet.


User contributions licensed under CC BY-SA 3.0