How to map the section to the segment from an ELF output file?

4

Well I have written a bootloader in assembly and trying to load a C kernel from it.

This is the bootloader:

bits 16
xor ax,ax
jmp 0x0000:boot

extern kernel_main

global boot
boot:
    mov ah, 0x02             ; load second stage to memory
    mov al, 1                ; numbers of sectors to read into memory
    mov dl, 0x80             ; sector read from fixed/usb disk ;0 for floppy; 0x80 for hd
    mov ch, 0                ; cylinder number
    mov dh, 0                ; head number
    mov cl, 2                ; sector number
    mov bx, 0x8000           ; load into es:bx segment :offset of buffer
    int 0x13                 ; disk I/O interrupt

    mov ax, 0x2401
    int 0x15 ; enable A20 bit
    mov ax, 0x3
    int 0x10 ; set vga text mode 3


    cli

    lgdt [gdt_pointer] ; load the gdt table
    mov eax, cr0
    or eax,0x1 ; set the protected mode bit on special CPU reg cr0
    mov cr0, eax
    jmp CODE_SEG:boot2 ; long jump to the code segment


gdt_start:
    dq 0x0
gdt_code:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10011010b
    db 11001111b
    db 0x0
gdt_data:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10010010b
    db 11001111b
    db 0x0
gdt_end:


gdt_pointer:
    dw gdt_end - gdt_start
    dd gdt_start
CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start

bits 32
boot2:
    mov ax, DATA_SEG
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    mov ss, ax

;    mov esi,hello
;    mov ebx,0xb8000
;.loop:
;    lodsb
;    or al,al
;    jz haltz
;    or eax,0x0100
;    mov word [ebx], ax
;    add ebx,2
;    jmp .loop
;haltz:
;hello: db "Hello world!",0

mov esp,kernel_stack_top
jmp kernel_main

cli
hlt

times 510 -($-$$) db 0
dw 0xaa55

section .bss
align 4
kernel_stack_bottom: equ $
    resb 16384 ; 16 KB
kernel_stack_top:

and this is the C kernel:

__asm__("cli\n");
void kernel_main(void){
  const char string[] = "012345678901234567890123456789012345678901234567890123456789012";
  volatile unsigned char* vid_mem = (unsigned char*) 0xb8000;
  int j=0;
  while(string[j]!='\0'){

    *vid_mem++ = (unsigned char) string[j++];
    *vid_mem++ = 0x09;
  }

for(;;);

}

Now I am compiling both the source separately into an ELF output file. And linking them through a linker script and output a raw binary file and load it with qemu.

Linker script:

ENTRY(boot)
OUTPUT_FORMAT("binary")

SECTIONS{
  . = 0x7c00;

  .boot1 : {
    *(.boot)
  }

  .kernel : AT(0x7e00){
    *(.text)
    *(.rodata)
    *(.data)
    _bss_start = .;
    *(.bss)
    *(COMMON)
    _bss_end = .;
    *(.comment)
    *(.symtab)
    *(.shstrtab)
    *(.strtab)
  }
  /DISCARD/ : {
        *(.eh_frame)
  }

}

with a build script:

nasm -f elf32 boot.asm -o boot.o
/home/rakesh/Desktop/cross-compiler/i686-elf-4.9.1-Linux-x86_64/bin/i686-elf-gcc -m32 kernel.c -o kernel.o -e kernel_main -Ttext 0x0 -nostdlib -ffreestanding -std=gnu99 -mno-red-zone -fno-exceptions -nostdlib  -Wall -Wextra
/home/rakesh/Desktop/cross-compiler/i686-elf-4.9.1-Linux-x86_64/bin/i686-elf-ld boot.o kernel.o -o kernel.bin -T linker3.ld
qemu-system-x86_64 kernel.bin

But I am running into a little problem. notice that string in the C kernel

const char string[] = "012345678901234567890123456789012345678901234567890123456789012";

when its size is equal to or less than 64 bytes (along with the null termination). then the program works correctly.

enter image description here

however when the string size increases from 64 bytes then the program seems to not work

enter image description here

I was trying to debug it myself and observed that when the string size is less than or equal to 64 bytes then the output ELF file, the kernel.o have following contents :

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           Intel 80386
  Version:                           0x1
  Entry point address:               0x1
  Start of program headers:          52 (bytes into file)
  Start of section headers:          4412 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         1
  Size of section headers:           40 (bytes)
  Number of section headers:         7
  Section header string table index: 4

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .text             PROGBITS        00000000 001000 0000bd 00  AX  0   0  1
  [ 2] .eh_frame         PROGBITS        000000c0 0010c0 000034 00   A  0   0  4
  [ 3] .comment          PROGBITS        00000000 0010f4 000011 01  MS  0   0  1
  [ 4] .shstrtab         STRTAB          00000000 001105 000034 00      0   0  1
  [ 5] .symtab           SYMTAB          00000000 001254 0000a0 10      6   6  4
  [ 6] .strtab           STRTAB          00000000 0012f4 00002e 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
  L (link order), O (extra OS processing required), G (group), T (TLS),
  C (compressed), x (unknown), o (OS specific), E (exclude),
  p (processor specific)

There are no section groups in this file.

Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  LOAD           0x001000 0x00000000 0x00000000 0x000f4 0x000f4 R E 0x1000

 Section to Segment mapping:
  Segment Sections...
   00     .text .eh_frame 

There is no dynamic section in this file.

There are no relocations in this file.

The decoding of unwind sections for machine type Intel 80386 is not currently supported.

Symbol table '.symtab' contains 10 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND 
     1: 00000000     0 SECTION LOCAL  DEFAULT    1 
     2: 000000c0     0 SECTION LOCAL  DEFAULT    2 
     3: 00000000     0 SECTION LOCAL  DEFAULT    3 
     4: 00000000     0 FILE    LOCAL  DEFAULT  ABS kernel.c
     5: 00000000     0 FILE    LOCAL  DEFAULT  ABS 
     6: 00000001   188 FUNC    GLOBAL DEFAULT    1 kernel_main
     7: 000010f4     0 NOTYPE  GLOBAL DEFAULT    2 __bss_start
     8: 000010f4     0 NOTYPE  GLOBAL DEFAULT    2 _edata
     9: 000010f4     0 NOTYPE  GLOBAL DEFAULT    2 _end

No version information found in this file.

However when the size of the string is more than 64 bytes the contents are like this:

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           Intel 80386
  Version:                           0x1
  Entry point address:               0x1
  Start of program headers:          52 (bytes into file)
  Start of section headers:          4432 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         1
  Size of section headers:           40 (bytes)
  Number of section headers:         8
  Section header string table index: 5

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .text             PROGBITS        00000000 001000 000083 00  AX  0   0  1
  [ 2] .rodata           PROGBITS        00000084 001084 000041 00   A  0   0  4
  [ 3] .eh_frame         PROGBITS        000000c8 0010c8 000038 00   A  0   0  4
  [ 4] .comment          PROGBITS        00000000 001100 000011 01  MS  0   0  1
  [ 5] .shstrtab         STRTAB          00000000 001111 00003c 00      0   0  1
  [ 6] .symtab           SYMTAB          00000000 001290 0000b0 10      7   7  4
  [ 7] .strtab           STRTAB          00000000 001340 00002e 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
  L (link order), O (extra OS processing required), G (group), T (TLS),
  C (compressed), x (unknown), o (OS specific), E (exclude),
  p (processor specific)

There are no section groups in this file.

Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  LOAD           0x001000 0x00000000 0x00000000 0x00100 0x00100 R E 0x1000

 Section to Segment mapping:
  Segment Sections...
   00     .text .rodata .eh_frame 

There is no dynamic section in this file.

There are no relocations in this file.

The decoding of unwind sections for machine type Intel 80386 is not currently supported.

Symbol table '.symtab' contains 11 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND 
     1: 00000000     0 SECTION LOCAL  DEFAULT    1 
     2: 00000084     0 SECTION LOCAL  DEFAULT    2 
     3: 000000c8     0 SECTION LOCAL  DEFAULT    3 
     4: 00000000     0 SECTION LOCAL  DEFAULT    4 
     5: 00000000     0 FILE    LOCAL  DEFAULT  ABS kernel.c
     6: 00000000     0 FILE    LOCAL  DEFAULT  ABS 
     7: 00000001   130 FUNC    GLOBAL DEFAULT    1 kernel_main
     8: 00001100     0 NOTYPE  GLOBAL DEFAULT    3 __bss_start
     9: 00001100     0 NOTYPE  GLOBAL DEFAULT    3 _edata
    10: 00001100     0 NOTYPE  GLOBAL DEFAULT    3 _end

No version information found in this file.

I noticed that the string is now in the .rodata section with a size of 41 hex or 65 bytes, which has to be mapped to a segment, possibly the 0th segment which is NULL. And that the program is unable to find the .rodata.

I am unable to make it work. I understand the ELF structure but I don't know how to work with them.

gcc
assembly
x86
elf
osdev
asked on Stack Overflow Sep 25, 2018 by rsonx • edited Sep 16, 2019 by Michael Petch

1 Answer

5

Two serious issues cause most of the problems are:

  • You load the second sector of the disk to 0x0000:0x8000 when all of the code expect the kernel to be loaded after the bootloader at 0x0000:0x7e00
  • You compile your kernel.c straight to an executable name kernel.o. You should compile it to a proper object file so it can go through the expected linking phase when you run ld.

To fix the problem with the kernel being loaded into memory at the wrong memory location, change:

mov bx, 0x8000           ; load into es:bx segment :offset of buffer

to:

mov bx, 0x7e00           ; load into es:bx segment :offset of buffer

To fix the issue of compiling kernel.cto an executable ELF file named kernel.o remove the -e kernel_main -Ttext 0x0 and replace it with -c. -c option forces GCC to produce an object file that can be properly linked with LD. Change:

/home/rakesh/Desktop/cross-compiler/i686-elf-4.9.1-Linux-x86_64/bin/i686-elf-gcc -m32 kernel.c -o kernel.o -e kernel_main -Ttext 0x0 -nostdlib -ffreestanding -std=gnu99 -mno-red-zone -fno-exceptions -nostdlib  -Wall -Wextra

to:

/home/rakesh/Desktop/cross-compiler/i686-elf-4.9.1-Linux-x86_64/bin/i686-elf-gcc -m32 -c kernel.c -o kernel.o -nostdlib -ffreestanding -std=gnu99 -mno-red-zone -fno-exceptions -Wall -Wextra

Reason for Failure with Longer Strings

The reason the string with less than 64 bytes worked is because the compiler generated code in a position independent way by initializing the array on the stack with immediate values. When the size reached 64 bytes the compiler placed the string into the .rodata section and then initialized the array on the stack by copying it from the .rodata. This made your code position dependent. Your code was loaded at the wrong offsets and had incorrect origin points yielding code referencing incorrect addresses, so it failed.


Other Observations

  • You should initialize your BSS (.bss) section to 0 before calling kernel_main. This can be done in assembly by iterating through all the bytes from offset _bss_start to offset _bss_end.
  • The .comment section will be emitted into your binary file wasting bytes as a result. You should put it in the /DISCARD/ section.
  • You should place the BSS section in your linker script after all the others so it doesn't take up space in kernel.bin
  • In boot.asm you should set SS:SP (stack pointer) near the beginning before reading disk sectors. It should be set to a place that won't interfere with your code. This is especially important when reading data into memory from disk since you don't know where the BIOS placed the current stack. You don't want to read on top of the current stack area. Setting it just below the bootloader at 0x0000:0x7c00 should work.
  • Before calling into C code you should clear the direction flag to ensure string instructions use forward movement. You can do this by using the CLD instruction.
  • In boot.asmyou can make your code more generic by using the boot drive number passed by the BIOS in the DL register rather than hard coding it to the value 0x80 (0x80 being the first hard drive)
  • You might consider turning on optimization with -O3, or using optimization level -Os to optimize for code size.
  • Your linker script doesn't quite work the way you expect although it produces the correct results. You never declared .boot section in your NASM file so nothing actually gets placed in the .boot1 output section in the linker script. It works because it gets included in the .text section in the .kernel output section.
  • It is preferable to remove the padding and boot signature from the assembly file and move it to the linker script
  • Instead of having your linker script output a binary file directly, it is more useful to output to the default ELF executable format. You can then use OBJCOPY to convert the ELF file to a binary file. This allows you to build with debug information which will appear as part of the ELF executable. The ELF executable can be used to symbolically debug your binary kernel in QEMU.
  • Rather than use LD directly for linking, use GCC. This has the advantage that the libgcc library can be added without specifying the full path to the library. libgcc is a set of routines that may be needed for C code generation with GCC

Revised source code, linker script and build commands with the observations above taken into account:

boot.asm:

bits 16

section .boot

extern kernel_main
extern _bss_start
extern _bss_len

global boot

    jmp 0x0000:boot
boot:
    ; Place realmode stack pointer below bootloader where it doesn't
    ; get in our way
    xor ax, ax
    mov ss, ax
    mov sp, 0x7c00

    mov ah, 0x02             ; load second stage to memory
    mov al, 1                ; numbers of sectors to read into memory

;   Remove this, DL is already set by BIOS to current boot drive number
;    mov dl, 0x80             ; sector read from fixed/usb disk ;0 for floppy; 0x80 for hd
    mov ch, 0                ; cylinder number
    mov dh, 0                ; head number
    mov cl, 2                ; sector number
    mov bx, 0x7e00           ; load into es:bx segment :offset of buffer
    int 0x13                 ; disk I/O interrupt

    mov ax, 0x2401
    int 0x15 ; enable A20 bit
    mov ax, 0x3
    int 0x10 ; set vga text mode 3


    cli

    lgdt [gdt_pointer] ; load the gdt table
    mov eax, cr0
    or eax,0x1 ; set the protected mode bit on special CPU reg cr0
    mov cr0, eax
    jmp CODE_SEG:boot2 ; long jump to the code segment


gdt_start:
    dq 0x0
gdt_code:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10011010b
    db 11001111b
    db 0x0
gdt_data:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10010010b
    db 11001111b
    db 0x0
gdt_end:


gdt_pointer:
    dw gdt_end - gdt_start
    dd gdt_start
CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start

bits 32
boot2:
    mov ax, DATA_SEG
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    mov ss, ax

    ; Zero out the BSS area
    cld
    mov edi, _bss_start
    mov ecx, _bss_len
    xor eax, eax
    rep stosb

    mov esp,kernel_stack_top
    call kernel_main

    cli
    hlt


section .bss
align 4
kernel_stack_bottom: equ $
    resb 16384 ; 16 KB
kernel_stack_top:

kernel.c:

void kernel_main(void){
  const char string[] = "01234567890123456789012345678901234567890123456789012345678901234";
  volatile unsigned char* vid_mem = (unsigned char*) 0xb8000;
  int j=0;
  while(string[j]!='\0'){

    *vid_mem++ = (unsigned char) string[j++];
    *vid_mem++ = 0x09;
  }

for(;;);

}

linker3.ld:

ENTRY(boot)

SECTIONS{
  . = 0x7c00;

  .boot1 : {
    *(.boot);
  }

  .sig : AT(0x7dfe){
     SHORT(0xaa55);
  }

  . = 0x7e00;
  .kernel : AT(0x7e00){
    *(.text);
    *(.rodata*);
    *(.data);
    _bss_start = .;
    *(.bss);
    *(COMMON);
    _bss_end = .;
    _bss_len = _bss_end - _bss_start;
  }
  /DISCARD/ : {
    *(.eh_frame);
    *(.comment);
  }

}

Commands to build this bootloader and kernel:

nasm -g -F dwarf -f elf32 boot.asm -o boot.o
i686-elf-gcc -g -O3 -m32 kernel.c -c -o kernel.o -ffreestanding -std=gnu99 \
    -mno-red-zone -fno-exceptions -Wall -Wextra    
i686-elf-gcc -nostdlib -Wl,--build-id=none -T linker3.ld boot.o kernel.o \
    -lgcc -o kernel.elf
objcopy -O binary kernel.elf kernel.bin

To symbolically debug the 32-bit kernel with QEMU you can launch QEMU this way:

qemu-system-i386 -fda kernel.bin -S -s &
gdb kernel.elf \
        -ex 'target remote localhost:1234' \
        -ex 'break *kernel_main' \
        -ex 'layout src' \
        -ex 'continue'

This will start up your kernel.bin file in QEMU and then remotely connect the GDB debugger. The layout should show the source code and break on kernel_main.

answered on Stack Overflow Sep 26, 2018 by Michael Petch • edited Sep 26, 2018 by Michael Petch

User contributions licensed under CC BY-SA 3.0