Replicating `std::thread::spawn` results in stack overflow

0

Description

I am trying to spawn threads on my Windows machine in a #[no_std] crate, but I am running into problems in the __chkstk function. To start off I created a crate with std and tried to find the places that are responsible for spawning threads in libstd.

Libstd code (shortened to only show the relevant parts)

// libstd/thread/mod.rs
pub fn spawn<F, T>(f: F) -> JoinHandle<T>
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    Builder::new().spawn(f).expect("failed to spawn thread")
}

// libstd/thread/mod.rs
impl Builder {
    pub fn spawn<F, T>(self, f: F) -> io::Result<JoinHandle<T>>
    where
        F: FnOnce() -> T,
        F: Send + 'static,
        T: Send + 'static,
    {
        unsafe { self.spawn_unchecked(f) }
    }

    pub unsafe fn spawn_unchecked<'a, F, T>(self, f: F) -> io::Result<JoinHandle<T>>
    where
        F: FnOnce() -> T,
        F: Send + 'a,
        T: Send + 'a,
    {
        // ...

        imp::Thread::new(
            mem::transmute::<Box<dyn FnOnce() + 'a>, Box<dyn FnOnce() + 'static>>(Box::new(
                main,
            )),
        )

        // ...
    }
}

// libstd/sys/windows/thread.rs
impl Thread {
    pub unsafe fn new(p: Box<dyn FnOnce()>) -> io::Result<Thread> {
        extern "system" fn thread_start(main: *mut c_void) -> c::DWORD {
            unsafe { start_thread(main as *mut u8); }
            0
        }

        let p = box p;

        let ret = c::CreateThread(
            // ...
            thread_start,
            &*p as *const _ as *mut _,
            // ...
        );

        if ret as usize == 0 {
            Err(io::Error::last_os_error())
        } else {
            mem::forget(p);
            Ok(Thread { handle: Handle::new(ret) })
        };
    }
}

// libstd/sys_common/thread.rs
pub unsafe fn start_thread(main: *mut u8) {
    // ...

    Box::from_raw(main as *mut Box<dyn FnOnce()>)()
}

// libstd/sys/windows/c.rs
extern "system" {
    // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createthread
    pub fn CreateThread(
        lpThreadAttributes: LPSECURITY_ATTRIBUTES,
        dwStackSize: SIZE_T,
        lpStartAddress: extern "system" fn(*mut c_void) -> DWORD,
        lpParameter: LPVOID,
        dwCreationFlags: DWORD,
        lpThreadId: LPDWORD,
    ) -> HANDLE;
}

My attempt at replicating libstd

I cut down the code for this example, this is the short, single-threaded way how I tried to replicate libstd:

#[repr(C)]
struct Handle(usize);

fn spawn_std_like<F, T>(f: F) -> Handle
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    try_spawn_std_like(f).expect("failed to spawn thread")
}

fn try_spawn_std_like<F, T>(f: F) -> Result<Handle, ()>
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    extern "system" fn thread_start(main: *mut u8) -> u32 {
        unsafe { Box::from_raw(main as *mut Box<dyn FnOnce()>)(); }
        0
    }

    let p = Box::new(Box::new(f));

    let handle = CreateThread(
        thread_start,
        &*p as *const _ as *mut _
    );

    if handle.0 != 0 {
        core::mem::forget(p);
        Ok(handle)
    } else {
        Err(())
    }
}

// Minimal version of `kernel32.CreateThread`, with only the relevant parameters.
#[allow(non_snake_case)]
extern "system" fn CreateThread(
    start_address: extern "system" fn(*mut u8) -> u32,
    parameter: *mut u8
) -> Handle {
    start_address(parameter);
    // Emulate successful `CreateThread` call.
    Handle(4)
}

Calling this with spawn_std_like(|| println!("std_like!")); crashes the process with a stack overflow in __chkstk because it uses some memory address instead of the closure size as the "counter" to access memory pages: Exception thrown at 0x00007FF7E41FE948 in example.exe: 0xC00000FD: Stack overflow (parameters: 0x0000000000000001, 0x000000EFEDC06000).

Stack trace:

  • __chkstk() Line 109 (d:\agent_work\5\s\src\vctools\crt\vcstartup\src\misc\amd64\chkstk.asm)
  • std::alloc::boxed::{{impl}}::call_once<(), FnOnce<()>>(core::ops::function::Box<FnOnce<()>> self) Line 1015 (boxed.rs)
  • std::alloc::boxed::{{impl}}::call_once<(), alloc::boxed::Box<FnOnce<()>>>(core::ops::function::Box<FnOnce<()>> * self) Line 1015 (boxed.rs)
  • try_spawn_std_like::thread_start(unsigned char * main) (main.rs)
  • try_spawn_std_like::<closure-0, ()>(main::closure-0) (main.rs)
  • spawn_std_like<closure-0, ()>(main::closeure-0 f) (main.rs)
  • main() (main.rs)

Other variants I tried

// Explicitly typed out, `std` style.
fn spawn0<F, T>(f: F) -> Result<Handle, ()>
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    extern "system" fn thread_start(f: *mut u8) -> u32 {
        let f = f as *mut Box<dyn FnOnce()>;
        let f: Box<Box<dyn FnOnce()>> = unsafe {
            Box::from_raw(f)
        };
        f();
        0
    }

    let p = Box::new(Box::new(f));

    let handle = CreateThread(
        thread_start,
        &*p as *const _ as *mut _
    );

    if handle.0 != 0 {
        core::mem::forget(p);
        Ok(handle)
    } else {
        Err(())
    }
}

// Explicitly typed out, with `into_raw`.
fn spawn1<F, T>(f: F) -> Result<Handle, ()>
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    extern "system" fn thread_start(f: *mut u8) -> u32 {
        let f = f as *mut Box<dyn FnOnce()>;
        let f: Box<Box<dyn FnOnce()>> = unsafe {
            Box::from_raw(f)
        };
        f();
        0
    }

    let f: Box<Box<F>> = Box::new(Box::new(f));
    let f: *mut Box<F> = Box::into_raw(f);

    let handle = CreateThread(
        thread_start,
        f as *mut _
    );

    if handle.0 != 0 {
        Ok(handle)
    } else {
        unsafe { Box::from_raw(f); }
        Err(())
    }
}

// Implicitly typed `spawn1` variant.
fn spawn2<F, T>(f: F) -> Result<Handle, ()>
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    extern "system" fn thread_start(f: *mut u8) -> u32 {
        unsafe {
            Box::from_raw(
                f as *mut Box<dyn FnOnce()>
            )();
        }
        0
    }

    let f = Box::into_raw(Box::new(Box::new(f)));

    let handle = CreateThread(
        thread_start,
        f as *mut _
    );

    if handle.0 != 0 {
        Ok(handle)
    } else {
        unsafe { Box::from_raw(f); }
        Err(())
    }
}

// Generic `thread_start` routine.
fn spawn3<F, T>(f: F) -> Result<Handle, ()>
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    extern "system" fn thread_start<F, T>(f: *mut Box<F>) -> Handle
    where
        F: FnOnce() -> T,
        F: Send + 'static,
        T: Send + 'static
    {
        unsafe { Box::from_raw(f)(); }

        Handle(1)
    }

    let f = Box::into_raw(Box::new(Box::new(f)));

    let handle = thread_start(f);

    if handle.0 != 0 {
        Ok(handle)
    } else {
        unsafe { Box::from_raw(f); }
        Err(())
    }
}

// More explicit type in type-cast in `thread_start`. Does not compile.
/*
fn spawn4<F, T>(f: F) -> Result<Handle, ()>
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    extern "system" fn thread_start(f: *mut u8) -> u32 {
        unsafe {
            Box::from_raw(
                // f as *mut Box<dyn FnOnce() -> (dyn Send + 'static) + Send + 'static>
                // f as *mut Box<dyn FnOnce() -> (dyn Sized + Send + 'static) + Send + 'static>
            )();
        }
        0
    }

    let f = Box::into_raw(Box::new(Box::new(f)));

    let handle = CreateThread(
        thread_start,
        f as *mut _
    );

    if handle.0 != 0 {
        Ok(handle)
    } else {
        unsafe { Box::from_raw(f); }
        Err(())
    }
}
*/

// Like `spawn2`, but with `+ Send + 'static`.
fn spawn5<F, T>(f: F) -> Result<Handle, ()>
where
    F: FnOnce() -> T,
    F: Send + 'static,
    T: Send + 'static,
{
    // `kernel32.CreateThread` like start routine.
    extern "system" fn thread_start(f: *mut u8) -> u32 {
        unsafe {
            Box::from_raw(
                f as *mut Box<dyn FnOnce() + Send + 'static>
            )();
        }
        0
    }

    let f = Box::into_raw(Box::new(Box::new(f)));

    let handle = CreateThread(
        thread_start,
        f as *mut _
    );

    if handle.0 != 0 {
        Ok(handle)
    } else {
        unsafe { Box::from_raw(f); }
        Err(())
    }
}

For all versions other than spawn3 the actual code inside the closure is stripped out of the binary by the compiler, so it can never work. I tried this in my minimal #[no_std] crate by calling user32.MessageBox in the closure and it does not appear in the imported function list inside the binary. It also crashes in my implementation of __chkstk. I can see while debugging that the parameter sent to the function in the rax register (special calling convention) contains a memory address instead of the size of the closure, and it decrements the parameter in the loop by 0x1000 each time and touches the stack page until the stack overflows.

Generic kernel32.CreateThread

spawn3 is the only variant that actually works. But I can not use this for the real kernel32.CreateThread, because imported C functions and their parameters can not be generic in Rust (error[E0044]: foreign items may not have type parameters):

#[link(name = "kernel32", kind = "dylib")]
extern "system" {
    fn CreateThread<
        F: Send + 'static + FnOnce() -> T,
        T: Send + 'static
    >(
        security_attributes: *const u8,
        stack_size: usize,
        start_address: extern "system" fn(*mut Box<F>) -> u32,
        parameter: *mut Box<F>,
        attributes: u32,
        id: *mut u32
    ) -> usize;
}

I guess it should be possible and I'm just doing something wrong, since it works in libstd.

multithreading
rust
asked on Stack Overflow Jan 16, 2020 by Maurice Kayser • edited Jan 16, 2020 by Maurice Kayser

1 Answer

2

On the line

let p = Box::new(Box::new(f));

you are creating a Box<Box<F>>. The two boxes here are thin pointers, because F happens to be Sized here and because Box<T> is always Sized.

On the line

unsafe { Box::from_raw(main as *mut Box<dyn FnOnce()>)(); }

you are trying to interpret the inner Box as a Box<dyn FnOnce()>. Box<dyn FnOnce()> is a fat pointer: a raw pointer coupled with some auxiliary data – in the case of dyn Trait types, the auxiliary data is a pointer to the vtable.

In order for your code to work, you need to actually create a Box<dyn FnOnce()>. To do this, you need to cast the inner Box, like this:

let p = Box::new(Box::new(f) as Box<dyn FnOnce()>);

Now, this isn't enough; the cast isn't valid because F implements FnOnce() -> T, not FnOnce() (which is a shorthand for FnOnce() -> ()). Changing the bound F: FnOnce() -> T to F: FnOnce() and removing the now redundant T on spawn_std_like and try_spawn_std_like will solve this. Another option is to wrap f in a closure that returns ():

let p = Box::new(Box::new(|| { f(); }) as Box<dyn FnOnce()>);
answered on Stack Overflow Jan 16, 2020 by Francis Gagné • edited Jan 16, 2020 by Francis Gagné

User contributions licensed under CC BY-SA 3.0