This page is from the Fall 2013 course.
It may be replaced with an updated version later this semester. [Permalink to this version]

Class 19: Making a Process

Making a Process

let mut prog = run::Process::new(program, argv, options);

What needs to happen for this code to create a new process?

src/libstd/run.rs

impl Process {
    /**
     * Spawns a new Process.
     *
     * # Arguments
     *
     * * prog - The path to an executable.
     * * args - Vector of arguments to pass to the child process.
     * * options - Options to configure the environment of the process,
     *             the working directory and the standard IO streams.
     */
    pub fn new(prog: &str, args: &[~str], options: ProcessOptions) ->
    Process {
        let ProcessOptions { env, dir, in_fd, out_fd, err_fd } =
    options;
        let env = env.as_ref().map(|a| a.as_slice());
        let cwd = dir.as_ref().map(|a| a.as_str().unwrap());
        fn rtify(fd: Option<c_int>, input: bool) ->
    process::StdioContainer {
            match fd {
                Some(fd) => process::InheritFd(fd),
                None => process::CreatePipe(input, !input),
            }
        }
        let rtio = [rtify(in_fd, true), rtify(out_fd, false),
                    rtify(err_fd, false)];
        let rtconfig = process::ProcessConfig {
            program: prog,
            args: args,
            env: env,
            cwd: cwd,
            io: rtio,
        };
        let inner = process::Process::new(rtconfig).unwrap();
        Process { inner: inner }
    }

src/libstd/task/mod.rs

    /**
     * Creates and executes a new child task
     *
     * Sets up a new task with its own call stack and schedules it to
    run
     * the provided unique closure. The task has the properties and
    behavior
     * specified by the task_builder.
     *
     * # Failure
     *
     * When spawning into a new scheduler, the number of threads
    requested
     * must be greater than zero.
     */
    pub fn spawn(&mut self, f: ~fn()) {
        let gen_body = self.gen_body.take();
        let notify_chan = self.opts.notify_chan.take();
        let name = self.opts.name.take();
        let x = self.consume();
        let opts = TaskOpts {
            linked: x.opts.linked,
            supervised: x.opts.supervised,
            watched: x.opts.watched,
            indestructible: x.opts.indestructible,
            notify_chan: notify_chan,
            name: name,
            sched: x.opts.sched,
            stack_size: x.opts.stack_size
        };
        let f = match gen_body {
            Some(gen) => {
                gen(f)
            }
            None => {
                f
            }
        };
        spawn::spawn_raw(opts, f);
    }

What is a GreenTask?

    // The primary function for changing contexts. In the current
    // design the scheduler is just a slightly modified GreenTask, so
    // all context swaps are from Task to Task. The only difference
    // between the various cases is where the inputs come from, and
    // what is done with the resulting task. That is specified by the
    // cleanup function f, which takes the scheduler and the
    // old task as inputs.

    pub fn change_task_context(mut ~self,
                               next_task: ~Task,
                               f: &fn(&mut Scheduler, ~Task)) {
        // The current task is grabbed from TLS, not taken as an input.
        // Doing an unsafe_take to avoid writing back a null pointer -
        // We're going to call `put` later to do that.
        let current_task: ~Task = unsafe { Local::unsafe_take() };

        // Check that the task is not in an atomically() section (e.g.,
        // holding a pthread mutex, which could deadlock the scheduler).
        current_task.death.assert_may_sleep();

        // These transmutes do something fishy with a closure.
        let f_fake_region = unsafe {
            transmute::<&fn(&mut Scheduler, ~Task),
                        &fn(&mut Scheduler, ~Task)>(f)
        };
        let f_opaque = ClosureConverter::from_fn(f_fake_region);

        // The current task is placed inside an enum with the cleanup
        // function. This enum is then placed inside the scheduler.
        self.cleanup_job = Some(CleanupJob::new(current_task,
        f_opaque));

        // The scheduler is then placed inside the next task.
        let mut next_task = next_task;
        next_task.sched = Some(self);

        // However we still need an internal mutable pointer to the
        // original task. The strategy here was "arrange memory, then
        // get pointers", so we crawl back up the chain using
        // transmute to eliminate borrowck errors.
        unsafe {

            let sched: &mut Scheduler =
                transmute_mut_region(*next_task.sched.get_mut_ref());

            let current_task: &mut Task = match sched.cleanup_job {
                Some(CleanupJob { task: ref task, _ }) => {
                    let task_ptr: *~Task = task;
                    transmute_mut_region(*transmute_mut_unsafe(task_ptr))
                }
                None => {
                    rtabort!("no cleanup job");
                }
            };

            let (current_task_context, next_task_context) =
                Scheduler::get_contexts(current_task, next_task);

            // Done with everything - put the next task in TLS. This
            // works because due to transmute the borrow checker
            // believes that we have no internal pointers to
            // next_task.
            Local::put(next_task);

            // The raw context swap operation. The next action taken
            // will be running the cleanup job from the context of the
            // next task.
            Context::swap(current_task_context, next_task_context);
        }

        // When the context swaps back to this task we immediately
        // run the cleanup job, as expected by the previously called
        // swap_contexts function.
        unsafe {
            let task: *mut Task = Local::unsafe_borrow();
            (*task).sched.get_mut_ref().run_cleanup_job();

            // Must happen after running the cleanup job (of course).
            (*task).death.check_killed((*task).unwinder.unwinding);
        }
    }

What is the point of current_task.death.assert_may_sleep()?

src/libstd/rt/context.rs

/* Switch contexts

    Suspend the current execution context and resume another by
    saving the registers values of the executing thread to a Context
    then loading the registers from a previously saved Context.
    */
    pub fn swap(out_context: &mut Context, in_context: &Context) {
        rtdebug!("swapping contexts");
        let out_regs: &mut Registers = match out_context {
            &Context { regs: ~ref mut r, _ } => r
        };
        let in_regs: &Registers = match in_context {
            &Context { regs: ~ref r, _ } => r
        };

        rtdebug!("noting the stack limit and doing raw swap");

        unsafe {
            // Right before we switch to the new context, set the new context's
            // stack limit in the OS-specified TLS slot. This also  means that
            // we cannot call any more rust functions after record_stack_bounds
            // returns because they would all likely fail due to the limit being
            // invalid for the current task. Lucky for us `swap_registers` is a
            // C function so we don't have to worry about that!
            match in_context.stack_bounds {
                Some((lo, hi)) => record_stack_bounds(lo, hi),
                // If we're going back to one of the original contexts or
                // something that's possibly not a "normal task", then reset
                // the stack limit to 0 to make morestack never fail
                None => record_stack_bounds(0, uint::max_value),
            }
            swap_registers(out_regs, in_regs)
        }
    }
#[inline(always)]
pub unsafe fn record_stack_bounds(stack_lo: uint, stack_hi: uint) {
    // When the old runtime had segmented stacks, it used a calculation that was
    // "limit + RED_ZONE + FUDGE". The red zone was for things like dynamic
    // symbol resolution, llvm function calls, etc. In theory this red zone
    // value is 0, but it matters far less when we have gigantic stacks because
    // we don't need to be so exact about our stack budget. The "fudge factor"
    // was because LLVM doesn't emit a stack check for functions < 256 bytes in
    // size. Again though, we have giant stacks, so we round all these
    // calculations up to the nice round number of 20k.
    record_sp_limit(stack_lo + RED_ZONE);

    return target_record_stack_bounds(stack_lo, stack_hi);

    #[cfg(not(windows))] #[cfg(not(target_arch = "x86_64"))] #[inline(always)]
    unsafe fn target_record_stack_bounds(_stack_lo: uint, _stack_hi: uint) {}
    #[cfg(windows, target_arch = "x86_64")] #[inline(always)]
    unsafe fn target_record_stack_bounds(stack_lo: uint, stack_hi: uint) {
        // Windows compiles C functions which may check the stack bounds. This
        // means that if we want to perform valid FFI on windows, then we need
        // to ensure that the stack bounds are what they truly are for this
        // task. More info can be found at:
        //   https://github.com/mozilla/rust/issues/3445#issuecomment-26114839
        //
        // stack range is at TIB: %gs:0x08 (top) and %gs:0x10 (bottom)
        asm!("mov $0, %gs:0x08" :: "r"(stack_lo) :: "volatile");
        asm!("mov $0, %gs:0x10" :: "r"(stack_hi) :: "volatile");
    }
}

Why does this code need #[inline(always)]?

Ltmp4:
        .cfi_def_cfa_register %rbp
    movl    $3405691582, %eax
    ## InlineAsm Start
        movq $0x60+90*8, %rsi
        movq %rax, %gs:(%rsi)
        ## InlineAsm End
        popq    %rbp
        ret
        .cfi_endproc

src/rt/arch/x86_64/_context.S

// swap_registers(registers_t *oregs, registers_t *regs)
.globl SWAP_REGISTERS
SWAP_REGISTERS:
        // n.b. when we enter, the return address is at the top of
        // the stack (i.e., 0(%RSP)) and the argument is in
        // RUSTRT_ARG0_S.  We
        // simply save all NV registers into oregs.
        // We then restore all NV registers from regs.  This restores
        // the old stack pointer, which should include the proper
        // return address. We can therefore just return normally to
        // jump back into the old code.

        // Save instruction pointer:
        pop %rax
        mov %rax, (RUSTRT_IP*8)(RUSTRT_ARG0_S)

        // Save non-volatile integer registers:
        //   (including RSP)
        mov %rbx, (RUSTRT_RBX*8)(ARG0)
        mov %rsp, (RUSTRT_RSP*8)(ARG0)
        mov %rbp, (RUSTRT_RBP*8)(ARG0)
        mov %r12, (RUSTRT_R12*8)(ARG0)
        mov %r13, (RUSTRT_R13*8)(ARG0)
        mov %r14, (RUSTRT_R14*8)(ARG0)
        mov %r15, (RUSTRT_R15*8)(ARG0)

#if defined(__MINGW32__) || defined(_WINDOWS)
        mov %rdi, (RUSTRT_RDI*8)(ARG0)
        mov %rsi, (RUSTRT_RSI*8)(ARG0)
#endif

        // Save 0th argument register:
        mov ARG0, (RUSTRT_ARG0*8)(ARG0)

        // Save non-volatile XMM registers:
#if defined(__MINGW32__) || defined(_WINDOWS)
        movapd %xmm6, (RUSTRT_XMM6*8)(ARG0)
        movapd %xmm7, (RUSTRT_XMM7*8)(ARG0)
        movapd %xmm8, (RUSTRT_XMM8*8)(ARG0)
        movapd %xmm9, (RUSTRT_XMM9*8)(ARG0)
        movapd %xmm10, (RUSTRT_XMM10*8)(ARG0)
        movapd %xmm11, (RUSTRT_XMM11*8)(ARG0)
        movapd %xmm12, (RUSTRT_XMM12*8)(ARG0)
        movapd %xmm13, (RUSTRT_XMM13*8)(ARG0)
        movapd %xmm14, (RUSTRT_XMM14*8)(ARG0)
        movapd %xmm15, (RUSTRT_XMM15*8)(ARG0)
#else
        movapd %xmm0, (RUSTRT_XMM0*8)(ARG0)
        movapd %xmm1, (RUSTRT_XMM1*8)(ARG0)
        movapd %xmm2, (RUSTRT_XMM2*8)(ARG0)
        movapd %xmm3, (RUSTRT_XMM3*8)(ARG0)
        movapd %xmm4, (RUSTRT_XMM4*8)(ARG0)
        movapd %xmm5, (RUSTRT_XMM5*8)(ARG0)
#endif

        // Restore non-volatile integer registers:
        //   (including RSP)
        mov (RUSTRT_RBX*8)(ARG1), %rbx
        mov (RUSTRT_RSP*8)(ARG1), %rsp
        mov (RUSTRT_RBP*8)(ARG1), %rbp
        mov (RUSTRT_R12*8)(ARG1), %r12
        mov (RUSTRT_R13*8)(ARG1), %r13
        mov (RUSTRT_R14*8)(ARG1), %r14
        mov (RUSTRT_R15*8)(ARG1), %r15

#if defined(__MINGW32__) || defined(_WINDOWS)
        mov (RUSTRT_RDI*8)(ARG1), %rdi
        mov (RUSTRT_RSI*8)(ARG1), %rsi
#endif

        // Restore 0th argument register:
        mov (RUSTRT_ARG0*8)(ARG1), ARG0

        // Restore non-volatile XMM registers:
#if defined(__MINGW32__) || defined(_WINDOWS)
        movapd (RUSTRT_XMM6*8)(ARG1), %xmm6
        movapd (RUSTRT_XMM7*8)(ARG1), %xmm7
        movapd (RUSTRT_XMM8*8)(ARG1), %xmm8
        movapd (RUSTRT_XMM9*8)(ARG1), %xmm9
        movapd (RUSTRT_XMM10*8)(ARG1), %xmm10
        movapd (RUSTRT_XMM11*8)(ARG1), %xmm11
        movapd (RUSTRT_XMM12*8)(ARG1), %xmm12
        movapd (RUSTRT_XMM13*8)(ARG1), %xmm13
        movapd (RUSTRT_XMM14*8)(ARG1), %xmm14
        movapd (RUSTRT_XMM15*8)(ARG1), %xmm15
#else
        movapd (RUSTRT_XMM0*8)(ARG1), %xmm0
        movapd (RUSTRT_XMM1*8)(ARG1), %xmm1
        movapd (RUSTRT_XMM2*8)(ARG1), %xmm2
        movapd (RUSTRT_XMM3*8)(ARG1), %xmm3
        movapd (RUSTRT_XMM4*8)(ARG1), %xmm4
        movapd (RUSTRT_XMM5*8)(ARG1), %xmm5
#endif

        // Jump to the instruction pointer
        // found in regs:
        jmp *(RUSTRT_IP*8)(ARG1)

Does the jmp instruction create a new process?

src/libstd/rt/io/native/process.rs

#[cfg(unix)]
fn spawn_process_os(prog: &str, args: &[~str],
                    env: Option<~[(~str, ~str)]>,
                    dir: Option<&Path>,
                    in_fd: c_int, out_fd: c_int, err_fd: c_int) -> SpawnProcessResult {
    #[fixed_stack_segment]; #[inline(never)];

    use libc::funcs::posix88::unistd::{fork, dup2, close, chdir, execvp};
    use libc::funcs::bsd44::getdtablesize;

    mod rustrt {
        #[abi = "cdecl"]
        extern {
            pub fn rust_unset_sigprocmask();
        }
    }

    #[cfg(windows)]
    unsafe fn set_environ(_envp: *c_void) {}
    #[cfg(target_os = "macos")]
    unsafe fn set_environ(envp: *c_void) {
        externfn!(fn _NSGetEnviron() -> *mut *c_void);

        *_NSGetEnviron() = envp;
    }
    #[cfg(not(target_os = "macos"), not(windows))]
    unsafe fn set_environ(envp: *c_void) {
        extern {
            static mut environ: *c_void;
        }
        environ = envp;
    }

    unsafe {

        let pid = fork();
        if pid < 0 {
            fail!("failure in fork: {}", os::last_os_error());
        } else if pid > 0 {
            return SpawnProcessResult {pid: pid, handle: ptr::null()};
        }

        rustrt::rust_unset_sigprocmask();

        if dup2(in_fd, 0) == -1 {
            fail!("failure in dup2(in_fd, 0): {}", os::last_os_error());
        }
        if dup2(out_fd, 1) == -1 {
            fail!("failure in dup2(out_fd, 1): {}", os::last_os_error());
        }
        if dup2(err_fd, 2) == -1 {
            fail!("failure in dup3(err_fd, 2): {}", os::last_os_error());
        }
        // close all other fds
        for fd in range(3, getdtablesize()).invert() {
            close(fd as c_int);
        }

        do with_dirp(dir) |dirp| {
            if !dirp.is_null() && chdir(dirp) == -1 {
                fail!("failure in chdir: {}", os::last_os_error());
            }
        }

        do with_envp(env) |envp| {
            if !envp.is_null() {
                set_environ(envp);
            }
            do with_argv(prog, args) |argv| {
                execvp(*argv, argv);
                // execvp only returns if an error occurred
                fail!("failure in execvp: {}", os::last_os_error());
            }
        }
    }
}