Callback-based C FFI

// C
struct IDiscordStorageManager {
    // ...
    void (*read_async) (
        struct IDiscordStorageManager * manager,
        char const * name,
        void * callback_data,
        void (*callback) (
            void * callback_data,
            enum EDiscordResult result,
            uint8_t * data,
            uint32_t data_length
        )
    );
    // ...
};

Ok, let's do it it step by step:

First, literal C-to-Rust translation

  1. void (*) (...) becomes Option<extern "C" fn(...)> (see the nomicon to understand why there is an Option)

  2. Any argument of type some_type * becomes *mut some_type

    • it can also become Option<non_null_ptr> (such as when non_null_ptr is a Rust reference (&_ or &mut _) or a function pointer, c.f. previous point).

      For instance, if you wanted to use libc's perror function (void perror (char const * s)), it would be perfectly valid to declare its header like this:

      extern "C" { fn perror (s: Option<NonNull<c_char>>); }
      
      • (despite how much we would have liked to use Option<&CStr> here, CStr is currently a DST, thus making &CStr a fat pointer, thus breaking the defined C ABI)

Hence, this how the struct definition would translate into bare Rust:

// Rust (`-sys`)
use ::std::ffi::CStr;
use ::libc::{c_char, c_int, c_void};

// Do not trust FFI enums: treat them as integers
type EDiscordResult = c_int;

#[repr(C)]
struct IDiscordStorageManager {
  // ...
  read_async: Option<extern "C" fn (
    manager: Option<&mut IDiscordStorageManager>,
    name: *const c_char, // not Option<&CStr> because &CStr is fat
    callback_data: *mut c_void,
    callback: Option<extern "C" fn (
      callback_data: *mut c_void,
      result: EDiscordResult,
      data: *mut u8,
      data_length: u32,
    )>,
  )>,
  // ...

This would be a typical definition of a Rust sys- crate. See @kornel's guide for more info about it.

Improving ergonomics

Of course, you shouldn't be carrying such a low-level unsafe struct / type definition around your Rust code. So, the first thing, then is to go and improve the API to match Rust high-level style:

  • use references instead of pointers,

    • and fat references instead of pointer + size: data: *uint8_t + data_length: u32 above should become data: &mut [u8] in the API

    • in the same vein, callback_env: *mut c_void + callback: fn(callback_env: *mut c_void, ...) should become callback: impl FnMut(...)

    • now you can use &CStr instead of *const c_char, since conversion from the former to the latter is just a matter of calling the .as_ptr() method.

Example

Here is an example for the inner callback, that shows there is no need to use trait objects when resorting to a high level API:

use ::std::*;
use ::libc::{
    c_void,
    c_int,
};

mod lib {
    use super::*;

    // Do not trust FFI enums
    mod enum_discord_result {
        use super::*;

        #[derive(Debug)]
        #[repr(C)]
        pub
        enum EDiscordResult {
            A,
            B,
            C,
        }

        // Values from FFI cannot be trusted and must thus be seen as ints.
        pub
        type _EDiscordResult = c_int;
        const _EDISCORD_RESULT_A: c_int = EDiscordResult::A as _;
        const _EDISCORD_RESULT_B: c_int = EDiscordResult::B as _;
        const _EDISCORD_RESULT_C: c_int = EDiscordResult::C as _;

        impl EDiscordResult {
            pub
            fn try_from_int (value: c_int) -> Option<Self>
            {
                Some(match value {
                    // always use self:: when matching against constants
                    // to prevent binding against a catch-all variable
                    self::_EDISCORD_RESULT_A => EDiscordResult::A,
                    self::_EDISCORD_RESULT_B => EDiscordResult::B,
                    self::_EDISCORD_RESULT_C => EDiscordResult::C,
                    _ => return None,
                })
            }
        }
    }
    pub use enum_discord_result::*;

    pub
    type CallbackFptr = Option<
        unsafe extern "C"
        fn (
            callback_data: *mut c_void,
            result: _EDiscordResult,
            data: *mut u8,
            data_length: u32,
        )
    >;

    pub
    struct ReadAsyncCallbackArg<'data> {
        pub
        data: &'data mut c_void,

        pub
        fptr: CallbackFptr,
    }

    impl<'data> ReadAsyncCallbackArg<'data> {
        pub
        fn new <Env : Sized> (
            at_closure_env: &'data mut Env,
        ) -> Self
        where
            Env : FnMut(EDiscordResult, &mut [u8]),
        {
            ReadAsyncCallbackArg {
                data: unsafe { mem::transmute(at_closure_env) },
                fptr: Some(
                    // monomorphised function (thus non-generic)
                    c_callback::<Env>
                ),
            }
        }
    }

    // This could be seen as a "function constructor":
    // for each concrete Env type parameter,
    // a new static function is defined by monomorphisation
    unsafe extern "C"
    fn c_callback<Env : Sized> (
        callback_data: *mut c_void,
        result: _EDiscordResult,
        data: *mut u8,
        data_length: u32,
    )
    where
        Env : FnMut(EDiscordResult, &mut [u8]),
    {
        // Prevent unwinding accross the FFI
        ::scopeguard::defer_on_unwind!({
            process::abort();
        });
        macro_rules! failwith {
            (
                $expr:expr $(, $($extra:tt)* )?
            ) => ({
                eprintln!(
                    "c_callback error: {}",
                    format_args!($expr $(, $($extra)* )?),
                );
                dbg!((
                    callback_data,
                    result,
                    data,
                    data_length,
                ));
                process::exit(1);
            })
        }

        let data: &mut [u8] = slice::from_raw_parts_mut(
            if data.is_null() { failwith!("null ptr") } else { data },
            data_length as usize,

        );

        macro_rules! ffi_unwrap {($expr:expr, $msg:expr $(,)?) => (
            if let Some(inner) = $expr { inner } else { failwith!($msg) }
        )}

        let at_env_raw_ptr: *mut Env = mem::transmute(callback_data);
        let at_env: &mut Env = ffi_unwrap!(
            at_env_raw_ptr.as_mut(), "null ptr",
        );

        let result = ffi_unwrap!(EDiscordResult::
            try_from_int(result), "Invalid EDiscordResult enum",
        );

        // For each given Env type parameter,
        // Rust knows how to call this since it is using the static address
        // <Env as FnMut<_>>::call_mut(at_env, result, data)
        // (this is the only part of the code that depends on the Env type)
        at_env(result, data);
    }
}

// ################
// #  Testing it  #
// ################

use lib::*;

unsafe extern "C"
fn call_cb_with_dummy_args (
    callback_data: *mut c_void,
    callback_fptr: CallbackFptr,
)
{
    // create dummy args
    let result: _EDiscordResult = 0;
    let mut data = [0x2a, 0x45];
    let (data, data_length) = (data.as_mut_ptr(), data.len() as u32);

    if let Some(non_null_fptr) = callback_fptr {
        non_null_fptr(callback_data, result, data, data_length);
    }
}

fn main ()
{
    let mut flag: bool = false;
    dbg!(flag);
    let flag_address: *mut bool = &mut flag;
    let mut env = |result: EDiscordResult, data: &mut [u8]| {
        flag = true;
        dbg!(result);
        dbg!(data);
    };
    // this is just to help illustrate what a closure really is:
    // just the captured environment
    unsafe {
        assert_eq!(mem::size_of_val(&env), mem::size_of::<*mut bool>());
        assert_eq!(
            dbg!(mem::transmute_copy::<_, *mut bool>(&env)),
            dbg!(flag_address),
        );
    }
    let c_callback = ReadAsyncCallbackArg::new(&mut env);
    unsafe {
        call_cb_with_dummy_args(
            c_callback.data,
            c_callback.fptr,
        );
    }
    assert!(dbg!(flag));
}
5 Likes