Interator with C list with ffi

Hello,

I'm attempting to wrap some C library in Rust and I'm facing difficulties with this.

I managed to reproduce my use case as simple as possible. In this context he C library has a list structure on which current element members can be accessed. My goal is to be able to iterate over the list in Rust from a for loop. The example in the documentation is not clear enough for my ffi case regarding how the implement the IntoIterator trait should be implemented.
Some corresponding minimal code below reproducing the library implementation.

Does anyone has any better understanding of the relevant involved concepts? Then please feel free to share your hints.
Is what I'd like to achieve feasible at all in Rust? I'm progressively doubting that their is the code interface for this but I lack experience with these topics.

Thanks in advance for constructive contributions.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

struct tEntry {
    char *name;
    void *ptr;
    /* some other members, like the type
     * to the void pointed variable */
};

struct tList {
    struct tEntry **entry;
    int len;
} data = {NULL, 0};

void free_list(struct tList *l)
{
    if (l != NULL) {
        if (l->entry != NULL) {
            int i;
            for(i=0; l->entry[i]!=NULL; i++) {
                free(l->entry[i]);
                l->entry[i] = NULL;
            }
            free(l->entry);
            l->entry = NULL;
            l->len = 0;
        }
    }
}

static struct tEntry *alloc_entry(const char *name, void *var)
{
    struct tEntry *e;
    if ((e = malloc( sizeof(struct tEntry) )) != NULL) {
        e->ptr = var;
        if ((e->name = strdup(name)) != NULL)
            return e;
        else
            free(e);
    }
    return NULL;
}

static int alloc_list(struct tList *l, const char **names)
{
    if (l == NULL || names == NULL)
        return -1;
    else {
        if (l->entry != NULL)
            free_list(l);

        for(l->len=0; 1; l->len++) {
            if (names[l->len]!=NULL)
                break;
        }

        if ((l->entry = malloc( sizeof(struct tEntry *) * l->len )) == NULL)
            return -1;
        else {
            int i;
            for(i=0; i<l->len; i++) {
                if ((l->entry[i] = alloc_entry(names[i], NULL)) == NULL) {
                    int j;
                    for(j=i-1; j>=0; j--) {
                        free(l->entry[i]->name);
                        free(l->entry[i]);
                    }
                    free(l->entry);
                    return -1;
                }
            }
            return 0;
        }
    }
}

struct tList *alloc_data(void)
{
    struct tList *l;
    if ((l = malloc( sizeof(struct tList) )) != NULL) {
	const char *names[] = {"foo", "bar", NULL};
	l->entry = NULL;
	l->len = 0;
	if (alloc_list(l, names) < 0)
	    free(l);
	else
	    return l;
    }
    return NULL;
}

int get_len(struct tList *l)
{
    return (l == NULL) ? 0 : l->len;
}

char *get_name(struct tList *l, int i)
{
    if (l == NULL)
        return NULL;
    else if (i < 0 || i >= l->len)
        return NULL;
    else
        return l->entry[i]->name;
}

use std::{
    iter::Iterator,
    ptr,
    ffi::{c_int, c_char},
};

#[repr(C)]
struct tList {
    _private: [u8; 0],
}

extern "C" {
    fn alloc_data() -> *const tList;
    fn free_list(l: *const tList);
    
    fn get_len(l: *const tList) -> c_int;
    fn get_name(l: *const tList, i: c_int) -> *const c_char;
}

pub struct Data {
    ptr: *const tList,
}

pub struct DataIter {
    ptr: *const tList,
    // iter related stuff
    len: usize,
    pos: usize,
}

impl Data {
    fn new() -> Option<Self> {
        unsafe {
            let ptr: *const tList = alloc_data();
            match ptr.is_null() {
                true => None,
                false => Some(Data {ptr}),
            }
        }
    }
}

impl Drop for Data {
    fn drop(&mut self) {
        match self.ptr.is_null() {
            true => {},
            false => unsafe {
                free_list(self.ptr);
            }
        }
    }
}

impl IntoIterator for Data {
    type Item = DataIter; // yes it's no Data but DataIter struct, otherwise I'm getting other problems with next() method implementation and I found documentation only regarding into_iter() method impl but not regarding iter() or iter_mut()
    type IntoIter: Iterator<Self::Item>; // I just don't understand 
    // which value should I set to this IntoIter trait variable 

    fn into_iter(self) -> Self::IntoIter {
        match self.ptr.is_null() {
            true => {
                let p: *const tList = ptr::null();
                let i = DataIter {ptr: p, len: 0, pos: 0};
                // no clue how should my i variable be handled 
                // to become an Iterator from here
            }
            false => unsafe {
                let len = get_len(self.ptr) as usize;
                let i = DataIter {ptr: self.ptr, len: len, pos: 0};
                // no clue how should my i variable be handled 
                // to become an Iterator from here
            }
        }
    }
}

Current error faced:

error: associated type in `impl` without body
  --> src\main.rs:73:5
   |
73 |     type IntoIter: Iterator<Self::Item>; // I just don't understand
   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-
   |                                        |
   |                                        help: provide a definition for the type: `= <type>;`

Initially I had a try with the following Iterator implementation but I got so stuck regarding references that I abandoned this idea for the above implementation try:

pub struct Data {
    ptr: *const tList,
    // iter related stuff
    len: usize,
    pos: usize,
}

impl Iterator for Data {
    type Item = Data;
    
    fn next(&mut self) -> Option<Self::Item> {
        self.pos += 1;
        if self.pos < self.len {
            Some(self)
        } else {
            None
        }
    }
}

Corresponding error I couldn't fix:

error[E0308]: mismatched types
  --> src\main.rs:72:18
   |
72 |             Some(self)
   |             ---- ^^^^ expected `Data`, found `&mut Data`
   |             |
   |             arguments to this enum variant are incorrect
   |
help: the type constructed contains `&mut Data` due to the type of the argument passed

It's odd to have DataIter as the item type. I would think you want to implement Iterator for DataIter and then make that the IntoIter type, with the Item type being a (possibly wrapped) tEntry. Also, since you are giving up ownership of Data, DataIter should also implement Drop.

Hmm, there's a lot to cover here.

Let's start with an overview on the iterator traits and how they're typically implemented. This will be a bit different from your situation involving raw pointers, but it will be useful for understanding the concepts.

After that I'll move on to implementing your iterator (well, at least an iterator, I had to guess at what exactly you want). And then we'll iteratively improve the code[1] to add some more unsafe encapsulation.

We won't get all the way to a perfectly encapsulated interface, but I think it will still be informative. There's a playground link at the bottom, but I didn't include all the intermediate steps, so you might have to read this overly-long post to understand why everything is how it is.

Typical iterator patterns

Generally, you have your data-holding struct and your iterator structs. Your main data holding struct shouldn't be your iterator struct, because that forces you to

  • Carry around iterator state all the time
  • Deal with iterator invalidation all the time

Additionally, you have to decide what kind of iterators you want; the three most common are

  • Owning iterators that consume the data from the original struct
  • Shared borrow iterators that hand out, typically, & references to the data; these don't consume the data from the original struct
  • Exclusive borrow iterators that hand out, typically, &mut reference to the data; these don't consume the data from the original struct (but may mutate it even without shared mutability)

Let's call them...

struct IntoIter    { /* ... */ } // Owning iterator
struct Iter<'_>    { /* ... */ } // Shared borrow iterator
struct IterMut<'_> { /* ... */ } // Exclusive borrow iterator

The borrowing iterators carry lifetimes so that the data-owning structure remains borrowed while the iterators still exist (and because that's just how reference-hodling structs work).

And their implementations would look something like

impl Iterator for IntoIter {
    type Item = SomeData;
    fn next(&mut self) -> Option<Self::Item> { ... }
}
impl<'a> Iterator for Iter<'a> {
    type Item = &'a SomeData;
    fn next(&mut self) -> Option<Self::Item> { ... }
}
impl<'a> Iterator for IterMut<'a> {
    type Item = &'a mut SomeData;
    fn next(&mut self) -> Option<Self::Item> { ... }
}

To create the owned version, you typically have an IntoIterator implementation on the owning struct.

impl IntoIterator for Data {
    type Item = SomeData; // The type that Self::IntoIter specified
    type IntoIter = IntoIter; // Your owning iterator type
    fn into_iter(self) -> Self::IntoIter { ... }
}

Note how Item isn't the iterator type, it's the associated Item type of the iterator type. We're just threading that information through the traits. You could rewrite it like so, even:

impl IntoIterator for Data {
    type Item = <IntoIter as Iterator>::Item;
    type IntoIter = IntoIter;
    fn into_iter(self) -> Self::IntoIter { ... }
}

For the borrowing iterators, you usually have methods on your owning struct:

impl Data {
    fn iter(&self) -> Iter<'_> { ... }
    fn iter_mut(&mut self) -> IterMut<'_> { ... }
}

And you can also add IntoIterator definitions on references to your owning struct:

impl<'a> IntoIterator for &'a Data {
    type Item = &'a SomeData;
    type IntoIter = Iter<'a>
    fn into_iter(self) -> Self::IntoIter {
        self.iter()
    }
}
// Similarly for `&'a mut Data` with `IterMut<'a>`

Hopefully that all explains how iterator types and implementations typically work, and what is expected from the associated types like Item and IntoIter.

Your iterator (owning)

Let's apply the above pattern to your code, for the owning iterator pattern -- that will avoid the need to think about lifetimes.

You have your Data type...

pub struct Data {
    ptr: *const tList,
}

impl Data { ... }

...it owns the data and deallocates it when it drops. Then you'll want a different type for your owning iterator, let's say it's DataIter....

pub struct DataIter {
    data: Data,
    // iter related stuff
    pos: i32,
}

A few notes here:

  • Since this is an owning iterator, I hold a Data, not a *const tList
    • Otherwise the data won't be deallocated once the iterator goes away
    • And conversely you could deallocate the Data while the iterator is still around, causing your references to dangle (probably causing UB)[2]
  • I got rid of the len field because it didn't seem needed, as I'll point out later
  • pos is i32 since that's what get_name expects[3]

Next to implement iterator. What sort of items are you expecting? I just guessed here that you're trying to get pointers to the names (hence my note about get_name). That could be this...

impl Iterator for DataIter {
    type Item = *const c_char;
    // ... next returns `Option<*const c_char>` ...
}

...however, I would like to draw your attention to the NonNull<_> type.

Unlike *mut T, the pointer must always be non-null, even if the pointer is never dereferenced. This is so that enums may use this forbidden value as a discriminant – Option<NonNull<T>> has the same size as *mut T.

So Option<NonNull<c_char>> will be similar to a *mut c_char, where null corresponds to None and non-null corresponds to Some(_). This lines up with the iterator trait quite well! We want *const and not *mut, so there will be a bit of casting I suppose, but let's give this a shot:

impl Iterator for DataIter {
    type Item = NonNull<c_char>;
    fn next(&mut self) -> Option<NonNull<c_char>> {
        todo!()
    }
}

Now for the body. We can get a *const c_char -- possibly null -- from the C get_name function. Looking at your implementation, it already does all the checks necessary on the index against the length. This is the reason I dropped the len field from the iterator.[4][5]

     fn next(&mut self) -> Option<NonNull<c_char>> {
+        let item_ptr: *const c_char = unsafe {
+            get_name(self.data.ptr, self.pos)
+        };
         todo!()
     }

For the conversion to Option<NonNull<c_char>>, we can use NonNull::new. It wants a *mut c_char though, so we have to do a bit of casting.

     fn next(&mut self) -> Option<NonNull<c_char>> {
         let item_ptr: *const c_char = unsafe {
             get_name(self.data.ptr, self.pos)
         };
+        let item = NonNull::new(item_ptr.cast_mut());
         todo!()
     }

If we have found a non-null pointer, we want to advance our position so we return the next item next time. (If we got a null (None), we should just leave our position alone so we don't overflow if someone calls next a whole bunch.)

     fn next(&mut self) -> Option<NonNull<c_char>> {
         let item_ptr: *const c_char = unsafe {
             get_name(self.data.ptr, self.pos)
         };
         let item = NonNull::new(item_ptr.cast_mut());
+        if item.is_some() {
+            self.pos += 1;
+        }
         todo!()
     }

And... that's it, we're ready to return the item!

    fn next(&mut self) -> Option<NonNull<c_char>> {
        let item_ptr: *const c_char = unsafe {
            get_name(self.data.ptr, self.pos)
        };
        let item = NonNull::new(item_ptr.cast_mut());
        if item.is_some() {
            self.pos += 1;
        }
        item
    }

Finally, let's implement IntoIterator. Remember that we need to transfer ownership of Data into the owning DataIter. Other than that, it's pretty straightforward, so I'm just going to paste some code.

// A little encapsulation is always nice
impl DataIter {
    fn new(data: Data) -> Self {
        let pos = 0;
        Self { data, pos }
    }
}

// Following the pattern discussed above
impl IntoIterator for Data {
    type Item = <DataIter as Iterator>::Item;
    type IntoIter = DataIter;

    fn into_iter(self) -> Self::IntoIter {
        DataIter::new(self)
    }
}

:partying_face:

Cleaning up some more (unsafe encapsulation)

You generally want to encapsulate your unsafe as much as possible. For example it's not great that we're using unsafe in the middle of our iterator implementation IMO:

        let item_ptr = unsafe { get_name(self.data.ptr, self.pos) };
        let item = NonNull::new(item_ptr.cast_mut());
        // ...

It would be nicer if this conversion was handled for us. You want a tighter border around your FFI interface, and you want a safe Rust interface that mirrors it, to the extent possible.

Let's see if we can wrap up all the unsafety into the Data type. It can keep its field private and do all the unsafe things. If we're lucky, nothing else in Rust should even know what a tList is.

  • alloc_data corresponds to Data::new :white_check_mark:
  • free_list corresponds to impl Drop for Data :white_check_mark:
  • get_len and get_name can be methods on Data :question:

get_name returns a raw pointer, which isn't useful without unsafe, but most this is looking pretty good. Let's see how far we get, starting with new.

        pub fn new() -> Option<Self> {
            let ptr: *const tList = unsafe { alloc_data() };
            match ptr.is_null() {
                true => None,
                false => Some(Data {ptr}),
            }
        }

Hmm, we're doing the same null dance as before. If we're not even going to allow construction of Data when we get a null pointer... let's use NonNull again!

    pub struct Data {
        ptr: NonNull<tList>,
    }

    impl Data {
        pub fn new() -> Option<Self> {
            let ptr: *const tList = unsafe { alloc_data() };
            let ptr = NonNull::new(ptr.cast_mut())?;
            Some(Self { ptr })
        }
    }

The breaks the Drop implementation though. Guess we better tackle that next:

    impl Drop for Data {
        fn drop(&mut self) {
            let ptr = self.ptr.as_ptr().cast_const();
            unsafe { free_list(ptr) };
        }
    }

And now get_name...

    impl Data {
        pub fn get_name(&self, i: c_int) -> *const c_char {
            todo!()
        }

Oop, returning raw pointers that might be null again. Now, probably you want some form of CStr here, but in the interest of brevity I'm going to stick with pointers for right now. But we can at least do the NonNull dance in here.

Let's sort of hide the C-ness a bit and change that c_int to usize while we're here, though.

        pub fn get_name(&self, i: usize) -> Option<NonNull<c_char>> {
            let i: c_int = i.try_into().ok()?;
            let ptr = self.ptr.as_ptr().cast_const();
            let item_ptr = unsafe { get_name(ptr, i) };
            NonNull::new(item_ptr.cast_mut())
        }

That's mostly a repeat of what we were doing in the iterator. Let's clean up the iterator a bit now that we have the method available -- get rid of some unencapsulated unsafe.

pub struct DataIter {
    data: Data,
-    pos: i32,
+    pos: usize, // guess we wanted `usize` afterall!
}
impl Iterator for DataIter {
    type Item = NonNull<c_char>;
    fn next(&mut self) -> Option<NonNull<c_char>> {
        // This returns early if it's `None`
        let item = self.data.get_name(self.pos)?;
        // We know the `item` is not `None` here, so we can increment...
        self.pos += 1;
        // ...and then wrap it back up in `Some`
        Some(item)
    }
}

That's looking a lot nicer.

All that's left is get_len. Let's hide the c_int again.

        pub fn get_len(&self) -> usize {
            let ptr = self.ptr.as_ptr().cast_const();
            let i = unsafe { get_len(ptr) };
            // Interpret negatives as 0
            i.try_into().unwrap_or_default()
        }

We could quible over 16-bit platforms and the possible interpretation of negatives as 0, but we don't even use this function anymore, so I'll just leave it there.

Let's take a break

This reply is already way too long. Here's where we got to:

The next steps would be tackling the CStr issue. If successful, the example in main shouldn't need any unsafe anymore. However, it's a bit involved -- we'd need to change the iterator to be borrowed for things to make the most sense.[6]


  1. get it? ↩︎

  2. i.e. iterator invalidation ↩︎

  3. but also more on that later ↩︎

  4. One could argue more safe guards are better, but I'm going to just move on. ↩︎

  5. If you wanted a double ended iterator, you'd probably need to add it back... but that's a different adventure than the one we're on. ↩︎

  6. (There's no way for Data to give away ownership of parts of the tList, so the owning iterator can't hand out CStrings without cloning the data -- and can't hand out &CStrs soundly and still implement Iterator.) ↩︎

4 Likes

OK, it's not that bad -- if I just marathon the code without taking the time to explain why a borrowing iterator makes the most sense,[1] anyway.

Let's transform DataIter into a borrowing DataIter<'_>.

-pub struct DataIter {
-    data: Data,
+// Added lifetime and made `data` a reference
+pub struct DataIter<'a> {
+    data: &'a Data,
     pos: usize,
 }

...adjust the constructor too...

-impl DataIter {
-    fn new(data: Data) -> Self {
+// Added lifetimes here
+impl DataIter<'_> {
+    fn new(data: &Data) -> DataIter<'_> {
        let pos = 0;
-        Self { data, pos }
+        DataIter { data, pos }
    }
}

...and the Iterator implementation...

-impl Iterator for DataIter {
-    type Item = NonNull<c_char>;
-    fn next(&mut self) -> Option<NonNull<c_char>> {
+// Added lifetime, changed Item to `&'a CStr`
+impl<'a> Iterator for DataIter<'a> {
+    type Item = &'a CStr;
+    fn next(&mut self) -> Option<Self::Item> {
         let item = self.data.get_name(self.pos)?;
         self.pos += 1;
         Some(item)
     }
 }

...neat, the body didn't even change.[2] We need to update the IntoIterator implementation too...

-    impl IntoIterator for Data {
-        type Item = <DataIter as Iterator>::Item;
-        type IntoIter = DataIter;
+    // Added lifetime and reference
+    impl<'a> IntoIterator for &'a Data {
+        type Item = <DataIter<'a> as Iterator>::Item;
+        type IntoIter = DataIter<'a>;

         fn into_iter(self) -> Self::IntoIter {
             DataIter::new(self)
         }
     }

And finally, update the get_name method to return CStrs.

-        pub fn get_name(&self, i: usize) -> Option<NonNull<c_char>> {
+        // Changed this to `CStr`
+        pub fn get_name(&self, i: usize) -> Option<&CStr> {
             let i: c_int = i.try_into().ok()?;
             let ptr = self.ptr.as_ptr().cast_const();
             let item_ptr = unsafe { get_name(ptr, i) };
-            NonNull::new(item_ptr.cast_mut())
+            // Back to checking for null!
+            match item_ptr.is_null() {
+                true => None,
+                false => Some(unsafe { CStr::from_ptr(item_ptr) }),
+            }
         }

And now the example in main looks like this.

    let dummy = Data::new().unwrap();
    // Added reference
    for cstr in &dummy {
        println!("{cstr:?}");
    }

  1. or anything else really ↩︎

  2. This still won't compile until we've finished our changes though. ↩︎

3 Likes

Thank you very very very much @quinedot for the time you took to give all these explanations! This will take me a lot of time to process all this very valuable information for me (because I'm still lacking experience with Rust as you could see) but it's fine and I even prefer it this way.

1 Like