Help with async

Hi everyone

I have the following code that just checks if a githubs orgs projects contain a cargo project.

use std::sync::Arc;

use async_recursion::async_recursion;
use octocrab::Octocrab;

#[tokio::main]
async fn main() {
    match crawl_github("rust-lang").await {
        Err(e) => println!("{}", e.to_string()),
        _ => {}
    }
}

async fn crawl_github(org: &str) -> octocrab::Result<()> {
    let owner = org;
    let root_path = "";
    let oc = octocrab::instance();

    let repos = oc.orgs(org).list_repos().send().await?;
    for repo in repos.items {
        println!("{:?}: ", repo.name);
        let contents = oc
            .repos(owner, repo.name.as_str())
            .get_content()
            .path(root_path)
            .send()
            .await?;
        println!("\t{} files/dirs in the repo root", contents.items.len());

        let is_cargo_project =
            contains_cargo_dir(owner, repo.name.as_str(), root_path, Arc::clone(&oc)).await?;
        println!("\tis cargo project: {is_cargo_project}\n");
    }

    Ok(())
}

#[async_recursion]
async fn contains_cargo_dir(
    owner: &str,
    repo: &str,
    path: &str,
    oc: Arc<Octocrab>,
) -> octocrab::Result<bool> {
    let contents = oc
        .repos(owner, repo)
        .get_content()
        .path(path)
        .send()
        .await?;

    let mut dirs = Vec::new();

    // Breadth first search, first files then directories.
    for item in contents.items.iter() {
        match item.r#type.as_str() {
            "dir" => dirs.push(item),
            "file" => {
                if let "Cargo.toml" = item.name.as_str() {
                    return Ok(true);
                }
            }
            _ => {}
        }
    }

    for item in dirs {
        if let Ok(true) = contains_cargo_dir(owner, repo, &item.path, Arc::clone(&oc)).await {
            return Ok(true);
        }
    }

    Ok(false)
}

Now the code execution is entirely serial because I am only using .await and I wanted to change that. I figured it would make sense in a first step to parallelize the recursion into the directories. I did it like this

use std::sync::Arc;

use async_recursion::async_recursion;
use octocrab::Octocrab;

#[tokio::main]
async fn main() {
    match crawl_github("SebastianJL").await {
        Err(e) => println!("{}", e.to_string()),
        _ => {}
    }
}

async fn crawl_github(org: &str) -> octocrab::Result<()> {
    let owner = org;
    let root_path = "";
    let oc = octocrab::instance();

    let repos = oc.orgs(org).list_repos().send().await?;
    for repo in repos.items {
        println!("{:?}: ", repo.name);
        let contents = oc
            .repos(owner, repo.name.as_str())
            .get_content()
            .path(root_path)
            .send()
            .await?;
        println!("\t{} files/dirs in the repo root", contents.items.len());

        let is_cargo_project =
            contains_cargo_dir(owner, repo.name.as_str(), root_path, Arc::clone(&oc), 2).await?;
        println!("\tis cargo project: {is_cargo_project}\n");
    }

    Ok(())
}

#[async_recursion]
async fn contains_cargo_dir(
    owner: &str,
    repo: &str,
    path: &str,
    oc: Arc<Octocrab>,
    rec_limit: u32,
) -> octocrab::Result<bool> {
    if rec_limit <= 0 {
        return Ok(false);
    }

    let contents = oc
        .repos(owner, repo)
        .get_content()
        .path(path)
        .send()
        .await?;

    let mut dirs = Vec::new();

    // Breadth first search, first files then directories.
    for item in contents.items.iter() {
        match item.r#type.as_str() {
            "dir" => dirs.push(item),
            "file" => {
                if let "Cargo.toml" = item.name.as_str() {
                    return Ok(true);
                }
            }
            _ => {}
        }
    }

    let mut futures = Vec::new();
    for item in dirs {
        let fut = contains_cargo_dir(owner, repo, &item.path, Arc::clone(&oc), rec_limit - 1);
        futures.push(fut);
    }

    for fut in futures {
        if let (Ok(true), ) = tokio::join!(fut) {
            return Ok(true);
        };
    }

    Ok(false)
}

Here I also limited the number of recursions into directories. I think this version is actually still serial since the futures only get started when tokio::join() is called. But I couldn't figure out how to do it in a way that all directory searches are spawned a the same time and then joined one after another, breaking early when a Cargo.toml was found.
I tried using tokio::task::JoinSet because adding a task to the set also spawns it. But I get lifetime issues with with the oc.

use std::sync::Arc;

use async_recursion::async_recursion;
use octocrab::Octocrab;
use tokio::task::JoinSet;

#[tokio::main]
async fn main() {
    match crawl_github("SebastianJL").await {
        Err(e) => println!("{}", e.to_string()),
        _ => {}
    }
}

async fn crawl_github(org: &str) -> octocrab::Result<()> {
    let owner = org;
    let root_path = "";
    let oc = octocrab::instance();

    let repos = oc.orgs(org).list_repos().send().await?;
    for repo in repos.items {
        println!("{:?}: ", repo.name);
        let contents = oc
            .repos(owner, repo.name.as_str())
            .get_content()
            .path(root_path)
            .send()
            .await?;
        println!("\t{} files/dirs in the repo root", contents.items.len());

        let is_cargo_project =
            contains_cargo_dir(owner, repo.name.as_str(), root_path, Arc::clone(&oc), 2).await?;
        println!("\tis cargo project: {is_cargo_project}\n");
    }

    Ok(())
}

#[async_recursion]
async fn contains_cargo_dir(
    owner: &str,
    repo: &str,
    path: &str,
    oc: Arc<Octocrab>,
    rec_limit: u32,
) -> octocrab::Result<bool> {
    if rec_limit <= 0 {
        return Ok(false);
    }

    let contents = oc
        .repos(owner, repo)
        .get_content()
        .path(path)
        .send()
        .await?;

    let mut dirs = Vec::new();

    // Breadth first search, first files then directories.
    for item in contents.items.iter() {
        match item.r#type.as_str() {
            "dir" => dirs.push(item),
            "file" => {
                if let "Cargo.toml" = item.name.as_str() {
                    return Ok(true);
                }
            }
            _ => {}
        }
    }

    let mut futures = JoinSet::new();
    for item in dirs {
        let fut = contains_cargo_dir(owner, repo, &item.path, Arc::clone(&oc), rec_limit - 1);
        futures.spawn(fut);
    }

    while let Some(res) = futures.join_next().await {
        // if let 
        if let Ok(true) = res.unwrap() {
            return Ok(true);
        }
    }
    
    Ok(false)
}
error[E0597]: `contents.items` does not live long enough
  --> src/main.rs:61:17
   |
51 |     let contents = oc
   |         -------- binding `contents` declared here
...
61 |     for item in contents.items.iter() {
   |                 ^^^^^^^^^^^^^^^^^^^^^ borrowed value does not live long enough
...
75 |         let fut = contains_cargo_dir(owner, repo, &item.path, Arc::clone(&oc), rec_limit - 1);
   |                   --------------------------------------------------------------------------- argument requires that `contents.items` is borrowed for `'static`
...
87 | }
   | - `contents.items` dropped here while still borrowed

error[E0521]: borrowed data escapes outside of function
  --> src/main.rs:75:19
   |
39 | #[async_recursion]
   | ------------------ lifetime `'life0` defined here
40 | async fn contains_cargo_dir(
41 |     owner: &str,
   |     ----- `owner` is a reference that is only valid in the function body
...
75 |         let fut = contains_cargo_dir(owner, repo, &item.path, Arc::clone(&oc), rec_limit - 1);
   |                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   |                   |
   |                   `owner` escapes the function body here
   |                   argument requires that `'life0` must outlive `'static`

error[E0521]: borrowed data escapes outside of function
  --> src/main.rs:75:19
   |
39 | #[async_recursion]
   | ------------------ lifetime `'life1` defined here
...
42 |     repo: &str,
   |     ---- `repo` is a reference that is only valid in the function body
...
75 |         let fut = contains_cargo_dir(owner, repo, &item.path, Arc::clone(&oc), rec_limit - 1);
   |                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   |                   |
   |                   `repo` escapes the function body here
   |                   argument requires that `'life1` must outlive `'static`

help: the following changes may resolve your lifetime errors
  |
  = help: replace `'life0` with `'static`
  = help: replace `'life1` with `'static`

Some errors have detailed explanations: E0521, E0597.
For more information about an error, try `rustc --explain E0521`.
error: could not compile `github-cli` (bin "github-cli") due to 3 previous errors

Can you help me?

Here you are creating a vector of references whose lifetime is not long enough due to this call:

futures.spawn(fut);

Spawning a future requires the future itself to have a 'static lifetime.

You'll need to refactor your calls taking these factors into consideration. Namely, you'll want not to spawn a future that receives parameters from another short-lived future.