Implementing dynamic gitignore parser

Hi, currently i'm creating a cli-tool to count lines of code in project with given extension
It works, but still have a big room to improve

For now i'm trying to implement gitignore parser for multiple files in multiples directories and got a problem with my first one. I tried to do it via &mut Vec but it didn't go well. It did work, but it stored all strings in one common vector, so all directories would use all gitignore lines

Here's my old implementation:
Under it there is new one

use std::{fs, path::{Path, PathBuf}};

use clap::Parser;
use env_logger::Builder;
use regex::Regex;


#[derive(Parser)]
#[command(version, about, long_about = None)]
pub struct Cli {
    path: PathBuf,
    extension: String,

    #[arg(short, long)]
    verbose: bool,
}

fn get_gitignore(dir: &Path) -> Vec<String> {
    let mut gitignore: Vec<String> = vec![];
    if dir.is_dir() {
        'outer: for entry in fs::read_dir(dir).unwrap() {
            let entry = entry.unwrap();
            let path = entry.path();

            if path.file_name().unwrap_or_default().to_str().unwrap_or("") == ".gitignore" {
                let lines = fs::read_to_string(&path).unwrap();
                let lines = lines.lines();
                for line in lines {
                    gitignore.push(line.to_string())
                }
                break 'outer;
            }
        }
    }

    return gitignore;
}

fn visit_dir(dir: &Path, ext: &str, gitignore: &mut Vec<String>) -> std::io::Result<usize> {
    let mut lines = 0usize;
    if dir.is_dir() {
        log::info!("Old gitignore passed into {:?} dir: {:?}", dir.file_name(), gitignore);
        gitignore.append(&mut get_gitignore(dir));
        log::info!("Added new Gitignore in new dir: {:?}", gitignore);
        for entry in fs::read_dir(dir)? {
            let entry = entry?;
            let path = entry.path();

            if path.file_name().unwrap_or_default().to_str().unwrap_or("").starts_with('.') {
                continue;
            }

            
            let contains = gitignore.iter().any(|s| s == path.file_name().unwrap().to_str().unwrap());

            if contains {
                log::info!("Ignored file: {:?}", path.file_name().unwrap());
                continue;
            }

            if path.is_dir() {
                log::info!("Dir name {:?}", path.file_name().unwrap());
                lines += visit_dir(&path, ext, gitignore)?;
            } else {
                if path.file_name().unwrap().to_str().unwrap().ends_with(ext) {
                    log::info!("Good file with good ext");
                    log::info!("Filename name {:?}", path.file_name().unwrap());
                    lines += count_lines(&path);
                } else {
                    continue;
                }
            }
            log::info!("Total amount of lines: {}\n", &lines);
        }
    }
    log::info!("Getting out of {:?}", dir.file_name());
    log::info!("Total lines in {:?}: {}",dir.file_name(), lines);
    Ok(lines)
}

fn count_lines(file: &Path) -> usize {
    let file_str = fs::read_to_string(file).unwrap();

    let new_lines_re = Regex::new(r#"\n{2,}"#).unwrap();
    let multi_comment = Regex::new(r#"/\*[\s\S]*\*/\s"#).unwrap();

    let file_str = new_lines_re.replace_all(&file_str, "\n");
    let file_str = multi_comment.replace_all(&file_str, "");
    let mut lines: Vec<&str> = file_str.lines().collect();
    for (i, line) in lines.clone().into_iter().enumerate() {
        if line.starts_with("//") {
            lines.remove(i);
        }
    }
    log::info!("Lines in {:?}: {}",file.file_name(), lines.clone().len());
    lines.len()
}

fn main() {
    let cli = Cli::parse();

    if cli.verbose {
        Builder::new()
            .filter(None, log::LevelFilter::Info)
            .init();
    } else {
        Builder::new()
            .filter(None, log::LevelFilter::Off)
            .init();
    }

    match fs::metadata(&cli.path) {
        Ok(metadata) => {
            if metadata.is_dir() { 
                log::info!("Path: {}", cli.path.to_str().unwrap());
                log::info!("File extension: {}", cli.extension);
                let lines = visit_dir(&cli.path, &cli.extension, &mut vec![]);
                println!("{}", lines.unwrap());
            }
        },
        Err(_) => ()
    }
}

And here's new one with HashMap<PathBuf, Vec>:

use std::{collections::HashMap, fs, path::{Path, PathBuf}};

use clap::Parser;
use env_logger::Builder;
use regex::Regex;


#[derive(Parser)]
#[command(version, about, long_about = None)]
pub struct Cli {
    path: PathBuf,
    extension: String,

    #[arg(short, long)]
    verbose: bool,
}

fn get_gitignore(dir: &Path) -> Vec<String> {
    let mut gitignore: Vec<String> = vec![];
    if dir.is_dir() {
        'outer: for entry in fs::read_dir(dir).unwrap() {
            let entry = entry.unwrap();
            let path = entry.path();

            if path.file_name().unwrap_or_default().to_str().unwrap_or("") == ".gitignore" {
                let lines = fs::read_to_string(&path).unwrap();
                let lines = lines.lines();
                for line in lines {
                    gitignore.push(line.to_string())
                }
                break 'outer;
            }
        }
    }

    return gitignore;
}

fn visit_dir<'a>(dir: &Path, ext: &str, gitignore_map: &mut HashMap<PathBuf, Vec<String>>) -> std::io::Result<usize> {
    let mut lines = 0usize;
    if dir.is_dir() {
        log::info!("Old gitignore passed into {:?} dir: {:?}", dir.file_name(), gitignore_map);
        let ignore_vec = get_gitignore(dir);
        if ignore_vec.len() > 0 {
            gitignore_map.insert(dir.to_path_buf().clone(),get_gitignore(dir));
            log::debug!("Added new Gitignore in new dir: {:?}", gitignore_map);
        } else {
            log::debug!("No gitignore in dir: {:?}", dir.file_name());
        }
        for entry in fs::read_dir(dir)? {
            let entry = entry?;
            let path = entry.path();

            if path.file_name().unwrap_or_default().to_str().unwrap_or("").starts_with('.') {
                continue;
            }

            
            let contains = gitignore_map.iter().any(|(_k, v)| v.contains(&path.file_name().unwrap().to_str().unwrap().to_string()));

            if contains {
                log::info!("Ignored file: {:?}", path.file_name().unwrap());
                continue;
            }

            if path.is_dir() {
                log::info!("Dir name {:?}", path.file_name().unwrap());
                lines += visit_dir(&path, ext, gitignore_map)?;
            } else {
                if path.file_name().unwrap().to_str().unwrap().ends_with(ext) {
                    log::info!("Good file with good ext");
                    log::info!("Filename name {:?}", path.file_name().unwrap());
                    lines += count_lines(&path);
                } else {
                    continue;
                }
            }
            log::info!("Total amount of lines: {}\n", &lines);
        }
        gitignore_map.remove(&dir.to_path_buf());
    }
    log::info!("Getting out of {:?}", dir.file_name());
    log::info!("Total lines in {:?}: {}",dir.file_name(), lines);
    Ok(lines)
}

fn count_lines(file: &Path) -> usize {
    let file_str = fs::read_to_string(file).unwrap();

    let new_lines_re = Regex::new(r#"\n{2,}"#).unwrap();
    let multi_comment = Regex::new(r#"/\*[\s\S]*\*/\s"#).unwrap();
    let single_comment = Regex::new(r#"\s*//.*"#).unwrap();

    let file_str = new_lines_re.replace_all(&file_str, "\n");
    let file_str = multi_comment.replace_all(&file_str, "");
    let mut lines: Vec<&str> = file_str.lines().collect();

    let mut i = 0;
    let mut len = lines.len();
    while i < len {
        if single_comment.is_match(lines[i]) {
            lines.remove(i);
            i -= 1;
            len -= 1;
        }
        i += 1;
    }
    log::info!("Lines in {:?}: {}",file.file_name(), lines.clone().len());
    lines.len()
}

fn main() {
    let cli = Cli::parse();

    if cli.verbose {
        Builder::new()
            .filter(None, log::LevelFilter::Debug)
            .init();
    } else {
        Builder::new()
            .filter(None, log::LevelFilter::Off)
            .init();
    }

    match fs::metadata(&cli.path) {
        Ok(metadata) => {
            if metadata.is_dir() { 
                log::info!("Path: {}", cli.path.to_str().unwrap());
                log::info!("File extension: {}", cli.extension);
                let mut gitignore_map: HashMap<PathBuf, Vec<String>>= HashMap::new();
                let lines = visit_dir(&cli.path, &cli.extension, &mut gitignore_map);
                println!("{}", lines.unwrap());
            }
        },
        Err(_) => ()
    }
}

I tried to use &PathBuf as key but couldn't make it, if it's even possible. Probably skill issue.
Now it does work correct, but i don't like copying PathBuf every directory. Is there any better way? Or maybe i don't even need to use hash map?

Hi, currently i'm creating a cli-tool to count lines of code in project with given extension
It works, but still have a big room to improve

For now i'm trying to implement gitignore parser for multiple files in multiples directories and [...]

Do you know about the ignore crate? That's what ripgrep uses (and is written by the same author).

No i didn't. Sounds cool. I'm gonna check how they implement it. Thank you