Too much error handling?

Hi,
I would be glad to gather more experienced rustacean's opinion about my last creation. It is a small executable that count characters occurrence in a set of files.
It tooks a glob as input and spawn threads to process files matching the glob. I used this exercise ti work on channels. My point is that most of my code is for handling errors and I would like to know if there is a better way to do what I did.

Thank you for your advices.

use std::{fs::read_dir, path::PathBuf};
use std::time::Duration;
use std::path::Path;
use crossbeam_channel::{Receiver, SendError, Sender};
use structopt::StructOpt;
use simple_logger::SimpleLogger;
use glob::{PatternError, glob};
use rayon::ThreadPool;

const MAX_CHARS: usize = 256;

#[derive(Debug)]
enum ErrorKind {
    Internal,
    External
}

#[derive(Debug)]
struct ErrorData {
    kind: ErrorKind,
    source: Option<&'static str>,
    description: String
}

impl From<std::io::Error> for ErrorData {
    fn from(error: std::io::Error) -> Self {
        ErrorData { kind: ErrorKind::Internal, source: Some("io"), description: error.to_string() }
    }
}

impl<T> From<SendError<T>> for ErrorData {
    fn from(error: SendError<T>) -> Self {
        ErrorData { kind: ErrorKind::Internal, source: Some("channel"), description: error.to_string() }
    }
}

impl From<PatternError> for ErrorData {
    fn from(error: PatternError) -> Self {
        ErrorData { kind: ErrorKind::External, source: Some("input"), description: error.to_string() }
    }
}

type Result<T> = std::result::Result<T, ErrorData>;

enum ResultMsg {
    CharRead(char)
}

enum WorkMsg {
    FilePath(std::path::PathBuf)
}

#[derive(StructOpt)]
struct CliArgs {
    root_path: String,
}

fn extract_chars<P: AsRef<Path>>(filepath: P, sender: Sender<ResultMsg>) -> Result<()> {
    // Open the file in read-only mode (ignoring errors).
    let filepath_str = format!("{}", filepath.as_ref().display());
    let contents = std::fs::read_to_string(filepath)
        .map_err(|e| ErrorData { kind: ErrorKind::Internal, source: Some("io"), description: format!("unable to process {}: {}", &filepath_str, e) })?;
    for c in contents.chars() { 
        sender.send(ResultMsg::CharRead(c))?;
    }
    log::debug!("file '{}' processed", &filepath_str);
    Ok(())
}

fn update_stats(counters: &mut [i32], receiver: Receiver<ResultMsg>) { 
    loop {
        match receiver.recv() {
            Ok(ResultMsg::CharRead(c)) => { 
                let code = c as usize; 
                if code > 31 && code < MAX_CHARS-1 {
                    counters[code-32]+=1;
                }
            }
            Err(e) => { 
                log::info!("stopping state updates: {}", e);
                break
            }
        }
    }
}

fn scan_folder(file_pattern: &str, sender: Sender<WorkMsg>) {
    match glob(file_pattern) {
        Ok(d) => {
            for entry in d {
                match entry {
                    Ok(path) => {
                        if let Err(e) = scan_path(&path, &sender){
                            log::error!("unable to process '{}': {}", path.display(), &e.description);
                        } 
                    }
                    Err(e) => {
                        log::error!("unable to process pattern {}: {}", file_pattern, e);
                    }
                }
            }
        }
        Err(e) => {
            log::error!("unable to process pattern {}: {}", file_pattern, e);
        }
    }
}

fn scan_path(path: &Path, sender: &Sender<WorkMsg>) -> Result<()> {
    log::debug!("processing path '{}'...", path.display()); 
    if path.is_dir() {
        match read_dir(path) {
            Ok(d) => {
                for entry in d {
                    match entry {
                        Ok(entry) => {
                            let subpath = entry.path();
                            if let Err(e) = scan_path(&subpath, sender){
                                log::error!("unable to process path '{}': {}", subpath.display(), &e.description);
                            } 
                        }
                        Err(e) => {log::error!("unable to process path '{}': {}", path.display(), e)}
                    }
                }
            }
            Err(e) => {log::error!("unable to process entry in '{}': {}", path.display(), e)}
        }
    } else {
        if let Err(e) = sender.send(WorkMsg::FilePath((*path).to_path_buf())) {
            return Err(e.into());
        }
    }
    Ok(())
}

fn worker(path: &PathBuf, results: Sender<ResultMsg>) {
    if let Err(e) = extract_chars(path, results) {
        log::error!("unable to process path '{}': {:#?}", path.display(), e);
    }
}

fn manage_workers(pool: ThreadPool, requests: Receiver<WorkMsg>, results: Sender<ResultMsg>) {
    loop {
        match requests.recv_timeout(Duration::from_secs(10)) {
            Ok(WorkMsg::FilePath(dir_entry)) => {
                let result_sender = results.clone();
                pool.spawn(move || worker(&dir_entry, result_sender));
            }
            Err(e) => { 
                // handle receive timeout
                log::warn!("stopping workers: {}", e);
                break
            }
        }
    }
}

fn main() { 
    SimpleLogger::new().init().unwrap();
    let mut counters: [i32;256] = [0;MAX_CHARS];
        
    let (work_sender, work_receiver): (Sender<WorkMsg>,Receiver<WorkMsg>) = crossbeam_channel::unbounded();
    let (result_sender, result_receiver): (Sender<ResultMsg>,Receiver<ResultMsg>) = crossbeam_channel::unbounded();
    let pool = rayon::ThreadPoolBuilder::new()
        .num_threads(8)
        .build()
        .expect("cannot create thread pool");

    // retrieve source folder from command line
    let args = CliArgs::from_args();
    log::debug!("starting charcounter with {}...", &args.root_path); 
    // start workers
    let _t_w = rayon::spawn(move || { manage_workers(pool, work_receiver, result_sender)});
    // start scan on concurrent thread
    let _t_s = rayon::spawn(move || { scan_folder(&args.root_path, work_sender)});
    // execute receiver on main thread
    update_stats(&mut counters, result_receiver);
    println!("{:?}", counters);
}

I think I'm not alone here in saying that 7 levels of indentation is too much.

fn scan_path(path: &Path, sender: &Sender<WorkMsg>) -> Result<()> {
    log::debug!("processing path '{}'...", path.display()); 
    if path.is_dir() {
        match read_dir(path) {
            Ok(d) => {
                for entry in d {
                    match entry {
                        Ok(entry) => {
                            let subpath = entry.path();
                            if let Err(e) = scan_path(&subpath, sender){
                                log::error!("unable to process path '{}': {}", subpath.display(), &e.description);

What you should do is look into the ? operator. It'll let you skip every match ... { Ok(d) => { ... }, ...} and turn it into a normal variable assignment, dropping 2 levels of indentation and letting you write much more straightline code.

For example, you can turn this...

if let Err(e) =  sender.send(WorkMsg::FilePath((*path).to_path_buf())) {
    return Err(e.into());
}

... Into this...

sender.send(WorkMsg::FilePath((*path).to_path_buf()))?;

A rule of thumb I use is that an individual function should have at most 3 levels of indentation. That's enough for a for-loop which contains a match (or if-let) statement where one of the branches has a couple statements in it.

Your scan_path() also logs 3 slightly different variants of * "unable to process path"*... It would be much better to refactor your code to only handle errors once. In this case we're using recursion, so just return errors as-is and let the caller handle them.

For example:

fn scan_path(path: &Path, sender: &Sender<WorkMsg>) -> Result<()> {
    log::debug!("processing path '{}'...", path.display()); 

    if !path.is_dir() {
        let msg = WorkMsg::FilePath(path.to_path_buf());
        sender.send(msg)?;
        return Ok(()); 
    }

    for entry in read_dir(path)? {
        let path = entry?.path();

        // All errors when scanning "path" will be caught and logged here before continuing
        if let Err(e) = scan_path(&subpath, sender) {
            log::error!("unable to process \"{}\": {}", subpath.display(), e); 
        }
    }

    Ok(())
}
5 Likes

Thank you for your answer, I have tried the ? Operator but as you can see, I want to log errors but not interrupt the function execution.
As far as I know the ? will stop the execution in case of an error.
On my case, I want to log some errors without interrupting execution depending on the context.

Yes, the ? will return from the function on error. But it will not terminate the program (unless the function is main). So put the thing that can't be completed on error in a function and call that from the loop that should continue in case of error. Then the caller can do the logging of errors.

You can also use the Result::and_then function to "chain" things that should happen on success, and have a single condition do the logging afterwards.

You can still use control flow to linearize the code and decrease the nesting level.

fn scan_path(path: &Path, sender: &Sender<WorkMsg>) -> Result<()> {
    log::debug!("processing path '{}'...", path.display());
    if !path.is_dir() {
        sender.send(WorkMsg::FilePath((*path).to_path_buf()))?;
        return Ok(());
    }
    let d = match read_dir(path) {
        Ok(d) => d,
        Err(e) => {
            log::error!("unable to process entry in '{}': {}", path.display(), e);
            return Ok(());
        }
    };
    for entry in d {
        let entry = match entry {
            Ok(entry) => entry,
            Err(e) => {
                log::error!("unable to process path '{}': {}", path.display(), e);
                continue;
            }
        };
        let subpath = entry.path();
        if let Err(e) = scan_path(&subpath, sender) {
            log::error!(
                "unable to process path '{}': {}",
                subpath.display(),
                &e.description
            );
        }
    }
    Ok(())
}

Same thing, but reads more top-to-bottom and less left-to-right.

You’re right concerning the nested layout of my code. I may be too much influenced by structured programming rules which emphasize single return statement. But your version is definitely easier to read and that’s what a language stands for.
Concerning error handling, I will try to use and_then function to reduce nesting and usage of if let.
Anyway, thank you all for your time.

I don't get where this comes from. I mean, I've heard of it, but the only times I've ever found myself following such a principle is when writing assembly, where a single return point would often be desirable for restoring the caller's environment. I guess this can be generalized to "cleanup," but it's a very particular kind of cleanup that can't be fixed simply by factoring out a function, and it's very specific to assembly!

In rust, the vast majority of cleanup is automated by RAII, and in the rare circumstance where RAII feels like a poor fit, I can just refactor a function into two: One that has many exit points, and one that calls the first one and then does cleanup.

If the multiple steps bother you, another approach is to break up the steps as well.

// Approximate code, I didn't test with your code base

fn scan_path(path: &Path, sender: &Sender<WorkMsg>) -> Result<()> {
    log::debug!("processing path '{}'...", path.display());
    match path.is_dir() {
        true => Ok(scan_directory(path, sender)),
        false => sender.send(WorkMsg::FilePath(path.into())),
    }
}

fn scan_directory(path: &Path, sender: &Sender<WorkMsg>) {
    match read_dir(path) {
        Ok(d) => scan_subpaths(path, d, sender),
        Err(e) => log::error!("unable to process entry in '{}': {}", path.display(), e),
    }
}

fn scan_subpaths(path: &Path, d: ReadDir, sender: &Sender<WorkMsg>) {
    for entry in d {
        if let Err(e) = entry.and_then(|entry| {
            let subpath = entry.path();
            scan_path(&subpath, sender)
        }) {
            log::error!("unable to process path '{}': {}", path.display(), e);
        }
    }
}

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.