Macros and no_std and strtol, oh my!

Despite my best efforts, I have been unable to find a pre-existing Rust function or crate for parsing integers from strings that supports autodetection of the radix from a prefix like "0x." The best candidates is parse_int, but that doesn't properly handle signs with prefixes. So, I wrote my own crate for doing this. It's my first no_std crate, and it heavily depends on macros (largely because, if I had opted for a helper trait like in the standard library's from_str_radix implementation, I think it would have lead to suboptimal documentation of the supported types). I also suspect that the actual parsing code could be improved a bit.

Any and all critiques are welcome.

Code repository: GitHub - jwodder/strtoint: Parse integers from strings, with support for base prefixes

Code:

//! Parse integers from strings, with support for base prefixes
//!
//! `strtoint` provides a function of the same name for parsing integer
//! literals from strings, with support for the base prefixes `0x`, `0o`, and
//! `0b` for hexadecimal, octal, and binary literals, respectively.
//!
//! This crate supports parsing into all primitive integer types built into
//! Rust, along with their "NonZero" equivalents.
//!
//! If the `std` feature (enabled by default) is disabled, this crate will be
//! built in no-std mode.  The only difference is that [`StrToIntError`] only
//! implements the [`std::error::Error`] trait under `std`.
//!
//! ```
//! use core::num::NonZeroUsize;
//! use strtoint::strtoint;
//!
//! assert_eq!(strtoint::<i32>("123").unwrap(), 123);
//! assert_eq!(strtoint::<u32>("0xabcd_FFFF").unwrap(), 2882404351);
//! assert_eq!(strtoint::<i16>("0o644").unwrap(), 420);
//! assert_eq!(strtoint::<i8>("-0b00101010").unwrap(), -42);
//! assert!(strtoint::<i64>("42.0").is_err());
//!
//! assert_eq!(
//!     strtoint::<NonZeroUsize>("123_456").unwrap(),
//!     NonZeroUsize::new(123456).unwrap()
//! );
//! assert!(strtoint::<NonZeroUsize>("0").is_err());
//! ```
#![no_std]
#![cfg_attr(docsrs, feature(doc_cfg))]
use core::fmt;

#[cfg(feature = "std")]
extern crate std;

/// Parse an integer from a string.
///
/// This function follows the same rules as for [Rust's integer literals][1],
/// with support for signs and without support for integer suffixes.
/// Specifically, a valid integer string is an optional sign (`+` or `-`, the
/// latter forbidden for unsigned types), followed by an optional base prefix
/// (`0x`, `0o`, or `0b`, all lowercase), followed by one or more digits
/// optionally interspersed with underscores.  Leading & trailing whitespace is
/// not allowed.
///
/// [1]: https://doc.rust-lang.org/stable/reference/tokens.html#integer-literals
///
/// This function is implemented for all primitive integer types built into
/// Rust, along with their "NonZero" equivalents, and the `Err` type for all of
/// them is [`StrToIntError`].
///
/// # Errors
///
/// This function will return an error under the following conditions:
///
/// - The input string does not contain any digits after the optional sign and
///   base prefix
/// - The input string contains an invalid character, including surrounding or
///   internal whitespace, an invalid digit for the base in question, an
///   invalid base prefix, a sign after a base prefix, or a `-` sign for an
///   unsigned type
/// - The numeric value represented by the string is outside the range of valid
///   values for the numeric type
pub fn strtoint<T: StrToInt>(s: &str) -> Result<T, <T as StrToInt>::Err> {
    T::strtoint(s)
}

/// Trait used to implement the [`strtoint()`] function
///
/// Call [`strtoint()`] instead of using this trait directly.  You only ever
/// need to import this trait if you're implementing support for a custom
/// numeric type in your own crate.
pub trait StrToInt {
    type Err;

    /// Parse a string as the type in question
    fn strtoint(s: &str) -> Result<Self, Self::Err>
    where
        Self: Sized;
}

/// Error type for the [`strtoint()`] function
///
/// This type is used as the error type for [`strtoint()`] and [`StrToInt`] for
/// all types covered by this crate.
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum StrToIntError {
    /// Returned when the input string contained no digits
    NoDigits,
    /// Returned when the input string contained an invalid character; `c` is
    /// the character in question, and `position` is its index in the input
    InvalidCharacter { c: char, position: usize },
    /// Returned when the numeric value of the input string was out of range
    /// for the numeric type
    OutOfRange,
}

impl fmt::Display for StrToIntError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            StrToIntError::NoDigits => write!(f, "no digits in input"),
            StrToIntError::InvalidCharacter { c, position } => {
                write!(f, "invalid character {:?} at position {}", c, position)
            }
            StrToIntError::OutOfRange => write!(f, "value is out of range for numeric type"),
        }
    }
}

#[cfg(feature = "std")]
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
impl std::error::Error for StrToIntError {}

macro_rules! impl_prim {
    ($($t:ty),* $(,)?) => {
      $(
        impl StrToInt for $t {
            type Err = StrToIntError;

            fn strtoint(mut s: &str) -> Result<Self, Self::Err>
            where
                Self: Sized,
            {
                let mut offset = 0;
                let is_positive = {
                    if let Some(t) = s.strip_prefix('+') {
                        offset += 1;
                        s = t;
                        true
                    } else if let Some(t) = s.strip_prefix('-') {
                        if <$t>::MIN == 0 {
                            return Err(StrToIntError::InvalidCharacter {c: '-', position: 0});
                        }
                        offset += 1;
                        s = t;
                        false
                    } else {
                        true
                    }
                };
                let radix = {
                    if let Some(t) = s.strip_prefix("0x") {
                        offset += 2;
                        s = t;
                        16
                    } else if let Some(t) = s.strip_prefix("0o") {
                        offset += 2;
                        s = t;
                        8
                    } else if let Some(t) = s.strip_prefix("0b") {
                        offset += 2;
                        s = t;
                        2
                    } else {
                        10
                    }
                };
                let mut value: $t = 0;
                let mut digit_seen = false;
                for (i, c) in s.char_indices() {
                    if c == '_' {
                        if !digit_seen && radix == 10 {
                            return Err(StrToIntError::InvalidCharacter{c, position: i + offset});
                        }
                        continue;
                    }
                    let digit = c
                        .to_digit(radix)
                        .ok_or_else(|| StrToIntError::InvalidCharacter {
                            c,
                            position: i + offset,
                        })?;
                    value = value
                        .checked_mul(radix as $t)
                        .ok_or(StrToIntError::OutOfRange)?;
                    value = if is_positive {
                        value.checked_add(digit as $t)
                    } else {
                        value.checked_sub(digit as $t)
                    }.ok_or(StrToIntError::OutOfRange)?;
                    digit_seen = true;
                }
                if !digit_seen {
                    return Err(StrToIntError::NoDigits);
                }
                Ok(value)
            }
        }
      )*
    }
}

macro_rules! impl_nonzero {
    ($t:ty, $inner:ty) => {
        impl StrToInt for $t {
            type Err = StrToIntError;

            fn strtoint(s: &str) -> Result<Self, Self::Err>
            where
                Self: Sized,
            {
                let value = <$inner>::strtoint(s)?;
                <$t>::new(value).ok_or(StrToIntError::OutOfRange)
            }
        }
    };
}

impl_prim!(i8, i16, i32, i64, i128, isize, u8, u16, u32, u64, u128, usize);
impl_nonzero!(core::num::NonZeroI8, i8);
impl_nonzero!(core::num::NonZeroI16, i16);
impl_nonzero!(core::num::NonZeroI32, i32);
impl_nonzero!(core::num::NonZeroI64, i64);
impl_nonzero!(core::num::NonZeroI128, i128);
impl_nonzero!(core::num::NonZeroIsize, isize);
impl_nonzero!(core::num::NonZeroU8, u8);
impl_nonzero!(core::num::NonZeroU16, u16);
impl_nonzero!(core::num::NonZeroU32, u32);
impl_nonzero!(core::num::NonZeroU64, u64);
impl_nonzero!(core::num::NonZeroU128, u128);
impl_nonzero!(core::num::NonZeroUsize, usize);
3 Likes

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.