Arti-ureq - Library to use Arti (Tor) in the ureq http client

After an interesting learning journey I finally achieved something pretty cool.

I made a library which which allows you to easily use Arti (Tor) in the ureq-library. Ureq is a pretty popular http client focusing on being simple and easy to use. With this new library it is possible to make requests over Tor without adding complex code.

The difficulties I encountered while developing this mainly were:

  • Implementing ureq's Transport trait to intermediate between ureq's buffers and Arti's stream.
  • Supporting both bare HTTP and a secure HTTPS connection over TLS.
  • Keeping it modular (e.g supporting rustls and native-tls).
  • Bridging between ureq's sync I/O context and Arti's async I/O.

I am certain my code is far from perfect but I am eager to receive feedback to improve.

  • I think my library can be simplified a bit, and types like ArtiHttpInner and HttpStreamType can be made obsolete.
  • Instead of unwrapping everything I tried to handle the exceptions better but I am sure it can be done better? Like making a custom error type implementing the tor error-crate.
  • How is my usage of Pin, Box and rt.block_on?
  • It's still WIP and things like testing, error types and exception handling still need to be implemented.
  • Other feedback? Like styling guidelines or readability?

Usage example

// Simple example on how to use the arti-ureq library
// to make a request over Tor using the easy-to-use
// and popular ureq http client.

use arti_ureq;

#[allow(unused_imports)]
use ureq::unversioned::transport::{DefaultConnector, RustlsConnector};
use ureq::unversioned::resolver::DefaultResolver;
use tor_rtcompat::PreferredRuntime;
use arti_client::TorClientConfig;

use regex::Regex;

fn main() {

    // Runtime specfically for Arti.
    let rt = PreferredRuntime::create().expect("Failed to create runtime.");
    // Or e.g RustlsConnector::default(). Connector for ureq.
    let tls = DefaultConnector::default();
    // Default config for TorClient.
    let config = TorClientConfig::default();

    // Create a arti-connector compatiable with ureq from the ureq Connector.
    let arti_connector = arti_ureq::ArtiConnector::new(rt, tls, &config);
    
    // Create a ureq agent with the arti-connector.
    let ureq_agent = ureq::Agent::with_parts(
        ureq::config::Config::default(),
        arti_connector, // Replace with DefaultConnector::default() for non-Tor.
        DefaultResolver::default(),
    );

    // Make request.
    let mut request = ureq_agent
        .get("https://check.torproject.org/")
        .call()
        .expect("Failed to make request.");

    // Get response body.
    let html = request
        .body_mut()
        .read_to_vec()
        .expect("Failed to read body.");

    // Output the Tor status.
    output_tor_status(&String::from_utf8(html).expect("Failed to convert to string."));

}

fn output_tor_status(html: &str) {
    let re_tor_configured = Regex::new(r#"<h1 class=\"(not|off)\">([\s\S]*?)</h1>"#).expect("Invalid regex");
    let re_ip_address = Regex::new(r#"<strong>(\d{1,3}(?:\.\d{1,3}){3})</strong>"#).expect("Invalid regex");
    
    if let (Some(tor_configured_captured), Some(ip_addres_captured)) = (re_tor_configured.captures(&html), re_ip_address.captures(&html)) {
        let tor_configured = tor_configured_captured.get(2).unwrap().as_str().trim();
        let ip_address = ip_addres_captured.get(1).unwrap().as_str().trim();
        let color = if tor_configured.to_lowercase().contains("not") {
            "\x1b[31m"
        } else {
            "\x1b[32m"
        };

        println!("{}{}\x1b[0m ({})", color, tor_configured, ip_address);
    }
}
[package]
name = "arti-ureq-impl-test"
version = "0.1.0"
edition = "2021"

[dependencies]
arti-ureq = { path = "../arti-ureq" }
arti-client = { version = "0.25.0", features = ["rustls"] }
tor-rtcompat= {version =  "0.25.0", features = ["async-std", "rustls"] }
ureq = { version = "=3.0.0-rc4", features = ["charset", "gzip", "rustls"] }
regex = "1.11.1"

Arti-ureq library

use std::pin::Pin;
use std::sync::{Arc, Mutex};

use arti_client::{config::Reconfigure, IntoTorAddr, TorClient, TorClientConfig};
use ureq::unversioned::transport::{
    Buffers, ConnectionDetails, Connector as TlsConn, LazyBuffers, NextTimeout, Transport,
};

use educe::Educe;
use pin_project::pin_project;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tor_proto::stream::{DataReader, DataWriter};
use tor_rtcompat::Runtime;
use ureq::http::{uri::Scheme, Uri};

/// **Main entrypoint**: Create a new 'ArtiConnector' to make HTTP/S requests through Tor using ureq.
///
/// It combines the TorClient from Arti with the Connector trait from ureq.
/// This way the client can use both the NativeTls and Rustls implementation or bare HTTP.
///
/// It also provides the Runtime used for bridging between async I/O and sync I/O.
#[derive(Educe)]
#[educe(Debug)]
pub struct ArtiConnector<R: Runtime, TC: TlsConn> {
    #[educe(Debug(ignore))]
    client: Arc<TorClient<R>>,
    rt: R,
    #[educe(Debug(ignore))]
    tls_conn: TC,
}

/// Wrapper type to implement ureq::unversioned::transport::Transport responsible for
/// providing the buffers to perform the request with ureq and to read/write the buffer
/// data to arti_client::TorClient stream.
#[derive(Educe)]
#[educe(Debug)]
#[pin_project]
struct ArtiHttpConnection<R: Runtime> {
    #[educe(Debug(ignore))]
    #[pin]
    inner: HttpStreamType,      // Connection can be bare HTTP or a secure HTTPS connection through Tor.
    r: Arc<Mutex<DataReader>>,  // Read data from arti_client::TorClient stream to ureq buffer.
    w: Arc<Mutex<DataWriter>>,  // Write data from ureq buffer to arti_client::TorClient stream.
    rt: R,                      // Runtime to bridge between async I/O from arti to sync I/O of ureeq.
}

/// Wrapper type for ureq::unversioned::transport::LazyBuffers to provide the buffer for bare HTTP.
#[derive(Educe)]
#[educe(Debug)]
#[pin_project]
struct ArtiHttpInner {
    #[educe(Debug(ignore))]
    buffer: LazyBuffers, // Default buffer implementation for ureq directly usable in Transport.
}

/// Intermediator between ureq buffer and arti stream. Can be HTTP or HTTPS.
#[derive(Educe)]
#[educe(Debug)]
#[pin_project(project = HttpStreamTypeProj)]
enum HttpStreamType {
    Secure(#[pin] Box<dyn Transport>), // HTTPS/TLS over Tor.
    Unsecure(Pin<Box<ArtiHttpInner>>), // Bare HTTP over Tor.
}

/// Is connection secure TLS?
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
enum UseTls {
    /// No
    Bare,

    /// Yes
    Tls,
}

/// Implement ureq::unversioned::transport::Transport for ArtiHttpConnection.
/// Due to this implementation ArtiConnector has a valid Transport to be used with ureq.
///
/// In this implementation we map the ureq buffer to the arti stream. And map the
/// methods to receive and transmit data between ureq and arti.
///
/// Here we also bridge between the sync context ureq is usually called from and Arti async I/O
/// by blocking the provided runtime. Preferably a runtime only used for Arti should be provided.
impl<R: Runtime> Transport for ArtiHttpConnection<R> {
    // Obtain buffers used by ureal.
    fn buffers(&mut self) -> &mut dyn Buffers {
        match &mut self.inner {
            HttpStreamType::Secure(t) => t.buffers(),
            HttpStreamType::Unsecure(ahi) => &mut ahi.buffer,
        }
    }

    // Write received data from ureq request to arti stream.
    fn transmit_output(&mut self, amount: usize, timeout: NextTimeout) -> Result<(), ureq::Error> {
        let mut writer = match self.w.lock() {
            Ok(w) => w,
            Err(_) => {
                return Err(ureq::Error::Io(std::io::Error::new(
                    std::io::ErrorKind::Other,
                    "Mutex lock failed",
                )));
            }
        };

        // Inner function writing to arti stream, depening on the buffer
        // parameter the data from the HTTP or the HTTPS buffer is written.
        let mut transmit_output_inner = |rt: R,
                                         amount: usize,
                                         _timeout: NextTimeout,
                                         buffer: &mut [u8]|
         -> Result<(), ureq::Error> {
            let data_to_write = &buffer[..amount];
            rt.block_on(async {
                writer
                    .write_all(data_to_write)
                    .await
                    .map_err(ureq::Error::Io)?;
                writer.flush().await.map_err(ureq::Error::Io)?;
                Ok(())
            })
        };

        let inner = std::pin::Pin::new(&mut self.inner);
        let rt_clone = self.rt.clone();
        match inner.project() {
            HttpStreamTypeProj::Secure(mut t) => {
                // Write data from HTTPS buffer to arti stream.
                if let Err(e) = t.transmit_output(amount, timeout) {
                    println!("Error: {:?}", e);
                }
                let buffer = t.buffers().output();
                transmit_output_inner(rt_clone, amount, timeout, buffer)
            }
            HttpStreamTypeProj::Unsecure(ahi) => {
                // Write data from HTTP buffer to arti stream.
                let buffer = ahi.buffer.output();
                transmit_output_inner(rt_clone, amount, timeout, buffer)
            }
        }
    }

    // Read data from arti stream to ureq buffer.
    fn await_input(&mut self, timeout: NextTimeout) -> Result<bool, ureq::Error> {
        let mut reader = match self.r.lock() {
            Ok(r) => r,
            Err(_) => {
                return Err(ureq::Error::Io(std::io::Error::new(
                    std::io::ErrorKind::Other,
                    "Mutex lock failed",
                )));
            }
        };

        // Inner function reading from arti stream, depending on the buffer
        // parameter the data is read to the HTTP or the HTTPS buffer.
        let mut await_input_inner = |rt: R, buffers: &mut [u8]| -> Result<usize, ureq::Error> {
            rt.block_on(async {
                let mut temp_buf = vec![0; buffers.len()];
                let read_result = reader.read(&mut temp_buf).await;
                match read_result {
                    Ok(size) if size > 0 => {
                        buffers[..size].copy_from_slice(&temp_buf[..size]);
                        Ok(size)
                    }
                    Ok(_) => Ok(0),
                    Err(e) => {
                        println!("Error: {:?}", e);
                        Err(ureq::Error::Io(e))
                    }
                }
            })
        };

        let inner = std::pin::Pin::new(&mut self.inner);
        let rt_clone = self.rt.clone();
        match inner.project() {
            HttpStreamTypeProj::Secure(mut t) => {
                // Read data from arti HTTPS stream to HTTPS buffer.
                if let Err(e) = t.await_input(timeout) {
                    println!("Error: {:?}", e);
                }
                let buffer = t.buffers().input_append_buf();
                let size = await_input_inner(rt_clone, buffer).expect("Error reading from stream.");
                t.buffers().input_appended(size);

                Ok(size > 0)
            }
            HttpStreamTypeProj::Unsecure(ahi) => {
                // Read data from arti HTTP stream to HTTP buffer.
                let buffer = ahi.buffer.input_append_buf();
                let size = await_input_inner(rt_clone, buffer).expect("Error reading from stream.");
                ahi.buffer.input_appended(size);

                Ok(size > 0)
            }
        }
    }

    // Check if the connection is open.
    fn is_open(&mut self) -> bool {
        match &mut self.inner {
            HttpStreamType::Secure(t) => t.is_open(),
            HttpStreamType::Unsecure(_) => {
                let _r = match self.r.lock() {
                    Ok(r) => r,
                    Err(_) => {
                        return false;
                    }
                };

                (*_r).ctrl().is_open()
            }
        }
    }
}

/// The connector produces multiple transports and links them in a chain.
/// First a socket connection is made. If TLS is being used, the connection is upgraded to TLS.
impl<R: Runtime, TC: TlsConn> TlsConn for ArtiConnector<R, TC> {
    fn connect(
        &self,
        details: &ConnectionDetails,
        chained: Option<Box<dyn Transport>>,
    ) -> Result<Option<Box<dyn Transport>>, ureq::Error> {
        // Reuse the chained transport if it is already created.
        if chained.is_some() {
            return Ok(chained);
        }

        let (host, port, use_tls) = uri_to_host_port_tls(details.uri).expect("Error parsing uri.");

        // Connect to Tor using Arti, receive the stream of this
        // connection and split it.
        // The data of this stream will be read/written to/from the ureq buffer.
        let split_stream = |host: String, port: u16| async move {
            // Create a safe Tor address from the host and port.
            let addr = (&host as &str, port)
                .into_tor_addr()
                .expect("Error creating Tor address.");

            // Request stream from TorClient and split.
            let stream = self.client.connect(addr).await.map_err(|e| {
                ureq::Error::Io(std::io::Error::new(
                    std::io::ErrorKind::Other,
                    format!("Error creating stream: {:?}", e),
                ))
            })?;
            let (r, w) = stream.split();

            Ok::<_, ureq::Error>((r, w))
        };

        // Create a transport for bare HTTP.
        // If HTTPS is used we use this bare HTTP transport to upgrade it to a TLS connection.
        let insecure_transport = |host: String, port: u16| async move {
            let (r, w) = split_stream(host, port).await?;
            let inner = Box::new(HttpStreamType::Unsecure(Box::pin(ArtiHttpInner {
                buffer: LazyBuffers::new(2048, 2048),
            })));

            let transport = Box::new(ArtiHttpConnection::<R> {
                inner: *inner,
                r: Arc::new(Mutex::new(r)),
                w: Arc::new(Mutex::new(w)),
                rt: self.rt.clone(),
            }) as Box<dyn Transport>;

            Ok::<Box<dyn Transport>, ureq::Error>(transport)
        };

        // Block the runtime to connect to Tor and create the transport.
        self.rt.block_on(async {
            match use_tls {
                // Create a secure HTTPS connection over TLS through Tor.
                UseTls::Tls => {
                    // First obtain a bare HTTP transport.
                    let chained = if chained.is_none() {
                        Some(insecure_transport(host.to_string(), port).await?)
                    } else {
                        chained
                    };

                    // Upgrade the bare HTTP transport to a TLS connection.
                    let conn = self
                        .tls_conn
                        .connect(details, chained)
                        .map_err(|e| {
                            ureq::Error::Io(std::io::Error::new(
                                std::io::ErrorKind::Other,
                                format!("Error creating TLS connection: {:?}", e),
                            ))
                        })?
                        .expect("TLS connection failed.");

                    // Split the arti stream into a read-write pair and create the transport.
                    let (r, w) = split_stream(host.to_string(), port).await?;
                    let transport = ArtiHttpConnection::<R> {
                        inner: *Box::new(HttpStreamType::Secure(conn)),
                        r: Arc::new(Mutex::new(r)),
                        w: Arc::new(Mutex::new(w)),
                        rt: self.rt.clone(),
                    };

                    Ok::<_, ureq::Error>(Some(Box::new(transport) as Box<dyn Transport>))
                }

                // Create a bare HTTP connection through Tor.
                UseTls::Bare => insecure_transport(host.to_string(), port).await.map(Some),
            }
        })
    }
}

impl<R: Runtime, TC: TlsConn> ArtiConnector<R, TC> {
    /// Create a new ArtiConnector with a TorClient and a TlsConn.
    ///
    /// Example:
    /// let rt = tor_rtcompat::PreferredRuntime::create().expect("Failed to create runtime.");
    /// let tls = ureq::unversioned::transport::DefaultConnector::default();
    /// let config = arti_client::TorClientConfig::default();
    ///
    /// let arti_connector = arti_ureq::ArtiConnector::new(rt, tls, &config);
    /// let agent = ureq::Agent::with_parts(
    ///     ureq::config::Config::default(),
    ///     arti_connector,
    ///     ureq::unversioned::resolver::DefaultResolver::default(),
    /// };
    ///
    /// agent.get("https://check.torporject.org").call()?;
    pub fn new(rt: R, tls_conn: TC, config: &TorClientConfig) -> Self {
        let client = TorClient::with_runtime(rt.clone())
            .create_unbootstrapped()
            .expect("Error creating Tor Client.");
        client
            .reconfigure(config, Reconfigure::AllOrNothing)
            .expect("Error applying config to Tor Client.");

        Self {
            client: Arc::new(client),
            rt: rt.clone(),
            tls_conn,
        }
    }
}

/// Parse the URI to obtain the host, port and determine if TLS is used.
fn uri_to_host_port_tls(uri: &Uri) -> Result<(String, u16, UseTls), String> {
    let use_tls = {
        let scheme = uri.scheme();
        if scheme == Some(&Scheme::HTTP) {
            UseTls::Bare
        } else if scheme == Some(&Scheme::HTTPS) {
            UseTls::Tls
        } else {
            return Err("Unsupported scheme".to_owned());
        }
    };

    let host = match uri.host() {
        Some(h) => h,
        _ => return Err("Missing hostname".to_owned()),
    };

    let port = uri.port().map(|x| x.as_u16()).unwrap_or(match use_tls {
        UseTls::Tls => 443,
        UseTls::Bare => 80,
    });

    Ok((host.to_owned(), port, use_tls))
}
[package]
name = "arti-ureq"
version = "0.1.0"
edition = "2021"

[dependencies]
arti-client = { version = "0.25.0" }
tor-proto = { version = "0.25.0", features = [ "stream-ctrl", "tokio" ] }
tor-rtcompat= {version =  "0.25.0", features = [ "async-std" ] }
ureq = { version = "=3.0.0-rc4", features = [ "charset", "gzip" ] }
tokio = { version = "1.42.0", features = [ "full" ] }
educe = "=0.4.23"
pin-project = "1.1.8"
1 Like