How to To improve Rust deserialization performance in my code

Hi, I am a beginner in Rust and bioinformatics. I know that users can use #[derive(Serialize)] to improve serialization. I would like to learn how to enhance Rust deserialization performance in my code.

use serde_json::Deserializer;
use std::fs::File;
use std::io::{self, Write};

pub fn run(mut output: File) {
    // from stdin
    let stdin = io::stdin();
    let handle = stdin.lock();
    let reader = io::BufReader::new(handle);

    let stream =
        Deserializer::from_reader(reader).into_iter::<serde_json::Value>();
    for value in stream {
        match value {
            Ok(json) => {
                if let Some(path) = json.get("path").and_then(|p| p.as_object())
                {
                    if let Some(mapping) =
                        path.get("mapping").and_then(|m| m.as_array())
                    {
                        for map in mapping {
                            if let Some(edit) =
                                map.get("edit").and_then(|f| f.as_array())
                            {
                                // println!("{:?}", edit);
                                if edit.iter().any(|i| {
                                    i.get("sequence").is_some()
                                        || i.as_object().map_or(false, |obj| {
                                            obj.len() % 2 != 0
                                        })
                                }) {
                                    continue;
                                }
                            }
                            // println!("{:?}", map);
                            if let Some(position) =
                                map.get("position").and_then(|f| f.as_object())
                            {
                                if position.get("offset").is_some() {
                                    continue;
                                }
                                // println!("{:?}", position);
                                if let Some(node_id) = position
                                    .get("node_id")
                                    .and_then(|f| f.as_str())
                                {
                                    writeln!(output, "{}", node_id)
                                        .expect("write failed");
                                }
                            }
                        }
                    }
                }
            }
            Err(err) => {
                eprintln!("Failed to parse JSON: {}", err);
                return;
            }
        }
    }

    // Iterate each information in the JSON and checks
}

if you have predefined data type schema, you should define your own type in stead of using the generic serde_json::Value. for example, I'm assuming your data looks something like:

{
  "path": {
    "mapping": [
      {
        "edit": [
          {
            "sequence": "acctg"
          }
        ],
        "position": {
          "offset": "12345"
        }
    ]
  }
}

you can define your data types like this and use the serde crate to derive Deserialize:

use serde::Deserialize;z
#[derive(Deserialize)]
struct MyStruct {
    path: Path,
}
#[derive(Deserialize)]
struct Path {
    mappping: Vec<Mapping>,
}
#[derive(Deserialize)]
struct Mapping {
    edit: Vec<Edit>,
    position: Position,
}
#[derive(Deserialize)]
struct Edit {
    sequence: String,
}
#[derive(Deserialize)]
struct Position {
    offset: usize,
}

then you can use the custom type like this:

let stream = Deserializer::from_reader(reader).into_iter::<MyStruct>();
for value in stream {
    // value is of the type `MyStruct`
    println!("{}", value.path.mapping[0].edit[0].sequence);
}
2 Likes

Thank you, I will try

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.