Hey everybody!
As a project to learn Rust, I'm trying to port this library from Python. It is basically a bunch of functions that take Yiddish strings as input, modifies those strings, then outputs them.
This means that there are a lot of repeated string replacements. I have struggled a lot with the borrow checker, but I've found a way of doing these replacements that compiles. Nonetheless I have a feeling I am being inefficient/unidiomatic or just doing something wrong in general... For example, the amount let
's in replace_punctuation()
seems bad, but I can't figure out another way to do it.
I would be very grateful for any critique as advice for working with strings in Rust! Below is my code:
use regex::Regex;
use std::borrow::Borrow;
//////////
//encoding
//////////
const PAIRS: [(&str, &str); 14] = [
("וּ", "וּ"),
("יִ", "יִ"),
("ײַ", "ײַ"),
("וו", "װ"),
("וי", "ױ"),
("יי", "ײ"),
("אַ", "אַ"),
("אָ", "אָ"),
("בֿ", "בֿ"),
("כּ", "כּ"),
("פּ", "פּ"),
("פֿ", "פֿ"),
("שׂ", "שׂ"),
("תּ", "תּ"),
];
fn replace_with_precombined(input: &str) -> String{
let mut result = input.to_string();
for pair in PAIRS{
result = result.replace(pair.0, pair.1);
}
result = result.replace("בּ", "ב"); //diacritic not used in YIVO
result = result.replace("בּ", "ב");
return result;
}
// When vov_yud==True, these will be preserved as precombined chars:
// װ, ײ, ױ
fn replace_with_decomposed(input: &str, vov_yud: bool) -> String{
let mut result = input.to_string();
for pair in PAIRS{
if !vov_yud{
match pair.1 {
"װ" | "ױ" | "ײ" => (),
_ => result = result.replace(pair.1, pair.0),
}
} else {
result = result.replace(pair.1, pair.0);
}
}
result = result.replace("ייַ", "ײַ");
result = result.replace("בּ", "ב");
result = result.replace("בּ", "ב");
return result;
}
fn replace_punctuation(input: &str) -> String{
let result = input;
let re = Regex::new(r"[-]").unwrap();
let result = &re.replace_all(result, "־"); //YIVO-style hyphen
let re = Regex::new(r"[′׳]").unwrap();
let result = &re.replace_all(result, "'");
let re = Regex::new(r"[″״]").unwrap();
let result = &re.replace_all(result, "\"");
return result.to_string();
}
fn strip_diacritics(input: &str) -> String{
let result = replace_with_decomposed(input, false);
let re = Regex::new(r"[ִַַָּּּּֿֿׂ]").unwrap();
let result = &re.replace_all(result.as_str(), "");
return result.to_string();
}
///////////////////////////////////////////
// transliteration/romanization and reverse
///////////////////////////////////////////
///////////////////////////////////////////
// import loshn-koydesh pronunciation list
///////////////////////////////////////////
fn main() {
let input = "′׳-″״";
let stringed = strip_diacritics("װאָס הערט זיך מײַן חבֿר?");
println!("{}", stringed);
}