I trying to make a simple url parser module. but have a trouble with casting vec to str,please help.
use std::mem;
// '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
const HOST_NOT_ALLOWS: [u8; 17] = [33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 58, 59, 60, 61, 62, 91, 93];
// '$', '&', '+', ',', '/', ':', ';', '=', '?', '@':
const RESERVED_CHARACTERS: [u8; 10] = [36, 38, 43, 44, 47, 58, 59, 61, 63, 64];
// '-', '_', '.', '~'
const UNRESERVED_CHARACTERS: [u8; 4] = [45, 46, 95, 126];
const NOT_ALLOWS_PASSWORD: [u8; 4] = [47, 58, 63, 64];
#[derive(PartialEq)]
enum EncodeMode {
Path,
Host,
Zone,
UserPassword,
QueryComponent,
Fragment,
}
fn fast_str(v: &[u8]) -> &'static str {
let x: &str = unsafe { mem::transmute(v) };
x
}
fn should_escape(c: u8, mode: &EncodeMode) -> bool {
if is_alpha_numeric(c) {
return false;
}
match mode {
&EncodeMode::Host | &EncodeMode::Zone => {
if HOST_NOT_ALLOWS.contains(&c) {
return true;
} else {
return false;
}
}
_ => {
if UNRESERVED_CHARACTERS.contains(&c) {
return false;
}
if RESERVED_CHARACTERS.contains(&c) {
let x = match mode {
// 63='?'
&EncodeMode::Path => c == 63,
&EncodeMode::UserPassword => NOT_ALLOWS_PASSWORD.contains(&c),
&EncodeMode::QueryComponent => true,
_ => false,
};
return x;
}
true
}
}
}
fn escape(url: &'static str, mode: &EncodeMode) -> &'static str {
let mut space_count = 0usize;
let mut hex_count = 0usize;
let url_bytes = url.as_bytes();
for b in url_bytes {
if should_escape(*b, mode) {
if *b == 32 && mode == &EncodeMode::QueryComponent {
space_count += 1;
} else {
hex_count += 1;
}
}
}
if space_count == 0 && hex_count == 0 {
return url;
}
let l = url_bytes.len() + 2 * hex_count;
let mut t: Vec<u8> = vec![0;l as usize];
let mut j = 0;
let s = "0123456789ABCDEF".as_bytes();
for v in url_bytes {
if *v == 32 && mode == &EncodeMode::QueryComponent {
t[j] = 43;
j += 1;
}else if should_escape(*v, mode) {
t[j] = 37;
t[j + 1] = s[(v >> 4) as usize];
t[j + 2] = s[(v & 15) as usize];
j += 3;
} else {
t[j] = *v;
j += 1;
}
}
let result: &str = fast_str(&t);
result
}
fn is_alpha_numeric(cc: u8) -> bool {
match cc {
97...122 | 65...90 | 48...57 => true,
_ => false,
}
}
fn main() {
println!("{:?}",
escape("/test?id=32&value=我们", &EncodeMode::Path));
}
Output:
"\u{1}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}"
but, if add println!("{:?}",t);
,like this:
use std::mem;
// '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
const HOST_NOT_ALLOWS: [u8; 17] = [33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 58, 59, 60, 61, 62, 91, 93];
// '$', '&', '+', ',', '/', ':', ';', '=', '?', '@':
const RESERVED_CHARACTERS: [u8; 10] = [36, 38, 43, 44, 47, 58, 59, 61, 63, 64];
// '-', '_', '.', '~'
const UNRESERVED_CHARACTERS: [u8; 4] = [45, 46, 95, 126];
const NOT_ALLOWS_PASSWORD: [u8; 4] = [47, 58, 63, 64];
#[derive(PartialEq)]
enum EncodeMode {
Path,
Host,
Zone,
UserPassword,
QueryComponent,
Fragment,
}
fn fast_str(v: &[u8]) -> &'static str {
let x: &str = unsafe { mem::transmute(v) };
x
}
fn should_escape(c: u8, mode: &EncodeMode) -> bool {
if is_alpha_numeric(c) {
return false;
}
match mode {
&EncodeMode::Host | &EncodeMode::Zone => {
if HOST_NOT_ALLOWS.contains(&c) {
return true;
} else {
return false;
}
}
_ => {
if UNRESERVED_CHARACTERS.contains(&c) {
return false;
}
if RESERVED_CHARACTERS.contains(&c) {
let x = match mode {
// 63='?'
&EncodeMode::Path => c == 63,
&EncodeMode::UserPassword => NOT_ALLOWS_PASSWORD.contains(&c),
&EncodeMode::QueryComponent => true,
_ => false,
};
return x;
}
true
}
}
}
fn escape(url: &'static str, mode: &EncodeMode) -> &'static str {
let mut space_count = 0usize;
let mut hex_count = 0usize;
let url_bytes = url.as_bytes();
for b in url_bytes {
if should_escape(*b, mode) {
if *b == 32 && mode == &EncodeMode::QueryComponent {
space_count += 1;
} else {
hex_count += 1;
}
}
}
if space_count == 0 && hex_count == 0 {
return url;
}
let l = url_bytes.len() + 2 * hex_count;
let mut t: Vec<u8> = vec![0;l as usize];
let mut j = 0;
let s = "0123456789ABCDEF".as_bytes();
for v in url_bytes {
if *v == 32 && mode == &EncodeMode::QueryComponent {
t[j] = 43;
j += 1;
}else if should_escape(*v, mode) {
t[j] = 37;
t[j + 1] = s[(v >> 4) as usize];
t[j + 2] = s[(v & 15) as usize];
j += 3;
} else {
t[j] = *v;
j += 1;
}
}
println!("{:?}",t);
let result: &str = fast_str(&t);
result
}
fn is_alpha_numeric(cc: u8) -> bool {
match cc {
97...122 | 65...90 | 48...57 => true,
_ => false,
}
}
fn main() {
println!("{:?}",
escape("/test?id=32&value=我们", &EncodeMode::Path));
}
Output:
[47, 116, 101, 115, 116, 37, 51, 70, 105, 100, 61, 51, 50, 38, 118, 97, 108, 117, 101, 61, 37, 69, 54, 37, 56, 56, 37, 57, 49, 37, 69, 52, 37, 66, 66, 37, 65, 67]
"/test%3Fid=32&value=%E6%88%91%E4%BB%AC"
I just get what i wanted. why would this be?