gh0st42
November 18, 2021, 10:46am
1
Is there an easy way to have one struct that encodes Vec<u8>
or &[u8]
differently depending on the used (de)serializer, e.g., serde_json
or serde_cbor
?
The following example unfortunately doesn't work in rust playground (missing serde_cbor
and serde_bytes
crates):
use serde::{Serialize, Deserialize};
#[derive(Serialize, Deserialize)]
pub struct PacketJson {
id: u16,
#[serde(with="base64")]
data: Vec<u8>,
}
mod base64 {
use serde::{Serialize, Deserialize};
use serde::{Deserializer, Serializer};
pub fn serialize<S: Serializer>(v: &Vec<u8>, s: S) -> Result<S::Ok, S::Error> {
let base64 = base64::encode(v);
String::serialize(&base64, s)
}
pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<u8>, D::Error> {
let base64 = String::deserialize(d)?;
base64::decode(base64.as_bytes())
.map_err(|e| serde::de::Error::custom(e))
}
}
#[derive(Serialize, Deserialize)]
pub struct PacketCbor {
id: u16,
#[serde(with="serde_bytes")]
data: Vec<u8>,
}
fn main() {
println!("Hello, world!");
let json_packet = PacketJson { id: 23, data : vec![1,2,3,4] };
println!("{}", serde_json::to_string(&json_packet).unwrap());
let cbor_packet = PacketCbor { id: 23, data : vec![1,2,3,4] };
println!("{:?}", serde_cbor::to_vec(&cbor_packet).unwrap());
}
Here, both structs, PacketJson
and PacketCbor
, are basically the same but have different serde-with encodings.
Maintaining different structs is error prone and there is probably a more idiomatic way to achieve this.
You could make the case distinction based in the is_human_readable
information that the Serializer and Deserializer provide. Use base a base 64 string for human readable and ordinary bytes sequence encoding for non-human-readable.
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct Packet {
id: u16,
#[serde(with = "base64_or_bytes")]
data: Vec<u8>,
}
mod base64_or_bytes {
use serde::{Deserialize, Serialize};
use serde::{Deserializer, Serializer};
#[allow(clippy::ptr_arg)]
pub fn serialize<S: Serializer>(v: &Vec<u8>, s: S) -> Result<S::Ok, S::Error> {
if s.is_human_readable() {
let base64 = base64::encode(v);
String::serialize(&base64, s)
} else {
serde_bytes::serialize(v, s)
}
}
pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<u8>, D::Error> {
if d.is_human_readable() {
let base64 = String::deserialize(d)?;
base64::decode(base64.as_bytes()).map_err(serde::de::Error::custom)
} else {
serde_bytes::deserialize(d)
}
}
}
fn main() {
println!("Hello, world!");
let json_packet = Packet {
id: 23,
data: vec![1, 2, 3, 4],
};
println!("{}", serde_json::to_string(&json_packet).unwrap());
let cbor_packet = Packet {
id: 23,
data: vec![1, 2, 3, 4],
};
println!("{:?}", serde_cbor::to_vec(&cbor_packet).unwrap());
}
2 Likes
You can also avoid allocating an intermediate buffer:
#[allow(clippy::ptr_arg)]
pub fn serialize<S: Serializer>(v: &Vec<u8>, s: S) -> Result<S::Ok, S::Error> {
if s.is_human_readable() {
s.collect_str(&Base64Display::with_config(v, base64::STANDARD))
} else {
serde_bytes::serialize(v, s)
}
}
pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<u8>, D::Error> {
if d.is_human_readable() {
struct Visitor;
impl<'de> serde::de::Visitor<'de> for Visitor {
type Value = Vec<u8>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a string")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_bytes(v.as_ref())
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_bytes(v.as_ref())
}
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
base64::decode(v).map_err(serde::de::Error::custom)
}
fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_bytes(v.as_ref())
}
}
d.deserialize_str(Visitor)
} else {
serde_bytes::deserialize(d)
}
}
3 Likes
Thank you for the proposed solutions, is_human_readable
seems like a viable solution at least for my use-case.
serde_bytes
does work with Vec<u8>
as well as with &'a [u8]
.
I'm struggling a bit to make either variant work with deserializing into structs containing &'a [u8]
.
Let alone having it work with both at the same time.
No matter what I do I'm always trying to return a reference to data owned by the current function.
I guess the problem is the call to base64::decode
which allocates a new Vec<u8>
.
But switching to base64::decode_config_slice
also does not seem to be what I'm looking for.
Just doing the serde_bytes
deserialization works with references just fine.
system
Closed
March 3, 2022, 4:59pm
6
This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.