Constructing a struct from an array of bytes?

So, I'm writing a driver for NVMe controllers and the identify namespace and controller data structures look like this in rust:

pub struct IdentifyNamespaceResponse {
    // Namespace size
    pub nsez: u128,
    // Namespace capabilities
    pub ncap: u128,
    // Namespace utilization
    pub nuse: u128,
    // Namespace features
    pub nsfeat: u8,
    // No. of LBA formats
    pub nlbaf: u8,
    // Formatted LBA size
    pub flbas: u8,
    // Metadata capabilities
    pub mc: u8,
    // End-to-end Data Protection Capabilities
    pub dpc: u8,
    // End-to-end Data Protection Type Settings
    pub dps: u8,
    // Namespace Multi-path I/O and Namespace Sharing Capabilities
    pub nmic: u8,
    // Reservation Capabilities
    pub rescap: u8,
    // Format Progress Indicator
    pub fpi: u8,
    // Deallocate Logical Block Features
    pub dlfeat: u8,
    // Namespace Atomic Write Unit Normal
    pub nawun: u16,
    // Namespace Atomic Write Unit Power Fail
    pub nawupf: u16,
    // Namespace Atomic Compare & Write Unit
    pub nacwu: u16,
    // Namespace Atomic Boundary Size Normal
    pub nabsn: u16,
    // Namespace Atomic Boundary Offset
    pub nabo: u16,
    // Namespace Atomic Boundary Size Power Fail
    pub nabspf: u16,
    // Namespace Optimal I/O Boundary
    pub noiob: u16,
    // NVM Capacity
    pub nvmcap: u128,
    // Namespace Preferred Write Granularity
    pub npwg: u16,
    // Namespace Preferred Write Alignment
    pub npwa: u16,
    // Namespace Preferred Deallocate Granularity
    pub npdg: u16,
    // Namespace Preferred Deallocate Alignment
    pub npda: u16,
    // Namespace Optimal Write Size
    pub nows: u16,
    _rsvd1: [u8; 18],
    // ANA Group Identifier
    pub anagrpid: u32,
    _rsvd2: [u8; 3], // Should this be a u32 (its bytes 98:96)?
    // Namespace attributes
    pub nsattr: u8,
    // NVM Set Identifier
    pub nvmsetid: u16,
    // Endurance Group Identifier
    pub endgid: u16,
    // Namespace Globally Unique Identifier
    pub nsguid: u128,
    // IEEE Extended Unique Identifier
    pub eui64: u64,
    // LBA Format Support
    pub lbaf: [u32; 16],
    _rsvd3: [u8; 192],
    // Vendor-specific
    pub vs: [u8; 3712], // Should this be 3713? Bytes 4095:384.
}

pub struct IdentifyControllerResponse {
    // PCI Vendor ID
    pub vid: u16,
    // PCI Subsystem Vendor ID
    pub svid: u16,
    // Serial Number
    pub sn: [u8; 20],
    // Model Number
    pub mn: [u8; 40],
    // Firmware Revision
    pub fr: [u8; 8],
    // Recommended Arbitration Burst
    pub rab: u8,
    // IEEE OUI Identifier
    pub ieee: [u8; 3],
    // Controller Multi-Path I/O and Namespace Sharing Capabilities
    pub cmic: u8,
    // Maximum Data Transfer Size
    pub mdts: u8,
    // Controller ID
    pub cntlid: u16,
    // Version
    pub ver: u32,
    // RTD3 Resume Latency
    pub rtd3r: u32,
    // RTD3 Entry Latency
    pub rtd3e: u32,
    // Optional Asynchronous Events Supported
    pub oaes: u32,
    // Controller Attributes
    pub ctratt: u32,
    // Read Recovery Levels Supported 
    pub rrls: u16,
    _rsvd: [u8; 9],
    // Controller Type
    pub cntrltype: u8,
    // FRU Globally Unique Identifier
    pub fguid: u128,
    // Command Retry Delay Times
    pub crdt: [u16; 3],
    _rsvd2: [u8; 119],
    // NVM Subsystem Report
    pub nvmsr: u8,
    // VPD Write Cycle Information
    pub vwci: u8,
    // Management Endpoint Capabilities
    pub mec: u8,
    // Optional Admin Command Support
    pub oacs: u16,
    // Abort Command Limit
    pub acl: u8,
    // Asynchronous Event Request Limit
    pub aerl: u8,
    // Firmware Updates
    pub frmw: u8,
    // Log Page Attributes
    pub lpa: u8,
    // Error Log Page Entries
    pub elpe: u8,
    // Number of Power States Support
    pub npss: u8,
    // Admin Vendor Specific Command Configuration
    pub avscc: u8,
    // Autonomous Power State Transition Attributes
    pub apsta: u8,
    // Warning Composite Temperature Threshold
    pub wctemp: u16,
    // Critical Composite Temperature Threshold
    pub cctemp: u16,
    // Maximum Time for Firmware Activation
    pub mtfa: u16,
    // Host Memory Buffer Preferred Size
    pub hmpre: u32,
    // Host Memory Buffer Minimum Size
    pub hmmin: u32,
    // Total NVM Capacity
    pub tnvmcap: u128,
    // Unallocated NVM Capacity
    pub unvmcap: u128,
    // Replay Protected Memory Block Support
    pub rpmbs: u32,
    // Extended Device Self-test Time
    pub edstt: u16,
    // Device Self-test Options 
    pub dsto: u8,
    // Firmware Update Granularity
    pub fwug: u8,
    // Keep Alive Support
    pub kas: u16,
    // Host Controlled Thermal Management Attributes
    pub hctma: u16,
    // Minimum Thermal Management Temperature
    pub mntmt: u16,
    // Maximum Thermal Management Temperature
    pub mxtmt: u16,
    // Sanitize Capabilities
    pub sanicap: u32,
    // Host Memory Buffer Minimum Descriptor Entry Size
    pub hmminds: u32,
    // Host Memory Maximum Descriptors Entries
    pub hmmaxd: u16,
    // NVM Set Identifier Maximum
    pub nsetidmax: u16,
    // Endurance Group Identifier Maximum
    pub endgidmax: u16,
    // ANA Transition Time
    pub anatt: u8,
    // Asymmetric Namespace Access Capabilities
    pub anacap: u8,
    // ANA Group Identifier Maximum
    pub anagrpmax: u32,
    // Number of ANA Group Identifiers
    pub nanagrpid: u32,
    // Persistent Event Log Size
    pub pels: u32,
    _rsvd3: [u8; 156],
    // Submission Queue Entry Size
    pub sqes: u8,
    // Completion Queue Entry Size
    pub cqes: u8,
    // Maximum Outstanding Commands
    pub maxcmd: u16,
    // Number of Namespaces
    pub nn: u32,
    // Optional NVM Command Support 
    pub oncs: u16,
    // Fused Operation Support
    pub fuses: u16,
    // Format NVM Attributes
    pub fna: u8,
    // Volatile Write Cache
    pub vwc: u8,
    // Atomic Write Unit Normal
    pub awun: u16,
    // Atomic Write Unit Power Fail
    pub awupf: u16,
    // NVM Vendor Specific Command Configuration
    pub nvscc: u8,
    // Namespace Write Protection Capabilities
    pub nwpc: u8,
    // Atomic Compare & Write Unit
    pub acwu: u16,
    _rsvd4: u16,
    // SGL Support
    pub sgls: u32,
    // Maximum Number of Allowed Namespaces
    pub mnan: u32,
    _rsvd5: [u8; 224],
    // NVM Subsystem NVMe Qualified Name
    pub subnqn: [u8; 256],
    _rsvd6: [u8; 768],
    // I/O Queue Command Capsule Supported Size
    pub ioccsz: u32,
    // I/O Queue Response Capsule Supported Size
    pub iorcsz: u32,
    // In Capsule Data Offset
    pub icdoff: u16,
    // Fabrics Controller Attributes
    pub fcatt: u8,
    // Maximum SGL Data Block Descriptors
    pub msdbd: u8,
    // Optional Fabric Commands Support
    pub ofcs: u16,
    _rsvd7: [u8; 242],
    // Power State Descriptors
    pub psd: [[u128; 2]; 31],
    // Vendor Specific
    pub vs: [u8; 1024],
}

These structures can be found in Figures 247 and 249 of the NVM Express specification (pg. 163-172 and 172-194, sections 5.15.2.1 and 5.15.2.2). Note that figure 249 references figure 30 of the NVMe-Over Fabrics specification (pg. 35, section 4.1). However, I have a couple problems:

  1. Default is not implemented for some of the fields in either data structure (those over 32 items). Implementing Default shouldn't be too difficult, though I don't think its necessary for these since I shouldn't ever need to initialize them to fill them manually (I hope).
  2. The FromBytes derive macro from the zerocopy crate is not implemented for most of the fields of either data structure. This is a problem because I don't want to pollute my code by attempting to manually fill these data structures and then returning them to the caller (which could easily fill a few hundred lines right there). I know that in C you can do this -- access raw memory and point a structure at it and then access the members -- but I'd like to know if there's a crate that already does this, other than zerocopy, that does not have zerocopy's limitations? Zerocopy is not trivially extensible (in fact, I don't think you can without editing the source code of the crate itself and hacking something together through that). I'll delve into unsafe code if necessary, but I'd like to try and remain in the realm of "safe" code (even if I am working with hardware) before I go and throw down the gauntlet and utilize unsafe member accesses.

Additionally, I'm not precisely sure if my fields are correct as regards the type, though I've done my best to ensure that they do align perfectly within the specifications requirements. However, I can easily check this via other NVMe implementations in C, so I don't need anyone to verify that here (though go ahead if you like).

Since you’re working to an external specification, you probably want to mark those structs #[repr(C)] so that they have a stable layout. Assuming there are no endianness issues, you should be able to mem::transmute between a properly-aligned &[u8; SIZE] and a reference to your struct.

Looking at the zerocopy docs, it should mostly work except for the uncommon array lengths you’re using. Above 64 items, it (mostly) only defines FromBytes for powers of two. If you break up your reserved blocks so that they’re all powers of two in length, you may be able to get it to work.

A potential alternative is to break up the fields into substructures: In figure 249 of the document you linked, for example, the subheadings all appear to be 256-byte aligned, and the reserved bytes tend to be the last item in the section. If you define a #[repr(C, align(256))] struct for each of these subsections, many of the reserved fields can disappear into the padding bytes. Just make sure to verify the data you’re getting matches this larger alignment; I suspect it does, but haven’t looked for confirmation in the spec.

So, assuming that the data is little-endien, Ishould be able to acquire the bytes from the controller and then do:

transmute::<&[u8], IdentifyControllerResponse>(input_data);

And yes, the struct in the code itself does use #[repr(C)]. Just verifying before I go testing this.

Looks like the first 3 fields are u64 and not u128. I am not sure about endianness, but that needs to be checked before using transmute.

Slice references are a bit problematic with transmute because they’re implemented as fat pointers. I think this should be safe for your structs, but I’d prefer someone more familiar with unsafe code than I am to verify that:

unsafe fn cast_ref<'a, T>(bytes: &'a [u8]) -> &'a T {
    // assert correct endianness somehow
    assert_eq!(bytes.len(), mem::size_of::<T>());
    let ptr: *const u8 = bytes.as_ptr();
    assert_eq!(ptr.align_offset(mem::align_of::<T>()), 0);

    ptr.cast::<T>().as_ref().unwrap()
}

Edit: I tried to harden this a bit and came up with this code, which is probably overkill.

2 Likes

That's equivalent of:

struct slice { char *data; size_t len; };
struct IdentifyControllerResponse {…};

struct slice input_data;
(struct IdentifyControllerResponse)input_data

so you're reinterpreting a pointer as a struct value.

You probably want ptr::read_unaligned(slice.as_ptr().cast()).

The specification says nothing about the endienness of the data (that I can find anyway). I'm also struggling with size constraints; the spec says that the response is exactly 4,096 bytes in length. So I've added static assertions in to ensure that my structs are exactly the right size so that transmutation works properly. However, I'm short a few hundred bits, and not really sure what I've got incorrectly typed. Its on my IdentifyControllerResponse struct, which I currently have defined likethis:

#[repr(C)]
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct IdentifyControllerResponse {
    // PCI Vendor ID
    pub vid: u16,
    // PCI Subsystem Vendor ID
    pub svid: u16,
    // Serial Number
    pub sn: [u8; 20],
    // Model Number
    pub mn: [u8; 40],
    // Firmware Revision
    pub fr: [u8; 8],
    // Recommended Arbitration Burst
    pub rab: u8,
    // IEEE OUI Identifier
    pub ieee: [u8; 3],
    // Controller Multi-Path I/O and Namespace Sharing Capabilities
    pub cmic: u8,
    // Maximum Data Transfer Size
    pub mdts: u8,
    // Controller ID
    pub cntlid: u16,
    // Version
    pub ver: u32,
    // RTD3 Resume Latency
    pub rtd3r: u32,
    // RTD3 Entry Latency
    pub rtd3e: u32,
    // Optional Asynchronous Events Supported
    pub oaes: u32,
    // Controller Attributes
    pub ctratt: u32,
    // Read Recovery Levels Supported
    pub rrls: u16,
    _rsvd1: [u8; 9],
    // Controller Type
    pub cntrltype: u8,
    // FRU Globally Unique Identifier
    pub fguid: u128,
    // Command Retry Delay Times
    pub crdt: [u16; 3],
    _rsvd2: [u8; 119],
    // NVM Subsystem Report
    pub nvmsr: u8,
    // VPD Write Cycle Information
    pub vwci: u8,
    // Management Endpoint Capabilities
    pub mec: u8,
    // Optional Admin Command Support
    pub oacs: u16,
    // Abort Command Limit
    pub acl: u8,
    // Asynchronous Event Request Limit
    pub aerl: u8,
    // Firmware Updates
    pub frmw: u8,
    // Log Page Attributes
    pub lpa: u8,
    // Error Log Page Entries
    pub elpe: u8,
    // Number of Power States Support
    pub npss: u8,
    // Admin Vendor Specific Command Configuration
    pub avscc: u8,
    // Autonomous Power State Transition Attributes
    pub apsta: u8,
    // Warning Composite Temperature Threshold
    pub wctemp: u16,
    // Critical Composite Temperature Threshold
    pub cctemp: u16,
    // Maximum Time for Firmware Activation
    pub mtfa: u16,
    // Host Memory Buffer Preferred Size
    pub hmpre: u32,
    // Host Memory Buffer Minimum Size
    pub hmmin: u32,
    // Total NVM Capacity
    pub tnvmcap: u128,
    // Unallocated NVM Capacity
    pub unvmcap: u128,
    // Replay Protected Memory Block Support
    pub rpmbs: u32,
    // Extended Device Self-test Time
    pub edstt: u16,
    // Device Self-test Options
    pub dsto: u8,
    // Firmware Update Granularity
    pub fwug: u8,
    // Keep Alive Support
    pub kas: u16,
    // Host Controlled Thermal Management Attributes
    pub hctma: u16,
    // Minimum Thermal Management Temperature
    pub mntmt: u16,
    // Maximum Thermal Management Temperature
    pub mxtmt: u16,
    // Sanitize Capabilities
    pub sanicap: u32,
    // Host Memory Buffer Minimum Descriptor Entry Size
    pub hmminds: u32,
    // Host Memory Maximum Descriptors Entries
    pub hmmaxd: u16,
    // NVM Set Identifier Maximum
    pub nsetidmax: u16,
    // Endurance Group Identifier Maximum
    pub endgidmax: u16,
    // ANA Transition Time
    pub anatt: u8,
    // Asymmetric Namespace Access Capabilities
    pub anacap: u8,
    // ANA Group Identifier Maximum
    pub anagrpmax: u32,
    // Number of ANA Group Identifiers
    pub nanagrpid: u32,
    // Persistent Event Log Size
    pub pels: u32,
    _rsvd3: [u8; 156],
    // Submission Queue Entry Size
    pub sqes: u8,
    // Completion Queue Entry Size
    pub cqes: u8,
    // Maximum Outstanding Commands
    pub maxcmd: u16,
    // Number of Namespaces
    pub nn: u32,
    // Optional NVM Command Support
    pub oncs: u16,
    // Fused Operation Support
    pub fuses: u16,
    // Format NVM Attributes
    pub fna: u8,
    // Volatile Write Cache
    pub vwc: u8,
    // Atomic Write Unit Normal
    pub awun: u16,
    // Atomic Write Unit Power Fail
    pub awupf: u16,
    // NVM Vendor Specific Command Configuration
    pub nvscc: u8,
    // Namespace Write Protection Capabilities
    pub nwpc: u8,
    // Atomic Compare & Write Unit
    pub acwu: u16,
    _rsvd4: u16,
    // SGL Support
    pub sgls: u32,
    // Maximum Number of Allowed Namespaces
    pub mnan: u32,
    _rsvd5: [u8; 224],
    // NVM Subsystem NVMe Qualified Name
    pub subnqn: [u8; 256],
    _rsvd6: [u8; 768],
    // I/O Queue Command Capsule Supported Size
    pub ioccsz: u32,
    // I/O Queue Response Capsule Supported Size
    pub iorcsz: u32,
    // In Capsule Data Offset
    pub icdoff: u16,
    // Fabrics Controller Attributes
    pub fcatt: u8,
    // Maximum SGL Data Block Descriptors
    pub msdbd: u8,
    // Optional Fabric Commands Support
    pub ofcs: u16,
    _rsvd7: [u8; 242],
    // Power State Descriptors
    pub psd: [[u128; 2]; 31],
    // Vendor Specific
    pub vs: [u8; 1024],
}
assert_eq_size!(IdentifyControllerResponse, [u8; 4096]);

I know that this is asking a lot but would someone mind helping me verify the types of each field? Static assertions are nice but it'd be neat if they'd tell me what field is incorrectly sized but... hey, that'd mean computers would be able to read the spec (which would actually be cool). But anyway... would someone mind doing that for me? I'm pretty sure that I got the types right, but considering that the Namespace identification struct was mis-typed at the first three fields.... It makes me wonder where else I've mis-typed things.

Look at figure 8. They don’t call it little endian, but that’s the scheme they depict.

It actually says, in 1.8 (thanks for referring me there):

Figure 8 illustrates the relationship between bytes, words and dwords. A qword (quadruple word) is a unit of data that is four times the size of a word; it is not illustrated due to space constraints. Unless otherwise specified, this specification specifies data in a little endian format.

So I guess we've got that down. The figures that are not LE are:

  • Bytes 119:104 of Fig. 247, Namespace Globally Unique Identifier (NGUID)
  • Bytes 127:120 of Fig. 247, IEEE Extended Unique Identifier (EUI64)
  • Bytes 127:112 of Fig. 249, FRU Globally Unique Identifier (FGUID)
  • Serial Number (SN) and Model Number (MN)

(Section 7.10 defines endianness of prominent structure fields, such as SN, MN, EUI64, etc.) I'm not really sure how to deal with this; in a partition parsing library, what I did was I parsed the entire structure as LE, converted the (then incorrect) fields that were supposed to be BE into BE, then was done. Is that even the right way to do that?

If the BE Unique Identifiers and the BE Numbers are parsed as uN integers, or you need to treat them as integers, then you need to switch their endianness before using them as integers. If they are parsed as strings and you only need them as strings then you can leave them unchanged.

1 Like

So that they don’t accidentally get used without conversion, it may make sense to define those fields as [u8; N], and provide accessor methods that pass them through the appropriate from_be_bytes to get an integer type.

Using raw pointers, it’s possible to get the individual field offsets to compare with the corresponding table in the spec. I’d not want to access members this way, but it’s adequate for diagnostics and maybe automated acceptance testing: (Playground)

Edit: In this case, it appears that vs is where the PSD31 information should be— psd should have 32 elements (indexes 0..=31).

macro_rules! print_offsets {
    ( $root:ty ; $($member:ident),* ) => {
        let x: $root = unsafe { std::mem::zeroed() };
        let pstart: *const u8 = (&x) as *const _ as *const u8;
        $(
        let offset:usize = {
            let pmember: *const u8 = (&x.$member) as *const _ as *const u8;
            (pmember as usize) - (pstart as usize)
        };
        println!("{:4}  {}", offset, stringify!($member));
        )*
    }
}

fn main(){
    print_offsets!(IdentifyControllerResponse; vid, cntrltype, nvmsr, sqes, sgls, subnqn, ioccsz, psd, vs);
}

Thanks for all your suggestions -- I solved the problem (I think).

If you transmute [u8; 4096] to IdentifyControllerResponse, you will get a compile error when the size does not match.

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.