Development Documentation (main branch) - For stable release docs, see docs.rs/eidetica
Skip to main content

eidetica/entry/
id.rs

1//! Content-addressable identifier type used throughout Eidetica.
2//!
3//! The `ID` type wraps a CID (Content Identifier) from the IPLD/multiformats spec.
4
5use crate::Result;
6use cid::Cid;
7use multihash_codetable::{Code, MultihashDigest};
8use serde::{Deserialize, Serialize};
9
10// Codec values are taken from https://github.com/multiformats/multicodec
11
12/// DAG-CBOR codec identifier (0x71) for CIDs over DAG-CBOR encoded content.
13const DAG_CBOR_CODEC: u64 = 0x71;
14
15/// Raw codec identifier (0x55) for CIDs over opaque/raw bytes.
16const RAW_CODEC: u64 = 0x55;
17
18/// Error types for ID parsing and validation
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub enum IdError {
21    /// Invalid format - not a valid CID string
22    InvalidFormat(String),
23    /// Invalid length for the hash algorithm
24    InvalidLength { expected: usize, got: usize },
25    /// Unknown hash algorithm
26    UnknownAlgorithm(String),
27    /// Invalid encoding
28    InvalidHex(String),
29}
30
31impl std::fmt::Display for IdError {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        match self {
34            IdError::InvalidFormat(s) => write!(f, "Invalid ID format: {s}"),
35            IdError::InvalidLength { expected, got } => {
36                write!(f, "Invalid ID length: expected {expected}, got {got}")
37            }
38            IdError::UnknownAlgorithm(alg) => write!(f, "Unknown hash algorithm: {alg}"),
39            IdError::InvalidHex(s) => write!(f, "Invalid encoding: {s}"),
40        }
41    }
42}
43
44impl std::error::Error for IdError {}
45
46/// A content-addressable identifier for an `Entry` or other database object.
47///
48/// Wraps a CID v1. Hash algorithm is self-describing via the multihash specification;
49/// BLAKE3 is the default for newly-created IDs, but any multihash algorithm present
50/// in an existing CID will be preserved through parsing and serialization.
51///
52/// String format uses multibase base32lower encoding, producing CID strings like
53/// `bafyr4i...` (dag-cbor + blake3).
54#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
55pub struct ID(Option<Cid>);
56
57impl PartialOrd for ID {
58    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
59        Some(self.cmp(other))
60    }
61}
62
63/// Deterministic ordering: empty IDs sort before non-empty, then by CID fields.
64impl Ord for ID {
65    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
66        match (&self.0, &other.0) {
67            (None, None) => std::cmp::Ordering::Equal,
68            (None, Some(_)) => std::cmp::Ordering::Less,
69            (Some(_), None) => std::cmp::Ordering::Greater,
70            (Some(a), Some(b)) => a.cmp(b),
71        }
72    }
73}
74
75impl ID {
76    /// Creates an ID by hashing DAG-CBOR encoded bytes with BLAKE3.
77    ///
78    /// This is the primary way to create an ID from serialized entry content.
79    pub fn from_dagcbor_bytes(data: impl AsRef<[u8]>) -> Self {
80        Self::from_dagcbor_bytes_with(data, Code::Blake3_256)
81    }
82
83    /// Creates an ID by hashing DAG-CBOR encoded bytes with the specified algorithm.
84    pub fn from_dagcbor_bytes_with(data: impl AsRef<[u8]>, code: Code) -> Self {
85        let mh = code.digest(data.as_ref());
86        Self(Some(Cid::new_v1(DAG_CBOR_CODEC, mh)))
87    }
88
89    /// Creates an ID by hashing the given bytes with BLAKE3.
90    ///
91    /// Uses the raw codec (0x55) since the bytes are not DAG-CBOR encoded content.
92    /// Used for opaque blobs and non-entry identifiers.
93    pub fn from_bytes(data: impl AsRef<[u8]>) -> Self {
94        Self::from_bytes_with(data, Code::Blake3_256)
95    }
96
97    /// Creates an ID by hashing the given bytes with the specified algorithm.
98    ///
99    /// Uses the raw codec (0x55) since the bytes are not DAG-CBOR encoded content.
100    pub fn from_bytes_with(data: impl AsRef<[u8]>, code: Code) -> Self {
101        let mh = code.digest(data.as_ref());
102        Self(Some(Cid::new_v1(RAW_CODEC, mh)))
103    }
104
105    /// Parses an ID from its string representation.
106    ///
107    /// Accepts multibase-encoded CID strings (e.g., base32lower `bafyr4i...`).
108    /// An empty string produces the default (empty) ID.
109    pub fn parse(s: &str) -> Result<Self> {
110        if s.is_empty() {
111            return Ok(Self::default());
112        }
113
114        let cid = Cid::try_from(s).map_err(|e| IdError::InvalidFormat(e.to_string()))?;
115        Ok(Self(Some(cid)))
116    }
117
118    /// Returns true if the ID is empty (no CID).
119    pub fn is_empty(&self) -> bool {
120        self.0.is_none()
121    }
122
123    /// Get the underlying CID, if present.
124    pub fn as_cid(&self) -> Option<&Cid> {
125        self.0.as_ref()
126    }
127
128    /// Get the multihash code used for this ID.
129    pub fn hash_code(&self) -> Option<u64> {
130        self.0.as_ref().map(|cid| cid.hash().code())
131    }
132}
133
134impl From<Cid> for ID {
135    fn from(cid: Cid) -> Self {
136        Self(Some(cid))
137    }
138}
139
140impl From<&ID> for ID {
141    fn from(id: &ID) -> Self {
142        id.clone()
143    }
144}
145
146impl std::fmt::Display for ID {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        match &self.0 {
149            Some(cid) => {
150                // CID's default Display uses base32lower for v1 CIDs
151                // We want to use the defaults for consistency
152                write!(f, "{cid}")
153            }
154            None => Ok(()),
155        }
156    }
157}
158
159impl From<ID> for String {
160    fn from(id: ID) -> Self {
161        id.to_string()
162    }
163}
164
165impl From<&ID> for String {
166    fn from(id: &ID) -> Self {
167        id.to_string()
168    }
169}
170
171// Serialize as a CID link (CBOR tag 42) in binary formats, or as a string in
172// human-readable formats (JSON). This allows IDs to be used as map keys in JSON
173// while still being proper IPLD links in DAG-CBOR.
174impl Serialize for ID {
175    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
176    where
177        S: serde::Serializer,
178    {
179        // For human-readable formats, serialize as a string (multibase CID)
180        if serializer.is_human_readable() {
181            self.to_string().serialize(serializer)
182        } else {
183            self.0.serialize(serializer)
184        }
185    }
186}
187
188impl<'de> Deserialize<'de> for ID {
189    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
190    where
191        D: serde::Deserializer<'de>,
192    {
193        if deserializer.is_human_readable() {
194            let s = String::deserialize(deserializer)?;
195            if s.is_empty() {
196                Ok(Self(None))
197            } else {
198                Cid::try_from(s.as_str())
199                    .map(|cid| Self(Some(cid)))
200                    .map_err(serde::de::Error::custom)
201            }
202        } else {
203            Ok(Self(Option::<Cid>::deserialize(deserializer)?))
204        }
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn test_from_bytes_produces_cid() {
214        let data = b"hello world";
215        let id = ID::from_bytes(data);
216
217        assert!(!id.is_empty());
218        let cid = id.as_cid().unwrap();
219        assert_eq!(cid.version(), cid::Version::V1);
220        assert_eq!(cid.codec(), RAW_CODEC);
221        // Default is BLAKE3 (multihash code 0x1e)
222        assert_eq!(cid.hash().code(), 0x1e);
223    }
224
225    #[test]
226    fn test_from_bytes_sha256() {
227        let data = b"hello world";
228        let id = ID::from_bytes_with(data, Code::Sha2_256);
229
230        assert!(!id.is_empty());
231        let cid = id.as_cid().unwrap();
232        assert_eq!(cid.version(), cid::Version::V1);
233        assert_eq!(cid.codec(), RAW_CODEC);
234        // SHA-256 multihash code is 0x12
235        assert_eq!(cid.hash().code(), 0x12);
236    }
237
238    #[test]
239    fn test_parse_roundtrip() {
240        let id = ID::from_bytes(b"test data");
241        let s = id.to_string();
242
243        let parsed = ID::parse(&s).unwrap();
244        assert_eq!(id, parsed);
245    }
246
247    #[test]
248    fn test_parse_empty() {
249        let id = ID::parse("").unwrap();
250        assert!(id.is_empty());
251        assert_eq!(id, ID::default());
252    }
253
254    #[test]
255    fn test_from_bytes_deterministic() {
256        let id1 = ID::from_bytes("test_data_foo");
257        let id2 = ID::from_bytes("test_data_foo");
258        let id3 = ID::from_bytes("test_data_bar");
259
260        // Same data should produce same ID
261        assert_eq!(id1, id2);
262        // Different data should produce different IDs
263        assert_ne!(id1, id3);
264    }
265
266    #[test]
267    fn test_empty_id() {
268        let id = ID::default();
269        assert!(id.is_empty());
270        assert_eq!(id.to_string(), "");
271    }
272
273    #[test]
274    fn test_serialization_json() {
275        let id = ID::from_bytes("test_data_serialization");
276
277        // Should serialize/deserialize via JSON
278        let json = serde_json::to_string(&id).unwrap();
279        let deserialized: ID = serde_json::from_str(&json).unwrap();
280        assert_eq!(id, deserialized);
281    }
282
283    #[test]
284    fn test_serialization_dagcbor() {
285        let id = ID::from_bytes("test_data_cbor");
286
287        // Should serialize/deserialize via DAG-CBOR
288        let bytes = serde_ipld_dagcbor::to_vec(&id).unwrap();
289        let deserialized: ID = serde_ipld_dagcbor::from_slice(&bytes).unwrap();
290        assert_eq!(id, deserialized);
291    }
292
293    #[test]
294    fn test_empty_id_serialization() {
295        let id = ID::default();
296
297        // JSON roundtrip
298        let json = serde_json::to_string(&id).unwrap();
299        let deserialized: ID = serde_json::from_str(&json).unwrap();
300        assert_eq!(id, deserialized);
301        assert!(deserialized.is_empty());
302
303        // DAG-CBOR roundtrip
304        let bytes = serde_ipld_dagcbor::to_vec(&id).unwrap();
305        let deserialized: ID = serde_ipld_dagcbor::from_slice(&bytes).unwrap();
306        assert_eq!(id, deserialized);
307        assert!(deserialized.is_empty());
308    }
309
310    #[test]
311    fn test_ordering() {
312        let id1 = ID::from_bytes("aaa");
313        let id2 = ID::from_bytes("bbb");
314        let empty = ID::default();
315
316        // Empty is less than non-empty
317        assert!(empty < id1);
318        assert!(empty < id2);
319        assert!(id1 != id2);
320    }
321
322    #[test]
323    fn test_parse_invalid() {
324        // Invalid CID string
325        assert!(ID::parse("not-a-valid-cid").is_err());
326    }
327}