Development Documentation (main branch) - For stable release docs, see docs.rs/eidetica

eidetica/entry/
id.rs

1//! Content-addressable identifier type used throughout Eidetica.
2//!
3//! The `ID` type represents a content-addressable hash that supports multiple algorithms
4//! including SHA-256, Blake3, and future hash types.
5
6use serde::{Deserialize, Serialize};
7use sha2::{Digest, Sha256};
8
9/// Hash algorithm identifier
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
11pub enum HashAlgorithm {
12    /// SHA-256 (default for backward compatibility)
13    Sha256,
14    /// Blake3 (faster alternative)
15    Blake3,
16}
17
18impl HashAlgorithm {
19    /// Get the string prefix for this algorithm
20    pub fn prefix(&self) -> &'static str {
21        match self {
22            HashAlgorithm::Sha256 => "sha256",
23            HashAlgorithm::Blake3 => "blake3",
24        }
25    }
26
27    /// Get expected hash length in bytes
28    pub fn hash_len(&self) -> usize {
29        match self {
30            HashAlgorithm::Sha256 => 32,
31            HashAlgorithm::Blake3 => 32,
32        }
33    }
34
35    /// Get expected hex string length
36    pub fn hex_len(&self) -> usize {
37        self.hash_len() * 2
38    }
39}
40
41/// Error types for ID parsing and validation
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum IdError {
44    /// Invalid format - not a valid hex string or prefixed format
45    InvalidFormat(String),
46    /// Invalid length for the hash algorithm
47    InvalidLength { expected: usize, got: usize },
48    /// Unknown hash algorithm prefix
49    UnknownAlgorithm(String),
50    /// Invalid hex characters
51    InvalidHex(String),
52}
53
54impl std::fmt::Display for IdError {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        match self {
57            IdError::InvalidFormat(s) => write!(f, "Invalid ID format: {s}"),
58            IdError::InvalidLength { expected, got } => {
59                write!(f, "Invalid ID length: expected {expected}, got {got}")
60            }
61            IdError::UnknownAlgorithm(alg) => write!(f, "Unknown hash algorithm: {alg}"),
62            IdError::InvalidHex(s) => write!(f, "Invalid hex characters: {s}"),
63        }
64    }
65}
66
67impl std::error::Error for IdError {}
68
69/// A content-addressable identifier for an `Entry` or other database object.
70///
71/// Supports multiple hash algorithms including SHA-256 and Blake3. IDs can be created
72/// from raw data using various hash algorithms, or parsed from string representations.
73///
74/// String format:
75/// - Current: `sha256:deadbeef123...` or `blake3:abcdef456...` (algorithm prefix required)
76/// - Legacy: `deadbeef123...` (64 hex chars, assumed SHA-256, parsing only)
77#[derive(Debug, Clone, PartialEq, Eq, Hash)]
78pub struct ID {
79    /// String representation for compatibility and serialization
80    repr: String,
81    /// Cached algorithm for efficiency
82    algorithm: HashAlgorithm,
83}
84
85/// Lexicographic ordering by string representation only.
86impl PartialOrd for ID {
87    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
88        Some(self.cmp(other))
89    }
90}
91
92/// Lexicographic ordering by string representation only.
93///
94/// The `algorithm` field is intentionally excluded since it's derived from `repr`.
95impl Ord for ID {
96    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
97        self.repr.cmp(&other.repr)
98    }
99}
100
101impl Default for ID {
102    fn default() -> Self {
103        Self {
104            repr: String::new(),
105            algorithm: HashAlgorithm::Sha256,
106        }
107    }
108}
109
110impl ID {
111    /// Creates a new ID from any string-like input without validation.
112    ///
113    /// For validated creation, use `parse()` or `try_from()`.
114    pub fn new(s: impl Into<String>) -> Self {
115        let repr = s.into();
116        let algorithm = Self::detect_algorithm(&repr);
117        Self { repr, algorithm }
118    }
119
120    /// Creates an ID by hashing the given bytes with SHA-256.
121    pub fn from_bytes(data: impl AsRef<[u8]>) -> Self {
122        Self::from_bytes_with(data, HashAlgorithm::Sha256)
123    }
124
125    /// Creates an ID by hashing the given bytes with the specified algorithm.
126    pub fn from_bytes_with(data: impl AsRef<[u8]>, algorithm: HashAlgorithm) -> Self {
127        let data = data.as_ref();
128        let hash_bytes = match algorithm {
129            HashAlgorithm::Sha256 => {
130                let mut hasher = Sha256::new();
131                hasher.update(data);
132                hasher.finalize().to_vec()
133            }
134            HashAlgorithm::Blake3 => blake3::hash(data).as_bytes().to_vec(),
135        };
136
137        let hex = hex::encode(&hash_bytes);
138        let repr = format!("{}:{}", algorithm.prefix(), hex);
139
140        Self { repr, algorithm }
141    }
142
143    /// Parses an ID from a string, validating the format.
144    ///
145    /// Requires algorithm prefix format: `algorithm:hexhash`
146    pub fn parse(s: &str) -> Result<Self, IdError> {
147        if s.is_empty() {
148            return Ok(Self::default());
149        }
150
151        // Require prefixed format
152        let Some(colon_pos) = s.find(':') else {
153            return Err(IdError::InvalidFormat(
154                "ID must have algorithm prefix (e.g., 'sha256:' or 'blake3:')".to_string(),
155            ));
156        };
157
158        let (prefix, hex_part) = s.split_at(colon_pos);
159        let hex_part = &hex_part[1..]; // Skip the ':'
160
161        let algorithm = match prefix {
162            "sha256" => HashAlgorithm::Sha256,
163            "blake3" => HashAlgorithm::Blake3,
164            _ => return Err(IdError::UnknownAlgorithm(prefix.to_string())),
165        };
166
167        Self::validate_hex_format(hex_part, algorithm)?;
168
169        Ok(Self {
170            repr: s.to_string(),
171            algorithm,
172        })
173    }
174
175    /// Validates that a hex string matches the expected format for an algorithm.
176    fn validate_hex_format(hex: &str, algorithm: HashAlgorithm) -> Result<(), IdError> {
177        let expected_len = algorithm.hex_len();
178
179        if hex.len() != expected_len {
180            return Err(IdError::InvalidLength {
181                expected: expected_len,
182                got: hex.len(),
183            });
184        }
185
186        // Check that all characters are valid lowercase hex
187        if !hex
188            .chars()
189            .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase())
190        {
191            return Err(IdError::InvalidHex(hex.to_string()));
192        }
193
194        Ok(())
195    }
196
197    /// Detects the algorithm from a string representation (for parsing existing IDs).
198    fn detect_algorithm(s: &str) -> HashAlgorithm {
199        if let Some(colon_pos) = s.find(':') {
200            let prefix = &s[..colon_pos];
201            match prefix {
202                "blake3" => HashAlgorithm::Blake3,
203                _ => HashAlgorithm::Sha256, // Default fallback
204            }
205        } else {
206            HashAlgorithm::Sha256 // Default for empty/malformed IDs
207        }
208    }
209
210    /// Returns the ID as a string slice.
211    pub fn as_str(&self) -> &str {
212        &self.repr
213    }
214
215    /// Returns true if the ID is empty.
216    pub fn is_empty(&self) -> bool {
217        self.repr.is_empty()
218    }
219
220    /// Gets the hash algorithm used for this ID.
221    pub fn algorithm(&self) -> HashAlgorithm {
222        self.algorithm
223    }
224
225    /// Gets the raw hex string without the algorithm prefix.
226    pub fn hex(&self) -> &str {
227        if let Some(colon_pos) = self.repr.find(':') {
228            &self.repr[colon_pos + 1..]
229        } else {
230            &self.repr
231        }
232    }
233
234    /// Gets the hash bytes if the hex is valid.
235    pub fn as_bytes(&self) -> Result<Vec<u8>, hex::FromHexError> {
236        hex::decode(self.hex())
237    }
238}
239
240// Backward compatibility trait implementations
241impl From<String> for ID {
242    fn from(s: String) -> Self {
243        Self::new(s)
244    }
245}
246
247impl From<&str> for ID {
248    fn from(s: &str) -> Self {
249        Self::new(s)
250    }
251}
252
253impl From<&ID> for ID {
254    fn from(id: &ID) -> Self {
255        id.clone()
256    }
257}
258
259impl AsRef<str> for ID {
260    fn as_ref(&self) -> &str {
261        &self.repr
262    }
263}
264
265impl std::fmt::Display for ID {
266    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
267        write!(f, "{}", &self.repr)
268    }
269}
270
271impl std::ops::Deref for ID {
272    type Target = str;
273
274    fn deref(&self) -> &Self::Target {
275        &self.repr
276    }
277}
278
279impl PartialEq<str> for ID {
280    fn eq(&self, other: &str) -> bool {
281        self.repr == other
282    }
283}
284
285impl PartialEq<&str> for ID {
286    fn eq(&self, other: &&str) -> bool {
287        self.repr == *other
288    }
289}
290
291impl PartialEq<String> for ID {
292    fn eq(&self, other: &String) -> bool {
293        &self.repr == other
294    }
295}
296
297impl PartialEq<ID> for str {
298    fn eq(&self, other: &ID) -> bool {
299        self == other.repr
300    }
301}
302
303impl PartialEq<ID> for &str {
304    fn eq(&self, other: &ID) -> bool {
305        *self == other.repr
306    }
307}
308
309impl PartialEq<ID> for String {
310    fn eq(&self, other: &ID) -> bool {
311        self == &other.repr
312    }
313}
314
315impl From<ID> for String {
316    fn from(id: ID) -> Self {
317        id.repr
318    }
319}
320
321impl PartialEq<&ID> for ID {
322    fn eq(&self, other: &&ID) -> bool {
323        self == *other
324    }
325}
326
327impl From<&ID> for String {
328    fn from(id: &ID) -> Self {
329        id.repr.clone()
330    }
331}
332
333// Note: TryFrom implementations are not provided to avoid conflicts with blanket implementations.
334// Use ID::parse() directly for validated parsing.
335
336// Serialize/Deserialize implementations - serialize as string for compatibility
337impl Serialize for ID {
338    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
339    where
340        S: serde::Serializer,
341    {
342        self.repr.serialize(serializer)
343    }
344}
345
346impl<'de> Deserialize<'de> for ID {
347    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
348    where
349        D: serde::Deserializer<'de>,
350    {
351        let s = String::deserialize(deserializer)?;
352        Ok(Self::new(s))
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359
360    #[test]
361    fn test_sha256_prefixed_format() {
362        let data = b"hello world";
363        let id = ID::from_bytes(data);
364
365        // Should have sha256: prefix
366        assert!(id.as_str().starts_with("sha256:"));
367        assert_eq!(id.algorithm(), HashAlgorithm::Sha256);
368        assert_eq!(id.as_str().len(), 71); // "sha256:" (7) + hex (64) = 71
369    }
370
371    #[test]
372    fn test_blake3_prefixed_format() {
373        let data = b"hello world";
374        let id = ID::from_bytes_with(data, HashAlgorithm::Blake3);
375
376        // Should have blake3: prefix
377        assert!(id.as_str().starts_with("blake3:"));
378        assert_eq!(id.algorithm(), HashAlgorithm::Blake3);
379    }
380
381    #[test]
382    fn test_parse_sha256_prefixed() {
383        let hex = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
384        let prefixed = format!("sha256:{hex}");
385        let id = ID::parse(&prefixed).unwrap();
386
387        assert_eq!(id.algorithm(), HashAlgorithm::Sha256);
388        assert_eq!(id.hex(), hex);
389        assert_eq!(id.as_str(), prefixed);
390    }
391
392    #[test]
393    fn test_parse_prefixed_blake3() {
394        let hex = "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262";
395        let prefixed = format!("blake3:{hex}");
396        let id = ID::parse(&prefixed).unwrap();
397
398        assert_eq!(id.algorithm(), HashAlgorithm::Blake3);
399        assert_eq!(id.hex(), hex);
400        assert_eq!(id.as_str(), prefixed);
401    }
402
403    #[test]
404    fn test_from_bytes_deterministic() {
405        let id1 = ID::from_bytes("test_data_foo");
406        let id2 = ID::from_bytes("test_data_foo");
407        let id3 = ID::from_bytes("test_data_bar");
408
409        // Same data should produce same ID
410        assert_eq!(id1, id2);
411        // Different data should produce different IDs
412        assert_ne!(id1, id3);
413
414        // Should be SHA-256 prefixed format
415        assert_eq!(id1.algorithm(), HashAlgorithm::Sha256);
416    }
417
418    #[test]
419    fn test_validation() {
420        // Too short
421        assert!(ID::parse("deadbeef").is_err());
422
423        // Missing algorithm prefix
424        assert!(
425            ID::parse("deadbeef12345678901234567890123456789012345678901234567890123456").is_err()
426        );
427
428        // Invalid hex characters
429        assert!(
430            ID::parse("sha256:deadbeef123456789012345678901234567890123456789012345678901234567g")
431                .is_err()
432        );
433
434        // Unknown algorithm
435        assert!(
436            ID::parse("unknown:deadbeef12345678901234567890123456789012345678901234567890123456")
437                .is_err()
438        );
439
440        // Valid cases
441        assert!(
442            ID::parse("sha256:deadbeef12345678901234567890123456789012345678901234567890123456")
443                .is_ok()
444        );
445        assert!(
446            ID::parse("blake3:deadbeef12345678901234567890123456789012345678901234567890123456")
447                .is_ok()
448        );
449    }
450
451    #[test]
452    fn test_serialization() {
453        let id = ID::from_bytes("test_data_serialization");
454
455        // Should serialize/deserialize as string
456        let json = serde_json::to_string(&id).unwrap();
457        let deserialized: ID = serde_json::from_str(&json).unwrap();
458
459        assert_eq!(id, deserialized);
460        assert_eq!(id.algorithm(), deserialized.algorithm());
461    }
462}