eidetica/entry/mod.rs
1//!
2//! Defines the fundamental data unit (`Entry`) and related types.
3//!
4//! An `Entry` is the core, content-addressable building block of the database,
5//! representing a snapshot of data in the main tree and potentially multiple named subtrees.
6//! This module also defines the `ID` type and `RawData` type.
7
8mod builder;
9pub mod errors;
10pub mod id;
11
12#[cfg(test)]
13mod tests;
14
15use serde::{Deserialize, Serialize};
16
17pub use builder::EntryBuilder;
18pub use errors::EntryError;
19pub use id::ID;
20
21use crate::{Error, Result, auth::types::SigInfo, constants::ROOT, store::StoreError};
22
23use id::IdError;
24
25/// Represents serialized data, typically JSON, provided by the user.
26///
27/// This allows users to manage their own data structures and serialization formats.
28pub type RawData = String;
29
30/// Helper to check if tree height is zero for serde skip_serializing_if
31fn is_zero(h: &u64) -> bool {
32 *h == 0
33}
34
35/// Internal representation of the main tree node within an `Entry`.
36#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
37pub(super) struct TreeNode {
38 /// The ID of the root `Entry` of the tree this node belongs to.
39 pub root: ID,
40 /// IDs of the parent `Entry`s in the main tree history.
41 /// The vector is kept sorted alphabetically.
42 pub parents: Vec<ID>,
43 /// Serialized metadata associated with this `Entry` in the main tree.
44 /// This data is metadata about this specific entry only and is not merged with other entries.
45 ///
46 /// Metadata is used to improve the efficiency of certain operations and for experimentation.
47 ///
48 /// Metadata is optional and may not be present in all entries. Future versions
49 /// may extend metadata to include additional information.
50 pub metadata: Option<RawData>,
51 /// Height of this entry in the tree DAG (longest path from root).
52 /// Root entries have height 0, children have max(parent heights) + 1.
53 #[serde(rename = "h", default, skip_serializing_if = "is_zero")]
54 pub height: u64,
55}
56
57/// Internal representation of a named subtree node within an `Entry`.
58#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
59pub(super) struct SubTreeNode {
60 /// The name of the subtree, analogous to a table name.
61 /// Subtrees are _named_, and not identified by an ID.
62 pub name: String,
63 /// IDs of the parent `Entry`s specific to this subtree's history.
64 /// The vector is kept sorted alphabetically.
65 pub parents: Vec<ID>,
66 /// Serialized data specific to this `Entry` within this named subtree.
67 ///
68 /// `None` indicates that this Entry participates in the subtree but makes no data changes.
69 /// This is used when there is information needed for this subtree found somewhere else (e.g. the `_index`)
70 ///
71 /// `Some(data)` contains the actual serialized data for this subtree.
72 #[serde(skip_serializing_if = "Option::is_none")]
73 pub data: Option<RawData>,
74 /// Height of this entry in the subtree DAG.
75 ///
76 /// `None` means the subtree inherits the tree's height (not serialized).
77 /// `Some(h)` is an independent height for subtrees with their own strategy.
78 #[serde(rename = "h", default, skip_serializing_if = "Option::is_none")]
79 pub height: Option<u64>,
80}
81
82/// The fundamental unit of data in Eidetica, representing a finalized, immutable Database Entry.
83///
84/// An `Entry` represents a snapshot of data within a `Database` and potentially one or more named `Store`s.
85/// It is content-addressable, meaning its `ID` is a cryptographic hash of its contents.
86/// Entries form a Merkle-DAG (Directed Acyclic Graph) structure through parent references.
87///
88/// # Authentication
89///
90/// Each entry contains authentication information with:
91/// - `sig`: Base64-encoded cryptographic signature (optional, allows unsigned entry creation)
92/// - `key`: Authentication key reference path, either:
93/// - A direct key ID defined in this tree's `_settings.auth`
94/// - A delegation path as an ordered list of `{"key": "delegated_tree_1", "tips": ["A", "B"]}`
95/// where the last element must contain only a `"key"` field
96///
97/// # Immutability
98///
99/// `Entry` instances are designed to be immutable once created. To create or modify entries,
100/// use the `EntryBuilder` struct, which provides a mutable API for constructing entries.
101/// Once an entry is built, its content cannot be changed, and its ID is deterministic
102/// based on its content.
103///
104/// # Example
105///
106/// ```
107/// # use eidetica::Entry;
108///
109/// // Create a new root entry (standalone entry that starts a new DAG)
110/// let entry = Entry::root_builder()
111/// .set_subtree_data("users", r#"{"user1":"data"}"#)
112/// .build()
113/// .expect("Entry should build successfully");
114///
115/// // Access entry data
116/// let id = entry.id(); // Calculate content-addressable ID
117/// let user_data = entry.data("users").unwrap();
118/// ```
119///
120/// # Builders
121///
122/// To create an `Entry`, use the associated `EntryBuilder`.
123/// The preferred way to get an `EntryBuilder` is via the static methods
124/// `Entry::builder()` for regular entries or `Entry::root_builder()` for new top-level tree roots.
125///
126/// ```
127/// # use eidetica::entry::{Entry, RawData};
128/// # let root_id: String = "some_root_id".to_string();
129/// # let data: RawData = "{}".to_string();
130/// // For a regular entry:
131/// let builder = Entry::builder(root_id);
132///
133/// // For a new top-level tree root:
134/// let root_builder = Entry::root_builder();
135/// ```
136/// The current entry format version.
137/// v0 indicates this is an unstable protocol subject to breaking changes.
138pub const ENTRY_VERSION: u8 = 0;
139
140/// Helper to check if version is default (0) for serde skip_serializing_if
141fn is_v0(v: &u8) -> bool {
142 *v == 0
143}
144
145/// Validates the entry version during deserialization.
146fn validate_entry_version<'de, D>(deserializer: D) -> std::result::Result<u8, D::Error>
147where
148 D: serde::Deserializer<'de>,
149{
150 let version = u8::deserialize(deserializer)?;
151 if version != ENTRY_VERSION {
152 return Err(serde::de::Error::custom(format!(
153 "unsupported Entry version {version}; only version {ENTRY_VERSION} is supported"
154 )));
155 }
156 Ok(version)
157}
158
159#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
160pub struct Entry {
161 /// Protocol version for this entry format.
162 /// Used to verify that we support reading this entry.
163 #[serde(
164 rename = "_v",
165 default,
166 skip_serializing_if = "is_v0",
167 deserialize_with = "validate_entry_version"
168 )]
169 version: u8,
170 /// The main tree node data, including the root ID, parents in the main tree, and associated data.
171 pub(super) tree: TreeNode,
172 /// A collection of named subtrees this entry contains data for.
173 /// The vector is kept sorted alphabetically by subtree name during the build process.
174 pub(super) subtrees: Vec<SubTreeNode>,
175 /// Authentication information for this entry
176 pub sig: SigInfo,
177}
178
179impl Entry {
180 /// Creates a new `EntryBuilder` for an entry associated with a specific tree root.
181 /// This is a convenience method and preferred over calling `EntryBuilder::new()` directly.
182 ///
183 /// # Arguments
184 /// * `root` - The `ID` of the root `Entry` of the tree this entry will belong to.
185 pub fn builder(root: impl Into<ID>) -> EntryBuilder {
186 EntryBuilder::new(root)
187 }
188
189 /// Creates a new `EntryBuilder` for a top-level (root) entry for a new tree.
190 /// This is a convenience method and preferred over calling `EntryBuilder::new_top_level()` directly.
191 ///
192 /// Root entries have an empty string as their `root` ID and include a special ROOT subtree marker.
193 /// This method is typically used when creating a new tree.
194 pub fn root_builder() -> EntryBuilder {
195 EntryBuilder::new_top_level()
196 }
197
198 /// Get the content-addressable ID of the entry.
199 ///
200 /// The ID is calculated on demand by hashing the serialized JSON representation of the entry.
201 /// Because entries are immutable once created and their contents are deterministically
202 /// serialized, this ensures that identical entries will always have the same ID.
203 pub fn id(&self) -> ID {
204 // Entry itself derives Serialize and contains tree and subtrees.
205 // These are kept sorted and finalized by the EntryBuilder before Entry creation.
206 let json = serde_json::to_string(self).expect("Failed to serialize entry for hashing");
207 ID::from_bytes(json)
208 }
209
210 /// Get the ID of the root `Entry` of the tree this entry belongs to.
211 pub fn root(&self) -> ID {
212 self.tree.root.clone()
213 }
214
215 /// Check if this entry is a root entry (contains the ROOT marker and has no parents).
216 ///
217 /// Root entries are the top-level entries in the database and are distinguished by:
218 /// 1. Containing a subtree with the ROOT marker
219 /// 2. Having no parent entries (they are true tree roots)
220 ///
221 /// This ensures that root entries are actual starting points of trees in the DAG.
222 pub fn is_root(&self) -> bool {
223 self.subtrees.iter().any(|node| node.name == ROOT) && self.tree.parents.is_empty()
224 }
225
226 /// Check if this entry contains data for a specific named subtree.
227 pub fn in_subtree(&self, subtree_name: impl AsRef<str>) -> bool {
228 self.subtrees
229 .iter()
230 .any(|node| node.name == subtree_name.as_ref())
231 }
232
233 /// Check if this entry belongs to a specific tree, identified by its root ID.
234 pub fn in_tree(&self, tree_id: impl AsRef<str>) -> bool {
235 // Entries that are roots exist in both trees
236 self.root() == tree_id.as_ref() || (self.id().as_str() == tree_id.as_ref())
237 }
238
239 /// Get the names of all subtrees this entry contains data for.
240 /// The names are returned in alphabetical order.
241 pub fn subtrees(&self) -> Vec<String> {
242 self.subtrees
243 .iter()
244 .map(|subtree| subtree.name.clone())
245 .collect()
246 }
247
248 /// Get the metadata associated with this entry's tree node.
249 ///
250 /// Metadata is optional information attached to an entry that is not part of the
251 /// main data model and is not merged between entries.
252 pub fn metadata(&self) -> Option<&RawData> {
253 self.tree.metadata.as_ref()
254 }
255
256 /// Get the `RawData` for a specific named subtree within this entry.
257 ///
258 /// Returns an error if the subtree is not found or if the subtree exists but has no data (`None`).
259 pub fn data(&self, subtree_name: impl AsRef<str>) -> Result<&RawData> {
260 self.subtrees
261 .iter()
262 .find(|node| node.name == subtree_name.as_ref())
263 .and_then(|node| node.data.as_ref())
264 .ok_or_else(|| {
265 StoreError::KeyNotFound {
266 store: "entry".to_string(),
267 key: subtree_name.as_ref().to_string(),
268 }
269 .into()
270 })
271 }
272
273 /// Get the IDs of the parent entries in the main tree history.
274 /// The parent IDs are returned in alphabetical order.
275 pub fn parents(&self) -> Result<Vec<ID>> {
276 Ok(self.tree.parents.clone())
277 }
278
279 /// Get the IDs of the parent entries specific to a named subtree's history.
280 /// The parent IDs are returned in alphabetical order.
281 pub fn subtree_parents(&self, subtree_name: impl AsRef<str>) -> Result<Vec<ID>> {
282 self.subtrees
283 .iter()
284 .find(|node| node.name == subtree_name.as_ref())
285 .map(|node| node.parents.clone())
286 .ok_or_else(|| {
287 StoreError::KeyNotFound {
288 store: "entry".to_string(),
289 key: subtree_name.as_ref().to_string(),
290 }
291 .into()
292 })
293 }
294
295 /// Get the height of this entry in the main tree DAG.
296 pub fn height(&self) -> u64 {
297 self.tree.height
298 }
299
300 /// Get the height of this entry in a specific subtree's DAG.
301 ///
302 /// If the subtree has an explicit height (`Some(h)`), that value is returned.
303 /// If the subtree height is `None`, it inherits from the main tree height.
304 ///
305 /// This allows subtrees to either track independent heights (for subtrees
306 /// with their own height strategy) or share the tree's height (default).
307 pub fn subtree_height(&self, subtree_name: impl AsRef<str>) -> Result<u64> {
308 self.subtrees
309 .iter()
310 .find(|node| node.name == subtree_name.as_ref())
311 .map(|node| node.height.unwrap_or_else(|| self.height()))
312 .ok_or_else(|| {
313 StoreError::KeyNotFound {
314 store: "entry".to_string(),
315 key: subtree_name.as_ref().to_string(),
316 }
317 .into()
318 })
319 }
320
321 /// Create a canonical representation of this entry for signing purposes.
322 ///
323 /// This creates a copy of the entry with the signature field removed from auth,
324 /// which is necessary for signature generation and verification.
325 /// The returned entry has deterministic field ordering for consistent signatures.
326 pub fn canonical_for_signing(&self) -> Self {
327 let mut canonical = self.clone();
328 canonical.sig.sig = None;
329 canonical
330 }
331
332 /// Create canonical bytes for signing or ID generation.
333 ///
334 /// This method serializes the entry to JSON with deterministic field ordering.
335 /// For signing purposes, call `canonical_for_signing()` first.
336 pub fn canonical_bytes(&self) -> Result<Vec<u8>> {
337 let json = serde_json::to_string(self).map_err(Error::Serialize)?;
338 Ok(json.into_bytes())
339 }
340
341 /// Create canonical bytes for signing (convenience method).
342 ///
343 /// This combines `canonical_for_signing()` and `canonical_bytes()` for convenience.
344 pub fn signing_bytes(&self) -> Result<Vec<u8>> {
345 self.canonical_for_signing().canonical_bytes()
346 }
347
348 /// Validate the structural integrity of this entry.
349 ///
350 /// This method performs lightweight structural validation that can be done
351 /// without access to the backend database. It checks for obvious structural
352 /// issues while deferring complex DAG relationship validation to the transaction
353 /// and backend layers where full database access is available.
354 ///
355 /// # Validation Rules
356 ///
357 /// ## Critical Main Tree Parent Validation (Prevents "No Common Ancestor" Errors)
358 /// - **Root entries** (containing "_root" subtree): May have empty parents
359 /// - **Non-root entries**: MUST have at least one parent - **HARD REQUIREMENT**
360 /// - **Empty parent IDs**: Always rejected as invalid
361 ///
362 /// This strict enforcement prevents orphaned entries that cause sync failures.
363 ///
364 /// ## Subtree Parent Relationships
365 /// - For root entries: Subtrees may have empty parents (they establish the subtree roots)
366 /// - For non-root entries: Empty subtree parents require deeper validation:
367 /// - Could be legitimate (first entry in a new subtree)
368 /// - Could indicate broken relationships (needs DAG traversal to verify)
369 ///
370 /// ## Multi-Layer Validation System
371 /// Complex validation happens at multiple layers:
372 /// 1. **Entry Layer** (this method): Structural validation, main tree parent enforcement
373 /// 2. **Transaction Layer**: Parent discovery, subtree parent validation with DAG access
374 /// 3. **Backend Storage**: Final validation gate before persistence
375 /// 4. **Sync Operations**: Validation of entries received from peers
376 ///
377 /// # Special Cases
378 /// - The "_root" marker subtree has special handling and skips validation
379 /// - The "_settings" subtree follows standard validation rules
380 /// - Empty subtree parents are logged but deferred to transaction layer
381 ///
382 /// # Returns
383 ///
384 /// - `Ok(())` if the entry is structurally valid
385 /// - `Err(InstanceError::EntryValidationFailed)` if validation fails with specific reason
386 ///
387 /// # Examples
388 ///
389 /// ```rust,no_run
390 /// # use eidetica::Entry;
391 /// # let entry: Entry = unimplemented!();
392 /// // Validate an entry before storage or sync
393 /// match entry.validate() {
394 /// Ok(()) => {
395 /// // Entry is valid, safe to store/sync
396 /// println!("Entry is valid");
397 /// }
398 /// Err(e) => {
399 /// // Entry is invalid, reject it
400 /// eprintln!("Invalid entry: {}", e);
401 /// }
402 /// }
403 /// ```
404 /// Validates that an ID is in the correct format for the hash algorithm used.
405 ///
406 /// This function now supports multiple hash algorithms and uses the structured
407 /// ID validation from the ID type itself.
408 fn validate_id_format(id: &ID, context: &str) -> Result<()> {
409 // Use the ID's built-in validation by attempting to parse its string representation
410 // This ensures we validate according to the actual algorithm and format rules
411 if let Err(id_err) = ID::parse(id.as_str()) {
412 // Add context to the error and convert through the error system
413 let contextual_err = match &id_err {
414 IdError::InvalidFormat(_) => IdError::InvalidFormat(format!(
415 "Invalid ID format in {}: {}",
416 context,
417 id.as_str()
418 )),
419 IdError::InvalidHex(_) => IdError::InvalidHex(format!(
420 "Invalid hex characters in {} ID: {}",
421 context,
422 id.as_str()
423 )),
424 // For length and algorithm errors, the original error is sufficient
425 _ => id_err,
426 };
427 return Err(contextual_err.into());
428 }
429
430 Ok(())
431 }
432
433 pub fn validate(&self) -> Result<()> {
434 use crate::constants::{ROOT, SETTINGS};
435 use crate::instance::errors::InstanceError;
436
437 // CRITICAL VALIDATION: Root entries (with _root marker) cannot have parents
438 // This enforces that root entries are true starting points of trees
439 let has_root_marker = self.subtrees.iter().any(|node| node.name == ROOT);
440 if has_root_marker && !self.tree.parents.is_empty() {
441 return Err(InstanceError::EntryValidationFailed {
442 reason: format!(
443 "Entry {} has _root marker but also has parents. Root entries cannot have parent relationships as they are the starting points of trees.",
444 self.id()
445 ),
446 }.into());
447 }
448
449 // Check if this is a root entry (will be true only if has ROOT marker AND no parents)
450 let is_root_entry = has_root_marker && self.tree.parents.is_empty();
451
452 // Validate root ID format (when not empty)
453 if !self.tree.root.is_empty() {
454 Self::validate_id_format(&self.tree.root, "tree root ID")?;
455 }
456
457 // Validate each subtree
458 for subtree_node in &self.subtrees {
459 let subtree_name = &subtree_node.name;
460 let subtree_parents = &subtree_node.parents;
461
462 // Empty string is not allowed as a subtree name
463 if subtree_name.is_empty() {
464 return Err(InstanceError::EntryValidationFailed {
465 reason: format!(
466 "Entry {} has a subtree with empty name. Store names must be non-empty.",
467 self.id()
468 ),
469 }
470 .into());
471 }
472
473 // Skip validation for the special "_root" marker subtree
474 if subtree_name == ROOT {
475 continue;
476 }
477
478 // For non-root entries with empty subtree parents, this is only valid if:
479 // 1. The entry has no main parents (making it a legitimate subtree root), OR
480 // 2. The subtree is genuinely being established for the first time within the tree
481 //
482 // Note: We can't perform deep validation here without access to the backend,
483 // so we defer complex validation to transaction/backend layers where full
484 // DAG traversal is possible. This basic validation catches obvious structural errors.
485 if !is_root_entry && subtree_parents.is_empty() {
486 // This is a lightweight structural check - more comprehensive validation
487 // happens in transaction/backend layers with full DAG access
488 tracing::debug!(
489 entry_id = %self.id(),
490 subtree = subtree_name,
491 "Entry has empty subtree parents - will be validated in transaction layer"
492 );
493 }
494
495 // Special validation for the critical "_settings" subtree
496 // Note: Settings subtree follows the same rules as other subtrees - empty parents
497 // are valid for the first entry in the subtree. Comprehensive validation happens
498 // in transaction/backend layers with full DAG access.
499 if subtree_name == SETTINGS && !is_root_entry && subtree_parents.is_empty() {
500 tracing::debug!(
501 entry_id = %self.id(),
502 "Settings subtree has empty parents - will be validated in transaction layer"
503 );
504 }
505
506 // Validate that subtree parents are not empty strings and have valid format
507 for parent_id in subtree_parents {
508 if parent_id.is_empty() {
509 return Err(InstanceError::EntryValidationFailed {
510 reason: format!(
511 "Entry {} has subtree '{}' with empty parent ID. Parent IDs must be non-empty valid entry IDs.",
512 self.id(),
513 subtree_name
514 ),
515 }.into());
516 }
517 // Validate parent ID format
518 Self::validate_id_format(
519 parent_id,
520 &format!("subtree '{subtree_name}' parent ID"),
521 )?;
522 }
523 }
524
525 // Enforce main tree parent requirements
526 if !is_root_entry {
527 let main_parents = self.tree.parents.clone();
528 if main_parents.is_empty() {
529 // This is a HARD FAILURE - reject the entry completely
530 // Empty main tree parents create orphaned nodes that break LCA calculations
531 return Err(InstanceError::EntryValidationFailed {
532 reason: format!(
533 "Non-root entry {} has empty main tree parents. All non-root entries must have valid parent relationships in the main tree.",
534 self.id()
535 ),
536 }.into());
537 }
538
539 // Validate that main parents are not empty strings and have valid format
540 for parent_id in &main_parents {
541 if parent_id.is_empty() {
542 return Err(InstanceError::EntryValidationFailed {
543 reason: format!(
544 "Entry {} has empty parent ID in main tree. Parent IDs must be non-empty valid entry IDs.",
545 self.id()
546 ),
547 }.into());
548 }
549 // Validate parent ID format
550 Self::validate_id_format(parent_id, "main tree parent ID")?;
551 }
552 }
553
554 Ok(())
555 }
556}