Development Documentation (main branch) - For stable release docs, see docs.rs/eidetica

eidetica/entry/
mod.rs

1//!
2//! Defines the fundamental data unit (`Entry`) and related types.
3//!
4//! An `Entry` is the core, content-addressable building block of the database,
5//! representing a snapshot of data in the main tree and potentially multiple named subtrees.
6//! This module also defines the `ID` type and `RawData` type.
7
8mod builder;
9pub mod errors;
10pub mod id;
11
12#[cfg(test)]
13mod tests;
14
15use serde::{Deserialize, Serialize};
16
17pub use builder::EntryBuilder;
18pub use errors::EntryError;
19pub use id::ID;
20
21use crate::{Error, Result, auth::types::SigInfo, constants::ROOT, store::StoreError};
22
23use id::IdError;
24
25/// Represents serialized data, typically JSON, provided by the user.
26///
27/// This allows users to manage their own data structures and serialization formats.
28pub type RawData = String;
29
30/// Helper to check if tree height is zero for serde skip_serializing_if
31fn is_zero(h: &u64) -> bool {
32    *h == 0
33}
34
35/// Internal representation of the main tree node within an `Entry`.
36#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
37pub(super) struct TreeNode {
38    /// The ID of the root `Entry` of the tree this node belongs to.
39    pub root: ID,
40    /// IDs of the parent `Entry`s in the main tree history.
41    /// The vector is kept sorted alphabetically.
42    pub parents: Vec<ID>,
43    /// Serialized metadata associated with this `Entry` in the main tree.
44    /// This data is metadata about this specific entry only and is not merged with other entries.
45    ///
46    /// Metadata is used to improve the efficiency of certain operations and for experimentation.
47    ///
48    /// Metadata is optional and may not be present in all entries. Future versions
49    /// may extend metadata to include additional information.
50    pub metadata: Option<RawData>,
51    /// Height of this entry in the tree DAG (longest path from root).
52    /// Root entries have height 0, children have max(parent heights) + 1.
53    #[serde(rename = "h", default, skip_serializing_if = "is_zero")]
54    pub height: u64,
55}
56
57/// Internal representation of a named subtree node within an `Entry`.
58#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
59pub(super) struct SubTreeNode {
60    /// The name of the subtree, analogous to a table name.
61    /// Subtrees are _named_, and not identified by an ID.
62    pub name: String,
63    /// IDs of the parent `Entry`s specific to this subtree's history.
64    /// The vector is kept sorted alphabetically.
65    pub parents: Vec<ID>,
66    /// Serialized data specific to this `Entry` within this named subtree.
67    ///
68    /// `None` indicates that this Entry participates in the subtree but makes no data changes.
69    /// This is used when there is information needed for this subtree found somewhere else (e.g. the `_index`)
70    ///
71    /// `Some(data)` contains the actual serialized data for this subtree.
72    #[serde(skip_serializing_if = "Option::is_none")]
73    pub data: Option<RawData>,
74    /// Height of this entry in the subtree DAG.
75    ///
76    /// `None` means the subtree inherits the tree's height (not serialized).
77    /// `Some(h)` is an independent height for subtrees with their own strategy.
78    #[serde(rename = "h", default, skip_serializing_if = "Option::is_none")]
79    pub height: Option<u64>,
80}
81
82/// The fundamental unit of data in Eidetica, representing a finalized, immutable Database Entry.
83///
84/// An `Entry` represents a snapshot of data within a `Database` and potentially one or more named `Store`s.
85/// It is content-addressable, meaning its `ID` is a cryptographic hash of its contents.
86/// Entries form a Merkle-DAG (Directed Acyclic Graph) structure through parent references.
87///
88/// # Authentication
89///
90/// Each entry contains authentication information with:
91/// - `sig`: Base64-encoded cryptographic signature (optional, allows unsigned entry creation)
92/// - `key`: Authentication key reference path, either:
93///   - A direct key ID defined in this tree's `_settings.auth`
94///   - A delegation path as an ordered list of `{"key": "delegated_tree_1", "tips": ["A", "B"]}`
95///     where the last element must contain only a `"key"` field
96///
97/// # Immutability
98///
99/// `Entry` instances are designed to be immutable once created. To create or modify entries,
100/// use the `EntryBuilder` struct, which provides a mutable API for constructing entries.
101/// Once an entry is built, its content cannot be changed, and its ID is deterministic
102/// based on its content.
103///
104/// # Example
105///
106/// ```
107/// # use eidetica::Entry;
108///
109/// // Create a new root entry (standalone entry that starts a new DAG)
110/// let entry = Entry::root_builder()
111///     .set_subtree_data("users", r#"{"user1":"data"}"#)
112///     .build()
113///     .expect("Entry should build successfully");
114///
115/// // Access entry data
116/// let id = entry.id(); // Calculate content-addressable ID
117/// let user_data = entry.data("users").unwrap();
118/// ```
119///
120/// # Builders
121///
122/// To create an `Entry`, use the associated `EntryBuilder`.
123/// The preferred way to get an `EntryBuilder` is via the static methods
124/// `Entry::builder()` for regular entries or `Entry::root_builder()` for new top-level tree roots.
125///
126/// ```
127/// # use eidetica::entry::{Entry, RawData};
128/// # let root_id: String = "some_root_id".to_string();
129/// # let data: RawData = "{}".to_string();
130/// // For a regular entry:
131/// let builder = Entry::builder(root_id);
132///
133/// // For a new top-level tree root:
134/// let root_builder = Entry::root_builder();
135/// ```
136/// The current entry format version.
137/// v0 indicates this is an unstable protocol subject to breaking changes.
138pub const ENTRY_VERSION: u8 = 0;
139
140/// Helper to check if version is default (0) for serde skip_serializing_if
141fn is_v0(v: &u8) -> bool {
142    *v == 0
143}
144
145/// Validates the entry version during deserialization.
146fn validate_entry_version<'de, D>(deserializer: D) -> std::result::Result<u8, D::Error>
147where
148    D: serde::Deserializer<'de>,
149{
150    let version = u8::deserialize(deserializer)?;
151    if version != ENTRY_VERSION {
152        return Err(serde::de::Error::custom(format!(
153            "unsupported Entry version {version}; only version {ENTRY_VERSION} is supported"
154        )));
155    }
156    Ok(version)
157}
158
159#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
160pub struct Entry {
161    /// Protocol version for this entry format.
162    /// Used to verify that we support reading this entry.
163    #[serde(
164        rename = "_v",
165        default,
166        skip_serializing_if = "is_v0",
167        deserialize_with = "validate_entry_version"
168    )]
169    version: u8,
170    /// The main tree node data, including the root ID, parents in the main tree, and associated data.
171    pub(super) tree: TreeNode,
172    /// A collection of named subtrees this entry contains data for.
173    /// The vector is kept sorted alphabetically by subtree name during the build process.
174    pub(super) subtrees: Vec<SubTreeNode>,
175    /// Authentication information for this entry
176    pub sig: SigInfo,
177}
178
179impl Entry {
180    /// Creates a new `EntryBuilder` for an entry associated with a specific tree root.
181    /// This is a convenience method and preferred over calling `EntryBuilder::new()` directly.
182    ///
183    /// # Arguments
184    /// * `root` - The `ID` of the root `Entry` of the tree this entry will belong to.
185    pub fn builder(root: impl Into<ID>) -> EntryBuilder {
186        EntryBuilder::new(root)
187    }
188
189    /// Creates a new `EntryBuilder` for a top-level (root) entry for a new tree.
190    /// This is a convenience method and preferred over calling `EntryBuilder::new_top_level()` directly.
191    ///
192    /// Root entries have an empty string as their `root` ID and include a special ROOT subtree marker.
193    /// This method is typically used when creating a new tree.
194    pub fn root_builder() -> EntryBuilder {
195        EntryBuilder::new_top_level()
196    }
197
198    /// Get the content-addressable ID of the entry.
199    ///
200    /// The ID is calculated on demand by hashing the serialized JSON representation of the entry.
201    /// Because entries are immutable once created and their contents are deterministically
202    /// serialized, this ensures that identical entries will always have the same ID.
203    pub fn id(&self) -> ID {
204        // Entry itself derives Serialize and contains tree and subtrees.
205        // These are kept sorted and finalized by the EntryBuilder before Entry creation.
206        let json = serde_json::to_string(self).expect("Failed to serialize entry for hashing");
207        ID::from_bytes(json)
208    }
209
210    /// Get the ID of the root `Entry` of the tree this entry belongs to.
211    pub fn root(&self) -> ID {
212        self.tree.root.clone()
213    }
214
215    /// Check if this entry is a root entry (contains the ROOT marker and has no parents).
216    ///
217    /// Root entries are the top-level entries in the database and are distinguished by:
218    /// 1. Containing a subtree with the ROOT marker
219    /// 2. Having no parent entries (they are true tree roots)
220    ///
221    /// This ensures that root entries are actual starting points of trees in the DAG.
222    pub fn is_root(&self) -> bool {
223        self.subtrees.iter().any(|node| node.name == ROOT) && self.tree.parents.is_empty()
224    }
225
226    /// Check if this entry contains data for a specific named subtree.
227    pub fn in_subtree(&self, subtree_name: impl AsRef<str>) -> bool {
228        self.subtrees
229            .iter()
230            .any(|node| node.name == subtree_name.as_ref())
231    }
232
233    /// Check if this entry belongs to a specific tree, identified by its root ID.
234    pub fn in_tree(&self, tree_id: impl AsRef<str>) -> bool {
235        // Entries that are roots exist in both trees
236        self.root() == tree_id.as_ref() || (self.id().as_str() == tree_id.as_ref())
237    }
238
239    /// Get the names of all subtrees this entry contains data for.
240    /// The names are returned in alphabetical order.
241    pub fn subtrees(&self) -> Vec<String> {
242        self.subtrees
243            .iter()
244            .map(|subtree| subtree.name.clone())
245            .collect()
246    }
247
248    /// Get the metadata associated with this entry's tree node.
249    ///
250    /// Metadata is optional information attached to an entry that is not part of the
251    /// main data model and is not merged between entries.
252    pub fn metadata(&self) -> Option<&RawData> {
253        self.tree.metadata.as_ref()
254    }
255
256    /// Get the `RawData` for a specific named subtree within this entry.
257    ///
258    /// Returns an error if the subtree is not found or if the subtree exists but has no data (`None`).
259    pub fn data(&self, subtree_name: impl AsRef<str>) -> Result<&RawData> {
260        self.subtrees
261            .iter()
262            .find(|node| node.name == subtree_name.as_ref())
263            .and_then(|node| node.data.as_ref())
264            .ok_or_else(|| {
265                StoreError::KeyNotFound {
266                    store: "entry".to_string(),
267                    key: subtree_name.as_ref().to_string(),
268                }
269                .into()
270            })
271    }
272
273    /// Get the IDs of the parent entries in the main tree history.
274    /// The parent IDs are returned in alphabetical order.
275    pub fn parents(&self) -> Result<Vec<ID>> {
276        Ok(self.tree.parents.clone())
277    }
278
279    /// Get the IDs of the parent entries specific to a named subtree's history.
280    /// The parent IDs are returned in alphabetical order.
281    pub fn subtree_parents(&self, subtree_name: impl AsRef<str>) -> Result<Vec<ID>> {
282        self.subtrees
283            .iter()
284            .find(|node| node.name == subtree_name.as_ref())
285            .map(|node| node.parents.clone())
286            .ok_or_else(|| {
287                StoreError::KeyNotFound {
288                    store: "entry".to_string(),
289                    key: subtree_name.as_ref().to_string(),
290                }
291                .into()
292            })
293    }
294
295    /// Get the height of this entry in the main tree DAG.
296    pub fn height(&self) -> u64 {
297        self.tree.height
298    }
299
300    /// Get the height of this entry in a specific subtree's DAG.
301    ///
302    /// If the subtree has an explicit height (`Some(h)`), that value is returned.
303    /// If the subtree height is `None`, it inherits from the main tree height.
304    ///
305    /// This allows subtrees to either track independent heights (for subtrees
306    /// with their own height strategy) or share the tree's height (default).
307    pub fn subtree_height(&self, subtree_name: impl AsRef<str>) -> Result<u64> {
308        self.subtrees
309            .iter()
310            .find(|node| node.name == subtree_name.as_ref())
311            .map(|node| node.height.unwrap_or_else(|| self.height()))
312            .ok_or_else(|| {
313                StoreError::KeyNotFound {
314                    store: "entry".to_string(),
315                    key: subtree_name.as_ref().to_string(),
316                }
317                .into()
318            })
319    }
320
321    /// Create a canonical representation of this entry for signing purposes.
322    ///
323    /// This creates a copy of the entry with the signature field removed from auth,
324    /// which is necessary for signature generation and verification.
325    /// The returned entry has deterministic field ordering for consistent signatures.
326    pub fn canonical_for_signing(&self) -> Self {
327        let mut canonical = self.clone();
328        canonical.sig.sig = None;
329        canonical
330    }
331
332    /// Create canonical bytes for signing or ID generation.
333    ///
334    /// This method serializes the entry to JSON with deterministic field ordering.
335    /// For signing purposes, call `canonical_for_signing()` first.
336    pub fn canonical_bytes(&self) -> Result<Vec<u8>> {
337        let json = serde_json::to_string(self).map_err(Error::Serialize)?;
338        Ok(json.into_bytes())
339    }
340
341    /// Create canonical bytes for signing (convenience method).
342    ///
343    /// This combines `canonical_for_signing()` and `canonical_bytes()` for convenience.
344    pub fn signing_bytes(&self) -> Result<Vec<u8>> {
345        self.canonical_for_signing().canonical_bytes()
346    }
347
348    /// Validate the structural integrity of this entry.
349    ///
350    /// This method performs lightweight structural validation that can be done
351    /// without access to the backend database. It checks for obvious structural
352    /// issues while deferring complex DAG relationship validation to the transaction
353    /// and backend layers where full database access is available.
354    ///
355    /// # Validation Rules
356    ///
357    /// ## Critical Main Tree Parent Validation (Prevents "No Common Ancestor" Errors)
358    /// - **Root entries** (containing "_root" subtree): May have empty parents
359    /// - **Non-root entries**: MUST have at least one parent - **HARD REQUIREMENT**
360    /// - **Empty parent IDs**: Always rejected as invalid
361    ///
362    /// This strict enforcement prevents orphaned entries that cause sync failures.
363    ///
364    /// ## Subtree Parent Relationships
365    /// - For root entries: Subtrees may have empty parents (they establish the subtree roots)
366    /// - For non-root entries: Empty subtree parents require deeper validation:
367    ///   - Could be legitimate (first entry in a new subtree)
368    ///   - Could indicate broken relationships (needs DAG traversal to verify)
369    ///
370    /// ## Multi-Layer Validation System
371    /// Complex validation happens at multiple layers:
372    /// 1. **Entry Layer** (this method): Structural validation, main tree parent enforcement
373    /// 2. **Transaction Layer**: Parent discovery, subtree parent validation with DAG access
374    /// 3. **Backend Storage**: Final validation gate before persistence
375    /// 4. **Sync Operations**: Validation of entries received from peers
376    ///
377    /// # Special Cases
378    /// - The "_root" marker subtree has special handling and skips validation
379    /// - The "_settings" subtree follows standard validation rules
380    /// - Empty subtree parents are logged but deferred to transaction layer
381    ///
382    /// # Returns
383    ///
384    /// - `Ok(())` if the entry is structurally valid
385    /// - `Err(InstanceError::EntryValidationFailed)` if validation fails with specific reason
386    ///
387    /// # Examples
388    ///
389    /// ```rust,no_run
390    /// # use eidetica::Entry;
391    /// # let entry: Entry = unimplemented!();
392    /// // Validate an entry before storage or sync
393    /// match entry.validate() {
394    ///     Ok(()) => {
395    ///         // Entry is valid, safe to store/sync
396    ///         println!("Entry is valid");
397    ///     }
398    ///     Err(e) => {
399    ///         // Entry is invalid, reject it
400    ///         eprintln!("Invalid entry: {}", e);
401    ///     }
402    /// }
403    /// ```
404    /// Validates that an ID is in the correct format for the hash algorithm used.
405    ///
406    /// This function now supports multiple hash algorithms and uses the structured
407    /// ID validation from the ID type itself.
408    fn validate_id_format(id: &ID, context: &str) -> Result<()> {
409        // Use the ID's built-in validation by attempting to parse its string representation
410        // This ensures we validate according to the actual algorithm and format rules
411        if let Err(id_err) = ID::parse(id.as_str()) {
412            // Add context to the error and convert through the error system
413            let contextual_err = match &id_err {
414                IdError::InvalidFormat(_) => IdError::InvalidFormat(format!(
415                    "Invalid ID format in {}: {}",
416                    context,
417                    id.as_str()
418                )),
419                IdError::InvalidHex(_) => IdError::InvalidHex(format!(
420                    "Invalid hex characters in {} ID: {}",
421                    context,
422                    id.as_str()
423                )),
424                // For length and algorithm errors, the original error is sufficient
425                _ => id_err,
426            };
427            return Err(contextual_err.into());
428        }
429
430        Ok(())
431    }
432
433    pub fn validate(&self) -> Result<()> {
434        use crate::constants::{ROOT, SETTINGS};
435        use crate::instance::errors::InstanceError;
436
437        // CRITICAL VALIDATION: Root entries (with _root marker) cannot have parents
438        // This enforces that root entries are true starting points of trees
439        let has_root_marker = self.subtrees.iter().any(|node| node.name == ROOT);
440        if has_root_marker && !self.tree.parents.is_empty() {
441            return Err(InstanceError::EntryValidationFailed {
442                reason: format!(
443                    "Entry {} has _root marker but also has parents. Root entries cannot have parent relationships as they are the starting points of trees.",
444                    self.id()
445                ),
446            }.into());
447        }
448
449        // Check if this is a root entry (will be true only if has ROOT marker AND no parents)
450        let is_root_entry = has_root_marker && self.tree.parents.is_empty();
451
452        // Validate root ID format (when not empty)
453        if !self.tree.root.is_empty() {
454            Self::validate_id_format(&self.tree.root, "tree root ID")?;
455        }
456
457        // Validate each subtree
458        for subtree_node in &self.subtrees {
459            let subtree_name = &subtree_node.name;
460            let subtree_parents = &subtree_node.parents;
461
462            // Empty string is not allowed as a subtree name
463            if subtree_name.is_empty() {
464                return Err(InstanceError::EntryValidationFailed {
465                    reason: format!(
466                        "Entry {} has a subtree with empty name. Store names must be non-empty.",
467                        self.id()
468                    ),
469                }
470                .into());
471            }
472
473            // Skip validation for the special "_root" marker subtree
474            if subtree_name == ROOT {
475                continue;
476            }
477
478            // For non-root entries with empty subtree parents, this is only valid if:
479            // 1. The entry has no main parents (making it a legitimate subtree root), OR
480            // 2. The subtree is genuinely being established for the first time within the tree
481            //
482            // Note: We can't perform deep validation here without access to the backend,
483            // so we defer complex validation to transaction/backend layers where full
484            // DAG traversal is possible. This basic validation catches obvious structural errors.
485            if !is_root_entry && subtree_parents.is_empty() {
486                // This is a lightweight structural check - more comprehensive validation
487                // happens in transaction/backend layers with full DAG access
488                tracing::debug!(
489                    entry_id = %self.id(),
490                    subtree = subtree_name,
491                    "Entry has empty subtree parents - will be validated in transaction layer"
492                );
493            }
494
495            // Special validation for the critical "_settings" subtree
496            // Note: Settings subtree follows the same rules as other subtrees - empty parents
497            // are valid for the first entry in the subtree. Comprehensive validation happens
498            // in transaction/backend layers with full DAG access.
499            if subtree_name == SETTINGS && !is_root_entry && subtree_parents.is_empty() {
500                tracing::debug!(
501                    entry_id = %self.id(),
502                    "Settings subtree has empty parents - will be validated in transaction layer"
503                );
504            }
505
506            // Validate that subtree parents are not empty strings and have valid format
507            for parent_id in subtree_parents {
508                if parent_id.is_empty() {
509                    return Err(InstanceError::EntryValidationFailed {
510                        reason: format!(
511                            "Entry {} has subtree '{}' with empty parent ID. Parent IDs must be non-empty valid entry IDs.",
512                            self.id(),
513                            subtree_name
514                        ),
515                    }.into());
516                }
517                // Validate parent ID format
518                Self::validate_id_format(
519                    parent_id,
520                    &format!("subtree '{subtree_name}' parent ID"),
521                )?;
522            }
523        }
524
525        // Enforce main tree parent requirements
526        if !is_root_entry {
527            let main_parents = self.tree.parents.clone();
528            if main_parents.is_empty() {
529                // This is a HARD FAILURE - reject the entry completely
530                // Empty main tree parents create orphaned nodes that break LCA calculations
531                return Err(InstanceError::EntryValidationFailed {
532                    reason: format!(
533                        "Non-root entry {} has empty main tree parents. All non-root entries must have valid parent relationships in the main tree.",
534                        self.id()
535                    ),
536                }.into());
537            }
538
539            // Validate that main parents are not empty strings and have valid format
540            for parent_id in &main_parents {
541                if parent_id.is_empty() {
542                    return Err(InstanceError::EntryValidationFailed {
543                        reason: format!(
544                            "Entry {} has empty parent ID in main tree. Parent IDs must be non-empty valid entry IDs.",
545                            self.id()
546                        ),
547                    }.into());
548                }
549                // Validate parent ID format
550                Self::validate_id_format(parent_id, "main tree parent ID")?;
551            }
552        }
553
554        Ok(())
555    }
556}