Development Documentation (main branch) - For stable release docs, see docs.rs/eidetica
Skip to main content

eidetica/entry/
mod.rs

1//!
2//! Defines the fundamental data unit (`Entry`) and related types.
3//!
4//! An `Entry` is the core, content-addressable building block of the database,
5//! representing a snapshot of data in the main tree and potentially multiple named subtrees.
6//! This module also defines the `ID` type and `RawData` type.
7
8mod builder;
9pub mod errors;
10pub mod id;
11
12#[cfg(test)]
13mod tests;
14
15use serde::{Deserialize, Serialize};
16
17pub use builder::EntryBuilder;
18pub use errors::EntryError;
19pub use id::ID;
20
21use crate::{Result, auth::types::SigInfo, constants::ROOT, store::StoreError};
22
23use id::IdError;
24
25/// Opaque payload bytes embedded in an `Entry`.
26///
27/// Each `Store` owns the format of its own payload (JSON, CBOR, raw binary, etc.);
28/// the entry layer treats `RawData` as a byte string and does not interpret it.
29///
30/// Encoded as a CBOR byte string (major type 2) in DAG-CBOR for IPLD compatibility.
31pub type RawData = Vec<u8>;
32
33/// Helper to check if tree height is zero for serde skip_serializing_if
34fn is_zero(h: &u64) -> bool {
35    *h == 0
36}
37
38/// Internal representation of the main tree node within an `Entry`.
39#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
40pub(super) struct TreeNode {
41    /// The ID of the root `Entry` of the tree this node belongs to.
42    /// `None` for root entries (they are their own root).
43    #[serde(skip_serializing_if = "Option::is_none")]
44    pub root: Option<ID>,
45    /// IDs of the parent `Entry`s in the main tree history.
46    /// The vector is kept sorted alphabetically.
47    pub parents: Vec<ID>,
48    /// Serialized metadata associated with this `Entry` in the main tree.
49    /// This data is metadata about this specific entry only and is not merged with other entries.
50    ///
51    /// Metadata is used to improve the efficiency of certain operations and for experimentation.
52    ///
53    /// Metadata is optional and may not be present in all entries. Future versions
54    /// may extend metadata to include additional information.
55    #[serde(default, skip_serializing_if = "Option::is_none", with = "serde_bytes")]
56    pub metadata: Option<RawData>,
57    /// Height of this entry in the tree DAG (longest path from root).
58    /// Root entries have height 0, children have max(parent heights) + 1.
59    #[serde(rename = "h", default, skip_serializing_if = "is_zero")]
60    pub height: u64,
61}
62
63/// Internal representation of a named subtree node within an `Entry`.
64#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
65pub(super) struct SubTreeNode {
66    /// The name of the subtree, analogous to a table name.
67    /// Subtrees are _named_, and not identified by an ID.
68    pub name: String,
69    /// IDs of the parent `Entry`s specific to this subtree's history.
70    /// The vector is kept sorted alphabetically.
71    pub parents: Vec<ID>,
72    /// Serialized data specific to this `Entry` within this named subtree.
73    ///
74    /// `None` indicates that this Entry participates in the subtree but makes no data changes.
75    /// This is used when there is information needed for this subtree found somewhere else (e.g. the `_index`)
76    ///
77    /// `Some(data)` contains the actual serialized data for this subtree.
78    #[serde(default, skip_serializing_if = "Option::is_none", with = "serde_bytes")]
79    pub data: Option<RawData>,
80    /// Height of this entry in the subtree DAG.
81    ///
82    /// `None` means the subtree inherits the tree's height (not serialized).
83    /// `Some(h)` is an independent height for subtrees with their own strategy.
84    #[serde(rename = "h", default, skip_serializing_if = "Option::is_none")]
85    pub height: Option<u64>,
86}
87
88/// The fundamental unit of data in Eidetica, representing a finalized, immutable Database Entry.
89///
90/// An `Entry` represents a snapshot of data within a `Database` and potentially one or more named `Store`s.
91/// It is content-addressable, meaning its `ID` is a cryptographic hash of its contents.
92/// Entries form a Merkle-DAG (Directed Acyclic Graph) structure through parent references.
93///
94/// # Authentication
95///
96/// Each entry contains authentication information with:
97/// - `sig`: Base64-encoded cryptographic signature (optional, allows unsigned entry creation)
98/// - `key`: Authentication key reference path, either:
99///   - A direct key ID defined in this tree's `_settings.auth`
100///   - A delegation path as an ordered list of `{"key": "delegated_tree_1", "tips": ["A", "B"]}`
101///     where the last element must contain only a `"key"` field
102///
103/// # Immutability
104///
105/// `Entry` instances are designed to be immutable once created. To create or modify entries,
106/// use the `EntryBuilder` struct, which provides a mutable API for constructing entries.
107/// Once an entry is built, its content cannot be changed, and its ID is deterministic
108/// based on its content.
109///
110/// # Example
111///
112/// ```
113/// # use eidetica::Entry;
114///
115/// // Create a new root entry (standalone entry that starts a new DAG)
116/// let entry = Entry::root_builder()
117///     .set_subtree_data("users", br#"{"user1":"data"}"#.to_vec())
118///     .build()
119///     .expect("Entry should build successfully");
120///
121/// // Access entry data
122/// let id = entry.id(); // Calculate content-addressable ID
123/// let user_data = entry.data("users").unwrap();
124/// ```
125///
126/// # Builders
127///
128/// To create an `Entry`, use the associated `EntryBuilder`.
129/// The preferred way to get an `EntryBuilder` is via the static methods
130/// `Entry::builder()` for regular entries or `Entry::root_builder()` for new top-level tree roots.
131///
132/// ```
133/// # use eidetica::entry::{Entry, ID, RawData};
134/// # let root_id = ID::from_bytes("some_root_id");
135/// # let data: RawData = b"{}".to_vec();
136/// // For a regular entry:
137/// let builder = Entry::builder(root_id);
138///
139/// // For a new top-level tree root:
140/// let root_builder = Entry::root_builder();
141/// ```
142/// The current entry format version.
143/// v0 indicates this is an unstable protocol subject to breaking changes.
144pub const ENTRY_VERSION: u8 = 0;
145
146/// Helper to check if version is default (0) for serde skip_serializing_if
147fn is_v0(v: &u8) -> bool {
148    *v == 0
149}
150
151/// Validates the entry version during deserialization.
152fn validate_entry_version<'de, D>(deserializer: D) -> std::result::Result<u8, D::Error>
153where
154    D: serde::Deserializer<'de>,
155{
156    let version = u8::deserialize(deserializer)?;
157    if version != ENTRY_VERSION {
158        return Err(serde::de::Error::custom(format!(
159            "unsupported Entry version {version}; only version {ENTRY_VERSION} is supported"
160        )));
161    }
162    Ok(version)
163}
164
165#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
166pub struct Entry {
167    /// Protocol version for this entry format.
168    /// Used to verify that we support reading this entry.
169    #[serde(
170        rename = "_v",
171        default,
172        skip_serializing_if = "is_v0",
173        deserialize_with = "validate_entry_version"
174    )]
175    version: u8,
176    /// The main tree node data, including the root ID, parents in the main tree, and associated data.
177    pub(super) tree: TreeNode,
178    /// A collection of named subtrees this entry contains data for.
179    /// The vector is kept sorted alphabetically by subtree name during the build process.
180    pub(super) subtrees: Vec<SubTreeNode>,
181    /// Authentication information for this entry
182    pub sig: SigInfo,
183}
184
185impl Entry {
186    /// Creates a new `EntryBuilder` for an entry associated with a specific tree root.
187    /// This is a convenience method and preferred over calling `EntryBuilder::new()` directly.
188    ///
189    /// # Arguments
190    /// * `root` - The `ID` of the root `Entry` of the tree this entry will belong to.
191    pub fn builder(root: ID) -> EntryBuilder {
192        EntryBuilder::new(root)
193    }
194
195    /// Creates a new `EntryBuilder` for a top-level (root) entry for a new tree.
196    /// This is a convenience method and preferred over calling `EntryBuilder::new_top_level()` directly.
197    ///
198    /// Root entries have an empty string as their `root` ID and include a special ROOT subtree marker.
199    /// This method is typically used when creating a new tree.
200    pub fn root_builder() -> EntryBuilder {
201        EntryBuilder::new_top_level()
202    }
203
204    /// Get the content-addressable ID of the entry.
205    ///
206    /// The ID is the CID of the DAG-CBOR serialized representation of the Entry.
207    pub fn id(&self) -> ID {
208        let bytes = self
209            .to_dagcbor()
210            .expect("Failed to serialize entry to DAG-CBOR for ID");
211        ID::from_dagcbor_bytes(bytes)
212    }
213
214    /// Get the ID of the root `Entry` of the tree this entry belongs to.
215    /// Returns `None` for root entries.
216    pub fn root(&self) -> Option<ID> {
217        self.tree.root.clone()
218    }
219
220    /// Check if this entry is a root entry (contains the ROOT marker and has no parents).
221    ///
222    /// Root entries are the top-level entries in the database and are distinguished by:
223    /// 1. Containing a subtree with the ROOT marker
224    /// 2. Having no parent entries (they are true tree roots)
225    ///
226    /// This ensures that root entries are actual starting points of trees in the DAG.
227    pub fn is_root(&self) -> bool {
228        // FIXME: better identification of root entries
229        self.subtrees.iter().any(|node| node.name == ROOT)
230            && self.tree.parents.is_empty()
231            && self.tree.root.is_none()
232    }
233
234    /// Check if this entry contains data for a specific named subtree.
235    pub fn in_subtree(&self, subtree_name: impl AsRef<str>) -> bool {
236        self.subtrees
237            .iter()
238            .any(|node| node.name == subtree_name.as_ref())
239    }
240
241    /// Check if this entry belongs to a specific tree, identified by its root ID.
242    pub fn in_tree(&self, tree_id: &ID) -> bool {
243        // Entries that are roots exist in both trees
244        self.tree.root.as_ref() == Some(tree_id) || self.id() == *tree_id
245    }
246
247    /// Get the names of all subtrees this entry contains data for.
248    /// The names are returned in alphabetical order.
249    pub fn subtrees(&self) -> Vec<String> {
250        self.subtrees
251            .iter()
252            .map(|subtree| subtree.name.clone())
253            .collect()
254    }
255
256    /// Get the metadata associated with this entry's tree node.
257    ///
258    /// Metadata is optional information attached to an entry that is not part of the
259    /// main data model and is not merged between entries.
260    pub fn metadata(&self) -> Option<&RawData> {
261        self.tree.metadata.as_ref()
262    }
263
264    /// Get the `RawData` for a specific named subtree within this entry.
265    ///
266    /// Returns an error if the subtree is not found or if the subtree exists but has no data (`None`).
267    pub fn data(&self, subtree_name: impl AsRef<str>) -> Result<&RawData> {
268        self.subtrees
269            .iter()
270            .find(|node| node.name == subtree_name.as_ref())
271            .and_then(|node| node.data.as_ref())
272            .ok_or_else(|| {
273                StoreError::KeyNotFound {
274                    store: "entry".to_string(),
275                    key: subtree_name.as_ref().to_string(),
276                }
277                .into()
278            })
279    }
280
281    /// Get the IDs of the parent entries in the main tree history.
282    /// The parent IDs are returned in alphabetical order.
283    pub fn parents(&self) -> Result<Vec<ID>> {
284        Ok(self.tree.parents.clone())
285    }
286
287    /// Get the IDs of the parent entries specific to a named subtree's history.
288    /// The parent IDs are returned in alphabetical order.
289    pub fn subtree_parents(&self, subtree_name: impl AsRef<str>) -> Result<Vec<ID>> {
290        self.subtrees
291            .iter()
292            .find(|node| node.name == subtree_name.as_ref())
293            .map(|node| node.parents.clone())
294            .ok_or_else(|| {
295                StoreError::KeyNotFound {
296                    store: "entry".to_string(),
297                    key: subtree_name.as_ref().to_string(),
298                }
299                .into()
300            })
301    }
302
303    /// Get the height of this entry in the main tree DAG.
304    pub fn height(&self) -> u64 {
305        self.tree.height
306    }
307
308    /// Get the height of this entry in a specific subtree's DAG.
309    ///
310    /// If the subtree has an explicit height (`Some(h)`), that value is returned.
311    /// If the subtree height is `None`, it inherits from the main tree height.
312    ///
313    /// This allows subtrees to either track independent heights (for subtrees
314    /// with their own height strategy) or share the tree's height (default).
315    pub fn subtree_height(&self, subtree_name: impl AsRef<str>) -> Result<u64> {
316        self.subtrees
317            .iter()
318            .find(|node| node.name == subtree_name.as_ref())
319            .map(|node| node.height.unwrap_or_else(|| self.height()))
320            .ok_or_else(|| {
321                StoreError::KeyNotFound {
322                    store: "entry".to_string(),
323                    key: subtree_name.as_ref().to_string(),
324                }
325                .into()
326            })
327    }
328
329    /// Create a canonical representation of this entry for signing purposes.
330    ///
331    /// This creates a copy of the entry with the signature field removed from auth,
332    /// which is necessary for signature generation and verification.
333    /// The returned entry has deterministic field ordering for consistent signatures.
334    pub fn canonical_for_signing(&self) -> Self {
335        let mut canonical = self.clone();
336        canonical.sig.sig = None;
337        canonical
338    }
339
340    /// Create canonical bytes for signing or ID generation.
341    ///
342    /// This method serializes the entry to DAG-CBOR with deterministic field ordering.
343    /// For signing purposes, call `canonical_for_signing()` first.
344    pub fn canonical_bytes(&self) -> Result<Vec<u8>> {
345        self.to_dagcbor()
346    }
347
348    /// Create canonical bytes for signing (convenience method).
349    ///
350    /// This combines `canonical_for_signing()` and `canonical_bytes()` for convenience.
351    pub fn signing_bytes(&self) -> Result<Vec<u8>> {
352        self.canonical_for_signing().canonical_bytes()
353    }
354
355    /// Serialize this entry to DAG-CBOR bytes.
356    ///
357    /// Returns the canonical DAG-CBOR encoding for CID computation.
358    pub fn to_dagcbor(&self) -> Result<Vec<u8>> {
359        serde_ipld_dagcbor::to_vec(self).map_err(|e| {
360            EntryError::SerializationFailed {
361                context: format!("DAG-CBOR serialization failed: {e}"),
362            }
363            .into()
364        })
365    }
366
367    /// Validate the structural integrity of this entry.
368    ///
369    /// This method performs lightweight structural validation that can be done
370    /// without access to the backend database. It checks for obvious structural
371    /// issues while deferring complex DAG relationship validation to the transaction
372    /// and backend layers where full database access is available.
373    ///
374    /// # Validation Rules
375    ///
376    /// ## Critical Main Tree Parent Validation (Prevents "No Common Ancestor" Errors)
377    /// - **Root entries** (containing "_root" subtree): May have empty parents
378    /// - **Non-root entries**: MUST have at least one parent - **HARD REQUIREMENT**
379    /// - **Empty parent IDs**: Always rejected as invalid
380    ///
381    /// This strict enforcement prevents orphaned entries that cause sync failures.
382    ///
383    /// ## Subtree Parent Relationships
384    /// - For root entries: Subtrees may have empty parents (they establish the subtree roots)
385    /// - For non-root entries: Empty subtree parents require deeper validation:
386    ///   - Could be legitimate (first entry in a new subtree)
387    ///   - Could indicate broken relationships (needs DAG traversal to verify)
388    ///
389    /// ## Multi-Layer Validation System
390    /// Complex validation happens at multiple layers:
391    /// 1. **Entry Layer** (this method): Structural validation, main tree parent enforcement
392    /// 2. **Transaction Layer**: Parent discovery, subtree parent validation with DAG access
393    /// 3. **Backend Storage**: Final validation gate before persistence
394    /// 4. **Sync Operations**: Validation of entries received from peers
395    ///
396    /// # Special Cases
397    /// - The "_root" marker subtree has special handling and skips validation
398    /// - The "_settings" subtree follows standard validation rules
399    /// - Empty subtree parents are logged but deferred to transaction layer
400    ///
401    /// # Returns
402    ///
403    /// - `Ok(())` if the entry is structurally valid
404    /// - `Err(InstanceError::EntryValidationFailed)` if validation fails with specific reason
405    ///
406    /// # Examples
407    ///
408    /// ```rust,no_run
409    /// # use eidetica::Entry;
410    /// # let entry: Entry = unimplemented!();
411    /// // Validate an entry before storage or sync
412    /// match entry.validate() {
413    ///     Ok(()) => {
414    ///         // Entry is valid, safe to store/sync
415    ///         println!("Entry is valid");
416    ///     }
417    ///     Err(e) => {
418    ///         // Entry is invalid, reject it
419    ///         eprintln!("Invalid entry: {}", e);
420    ///     }
421    /// }
422    /// ```
423    /// Validates that an ID contains a valid CID.
424    fn validate_id_format(id: &ID, context: &str) -> Result<()> {
425        if id.as_cid().is_none() {
426            return Err(
427                IdError::InvalidFormat(format!("Invalid ID in {context}: ID is empty")).into(),
428            );
429        }
430        Ok(())
431    }
432
433    pub fn validate(&self) -> Result<()> {
434        use crate::constants::{ROOT, SETTINGS};
435        use crate::instance::errors::InstanceError;
436
437        // CRITICAL VALIDATION: Root entries (with _root marker) cannot have parents
438        // This enforces that root entries are true starting points of trees
439        let has_root_marker = self.subtrees.iter().any(|node| node.name == ROOT);
440        if has_root_marker && !self.tree.parents.is_empty() {
441            return Err(InstanceError::EntryValidationFailed {
442                reason: format!(
443                    "Entry {} has _root marker but also has parents. Root entries cannot have parent relationships as they are the starting points of trees.",
444                    self.id()
445                ),
446            }.into());
447        }
448
449        // Check if this is a root entry (will be true only if has ROOT marker AND no parents AND no root)
450        let is_root_entry =
451            has_root_marker && self.tree.parents.is_empty() && self.tree.root.is_none();
452
453        // Validate root ID format (when present)
454        if let Some(root_id) = &self.tree.root {
455            Self::validate_id_format(root_id, "tree root ID")?;
456        }
457
458        // Validate each subtree
459        for subtree_node in &self.subtrees {
460            let subtree_name = &subtree_node.name;
461            let subtree_parents = &subtree_node.parents;
462
463            // Empty string is not allowed as a subtree name
464            if subtree_name.is_empty() {
465                return Err(InstanceError::EntryValidationFailed {
466                    reason: format!(
467                        "Entry {} has a subtree with empty name. Store names must be non-empty.",
468                        self.id()
469                    ),
470                }
471                .into());
472            }
473
474            // Skip validation for the special "_root" marker subtree
475            if subtree_name == ROOT {
476                continue;
477            }
478
479            // For non-root entries with empty subtree parents, this is only valid if:
480            // 1. The entry has no main parents (making it a legitimate subtree root), OR
481            // 2. The subtree is genuinely being established for the first time within the tree
482            //
483            // Note: We can't perform deep validation here without access to the backend,
484            // so we defer complex validation to transaction/backend layers where full
485            // DAG traversal is possible. This basic validation catches obvious structural errors.
486            if !is_root_entry && subtree_parents.is_empty() {
487                // This is a lightweight structural check - more comprehensive validation
488                // happens in transaction/backend layers with full DAG access
489                tracing::debug!(
490                    entry_id = %self.id(),
491                    subtree = subtree_name,
492                    "Entry has empty subtree parents - will be validated in transaction layer"
493                );
494            }
495
496            // Special validation for the critical "_settings" subtree
497            // Note: Settings subtree follows the same rules as other subtrees - empty parents
498            // are valid for the first entry in the subtree. Comprehensive validation happens
499            // in transaction/backend layers with full DAG access.
500            if subtree_name == SETTINGS && !is_root_entry && subtree_parents.is_empty() {
501                tracing::debug!(
502                    entry_id = %self.id(),
503                    "Settings subtree has empty parents - will be validated in transaction layer"
504                );
505            }
506
507            // Validate that subtree parents are not empty strings and have valid format
508            for parent_id in subtree_parents {
509                if parent_id.is_empty() {
510                    return Err(InstanceError::EntryValidationFailed {
511                        reason: format!(
512                            "Entry {} has subtree '{}' with empty parent ID. Parent IDs must be non-empty valid entry IDs.",
513                            self.id(),
514                            subtree_name
515                        ),
516                    }.into());
517                }
518                // Validate parent ID format
519                Self::validate_id_format(
520                    parent_id,
521                    &format!("subtree '{subtree_name}' parent ID"),
522                )?;
523            }
524        }
525
526        // Enforce main tree parent requirements
527        if !is_root_entry {
528            let main_parents = self.tree.parents.clone();
529            if main_parents.is_empty() {
530                // This is a HARD FAILURE - reject the entry completely
531                // Empty main tree parents create orphaned nodes that break LCA calculations
532                return Err(InstanceError::EntryValidationFailed {
533                    reason: format!(
534                        "Non-root entry {} has empty main tree parents. All non-root entries must have valid parent relationships in the main tree.",
535                        self.id()
536                    ),
537                }.into());
538            }
539
540            // Validate that main parents are not empty strings and have valid format
541            for parent_id in &main_parents {
542                if parent_id.is_empty() {
543                    return Err(InstanceError::EntryValidationFailed {
544                        reason: format!(
545                            "Entry {} has empty parent ID in main tree. Parent IDs must be non-empty valid entry IDs.",
546                            self.id()
547                        ),
548                    }.into());
549                }
550                // Validate parent ID format
551                Self::validate_id_format(parent_id, "main tree parent ID")?;
552            }
553        }
554
555        Ok(())
556    }
557}