eidetica/entry/mod.rs
1//!
2//! Defines the fundamental data unit (`Entry`) and related types.
3//!
4//! An `Entry` is the core, content-addressable building block of the database,
5//! representing a snapshot of data in the main tree and potentially multiple named subtrees.
6//! This module also defines the `ID` type and `RawData` type.
7
8mod builder;
9pub mod errors;
10pub mod id;
11
12#[cfg(test)]
13mod tests;
14
15use serde::{Deserialize, Serialize};
16
17pub use builder::EntryBuilder;
18pub use errors::EntryError;
19pub use id::ID;
20
21use crate::{Result, auth::types::SigInfo, constants::ROOT, store::StoreError};
22
23use id::IdError;
24
25/// Opaque payload bytes embedded in an `Entry`.
26///
27/// Each `Store` owns the format of its own payload (JSON, CBOR, raw binary, etc.);
28/// the entry layer treats `RawData` as a byte string and does not interpret it.
29///
30/// Encoded as a CBOR byte string (major type 2) in DAG-CBOR for IPLD compatibility.
31pub type RawData = Vec<u8>;
32
33/// Helper to check if tree height is zero for serde skip_serializing_if
34fn is_zero(h: &u64) -> bool {
35 *h == 0
36}
37
38/// Internal representation of the main tree node within an `Entry`.
39#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
40pub(super) struct TreeNode {
41 /// The ID of the root `Entry` of the tree this node belongs to.
42 /// `None` for root entries (they are their own root).
43 #[serde(skip_serializing_if = "Option::is_none")]
44 pub root: Option<ID>,
45 /// IDs of the parent `Entry`s in the main tree history.
46 /// The vector is kept sorted alphabetically.
47 pub parents: Vec<ID>,
48 /// Serialized metadata associated with this `Entry` in the main tree.
49 /// This data is metadata about this specific entry only and is not merged with other entries.
50 ///
51 /// Metadata is used to improve the efficiency of certain operations and for experimentation.
52 ///
53 /// Metadata is optional and may not be present in all entries. Future versions
54 /// may extend metadata to include additional information.
55 #[serde(default, skip_serializing_if = "Option::is_none", with = "serde_bytes")]
56 pub metadata: Option<RawData>,
57 /// Height of this entry in the tree DAG (longest path from root).
58 /// Root entries have height 0, children have max(parent heights) + 1.
59 #[serde(rename = "h", default, skip_serializing_if = "is_zero")]
60 pub height: u64,
61}
62
63/// Internal representation of a named subtree node within an `Entry`.
64#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
65pub(super) struct SubTreeNode {
66 /// The name of the subtree, analogous to a table name.
67 /// Subtrees are _named_, and not identified by an ID.
68 pub name: String,
69 /// IDs of the parent `Entry`s specific to this subtree's history.
70 /// The vector is kept sorted alphabetically.
71 pub parents: Vec<ID>,
72 /// Serialized data specific to this `Entry` within this named subtree.
73 ///
74 /// `None` indicates that this Entry participates in the subtree but makes no data changes.
75 /// This is used when there is information needed for this subtree found somewhere else (e.g. the `_index`)
76 ///
77 /// `Some(data)` contains the actual serialized data for this subtree.
78 #[serde(default, skip_serializing_if = "Option::is_none", with = "serde_bytes")]
79 pub data: Option<RawData>,
80 /// Height of this entry in the subtree DAG.
81 ///
82 /// `None` means the subtree inherits the tree's height (not serialized).
83 /// `Some(h)` is an independent height for subtrees with their own strategy.
84 #[serde(rename = "h", default, skip_serializing_if = "Option::is_none")]
85 pub height: Option<u64>,
86}
87
88/// The fundamental unit of data in Eidetica, representing a finalized, immutable Database Entry.
89///
90/// An `Entry` represents a snapshot of data within a `Database` and potentially one or more named `Store`s.
91/// It is content-addressable, meaning its `ID` is a cryptographic hash of its contents.
92/// Entries form a Merkle-DAG (Directed Acyclic Graph) structure through parent references.
93///
94/// # Authentication
95///
96/// Each entry contains authentication information with:
97/// - `sig`: Base64-encoded cryptographic signature (optional, allows unsigned entry creation)
98/// - `key`: Authentication key reference path, either:
99/// - A direct key ID defined in this tree's `_settings.auth`
100/// - A delegation path as an ordered list of `{"key": "delegated_tree_1", "tips": ["A", "B"]}`
101/// where the last element must contain only a `"key"` field
102///
103/// # Immutability
104///
105/// `Entry` instances are designed to be immutable once created. To create or modify entries,
106/// use the `EntryBuilder` struct, which provides a mutable API for constructing entries.
107/// Once an entry is built, its content cannot be changed, and its ID is deterministic
108/// based on its content.
109///
110/// # Example
111///
112/// ```
113/// # use eidetica::Entry;
114///
115/// // Create a new root entry (standalone entry that starts a new DAG)
116/// let entry = Entry::root_builder()
117/// .set_subtree_data("users", br#"{"user1":"data"}"#.to_vec())
118/// .build()
119/// .expect("Entry should build successfully");
120///
121/// // Access entry data
122/// let id = entry.id(); // Calculate content-addressable ID
123/// let user_data = entry.data("users").unwrap();
124/// ```
125///
126/// # Builders
127///
128/// To create an `Entry`, use the associated `EntryBuilder`.
129/// The preferred way to get an `EntryBuilder` is via the static methods
130/// `Entry::builder()` for regular entries or `Entry::root_builder()` for new top-level tree roots.
131///
132/// ```
133/// # use eidetica::entry::{Entry, ID, RawData};
134/// # let root_id = ID::from_bytes("some_root_id");
135/// # let data: RawData = b"{}".to_vec();
136/// // For a regular entry:
137/// let builder = Entry::builder(root_id);
138///
139/// // For a new top-level tree root:
140/// let root_builder = Entry::root_builder();
141/// ```
142/// The current entry format version.
143/// v0 indicates this is an unstable protocol subject to breaking changes.
144pub const ENTRY_VERSION: u8 = 0;
145
146/// Helper to check if version is default (0) for serde skip_serializing_if
147fn is_v0(v: &u8) -> bool {
148 *v == 0
149}
150
151/// Validates the entry version during deserialization.
152fn validate_entry_version<'de, D>(deserializer: D) -> std::result::Result<u8, D::Error>
153where
154 D: serde::Deserializer<'de>,
155{
156 let version = u8::deserialize(deserializer)?;
157 if version != ENTRY_VERSION {
158 return Err(serde::de::Error::custom(format!(
159 "unsupported Entry version {version}; only version {ENTRY_VERSION} is supported"
160 )));
161 }
162 Ok(version)
163}
164
165#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
166pub struct Entry {
167 /// Protocol version for this entry format.
168 /// Used to verify that we support reading this entry.
169 #[serde(
170 rename = "_v",
171 default,
172 skip_serializing_if = "is_v0",
173 deserialize_with = "validate_entry_version"
174 )]
175 version: u8,
176 /// The main tree node data, including the root ID, parents in the main tree, and associated data.
177 pub(super) tree: TreeNode,
178 /// A collection of named subtrees this entry contains data for.
179 /// The vector is kept sorted alphabetically by subtree name during the build process.
180 pub(super) subtrees: Vec<SubTreeNode>,
181 /// Authentication information for this entry
182 pub sig: SigInfo,
183}
184
185impl Entry {
186 /// Creates a new `EntryBuilder` for an entry associated with a specific tree root.
187 /// This is a convenience method and preferred over calling `EntryBuilder::new()` directly.
188 ///
189 /// # Arguments
190 /// * `root` - The `ID` of the root `Entry` of the tree this entry will belong to.
191 pub fn builder(root: ID) -> EntryBuilder {
192 EntryBuilder::new(root)
193 }
194
195 /// Creates a new `EntryBuilder` for a top-level (root) entry for a new tree.
196 /// This is a convenience method and preferred over calling `EntryBuilder::new_top_level()` directly.
197 ///
198 /// Root entries have an empty string as their `root` ID and include a special ROOT subtree marker.
199 /// This method is typically used when creating a new tree.
200 pub fn root_builder() -> EntryBuilder {
201 EntryBuilder::new_top_level()
202 }
203
204 /// Get the content-addressable ID of the entry.
205 ///
206 /// The ID is the CID of the DAG-CBOR serialized representation of the Entry.
207 pub fn id(&self) -> ID {
208 let bytes = self
209 .to_dagcbor()
210 .expect("Failed to serialize entry to DAG-CBOR for ID");
211 ID::from_dagcbor_bytes(bytes)
212 }
213
214 /// Get the ID of the root `Entry` of the tree this entry belongs to.
215 /// Returns `None` for root entries.
216 pub fn root(&self) -> Option<ID> {
217 self.tree.root.clone()
218 }
219
220 /// Check if this entry is a root entry (contains the ROOT marker and has no parents).
221 ///
222 /// Root entries are the top-level entries in the database and are distinguished by:
223 /// 1. Containing a subtree with the ROOT marker
224 /// 2. Having no parent entries (they are true tree roots)
225 ///
226 /// This ensures that root entries are actual starting points of trees in the DAG.
227 pub fn is_root(&self) -> bool {
228 // FIXME: better identification of root entries
229 self.subtrees.iter().any(|node| node.name == ROOT)
230 && self.tree.parents.is_empty()
231 && self.tree.root.is_none()
232 }
233
234 /// Check if this entry contains data for a specific named subtree.
235 pub fn in_subtree(&self, subtree_name: impl AsRef<str>) -> bool {
236 self.subtrees
237 .iter()
238 .any(|node| node.name == subtree_name.as_ref())
239 }
240
241 /// Check if this entry belongs to a specific tree, identified by its root ID.
242 pub fn in_tree(&self, tree_id: &ID) -> bool {
243 // Entries that are roots exist in both trees
244 self.tree.root.as_ref() == Some(tree_id) || self.id() == *tree_id
245 }
246
247 /// Get the names of all subtrees this entry contains data for.
248 /// The names are returned in alphabetical order.
249 pub fn subtrees(&self) -> Vec<String> {
250 self.subtrees
251 .iter()
252 .map(|subtree| subtree.name.clone())
253 .collect()
254 }
255
256 /// Get the metadata associated with this entry's tree node.
257 ///
258 /// Metadata is optional information attached to an entry that is not part of the
259 /// main data model and is not merged between entries.
260 pub fn metadata(&self) -> Option<&RawData> {
261 self.tree.metadata.as_ref()
262 }
263
264 /// Get the `RawData` for a specific named subtree within this entry.
265 ///
266 /// Returns an error if the subtree is not found or if the subtree exists but has no data (`None`).
267 pub fn data(&self, subtree_name: impl AsRef<str>) -> Result<&RawData> {
268 self.subtrees
269 .iter()
270 .find(|node| node.name == subtree_name.as_ref())
271 .and_then(|node| node.data.as_ref())
272 .ok_or_else(|| {
273 StoreError::KeyNotFound {
274 store: "entry".to_string(),
275 key: subtree_name.as_ref().to_string(),
276 }
277 .into()
278 })
279 }
280
281 /// Get the IDs of the parent entries in the main tree history.
282 /// The parent IDs are returned in alphabetical order.
283 pub fn parents(&self) -> Result<Vec<ID>> {
284 Ok(self.tree.parents.clone())
285 }
286
287 /// Get the IDs of the parent entries specific to a named subtree's history.
288 /// The parent IDs are returned in alphabetical order.
289 pub fn subtree_parents(&self, subtree_name: impl AsRef<str>) -> Result<Vec<ID>> {
290 self.subtrees
291 .iter()
292 .find(|node| node.name == subtree_name.as_ref())
293 .map(|node| node.parents.clone())
294 .ok_or_else(|| {
295 StoreError::KeyNotFound {
296 store: "entry".to_string(),
297 key: subtree_name.as_ref().to_string(),
298 }
299 .into()
300 })
301 }
302
303 /// Get the height of this entry in the main tree DAG.
304 pub fn height(&self) -> u64 {
305 self.tree.height
306 }
307
308 /// Get the height of this entry in a specific subtree's DAG.
309 ///
310 /// If the subtree has an explicit height (`Some(h)`), that value is returned.
311 /// If the subtree height is `None`, it inherits from the main tree height.
312 ///
313 /// This allows subtrees to either track independent heights (for subtrees
314 /// with their own height strategy) or share the tree's height (default).
315 pub fn subtree_height(&self, subtree_name: impl AsRef<str>) -> Result<u64> {
316 self.subtrees
317 .iter()
318 .find(|node| node.name == subtree_name.as_ref())
319 .map(|node| node.height.unwrap_or_else(|| self.height()))
320 .ok_or_else(|| {
321 StoreError::KeyNotFound {
322 store: "entry".to_string(),
323 key: subtree_name.as_ref().to_string(),
324 }
325 .into()
326 })
327 }
328
329 /// Create a canonical representation of this entry for signing purposes.
330 ///
331 /// This creates a copy of the entry with the signature field removed from auth,
332 /// which is necessary for signature generation and verification.
333 /// The returned entry has deterministic field ordering for consistent signatures.
334 pub fn canonical_for_signing(&self) -> Self {
335 let mut canonical = self.clone();
336 canonical.sig.sig = None;
337 canonical
338 }
339
340 /// Create canonical bytes for signing or ID generation.
341 ///
342 /// This method serializes the entry to DAG-CBOR with deterministic field ordering.
343 /// For signing purposes, call `canonical_for_signing()` first.
344 pub fn canonical_bytes(&self) -> Result<Vec<u8>> {
345 self.to_dagcbor()
346 }
347
348 /// Create canonical bytes for signing (convenience method).
349 ///
350 /// This combines `canonical_for_signing()` and `canonical_bytes()` for convenience.
351 pub fn signing_bytes(&self) -> Result<Vec<u8>> {
352 self.canonical_for_signing().canonical_bytes()
353 }
354
355 /// Serialize this entry to DAG-CBOR bytes.
356 ///
357 /// Returns the canonical DAG-CBOR encoding for CID computation.
358 pub fn to_dagcbor(&self) -> Result<Vec<u8>> {
359 serde_ipld_dagcbor::to_vec(self).map_err(|e| {
360 EntryError::SerializationFailed {
361 context: format!("DAG-CBOR serialization failed: {e}"),
362 }
363 .into()
364 })
365 }
366
367 /// Validate the structural integrity of this entry.
368 ///
369 /// This method performs lightweight structural validation that can be done
370 /// without access to the backend database. It checks for obvious structural
371 /// issues while deferring complex DAG relationship validation to the transaction
372 /// and backend layers where full database access is available.
373 ///
374 /// # Validation Rules
375 ///
376 /// ## Critical Main Tree Parent Validation (Prevents "No Common Ancestor" Errors)
377 /// - **Root entries** (containing "_root" subtree): May have empty parents
378 /// - **Non-root entries**: MUST have at least one parent - **HARD REQUIREMENT**
379 /// - **Empty parent IDs**: Always rejected as invalid
380 ///
381 /// This strict enforcement prevents orphaned entries that cause sync failures.
382 ///
383 /// ## Subtree Parent Relationships
384 /// - For root entries: Subtrees may have empty parents (they establish the subtree roots)
385 /// - For non-root entries: Empty subtree parents require deeper validation:
386 /// - Could be legitimate (first entry in a new subtree)
387 /// - Could indicate broken relationships (needs DAG traversal to verify)
388 ///
389 /// ## Multi-Layer Validation System
390 /// Complex validation happens at multiple layers:
391 /// 1. **Entry Layer** (this method): Structural validation, main tree parent enforcement
392 /// 2. **Transaction Layer**: Parent discovery, subtree parent validation with DAG access
393 /// 3. **Backend Storage**: Final validation gate before persistence
394 /// 4. **Sync Operations**: Validation of entries received from peers
395 ///
396 /// # Special Cases
397 /// - The "_root" marker subtree has special handling and skips validation
398 /// - The "_settings" subtree follows standard validation rules
399 /// - Empty subtree parents are logged but deferred to transaction layer
400 ///
401 /// # Returns
402 ///
403 /// - `Ok(())` if the entry is structurally valid
404 /// - `Err(InstanceError::EntryValidationFailed)` if validation fails with specific reason
405 ///
406 /// # Examples
407 ///
408 /// ```rust,no_run
409 /// # use eidetica::Entry;
410 /// # let entry: Entry = unimplemented!();
411 /// // Validate an entry before storage or sync
412 /// match entry.validate() {
413 /// Ok(()) => {
414 /// // Entry is valid, safe to store/sync
415 /// println!("Entry is valid");
416 /// }
417 /// Err(e) => {
418 /// // Entry is invalid, reject it
419 /// eprintln!("Invalid entry: {}", e);
420 /// }
421 /// }
422 /// ```
423 /// Validates that an ID contains a valid CID.
424 fn validate_id_format(id: &ID, context: &str) -> Result<()> {
425 if id.as_cid().is_none() {
426 return Err(
427 IdError::InvalidFormat(format!("Invalid ID in {context}: ID is empty")).into(),
428 );
429 }
430 Ok(())
431 }
432
433 pub fn validate(&self) -> Result<()> {
434 use crate::constants::{ROOT, SETTINGS};
435 use crate::instance::errors::InstanceError;
436
437 // CRITICAL VALIDATION: Root entries (with _root marker) cannot have parents
438 // This enforces that root entries are true starting points of trees
439 let has_root_marker = self.subtrees.iter().any(|node| node.name == ROOT);
440 if has_root_marker && !self.tree.parents.is_empty() {
441 return Err(InstanceError::EntryValidationFailed {
442 reason: format!(
443 "Entry {} has _root marker but also has parents. Root entries cannot have parent relationships as they are the starting points of trees.",
444 self.id()
445 ),
446 }.into());
447 }
448
449 // Check if this is a root entry (will be true only if has ROOT marker AND no parents AND no root)
450 let is_root_entry =
451 has_root_marker && self.tree.parents.is_empty() && self.tree.root.is_none();
452
453 // Validate root ID format (when present)
454 if let Some(root_id) = &self.tree.root {
455 Self::validate_id_format(root_id, "tree root ID")?;
456 }
457
458 // Validate each subtree
459 for subtree_node in &self.subtrees {
460 let subtree_name = &subtree_node.name;
461 let subtree_parents = &subtree_node.parents;
462
463 // Empty string is not allowed as a subtree name
464 if subtree_name.is_empty() {
465 return Err(InstanceError::EntryValidationFailed {
466 reason: format!(
467 "Entry {} has a subtree with empty name. Store names must be non-empty.",
468 self.id()
469 ),
470 }
471 .into());
472 }
473
474 // Skip validation for the special "_root" marker subtree
475 if subtree_name == ROOT {
476 continue;
477 }
478
479 // For non-root entries with empty subtree parents, this is only valid if:
480 // 1. The entry has no main parents (making it a legitimate subtree root), OR
481 // 2. The subtree is genuinely being established for the first time within the tree
482 //
483 // Note: We can't perform deep validation here without access to the backend,
484 // so we defer complex validation to transaction/backend layers where full
485 // DAG traversal is possible. This basic validation catches obvious structural errors.
486 if !is_root_entry && subtree_parents.is_empty() {
487 // This is a lightweight structural check - more comprehensive validation
488 // happens in transaction/backend layers with full DAG access
489 tracing::debug!(
490 entry_id = %self.id(),
491 subtree = subtree_name,
492 "Entry has empty subtree parents - will be validated in transaction layer"
493 );
494 }
495
496 // Special validation for the critical "_settings" subtree
497 // Note: Settings subtree follows the same rules as other subtrees - empty parents
498 // are valid for the first entry in the subtree. Comprehensive validation happens
499 // in transaction/backend layers with full DAG access.
500 if subtree_name == SETTINGS && !is_root_entry && subtree_parents.is_empty() {
501 tracing::debug!(
502 entry_id = %self.id(),
503 "Settings subtree has empty parents - will be validated in transaction layer"
504 );
505 }
506
507 // Validate that subtree parents are not empty strings and have valid format
508 for parent_id in subtree_parents {
509 if parent_id.is_empty() {
510 return Err(InstanceError::EntryValidationFailed {
511 reason: format!(
512 "Entry {} has subtree '{}' with empty parent ID. Parent IDs must be non-empty valid entry IDs.",
513 self.id(),
514 subtree_name
515 ),
516 }.into());
517 }
518 // Validate parent ID format
519 Self::validate_id_format(
520 parent_id,
521 &format!("subtree '{subtree_name}' parent ID"),
522 )?;
523 }
524 }
525
526 // Enforce main tree parent requirements
527 if !is_root_entry {
528 let main_parents = self.tree.parents.clone();
529 if main_parents.is_empty() {
530 // This is a HARD FAILURE - reject the entry completely
531 // Empty main tree parents create orphaned nodes that break LCA calculations
532 return Err(InstanceError::EntryValidationFailed {
533 reason: format!(
534 "Non-root entry {} has empty main tree parents. All non-root entries must have valid parent relationships in the main tree.",
535 self.id()
536 ),
537 }.into());
538 }
539
540 // Validate that main parents are not empty strings and have valid format
541 for parent_id in &main_parents {
542 if parent_id.is_empty() {
543 return Err(InstanceError::EntryValidationFailed {
544 reason: format!(
545 "Entry {} has empty parent ID in main tree. Parent IDs must be non-empty valid entry IDs.",
546 self.id()
547 ),
548 }.into());
549 }
550 // Validate parent ID format
551 Self::validate_id_format(parent_id, "main tree parent ID")?;
552 }
553 }
554
555 Ok(())
556 }
557}