eidetica/backend/database/sql/schema.rs
1//! SQL schema definitions and migrations.
2//!
3//! This module contains the database schema used by SQL backends.
4//! The schema is designed to be portable between SQLite and Postgres.
5//!
6//! # Migration System
7//!
8//! The migration system uses code-based migrations rather than SQL files to handle
9//! dialect differences between SQLite and PostgreSQL. Each migration is a function
10//! that receives the backend and can execute database-specific SQL as needed.
11//!
12//! ## Adding a New Migration
13//!
14//! 1. Increment `SCHEMA_VERSION`
15//! 2. Add a new `migrate_vN_to_vM` async function
16//! 3. Add the migration to the match statement in `run_migration`
17//! 4. Document what the migration does
18
19use crate::Result;
20use crate::backend::errors::BackendError;
21
22use super::{SqlxBackend, SqlxResultExt};
23
24/// Current schema version.
25///
26/// Increment this when making schema changes that require migration.
27/// Version 0 is fully unstable and should not be used in production.
28pub const SCHEMA_VERSION: i64 = 0;
29
30/// SQL statements to create the schema tables.
31///
32/// Each statement uses portable SQL that works on both SQLite and PostgreSQL.
33pub const CREATE_TABLES: &[&str] = &[
34 // Schema version tracking
35 // BIGINT (64-bit) used for portability between SQLite and PostgreSQL
36 "CREATE TABLE IF NOT EXISTS schema_version (
37 version BIGINT PRIMARY KEY
38 )",
39 // Core entry storage
40 // Entries are content-addressable via hash of entry content
41 "CREATE TABLE IF NOT EXISTS entries (
42 id TEXT PRIMARY KEY NOT NULL,
43 tree_id TEXT NOT NULL,
44 is_root BIGINT NOT NULL DEFAULT 0,
45 verification_status BIGINT NOT NULL DEFAULT 0,
46 height BIGINT NOT NULL DEFAULT 0,
47 entry_cbor BYTEA NOT NULL
48 )",
49 // Tree parent relationships (main tree DAG edges)
50 // Each entry can have multiple parents for merge commits
51 "CREATE TABLE IF NOT EXISTS tree_parents (
52 child_id TEXT NOT NULL,
53 parent_id TEXT NOT NULL,
54 PRIMARY KEY (child_id, parent_id)
55 )",
56 // Subtrees - denormalized subtree data for efficient queries
57 // Replaces store_memberships with additional columns for height and data.
58 // `data` is the opaque payload bytes for each store (format chosen by the store).
59 "CREATE TABLE IF NOT EXISTS subtrees (
60 tree_id TEXT NOT NULL,
61 entry_id TEXT NOT NULL,
62 store_name TEXT NOT NULL,
63 height BIGINT NOT NULL,
64 data BLOB,
65 PRIMARY KEY (entry_id, store_name)
66 )",
67 // Store parent relationships (per-store DAG edges)
68 // Parents within a specific store context
69 "CREATE TABLE IF NOT EXISTS store_parents (
70 child_id TEXT NOT NULL,
71 parent_id TEXT NOT NULL,
72 store_name TEXT NOT NULL,
73 PRIMARY KEY (child_id, parent_id, store_name)
74 )",
75 // Tips cache - maintained incrementally
76 // Tips are entries with no children in their tree/store context
77 // store_name uses empty string for tree-level tips (PostgreSQL disallows NULL in PK)
78 "CREATE TABLE IF NOT EXISTS tips (
79 entry_id TEXT NOT NULL,
80 tree_id TEXT NOT NULL,
81 store_name TEXT NOT NULL DEFAULT '',
82 PRIMARY KEY (entry_id, tree_id, store_name)
83 )",
84 // Instance metadata (singleton row pattern)
85 // Contains device key and system database IDs.
86 // Uses singleton=1 constraint to ensure only one row exists.
87 "CREATE TABLE IF NOT EXISTS instance_metadata (
88 singleton BIGINT PRIMARY KEY DEFAULT 1 CHECK (singleton = 1),
89 data TEXT NOT NULL
90 )",
91 // Instance secrets (singleton row pattern)
92 // Contains device signing key. Stored separately from metadata.
93 "CREATE TABLE IF NOT EXISTS instance_secrets (
94 singleton BIGINT PRIMARY KEY DEFAULT 1 CHECK (singleton = 1),
95 data TEXT NOT NULL
96 )",
97 // CRDT state cache (v2: scope-keyed)
98 //
99 // `scope_user_uuid` is the trust scope: the empty string `''` encodes
100 // `CacheScope::Shared` (daemon-computed, visible to every user with
101 // database read permission); any other string encodes
102 // `CacheScope::User(uuid)` (client-attested bytes, visible only to that
103 // user). An empty-string sentinel is used because PostgreSQL disallows
104 // NULL in primary-key columns; user UUIDs are never empty so the
105 // mapping is unambiguous. `state` is opaque bytes (plaintext for Shared;
106 // ciphertext or plaintext for User, decided client-side).
107 //
108 // The pre-unification table `crdt_cache` (no scope column) is left
109 // untouched if it exists on an upgraded database — it just becomes
110 // unreferenced. The cache is performance state, so cold-rebuilding into
111 // `crdt_cache_v2` on first load is fine.
112 "CREATE TABLE IF NOT EXISTS crdt_cache_v2 (
113 scope_user_uuid TEXT NOT NULL,
114 entry_id TEXT NOT NULL,
115 store_name TEXT NOT NULL,
116 state BLOB NOT NULL,
117 PRIMARY KEY (scope_user_uuid, entry_id, store_name)
118 )",
119];
120
121/// SQL statements to create indexes.
122pub const CREATE_INDEXES: &[&str] = &[
123 // Entry lookups and filtering
124 "CREATE INDEX IF NOT EXISTS idx_entries_tree_id ON entries(tree_id)",
125 "CREATE INDEX IF NOT EXISTS idx_entries_tree_height ON entries(tree_id, height DESC, id)",
126 "CREATE INDEX IF NOT EXISTS idx_entries_verification ON entries(verification_status)",
127 "CREATE INDEX IF NOT EXISTS idx_entries_is_root ON entries(is_root)",
128 // Parent relationship traversal
129 "CREATE INDEX IF NOT EXISTS idx_tree_parents_parent ON tree_parents(parent_id)",
130 "CREATE INDEX IF NOT EXISTS idx_tree_parents_child ON tree_parents(child_id)",
131 // Store-specific queries
132 "CREATE INDEX IF NOT EXISTS idx_subtrees_tree_store_height ON subtrees(tree_id, store_name, height DESC, entry_id)",
133 "CREATE INDEX IF NOT EXISTS idx_subtrees_store_height ON subtrees(store_name, height DESC, entry_id)",
134 "CREATE INDEX IF NOT EXISTS idx_store_parents_parent ON store_parents(store_name, parent_id)",
135 "CREATE INDEX IF NOT EXISTS idx_store_parents_child ON store_parents(store_name, child_id)",
136 // Tip lookups
137 "CREATE INDEX IF NOT EXISTS idx_tips_tree_store ON tips(tree_id, store_name)",
138];
139
140/// Initialize the database schema.
141///
142/// Creates tables and indexes if they don't exist, and handles migrations
143/// if the schema version has changed.
144pub async fn initialize(backend: &SqlxBackend) -> Result<()> {
145 let pool = backend.pool();
146
147 // Create tables, adapting dialect-specific types
148 let blob_type = if backend.is_sqlite() { "BLOB" } else { "BYTEA" };
149 for statement in CREATE_TABLES {
150 let statement = statement.replace("BLOB", blob_type);
151 sqlx::query(&statement)
152 .execute(pool)
153 .await
154 .sql_context("Schema creation failed")?;
155 }
156
157 // Check current schema version
158 let row: Option<(i64,)> = sqlx::query_as("SELECT version FROM schema_version")
159 .fetch_optional(pool)
160 .await
161 .sql_context("Failed to check schema version")?;
162
163 if row.is_none() {
164 // First initialization
165 sqlx::query("INSERT INTO schema_version (version) VALUES ($1)")
166 .bind(SCHEMA_VERSION)
167 .execute(pool)
168 .await
169 .sql_context("Failed to initialize schema version")?;
170 } else if let Some((current_version,)) = row
171 && current_version < SCHEMA_VERSION
172 {
173 // Run migrations
174 migrate(backend, current_version, SCHEMA_VERSION).await?;
175 }
176
177 // Create indexes
178 for statement in CREATE_INDEXES {
179 sqlx::query(statement)
180 .execute(pool)
181 .await
182 .sql_context("Index creation failed")?;
183 }
184
185 Ok(())
186}
187
188/// Run migrations sequentially from one schema version to another.
189///
190/// Migrations are run one at a time, incrementing the version after each.
191/// This allows for proper error handling and rollback semantics.
192async fn migrate(backend: &SqlxBackend, from: i64, to: i64) -> Result<()> {
193 tracing::info!(from, to, "Starting SQL schema migration");
194
195 let mut current = from;
196 while current < to {
197 let next = current + 1;
198 tracing::info!(from = current, to = next, "Running migration");
199
200 run_migration(backend, current, next).await?;
201
202 // Update schema version after successful migration
203 sqlx::query("UPDATE schema_version SET version = $1")
204 .bind(next)
205 .execute(backend.pool())
206 .await
207 .sql_context("Failed to update schema version")?;
208
209 tracing::info!(version = next, "Migration completed");
210 current = next;
211 }
212
213 tracing::info!(from, to, "All migrations completed successfully");
214 Ok(())
215}
216
217/// Execute a single migration step.
218///
219/// Each migration is a separate async function that handles the schema change.
220/// Add new migrations here as match arms.
221///
222/// # Adding a New Migration
223///
224/// When incrementing `SCHEMA_VERSION`, add a match arm here:
225///
226/// ```ignore
227/// match from {
228/// 1 => migrate_v1_to_v2(backend).await,
229/// // ... existing migrations ...
230/// _ => { /* error handling */ }
231/// }
232/// ```
233async fn run_migration(backend: &SqlxBackend, from: i64, to: i64) -> Result<()> {
234 // When adding the first migration, replace this with:
235 //
236 // match from {
237 // 1 => migrate_v1_to_v2(backend).await,
238 // _ => Err(BackendError::SqlxError { ... }.into()),
239 // }
240 //
241 // For now, since there are no migrations yet, any attempt to migrate is an error.
242
243 // Suppress unused variable warning until migrations are added
244 let _ = backend;
245
246 Err(BackendError::SqlxError {
247 reason: format!(
248 "Unknown migration path: v{from} to v{to}. \
249 This likely means SCHEMA_VERSION was incremented without adding a migration."
250 ),
251 source: None,
252 }
253 .into())
254}