eidetica/backend/database/sql/mod.rs
1//! SQL-based backend implementations for Eidetica storage.
2//!
3//! This module provides SQL database backends that implement the `BackendImpl` trait,
4//! allowing Eidetica entries to be stored in relational databases.
5//!
6//! ## Available Backends
7//!
8//! - **SQLite** (feature: `sqlite`): Embedded database
9//! - **PostgreSQL** (feature: `postgres`): PostgreSQL database
10//!
11//! ## Architecture
12//!
13//! The SQL backend uses sqlx with `AnyPool` for multi-database support.
14//! All methods are async to match the async `BackendImpl` trait.
15//!
16//! ## Schema and Migrations
17//!
18//! The database schema is defined in the [`schema`] module and automatically
19//! initialized when connecting. Migrations are handled via code-based functions
20//! rather than SQL files to support dialect differences between SQLite and PostgreSQL.
21//!
22//! See [`schema`] module documentation for details on adding migrations.
23
24mod cache;
25mod storage;
26mod traversal;
27
28/// Schema definition and migration system.
29pub mod schema;
30
31use std::any::Any;
32use std::time::Duration;
33
34use async_trait::async_trait;
35use sqlx::AnyPool;
36use sqlx::Executor;
37use sqlx::any::AnyPoolOptions;
38
39use crate::Result;
40use crate::backend::errors::BackendError;
41use crate::backend::{BackendImpl, InstanceMetadata, VerificationStatus};
42use crate::entry::{Entry, ID};
43
44/// Extension trait for sqlx Result types to simplify error handling.
45///
46/// Similar to `anyhow::Context`, this trait adds a method to convert
47/// sqlx errors to `BackendError::SqlxError` with a context message.
48pub(crate) trait SqlxResultExt<T> {
49 /// Convert sqlx error to BackendError with context message.
50 fn sql_context(self, context: &str) -> Result<T>;
51}
52
53impl<T> SqlxResultExt<T> for std::result::Result<T, sqlx::Error> {
54 fn sql_context(self, context: &str) -> Result<T> {
55 self.map_err(|e| {
56 BackendError::SqlxError {
57 reason: format!("{context}: {e}"),
58 source: Some(e),
59 }
60 .into()
61 })
62 }
63}
64
65/// Database backend kind for SQL dialect selection.
66#[derive(Debug, Clone, Copy, PartialEq, Eq)]
67pub enum DbKind {
68 /// SQLite database
69 Sqlite,
70 /// PostgreSQL database
71 Postgres,
72}
73
74/// SQL-based backend implementing `BackendImpl` using sqlx.
75///
76/// This backend supports both SQLite and PostgreSQL through sqlx's `AnyPool`.
77///
78/// # Thread Safety
79///
80/// `SqlxBackend` is `Send + Sync` as required by `BackendImpl`. The underlying
81/// sqlx pool handles connection pooling and thread safety.
82///
83/// # Test Isolation
84///
85/// For PostgreSQL, each backend instance can use its own schema for test isolation.
86/// Use `connect_postgres_isolated()` to create an isolated backend for testing.
87pub struct SqlxBackend {
88 pool: AnyPool,
89 kind: DbKind,
90}
91
92impl SqlxBackend {
93 /// Get a reference to the underlying pool.
94 pub fn pool(&self) -> &AnyPool {
95 &self.pool
96 }
97
98 /// Get the database kind.
99 pub fn kind(&self) -> DbKind {
100 self.kind
101 }
102
103 /// Check if this backend is using SQLite.
104 pub fn is_sqlite(&self) -> bool {
105 self.kind == DbKind::Sqlite
106 }
107
108 /// Check if this backend is using PostgreSQL.
109 pub fn is_postgres(&self) -> bool {
110 self.kind == DbKind::Postgres
111 }
112}
113
114// SQLite-specific implementations
115#[cfg(feature = "sqlite")]
116impl SqlxBackend {
117 /// Open a SQLite database at the given path.
118 ///
119 /// Creates the database file and schema if they don't exist.
120 ///
121 /// # Arguments
122 ///
123 /// * `path` - Path to the SQLite database file
124 ///
125 /// # Example
126 ///
127 /// ```ignore
128 /// use eidetica::backend::database::sql::SqlxBackend;
129 ///
130 /// #[tokio::main]
131 /// async fn main() {
132 /// let backend = SqlxBackend::open_sqlite("my_database.db").await.unwrap();
133 /// }
134 /// ```
135 pub async fn open_sqlite<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
136 // mode=rwc: read-write-create (create file if it doesn't exist)
137 let url = format!("sqlite:{}?mode=rwc", path.as_ref().display());
138 Self::connect_sqlite(&url).await
139 }
140
141 /// Connect to a SQLite database using a connection URL.
142 ///
143 /// # Arguments
144 ///
145 /// * `url` - SQLite connection URL (e.g., "sqlite:./my.db")
146 pub async fn connect_sqlite(url: &str) -> Result<Self> {
147 // Install any driver support
148 sqlx::any::install_default_drivers();
149
150 // Detect if this is an in-memory database
151 let is_in_memory = url.contains("mode=memory");
152
153 // For SQLite in-memory databases with shared cache, we must prevent
154 // all connections from being closed. When the last connection closes,
155 // the in-memory database is destroyed and all data is lost.
156 //
157 // IMPORTANT: SQLite pragmas like busy_timeout and synchronous are per-connection
158 // settings. We use after_connect to ensure every connection in the pool has
159 // these configured, not just one.
160 let pool = if is_in_memory {
161 AnyPoolOptions::new()
162 .max_connections(5)
163 .min_connections(1)
164 .idle_timeout(None)
165 .max_lifetime(None)
166 .after_connect(|conn, _meta| {
167 Box::pin(async move {
168 // In-memory databases don't need WAL mode (all in RAM)
169 // but still need busy_timeout for lock contention
170 conn.execute("PRAGMA busy_timeout = 5000;").await?;
171 Ok(())
172 })
173 })
174 .connect(url)
175 .await
176 .sql_context("Failed to connect to SQLite")?
177 } else {
178 AnyPoolOptions::new()
179 .max_connections(5)
180 .after_connect(|conn, _meta| {
181 Box::pin(async move {
182 // File-based SQLite per-connection settings:
183 // - synchronous=NORMAL: Balanced durability (safe with WAL)
184 // - busy_timeout=5000: Wait up to 5s for locks before failing
185 //
186 // Note: journal_mode=WAL is a database-level setting that persists,
187 // so we only set it once after pool creation, not per-connection.
188 conn.execute("PRAGMA synchronous = NORMAL; PRAGMA busy_timeout = 5000;")
189 .await?;
190 Ok(())
191 })
192 })
193 .connect(url)
194 .await
195 .sql_context("Failed to connect to SQLite")?
196 };
197
198 // Set WAL mode once (database-level setting that persists in the file)
199 if !is_in_memory {
200 sqlx::query("PRAGMA journal_mode = WAL;")
201 .execute(&pool)
202 .await
203 .sql_context("Failed to set SQLite WAL mode")?;
204 }
205
206 let backend = Self {
207 pool,
208 kind: DbKind::Sqlite,
209 };
210
211 // Initialize schema
212 schema::initialize(&backend).await?;
213
214 Ok(backend)
215 }
216
217 /// Create an in-memory SQLite database (async).
218 ///
219 /// The database exists only for the lifetime of this backend instance.
220 /// Useful for testing.
221 ///
222 /// # Example
223 ///
224 /// ```ignore
225 /// use eidetica::backend::database::sql::SqlxBackend;
226 ///
227 /// #[tokio::main]
228 /// async fn main() {
229 /// let backend = SqlxBackend::sqlite_in_memory().await.unwrap();
230 /// }
231 /// ```
232 pub async fn sqlite_in_memory() -> Result<Self> {
233 // Use shared cache mode for in-memory SQLite so all connections in the pool
234 // share the same database. Without this, each connection gets its own
235 // isolated in-memory database.
236 // Use a unique name per instance to avoid sharing between tests.
237 let unique_id = uuid::Uuid::new_v4();
238 let url = format!("sqlite:file:mem_{unique_id}?mode=memory&cache=shared");
239 Self::connect_sqlite(&url).await
240 }
241}
242
243// PostgreSQL-specific implementations
244#[cfg(feature = "postgres")]
245impl SqlxBackend {
246 /// Connect to a PostgreSQL database using a connection URL.
247 ///
248 /// This connects to the default (public) schema. For test isolation,
249 /// use `connect_postgres_isolated()` instead.
250 ///
251 /// # Arguments
252 ///
253 /// * `url` - PostgreSQL connection URL (e.g., "postgres://user:pass@localhost/dbname")
254 ///
255 /// # Example
256 ///
257 /// ```ignore
258 /// use eidetica::backend::database::sql::SqlxBackend;
259 ///
260 /// let backend = SqlxBackend::connect_postgres("postgres://localhost/eidetica").await.unwrap();
261 /// ```
262 pub async fn connect_postgres(url: &str) -> Result<Self> {
263 Self::connect_postgres_with_schema(url, None).await
264 }
265
266 /// Connect to a PostgreSQL database with a specific schema for isolation.
267 ///
268 /// Creates a unique schema if `schema_name` is provided, providing test isolation.
269 /// Each test can use its own schema so they don't interfere with each other.
270 ///
271 /// # Arguments
272 ///
273 /// * `url` - PostgreSQL connection URL
274 /// * `schema_name` - Optional schema name. If None, uses the default (public) schema.
275 async fn connect_postgres_with_schema(url: &str, schema_name: Option<String>) -> Result<Self> {
276 // Install any driver support
277 sqlx::any::install_default_drivers();
278
279 // If schema_name is provided, first create the schema, then use after_connect
280 // to set search_path on each connection. This is more reliable than URL options
281 // which don't work consistently across all network configurations.
282 if let Some(ref schema) = schema_name {
283 // First connect to create the schema if needed
284 let temp_pool = AnyPoolOptions::new()
285 .max_connections(1)
286 .connect(url)
287 .await
288 .sql_context("Failed to connect to PostgreSQL")?;
289
290 // Create schema if it doesn't exist
291 let create_schema = format!("CREATE SCHEMA IF NOT EXISTS {schema}");
292 sqlx::query(&create_schema)
293 .execute(&temp_pool)
294 .await
295 .sql_context(&format!("Failed to create schema {schema}"))?;
296
297 temp_pool.close().await;
298 }
299
300 // Build pool with after_connect hook to set search_path on each connection
301 // For isolated (test) connections, use smaller pool to avoid exhausting
302 // PostgreSQL's max_connections when running many tests in parallel.
303 let schema_for_hook = schema_name.clone();
304 let is_isolated = schema_name.is_some();
305 let mut pool_options = AnyPoolOptions::new();
306
307 if is_isolated {
308 // Test isolation: 2 connections is enough, with longer timeout to wait
309 // rather than fail when many tests run in parallel
310 pool_options = pool_options
311 .max_connections(2)
312 .acquire_timeout(Duration::from_secs(30));
313 } else {
314 // Production: 5 connections for real concurrency needs
315 pool_options = pool_options.max_connections(5);
316 }
317
318 let pool = pool_options
319 .after_connect(move |conn, _meta| {
320 let schema = schema_for_hook.clone();
321 Box::pin(async move {
322 if let Some(ref s) = schema {
323 let set_path = format!("SET search_path TO {s}");
324 conn.execute(set_path.as_str()).await?;
325 }
326 Ok(())
327 })
328 })
329 .connect(url)
330 .await
331 .sql_context("Failed to connect to PostgreSQL")?;
332
333 let backend = Self {
334 pool,
335 kind: DbKind::Postgres,
336 };
337
338 // Initialize schema (tables will be created in the current search_path)
339 schema::initialize(&backend).await?;
340
341 Ok(backend)
342 }
343
344 /// Connect to a PostgreSQL database with test isolation.
345 ///
346 /// Creates a unique schema for this backend instance, ensuring tests
347 /// don't interfere with each other when run in parallel.
348 ///
349 /// # Arguments
350 ///
351 /// * `url` - PostgreSQL connection URL (e.g., "postgres://user:pass@localhost/dbname")
352 ///
353 /// # Example
354 ///
355 /// ```ignore
356 /// use eidetica::backend::database::sql::SqlxBackend;
357 ///
358 /// let backend = SqlxBackend::connect_postgres_isolated("postgres://localhost/eidetica").await.unwrap();
359 /// // This backend uses its own isolated schema
360 /// ```
361 pub async fn connect_postgres_isolated(url: &str) -> Result<Self> {
362 // Generate a unique schema name using UUID
363 // PostgreSQL schema names must start with a letter and be lowercase
364 let unique_id = uuid::Uuid::new_v4().simple().to_string();
365 let schema_name = format!("test_{unique_id}");
366 Self::connect_postgres_with_schema(url, Some(schema_name)).await
367 }
368}
369
370#[async_trait]
371impl BackendImpl for SqlxBackend {
372 async fn get(&self, id: &ID) -> Result<Entry> {
373 storage::get(self, id).await
374 }
375
376 async fn get_verification_status(&self, id: &ID) -> Result<VerificationStatus> {
377 storage::get_verification_status(self, id).await
378 }
379
380 async fn put(&self, verification_status: VerificationStatus, entry: Entry) -> Result<()> {
381 storage::put(self, verification_status, entry).await
382 }
383
384 async fn update_verification_status(
385 &self,
386 id: &ID,
387 verification_status: VerificationStatus,
388 ) -> Result<()> {
389 storage::update_verification_status(self, id, verification_status).await
390 }
391
392 async fn get_entries_by_verification_status(
393 &self,
394 status: VerificationStatus,
395 ) -> Result<Vec<ID>> {
396 storage::get_entries_by_verification_status(self, status).await
397 }
398
399 async fn get_tips(&self, tree: &ID) -> Result<Vec<ID>> {
400 traversal::get_tips(self, tree).await
401 }
402
403 async fn get_store_tips(&self, tree: &ID, store: &str) -> Result<Vec<ID>> {
404 traversal::get_store_tips(self, tree, store).await
405 }
406
407 async fn get_store_tips_up_to_entries(
408 &self,
409 tree: &ID,
410 store: &str,
411 main_entries: &[ID],
412 ) -> Result<Vec<ID>> {
413 traversal::get_store_tips_up_to_entries(self, tree, store, main_entries).await
414 }
415
416 async fn all_roots(&self) -> Result<Vec<ID>> {
417 storage::all_roots(self).await
418 }
419
420 async fn find_merge_base(&self, tree: &ID, store: &str, entry_ids: &[ID]) -> Result<ID> {
421 traversal::find_merge_base(self, tree, store, entry_ids).await
422 }
423
424 async fn collect_root_to_target(
425 &self,
426 tree: &ID,
427 store: &str,
428 target_entry: &ID,
429 ) -> Result<Vec<ID>> {
430 traversal::collect_root_to_target(self, tree, store, target_entry).await
431 }
432
433 fn as_any(&self) -> &dyn Any {
434 self
435 }
436
437 async fn get_tree(&self, tree: &ID) -> Result<Vec<Entry>> {
438 storage::get_tree(self, tree).await
439 }
440
441 async fn get_store(&self, tree: &ID, store: &str) -> Result<Vec<Entry>> {
442 storage::get_store(self, tree, store).await
443 }
444
445 async fn get_tree_from_tips(&self, tree: &ID, tips: &[ID]) -> Result<Vec<Entry>> {
446 traversal::get_tree_from_tips(self, tree, tips).await
447 }
448
449 async fn get_store_from_tips(&self, tree: &ID, store: &str, tips: &[ID]) -> Result<Vec<Entry>> {
450 traversal::get_store_from_tips(self, tree, store, tips).await
451 }
452
453 async fn get_cached_crdt_state(&self, entry_id: &ID, store: &str) -> Result<Option<String>> {
454 storage::get_cached_crdt_state(self, entry_id, store).await
455 }
456
457 async fn cache_crdt_state(&self, entry_id: &ID, store: &str, state: String) -> Result<()> {
458 storage::cache_crdt_state(self, entry_id, store, state).await
459 }
460
461 async fn clear_crdt_cache(&self) -> Result<()> {
462 storage::clear_crdt_cache(self).await
463 }
464
465 async fn get_sorted_store_parents(
466 &self,
467 tree_id: &ID,
468 entry_id: &ID,
469 store: &str,
470 ) -> Result<Vec<ID>> {
471 traversal::get_sorted_store_parents(self, tree_id, entry_id, store).await
472 }
473
474 async fn get_path_from_to(
475 &self,
476 tree_id: &ID,
477 store: &str,
478 from_id: &ID,
479 to_ids: &[ID],
480 ) -> Result<Vec<ID>> {
481 traversal::get_path_from_to(self, tree_id, store, from_id, to_ids).await
482 }
483
484 async fn get_instance_metadata(&self) -> Result<Option<InstanceMetadata>> {
485 storage::get_instance_metadata(self).await
486 }
487
488 async fn set_instance_metadata(&self, metadata: &InstanceMetadata) -> Result<()> {
489 storage::set_instance_metadata(self, metadata).await
490 }
491}
492
493/// Namespace for SQLite database constructors.
494///
495/// Provides ergonomic factory methods for creating SQLite-backed storage.
496/// All methods return `SqlxBackend` which implements `BackendImpl`.
497///
498/// # Example
499///
500/// ```ignore
501/// use eidetica::backend::database::Sqlite;
502///
503/// // File-based storage
504/// let backend = Sqlite::open("my_data.db").await?;
505///
506/// // In-memory (for testing)
507/// let backend = Sqlite::in_memory().await?;
508/// ```
509#[cfg(feature = "sqlite")]
510pub struct Sqlite;
511
512#[cfg(feature = "sqlite")]
513impl Sqlite {
514 /// Open a SQLite database at the given path.
515 ///
516 /// Creates the database file and schema if they don't exist.
517 ///
518 /// # Arguments
519 ///
520 /// * `path` - Path to the SQLite database file
521 pub async fn open<P: AsRef<std::path::Path>>(path: P) -> Result<SqlxBackend> {
522 SqlxBackend::open_sqlite(path).await
523 }
524
525 /// Create an in-memory SQLite database.
526 ///
527 /// The database exists only for the lifetime of the returned backend.
528 /// Useful for testing.
529 pub async fn in_memory() -> Result<SqlxBackend> {
530 SqlxBackend::sqlite_in_memory().await
531 }
532
533 /// Connect to a SQLite database using a connection URL.
534 ///
535 /// # Arguments
536 ///
537 /// * `url` - SQLite connection URL (e.g., "sqlite:./my.db")
538 pub async fn connect(url: &str) -> Result<SqlxBackend> {
539 SqlxBackend::connect_sqlite(url).await
540 }
541}
542
543/// Namespace for PostgreSQL database constructors.
544///
545/// Provides ergonomic factory methods for creating PostgreSQL-backed storage.
546/// All methods return `SqlxBackend` which implements `BackendImpl`.
547///
548/// # Example
549///
550/// ```ignore
551/// use eidetica::backend::database::Postgres;
552///
553/// // Connect to PostgreSQL
554/// let backend = Postgres::connect("postgres://user:pass@localhost/mydb").await?;
555///
556/// // With test isolation (unique schema per instance)
557/// let backend = Postgres::connect_isolated("postgres://localhost/test").await?;
558/// ```
559#[cfg(feature = "postgres")]
560pub struct Postgres;
561
562#[cfg(feature = "postgres")]
563impl Postgres {
564 /// Connect to a PostgreSQL database using a connection URL.
565 ///
566 /// This connects to the default (public) schema. For test isolation,
567 /// use `connect_isolated()` instead.
568 ///
569 /// # Arguments
570 ///
571 /// * `url` - PostgreSQL connection URL (e.g., "postgres://user:pass@localhost/dbname")
572 pub async fn connect(url: &str) -> Result<SqlxBackend> {
573 SqlxBackend::connect_postgres(url).await
574 }
575
576 /// Connect to a PostgreSQL database with test isolation.
577 ///
578 /// Creates a unique schema for this backend instance, ensuring tests
579 /// don't interfere with each other when run in parallel.
580 ///
581 /// # Arguments
582 ///
583 /// * `url` - PostgreSQL connection URL
584 pub async fn connect_isolated(url: &str) -> Result<SqlxBackend> {
585 SqlxBackend::connect_postgres_isolated(url).await
586 }
587}