Development Documentation (main branch) - For stable release docs, see docs.rs/eidetica
Skip to main content

eidetica/instance/
url.rs

1//! Parser for eidetica connection URLs.
2//!
3//! Eidetica has one connection-string entry point that dispatches across all
4//! supported backends and the daemon socket:
5//!
6//! - `sqlite://./app.db` — embedded eidetica with the sqlite backend; URL is
7//!   handed through to `sqlx::sqlite` unchanged after the scheme check, so
8//!   any sqlx-accepted form works (relative path, `?mode=rwc&journal_mode=WAL`,
9//!   etc.).
10//! - `postgres://user:pwd@host/db` — embedded eidetica with the postgres
11//!   backend; URL is handed through to `sqlx::postgres` unchanged.
12//! - `unix:///absolute/socket/path` — thin client to a running eidetica
13//!   daemon. Path must be absolute; query strings and fragments are
14//!   rejected.
15//! - `memory://` — ephemeral in-process backend.
16//! - `memory:///absolute/path/snapshot.json` — in-process backend with a
17//!   JSON snapshot file (load-on-start, snapshot on `Instance::flush` /
18//!   best-effort on `Drop`).
19//!
20//! Schemes are matched case-insensitively per RFC 3986 (`SQLITE://`,
21//! `Sqlite://`, and `sqlite://` are all accepted). The scheme portion of
22//! the URL passed through to sqlx is normalised to lowercase so backends
23//! that demand a lowercase scheme don't reject it.
24//!
25//! `sqlite:` and `postgres:`/`postgresql:` also accept the single-colon
26//! URI form (`sqlite:file::memory:?cache=shared`, `sqlite:./app.db`) that
27//! sqlx natively understands. This is required for sqlx's in-memory URLs:
28//! the `://` variant triggers URL authority parsing in sqlx and rejects
29//! `:memory:` as a port number. `unix:` and `memory:` keep the strict
30//! `://` requirement so their typo hints still fire on `unix:/run/sock`.
31//!
32//! Schemes that aren't recognised return [`InstanceError::UnsupportedScheme`]
33//! with a typo hint when possible (`mysql://` → suggests `postgres://`).
34//! URLs missing the `scheme://` separator return [`InstanceError::InvalidUrl`]
35//! with a hint guessing at `sqlite://`.
36
37use std::path::PathBuf;
38
39use crate::Result;
40
41use super::errors::InstanceError;
42
43/// Parsed connection URL.
44///
45/// Schemes that delegate to sqlx (`sqlite`, `postgres`) carry the original
46/// URL string and let sqlx do the detailed parsing — this keeps eidetica from
47/// shadowing sqlx's `?param` surface and matches the user-facing expectation
48/// of "the URL after the scheme prefix means whatever sqlx says it means."
49#[derive(Debug, Clone)]
50pub(crate) enum ConnectionUrl {
51    /// `sqlite://...` — full URL handed through to sqlx
52    #[cfg_attr(not(feature = "sqlite"), allow(dead_code))]
53    Sqlite { url: String },
54    /// `postgres://...` — full URL handed through to sqlx
55    #[cfg_attr(not(feature = "postgres"), allow(dead_code))]
56    Postgres { url: String },
57    /// `unix:///path/to/sock` — absolute socket path.
58    #[cfg_attr(not(all(unix, feature = "service")), allow(dead_code))]
59    Unix { socket_path: PathBuf },
60    /// `memory://` or `memory:///path/to/snapshot.json`.
61    Memory { snapshot_path: Option<PathBuf> },
62}
63
64/// Parse an eidetica connection URL.
65///
66/// Returns a structured [`ConnectionUrl`] that the `Instance` dispatcher can
67/// switch on. Errors are [`InstanceError::InvalidUrl`] for malformed input
68/// (with a hint where possible) and [`InstanceError::UnsupportedScheme`] for
69/// recognised-but-typoed schemes (e.g. `mysql://`).
70pub(crate) fn parse(url: &str) -> Result<ConnectionUrl> {
71    if url.is_empty() {
72        return Err(InstanceError::InvalidUrl {
73            url: String::new(),
74            reason: "URL is empty; expected something like `sqlite://./app.db`, \
75                     `postgres://user@host/db`, `unix:///run/eidetica/sock`, or `memory://`"
76                .into(),
77        }
78        .into());
79    }
80
81    let Some((scheme_raw, rest, sep)) = split_scheme(url) else {
82        return Err(missing_scheme_error(url));
83    };
84
85    // RFC 3986: schemes are case-insensitive. Normalise to lowercase and
86    // rebuild so sqlx sees a lowercase prefix. The `rest` is left
87    // untouched (paths and query strings are case-sensitive). Preserve
88    // whichever separator the caller used — `sqlite:` and `sqlite://`
89    // aren't interchangeable for sqlx's in-memory URLs.
90    let scheme = scheme_raw.to_ascii_lowercase();
91    let normalised = format!("{scheme}{sep}{rest}");
92
93    match scheme.as_str() {
94        "sqlite" => Ok(ConnectionUrl::Sqlite { url: normalised }),
95        "postgres" | "postgresql" => Ok(ConnectionUrl::Postgres { url: normalised }),
96        "unix" => parse_unix(url, rest),
97        "memory" => parse_memory(url, rest),
98        // Common typos / unrelated DB URLs people might paste in.
99        "mysql" | "mariadb" => unsupported(scheme, Some("postgres")),
100        "tcp" | "http" | "https" | "ws" | "wss" => unsupported(scheme, Some("unix")),
101        "file" => unsupported(scheme, Some("sqlite")),
102        _ => unsupported(scheme, None),
103    }
104}
105
106/// Split `url` into `(scheme, rest, separator)`.
107///
108/// Prefers `scheme://...` (the standard form for all four backends). Falls
109/// back to single-colon `scheme:...` only for sqlx-backed schemes — sqlx
110/// accepts both prefixes natively and needs the single-colon form for its
111/// in-memory URLs (`sqlite:file::memory:?cache=shared`). `unix:` and
112/// `memory:` intentionally don't get the fallback so their slash-typo
113/// hints (`unix:/run/sock`) keep firing.
114fn split_scheme(url: &str) -> Option<(&str, &str, &'static str)> {
115    if let Some((s, r)) = url.split_once("://") {
116        return Some((s, r, "://"));
117    }
118    let (s, r) = url.split_once(':')?;
119    matches!(
120        s.to_ascii_lowercase().as_str(),
121        "sqlite" | "postgres" | "postgresql"
122    )
123    .then_some((s, r, ":"))
124}
125
126fn missing_scheme_error(url: &str) -> crate::Error {
127    // Common typo: `unix:/path` (single slash) or just a bare path.
128    // Match case-insensitively so `UNIX:/path` still gets the hint.
129    let lower = url.to_ascii_lowercase();
130    let hint = if let Some(stripped) = lower.strip_prefix("unix:") {
131        format!(
132            "`unix://` requires two slashes plus an absolute path; did you mean `unix://{stripped}`?"
133        )
134    } else if url.starts_with('/') || url.starts_with("./") || url.ends_with(".db") {
135        format!("missing scheme; did you mean `sqlite://{url}`?")
136    } else {
137        "URL is missing the `scheme://` separator (expected `sqlite://`, `postgres://`, \
138         `unix://`, or `memory://`)"
139            .to_string()
140    };
141    InstanceError::InvalidUrl {
142        url: url.to_string(),
143        reason: hint,
144    }
145    .into()
146}
147
148fn unsupported(scheme: String, suggested: Option<&'static str>) -> Result<ConnectionUrl> {
149    Err(InstanceError::UnsupportedScheme { scheme, suggested }.into())
150}
151
152/// Shared validation for the absolute-path schemes (`unix://`, `memory://`).
153///
154/// Rejects query strings and fragments — both schemes describe a local path
155/// and have no use for HTTP-style decorations.
156fn reject_query_or_fragment(scheme: &'static str, original: &str, rest: &str) -> Result<()> {
157    if rest.contains('?') {
158        return Err(InstanceError::InvalidUrl {
159            url: original.to_string(),
160            reason: format!("`{scheme}://` does not support query strings"),
161        }
162        .into());
163    }
164    if rest.contains('#') {
165        return Err(InstanceError::InvalidUrl {
166            url: original.to_string(),
167            reason: format!("`{scheme}://` does not support fragments"),
168        }
169        .into());
170    }
171    Ok(())
172}
173
174fn parse_unix(original: &str, rest: &str) -> Result<ConnectionUrl> {
175    // `unix://` only describes a local absolute path to a Unix socket file.
176    // No hostname, no query, no fragment.
177    if rest.is_empty() {
178        return Err(InstanceError::InvalidUrl {
179            url: original.to_string(),
180            reason: "`unix://` requires an absolute socket path (e.g. \
181                     `unix:///run/eidetica/service.sock`)"
182                .into(),
183        }
184        .into());
185    }
186    if !rest.starts_with('/') {
187        // Ambiguous: `unix://run/sock` could be either a slash typo (meant
188        // `unix:///run/sock`) or an RFC-3986-style attempt at an authority
189        // (meant `unix:///sock`, treating `run` as a hostname). Unix
190        // sockets have no hostname concept — the kernel identifies them
191        // by filesystem path — so we surface both interpretations and
192        // explain rather than guess.
193        let forgot_slash_hint = format!("unix:///{rest}");
194        let dropped_host_hint = match rest.split_once('/') {
195            Some((_, after)) if !after.is_empty() => format!("unix:///{after}"),
196            _ => "unix:///path/to/sock".to_string(),
197        };
198        return Err(InstanceError::InvalidUrl {
199            url: original.to_string(),
200            reason: format!(
201                "`unix://` path must be absolute (start with `/`); got `{rest}`. \
202                 Two common causes: (1) a slash typo — if you meant the socket file \
203                 at `/{rest}`, use `{forgot_slash_hint}`; (2) treating `unix://` like \
204                 an HTTP URL with an authority — Unix sockets have no hostname (the \
205                 kernel identifies them by filesystem path only), so drop the host \
206                 segment and use `{dropped_host_hint}` instead."
207            ),
208        }
209        .into());
210    }
211    reject_query_or_fragment("unix", original, rest)?;
212    Ok(ConnectionUrl::Unix {
213        socket_path: PathBuf::from(rest),
214    })
215}
216
217fn parse_memory(original: &str, rest: &str) -> Result<ConnectionUrl> {
218    // `memory://` (no path) → ephemeral.
219    // `memory:///absolute/path.json` → load-on-start + snapshot target.
220    if rest.is_empty() {
221        return Ok(ConnectionUrl::Memory {
222            snapshot_path: None,
223        });
224    }
225    if !rest.starts_with('/') {
226        return Err(InstanceError::InvalidUrl {
227            url: original.to_string(),
228            reason: format!(
229                "`memory://` snapshot path must be absolute (start with `/`); got `{rest}`. \
230                 Use `memory://` for ephemeral state, or `memory:///{rest}` for a snapshot path."
231            ),
232        }
233        .into());
234    }
235    // `memory:///` and `memory:////...` aren't usable snapshot targets —
236    // the path resolves to `/` (a directory) and the I/O failure later
237    // would be cryptic. Reject up front.
238    if rest == "/" || rest.starts_with("//") {
239        return Err(InstanceError::InvalidUrl {
240            url: original.to_string(),
241            reason: "`memory://` snapshot path must name a file (e.g. \
242                     `memory:///var/lib/eidetica/snap.json`); got an empty or root path. \
243                     Use `memory://` for an ephemeral in-memory instance with no snapshot."
244                .into(),
245        }
246        .into());
247    }
248    reject_query_or_fragment("memory", original, rest)?;
249    Ok(ConnectionUrl::Memory {
250        snapshot_path: Some(PathBuf::from(rest)),
251    })
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257
258    #[test]
259    fn parses_sqlite() {
260        match parse("sqlite://./app.db").unwrap() {
261            ConnectionUrl::Sqlite { url } => assert_eq!(url, "sqlite://./app.db"),
262            other => panic!("expected sqlite, got {other:?}"),
263        }
264    }
265
266    #[test]
267    fn parses_sqlite_single_colon_uri_form() {
268        // sqlx's native in-memory URL uses single-colon URI form because the
269        // `://` variant triggers URL authority parsing in sqlx and rejects
270        // `:memory:` as an invalid port. We pass these through unchanged.
271        match parse("sqlite:file::memory:?cache=shared").unwrap() {
272            ConnectionUrl::Sqlite { url } => {
273                assert_eq!(url, "sqlite:file::memory:?cache=shared")
274            }
275            other => panic!("expected sqlite, got {other:?}"),
276        }
277        match parse("sqlite:./app.db").unwrap() {
278            ConnectionUrl::Sqlite { url } => assert_eq!(url, "sqlite:./app.db"),
279            other => panic!("expected sqlite, got {other:?}"),
280        }
281        match parse("postgres:user@host/db").unwrap() {
282            ConnectionUrl::Postgres { url } => assert_eq!(url, "postgres:user@host/db"),
283            other => panic!("expected postgres, got {other:?}"),
284        }
285        // Case-insensitive scheme even on the single-colon form.
286        match parse("SQLITE:file::memory:?cache=shared").unwrap() {
287            ConnectionUrl::Sqlite { url } => {
288                assert_eq!(url, "sqlite:file::memory:?cache=shared")
289            }
290            other => panic!("expected sqlite, got {other:?}"),
291        }
292    }
293
294    #[test]
295    fn parses_postgres_and_postgresql_aliases() {
296        match parse("postgres://u@h/db").unwrap() {
297            ConnectionUrl::Postgres { url } => assert_eq!(url, "postgres://u@h/db"),
298            other => panic!("expected postgres, got {other:?}"),
299        }
300        match parse("postgresql://u@h/db").unwrap() {
301            ConnectionUrl::Postgres { url } => assert_eq!(url, "postgresql://u@h/db"),
302            other => panic!("expected postgres, got {other:?}"),
303        }
304    }
305
306    #[test]
307    fn scheme_is_case_insensitive() {
308        // Upper- and mixed-case scheme names should match and be
309        // normalised to lowercase in the URL handed through to sqlx.
310        match parse("SQLITE://./app.db").unwrap() {
311            ConnectionUrl::Sqlite { url } => assert_eq!(url, "sqlite://./app.db"),
312            other => panic!("expected sqlite, got {other:?}"),
313        }
314        match parse("Postgres://u@h/db").unwrap() {
315            ConnectionUrl::Postgres { url } => assert_eq!(url, "postgres://u@h/db"),
316            other => panic!("expected postgres, got {other:?}"),
317        }
318        match parse("UNIX:///run/sock").unwrap() {
319            ConnectionUrl::Unix { socket_path } => {
320                assert_eq!(socket_path, PathBuf::from("/run/sock"));
321            }
322            other => panic!("expected unix, got {other:?}"),
323        }
324        match parse("Memory://").unwrap() {
325            ConnectionUrl::Memory { snapshot_path } => assert!(snapshot_path.is_none()),
326            other => panic!("expected memory, got {other:?}"),
327        }
328    }
329
330    #[test]
331    fn path_case_is_preserved_when_scheme_is_lowercased() {
332        // The scheme is case-insensitive (RFC 3986) but paths, hostnames,
333        // query strings, and filenames are case-sensitive on most
334        // filesystems and on the wire. Only the scheme prefix should
335        // change when we normalise.
336        match parse("SQLITE://./MyApp.DB").unwrap() {
337            ConnectionUrl::Sqlite { url } => assert_eq!(url, "sqlite://./MyApp.DB"),
338            other => panic!("expected sqlite, got {other:?}"),
339        }
340        // Single-colon URI form: same rule — only the scheme prefix
341        // changes, the rest passes through verbatim.
342        match parse("SQLITE:file:Mixed-Case.db?Cache=Shared").unwrap() {
343            ConnectionUrl::Sqlite { url } => {
344                assert_eq!(url, "sqlite:file:Mixed-Case.db?Cache=Shared")
345            }
346            other => panic!("expected sqlite, got {other:?}"),
347        }
348        match parse("Postgres://User:Pass@Host.Example/MyDB").unwrap() {
349            ConnectionUrl::Postgres { url } => {
350                assert_eq!(url, "postgres://User:Pass@Host.Example/MyDB")
351            }
352            other => panic!("expected postgres, got {other:?}"),
353        }
354        match parse("UNIX:///Run/MyDaemon.SOCK").unwrap() {
355            ConnectionUrl::Unix { socket_path } => {
356                assert_eq!(socket_path, PathBuf::from("/Run/MyDaemon.SOCK"));
357            }
358            other => panic!("expected unix, got {other:?}"),
359        }
360        match parse("MEMORY:///Var/Lib/MyApp/Snap.JSON").unwrap() {
361            ConnectionUrl::Memory { snapshot_path } => {
362                assert_eq!(
363                    snapshot_path,
364                    Some(PathBuf::from("/Var/Lib/MyApp/Snap.JSON"))
365                );
366            }
367            other => panic!("expected memory, got {other:?}"),
368        }
369    }
370
371    #[test]
372    fn parses_unix_absolute_path() {
373        match parse("unix:///run/eidetica/sock").unwrap() {
374            ConnectionUrl::Unix { socket_path } => {
375                assert_eq!(socket_path, PathBuf::from("/run/eidetica/sock"));
376            }
377            other => panic!("expected unix, got {other:?}"),
378        }
379    }
380
381    #[test]
382    fn rejects_unix_relative_path() {
383        // The hint should surface both interpretations: the slash-typo
384        // form (`unix:///run/sock`) and the dropped-host form
385        // (`unix:///sock`). Either could be what the user meant.
386        let err = parse("unix://run/sock").unwrap_err();
387        let msg = format!("{err}");
388        assert!(msg.contains("unix:///run/sock"), "{msg}");
389        assert!(msg.contains("unix:///sock"), "{msg}");
390        assert!(msg.contains("no hostname"), "{msg}");
391    }
392
393    #[test]
394    fn rejects_unix_relative_no_subpath() {
395        // `unix://host` has no `/` after the host, so the dropped-host
396        // suggestion falls back to a generic placeholder.
397        let err = parse("unix://host").unwrap_err();
398        let msg = format!("{err}");
399        assert!(msg.contains("unix:///host"), "{msg}");
400        assert!(msg.contains("/path/to/sock"), "{msg}");
401    }
402
403    #[test]
404    fn rejects_unix_empty_path() {
405        let err = parse("unix://").unwrap_err();
406        assert!(format!("{err}").contains("requires an absolute"), "{err}");
407    }
408
409    #[test]
410    fn rejects_unix_query_and_fragment() {
411        assert!(parse("unix:///s?foo=bar").is_err());
412        assert!(parse("unix:///s#frag").is_err());
413    }
414
415    #[test]
416    fn parses_memory_ephemeral() {
417        match parse("memory://").unwrap() {
418            ConnectionUrl::Memory { snapshot_path } => assert!(snapshot_path.is_none()),
419            other => panic!("expected memory, got {other:?}"),
420        }
421    }
422
423    #[test]
424    fn parses_memory_with_snapshot_path() {
425        match parse("memory:///var/lib/snap.json").unwrap() {
426            ConnectionUrl::Memory { snapshot_path } => {
427                assert_eq!(snapshot_path, Some(PathBuf::from("/var/lib/snap.json")));
428            }
429            other => panic!("expected memory, got {other:?}"),
430        }
431    }
432
433    #[test]
434    fn rejects_memory_relative_snapshot() {
435        let err = parse("memory://./snap.json").unwrap_err();
436        assert!(format!("{err}").contains("absolute"), "{err}");
437    }
438
439    #[test]
440    fn rejects_memory_root_snapshot() {
441        // `memory:///` resolves to `/` — a directory, not a snapshot file.
442        let err = parse("memory:///").unwrap_err();
443        let msg = format!("{err}");
444        assert!(msg.contains("must name a file"), "{msg}");
445        // The double-slash form is a common mistake; reject the same way.
446        let err = parse("memory:////etc/passwd").unwrap_err();
447        let msg = format!("{err}");
448        assert!(msg.contains("must name a file"), "{msg}");
449    }
450
451    #[test]
452    fn empty_url_errors_with_hint() {
453        let err = parse("").unwrap_err();
454        assert!(format!("{err}").contains("sqlite://"), "{err}");
455    }
456
457    #[test]
458    fn missing_scheme_hints_at_sqlite() {
459        let err = parse("./app.db").unwrap_err();
460        let msg = format!("{err}");
461        assert!(msg.contains("sqlite://"), "{msg}");
462    }
463
464    #[test]
465    fn unix_single_slash_hints_at_double() {
466        let err = parse("unix:/run/sock").unwrap_err();
467        let msg = format!("{err}");
468        assert!(msg.contains("unix://"), "{msg}");
469    }
470
471    #[test]
472    fn unix_single_slash_uppercase_still_hints() {
473        // The missing-scheme hint should match `UNIX:` case-insensitively.
474        let err = parse("UNIX:/run/sock").unwrap_err();
475        let msg = format!("{err}");
476        assert!(msg.contains("unix://"), "{msg}");
477    }
478
479    #[test]
480    fn mysql_suggests_postgres() {
481        let err = parse("mysql://u@h/db").unwrap_err();
482        let msg = format!("{err}");
483        assert!(msg.contains("postgres"), "{msg}");
484    }
485
486    #[test]
487    fn file_suggests_sqlite() {
488        let err = parse("file:///app.db").unwrap_err();
489        let msg = format!("{err}");
490        assert!(msg.contains("sqlite"), "{msg}");
491    }
492
493    #[test]
494    fn tcp_suggests_unix() {
495        let err = parse("tcp://host:1234").unwrap_err();
496        let msg = format!("{err}");
497        assert!(msg.contains("unix"), "{msg}");
498    }
499}