eidetica/service/server.rs
1//! Service server: accepts Unix socket connections and dispatches `BackendImpl` operations.
2//!
3//! The server wraps an `Instance` (not just a backend) so it can handle write
4//! notifications through the Instance's callback system.
5
6use std::collections::{HashMap, HashSet};
7use std::os::unix::fs::PermissionsExt;
8use std::path::{Path, PathBuf};
9
10use tokio::net::UnixListener;
11use tokio::sync::watch;
12
13use crate::Instance;
14use crate::auth::crypto::{PublicKey, generate_challenge, verify_challenge_response};
15use crate::auth::errors::AuthError;
16use crate::auth::types::{Permission, SigKey};
17use crate::auth::validation::permissions::resolve_identity_permission;
18use crate::backend::CacheScope;
19use crate::database::Database;
20use crate::entry::ID;
21use crate::instance::WriteSource;
22use crate::service::error::ServiceError;
23use crate::service::protocol::{
24 AuthenticatedDbRequest, DatabaseOp, HandshakeAck, MergeState, PROTOCOL_VERSION, ServiceRequest,
25 ServiceResponse, read_frame, write_frame,
26};
27use crate::user::system_databases::lookup_user_record;
28
29/// Per-connection authentication state.
30///
31/// A connection moves `PreAuth → AwaitingProof → Authenticated` on a successful
32/// `TrustedLoginUser` / `TrustedLoginProve` exchange. A failed proof drops the
33/// connection back to `PreAuth` so the client can retry without reconnecting.
34/// Any other request while in `AwaitingProof` also resets the state so a
35/// half-finished login can't be exploited mid-flight.
36///
37/// "Trusted" refers to the assumption that whoever can reach this socket is
38/// already authorised by filesystem permissions (mode 0600 under
39/// `$XDG_RUNTIME_DIR`); see the protocol module docs and the Service
40/// Architecture brain doc § Trusted login threat model.
41#[derive(Debug, Clone)]
42enum ConnectionState {
43 /// No login attempt yet, or last attempt failed/abandoned.
44 PreAuth,
45 /// `TrustedLoginUser` succeeded; waiting for the client's `TrustedLoginProve`.
46 AwaitingProof {
47 username: String,
48 user_uuid: String,
49 challenge: Vec<u8>,
50 expected_pubkey: PublicKey,
51 },
52 /// Login completed. `login_pubkey` is the verified root pubkey for the
53 /// user, established at `TrustedLoginProve` time. `session_keyset` is the
54 /// set of pubkeys the client has further proven possession of via
55 /// `SessionKeyChallenge`/`SessionKeyRegister`; it always contains
56 /// `login_pubkey` and may include additional per-DB keys the user owns.
57 /// The dispatch path for `Authenticated`/`AuthenticatedDb` requests
58 /// validates the identity hint against this set and gates against the
59 /// resulting *acting* pubkey, so a single connection can drive ops on
60 /// databases authored by any key the user has proven they hold.
61 /// `user_uuid` is the per-user scope key for the unified CRDT-state
62 /// cache (see [`crate::backend::CacheScope::User`]).
63 /// `pending_key_challenges` holds outstanding registration challenges,
64 /// keyed by the pubkey the challenge was issued for. Each challenge is
65 /// single-use: the matching `SessionKeyRegister` consumes it whether
66 /// verification succeeds or fails.
67 #[allow(dead_code)] // username surfaces in audit/logging follow-ups
68 Authenticated {
69 username: String,
70 user_uuid: String,
71 login_pubkey: PublicKey,
72 session_keyset: HashSet<PublicKey>,
73 pending_key_challenges: HashMap<PublicKey, Vec<u8>>,
74 },
75}
76
77/// Eidetica service server that listens on a Unix domain socket.
78///
79/// The server wraps a full `Instance` so it can dispatch both storage operations
80/// (via the backend) and write callbacks (via `Instance::put_entry()`'s notification path).
81///
82/// CRDT-state caching lives in the underlying `BackendImpl` (scope-keyed
83/// via [`crate::backend::CacheScope`]); wire handlers route through
84/// `instance.backend()` directly rather than keeping a separate
85/// service-layer cache.
86pub struct ServiceServer {
87 instance: Instance,
88 socket_path: PathBuf,
89}
90
91impl ServiceServer {
92 /// Create a new service server.
93 ///
94 /// # Arguments
95 /// * `instance` - The Instance to serve. The server holds a strong reference.
96 /// * `socket_path` - Path for the Unix domain socket.
97 pub fn new(instance: Instance, socket_path: impl Into<PathBuf>) -> Self {
98 Self {
99 instance,
100 socket_path: socket_path.into(),
101 }
102 }
103
104 /// Get the socket path.
105 pub fn socket_path(&self) -> &Path {
106 &self.socket_path
107 }
108
109 /// Run the server until the shutdown signal is received.
110 ///
111 /// Removes any stale socket file, creates the parent directory, binds the
112 /// listener, and loops accepting connections. Each connection is handled in
113 /// a spawned task. On shutdown, the socket file is cleaned up.
114 ///
115 /// # Arguments
116 /// * `shutdown` - A watch receiver; the server stops when the sender is dropped.
117 pub async fn run(&self, mut shutdown: watch::Receiver<()>) -> crate::Result<()> {
118 // Remove stale socket if it exists
119 if self.socket_path.exists() {
120 tokio::fs::remove_file(&self.socket_path).await?;
121 }
122
123 // Create parent directory with owner-only permissions (0700)
124 if let Some(parent) = self.socket_path.parent() {
125 tokio::fs::create_dir_all(parent).await?;
126 tokio::fs::set_permissions(parent, std::fs::Permissions::from_mode(0o700)).await?;
127 }
128
129 let listener = UnixListener::bind(&self.socket_path)?;
130
131 // Restrict socket to owner-only access (0600)
132 tokio::fs::set_permissions(&self.socket_path, std::fs::Permissions::from_mode(0o600))
133 .await?;
134
135 tracing::info!("Service server listening on {}", self.socket_path.display());
136
137 loop {
138 tokio::select! {
139 accept_result = listener.accept() => {
140 match accept_result {
141 Ok((stream, _addr)) => {
142 let instance = self.instance.clone();
143 tokio::spawn(async move {
144 if let Err(e) = handle_connection(stream, instance).await {
145 tracing::debug!("Connection handler error: {e}");
146 }
147 });
148 }
149 Err(e) => {
150 tracing::error!("Failed to accept connection: {e}");
151 }
152 }
153 }
154 _ = shutdown.changed() => {
155 tracing::info!("Service server shutting down");
156 break;
157 }
158 }
159 }
160
161 // Clean up socket file
162 let _ = tokio::fs::remove_file(&self.socket_path).await;
163 Ok(())
164 }
165}
166
167/// Handle a single client connection.
168async fn handle_connection(
169 stream: tokio::net::UnixStream,
170 instance: Instance,
171) -> crate::Result<()> {
172 let (mut reader, mut writer) = tokio::io::split(stream);
173
174 // 1. Read and validate handshake
175 let handshake: crate::service::protocol::Handshake = match read_frame(&mut reader).await? {
176 Some(h) => h,
177 None => return Ok(()), // Client disconnected before handshake
178 };
179
180 if handshake.protocol_version != PROTOCOL_VERSION {
181 // Send error ack and close
182 let ack = HandshakeAck {
183 protocol_version: PROTOCOL_VERSION,
184 };
185 write_frame(&mut writer, &ack).await?;
186 return Err(crate::Error::Io(std::io::Error::new(
187 std::io::ErrorKind::InvalidData,
188 format!(
189 "Protocol version mismatch: client={}, server={}",
190 handshake.protocol_version, PROTOCOL_VERSION
191 ),
192 )));
193 }
194
195 // Send handshake ack
196 let ack = HandshakeAck {
197 protocol_version: PROTOCOL_VERSION,
198 };
199 write_frame(&mut writer, &ack).await?;
200
201 // 2. Request/response loop with per-connection auth state
202 let mut state = ConnectionState::PreAuth;
203 let loop_result: crate::Result<()> = async {
204 loop {
205 let request: ServiceRequest = match read_frame(&mut reader).await? {
206 Some(req) => req,
207 None => break, // Clean EOF
208 };
209
210 let response = dispatch(&instance, &mut state, request).await;
211 write_frame(&mut writer, &response).await?;
212 }
213 Ok(())
214 }
215 .await;
216
217 // Session teardown: nothing to reclaim. The unified CRDT-state cache
218 // lives in `BackendImpl` and is bounded by its own eviction policy
219 // (byte-bounded LRU on the in-memory backend; disk-bounded on SQL).
220 // Per-user cache slots survive disconnect intentionally so a
221 // reconnecting client recovers materialized state from tier 2 without
222 // recomputing from entries.
223
224 loop_result
225}
226
227/// Dispatch a service request to the appropriate Instance/Backend method.
228async fn dispatch(
229 instance: &Instance,
230 state: &mut ConnectionState,
231 request: ServiceRequest,
232) -> ServiceResponse {
233 match dispatch_inner(instance, state, request).await {
234 Ok(resp) => resp,
235 Err(e) => ServiceResponse::Error(ServiceError::from(&e)),
236 }
237}
238
239/// Inner dispatch that returns Result for ergonomic error handling.
240async fn dispatch_inner(
241 instance: &Instance,
242 state: &mut ConnectionState,
243 request: ServiceRequest,
244) -> crate::Result<ServiceResponse> {
245 match request {
246 // === Pre-auth: login handshake ===
247 ServiceRequest::TrustedLoginUser { username } => {
248 handle_trusted_login_user(instance, state, username).await
249 }
250 ServiceRequest::TrustedLoginProve { signature } => {
251 handle_trusted_login_prove(state, &signature)
252 }
253
254 // === Pre-auth: server identity ===
255 ServiceRequest::GetInstanceMetadata => {
256 let metadata = instance.backend().get_instance_metadata().await?;
257 Ok(ServiceResponse::InstanceMetadata(metadata))
258 }
259
260 // === Post-auth: extend the session keyset ===
261 ServiceRequest::SessionKeyChallenge { pubkey } => {
262 handle_session_key_challenge(state, pubkey)
263 }
264 ServiceRequest::SessionKeyRegister { pubkey, signature } => {
265 handle_session_key_register(state, pubkey, &signature)
266 }
267
268 // === Authenticated storage operations ===
269 //
270 // Gate 1: the connection must have completed `TrustedLogin*`. Gate 2:
271 // the per-tree permission gate. Every `DatabaseOp` carries its target
272 // `root_id` explicitly, so the gate is *unconditional* — there is no
273 // tree-less op to fall through it — with two exceptions handled below
274 // (`SubmitSignedEntry`, verification-gated; `SetInstanceMetadata`,
275 // gated against the server-known `_databases`, not the request root).
276 ServiceRequest::AuthenticatedDb(inner) => {
277 let (login_pubkey, keyset_snapshot, session_user_uuid) = match state {
278 ConnectionState::Authenticated {
279 login_pubkey,
280 session_keyset,
281 user_uuid,
282 ..
283 } => (
284 login_pubkey.clone(),
285 session_keyset.clone(),
286 user_uuid.clone(),
287 ),
288 _ => {
289 return Err(crate::Error::Auth(Box::new(
290 AuthError::InvalidAuthConfiguration {
291 reason: "database operation requires an authenticated connection; \
292 complete TrustedLogin* first"
293 .to_string(),
294 },
295 )));
296 }
297 };
298
299 let AuthenticatedDbRequest {
300 root_id,
301 identity,
302 op,
303 } = *inner;
304
305 // Submit is verification-gated, not session-gated.
306 //
307 // Reads are session-gated (confidentiality boundary); submits
308 // are verification-gated (integrity boundary). `SubmitSignedEntry`
309 // requires only an *authenticated* connection (gate 1, the
310 // `ConnectionState::Authenticated` match above, still applies).
311 // Which tree the entry belongs to, and whether its signer may
312 // write that tree, is decided by the server's own verification
313 // pass in the handler (store `Unverified`, then
314 // `Database::open(...).verify()`) against the tree's *real*
315 // pinned auth lineage — not by who holds the socket. An attacker
316 // without a key the tree's auth grants cannot produce a
317 // `Verified` entry, and unverified junk is excluded from every
318 // default read by the frontier cut, so the per-tree session gate
319 // adds no correctness or isolation property here; it only blocks
320 // a legitimate transporter (e.g. an admin session carrying a
321 // user-signed genesis). See the verification-gated-submit design
322 // doc for the full threat analysis.
323 let is_submit = matches!(op, DatabaseOp::SubmitSignedEntry { .. });
324
325 // `SetInstanceMetadata` rewrites the daemon's pointers to its own
326 // system DBs. It is gated against `_databases` (a server-known
327 // tree), not the request's `root_id`, so an instance admin — by
328 // construction a user with Admin on `_databases` via the
329 // first-user bootstrap — is required. Fail closed (require_existing
330 // = true): `_databases` always exists on an initialized daemon and
331 // this is never a creation flow, so the create-flow passthrough
332 // must not apply (it would let any authenticated user rewrite
333 // system-DB pointers if `_databases` were ever unreadable).
334 let is_set_metadata = matches!(op, DatabaseOp::SetInstanceMetadata { .. });
335
336 // Submit accepts any identity hint (admin transports user-signed
337 // entries); every other op resolves an acting pubkey from the
338 // keyset and gates per-tree against it.
339 let acting_pubkey = if is_submit {
340 // Use the hint if it parses as a pubkey (for submit metadata),
341 // otherwise fall back to login_pubkey — submit doesn't gate
342 // on this value.
343 identity
344 .hint()
345 .pubkey
346 .clone()
347 .unwrap_or_else(|| login_pubkey.clone())
348 } else {
349 resolve_acting_pubkey(&identity, &login_pubkey, &keyset_snapshot)?
350 };
351
352 // Per-tree permission gate. Unconditional for every op *except*
353 // submit (verification in the handler is its boundary) and
354 // set-metadata (gated against `_databases` below). Create-flow
355 // passthrough (false) for the rest, so a not-yet-propagated tree is
356 // waved through and database creation works.
357 if is_set_metadata {
358 gate_tree_permission(
359 instance,
360 &acting_pubkey,
361 &identity,
362 instance.databases_db_id(),
363 Permission::Admin(0),
364 true,
365 )
366 .await?;
367 } else if !is_submit {
368 gate_tree_permission(
369 instance,
370 &acting_pubkey,
371 &identity,
372 &root_id,
373 op.required_permission(),
374 false,
375 )
376 .await?;
377 }
378
379 dispatch_database_op(
380 instance,
381 &acting_pubkey,
382 &identity,
383 &session_user_uuid,
384 root_id,
385 op,
386 )
387 .await
388 }
389 }
390}
391
392/// Resolve the *acting* pubkey for a session-gated op.
393///
394/// The identity hint, when present, must be in the connection's session
395/// keyset (proof of possession registered via `SessionKeyChallenge` /
396/// `SessionKeyRegister`, or established at login time). Returning the hint
397/// as the acting pubkey lets the per-tree gate check the actual key the
398/// caller wants to act as, not the connection-wide login key.
399///
400/// An absent hint defaults to the login pubkey — matches the pre-keyset
401/// behavior where every op acted as the login identity.
402fn resolve_acting_pubkey(
403 identity: &SigKey,
404 login_pubkey: &PublicKey,
405 session_keyset: &HashSet<PublicKey>,
406) -> crate::Result<PublicKey> {
407 match &identity.hint().pubkey {
408 Some(claimed) if session_keyset.contains(claimed) => Ok(claimed.clone()),
409 Some(claimed) => Err(crate::Error::Auth(Box::new(
410 AuthError::SigningKeyMismatch {
411 reason: format!(
412 "request identity claims pubkey '{claimed}' but it is not in the session keyset; \
413 register it first via SessionKeyChallenge/SessionKeyRegister"
414 ),
415 },
416 ))),
417 None => Ok(login_pubkey.clone()),
418 }
419}
420
421/// Dispatch a Database-level op against the server's local `Database`.
422///
423/// Additive sibling of `dispatch_backend_op`. The caller has already verified
424/// the session identity and run the unconditional per-tree permission gate on
425/// `root_id`. Because the server runs the `Database` layer here, verify-on-read
426/// and the Verified frontier are server-side **by construction**.
427async fn dispatch_database_op(
428 instance: &Instance,
429 acting_pubkey: &PublicKey,
430 identity: &SigKey,
431 user_uuid: &str,
432 root_id: ID,
433 op: DatabaseOp,
434) -> crate::Result<ServiceResponse> {
435 match op {
436 DatabaseOp::GetEntry { id } => {
437 let entry = instance.backend().get(&id).await?;
438 // Post-fetch owning-tree Read gate: a raw entry id carries no
439 // inline tree, so the pre-dispatch gate (which keys on `root_id`)
440 // could not cover the entry's real owning tree.
441 gate_entry_read(instance, acting_pubkey, identity, &entry).await?;
442 Ok(ServiceResponse::Entry(entry))
443 }
444
445 DatabaseOp::GetVerifiedTips => {
446 // The server runs the Database layer, so `snapshot()` returns the
447 // Verified frontier by construction — no client-side verify, no
448 // remote-detection heuristic.
449 let db = Database::open(instance, &root_id).await?;
450 let snapshot = db.snapshot().await?;
451 Ok(ServiceResponse::Ids(snapshot.into_tips()))
452 }
453
454 DatabaseOp::SubmitSignedEntry { entry } => {
455 // The client signed this entry; the server does NOT trust its
456 // claimed validity. Store it `Unverified`, then run our OWN
457 // verification pass against the entry's pinned settings. A
458 // poisoned entry never reaches `Verified` and is excluded from
459 // every default read by the frontier cut — D1 is closed by
460 // construction here, not by gate-hardening a raw `Put`.
461 instance
462 .put_entry(
463 &root_id,
464 crate::backend::VerificationStatus::Unverified,
465 *entry,
466 WriteSource::Remote,
467 )
468 .await?;
469 Database::open(instance, &root_id).await?.verify().await?;
470 Ok(ServiceResponse::Ok)
471 }
472
473 DatabaseOp::BeginTransaction { stores, scope } => {
474 // Single-sourced: both this handler and the Phase-3 remote seam
475 // call `Database::transaction_context`, so `Transaction::commit`'s
476 // build-sign path has one source of truth.
477 let db = Database::open(instance, &root_id).await?;
478 let ctx = db.transaction_context(&stores, scope).await?;
479 Ok(ServiceResponse::TransactionContext(ctx))
480 }
481
482 DatabaseOp::GetStoreState { store } => {
483 // Server-materialized merged state (unencrypted stores only).
484 // Encrypted stores must use GetStoreEntries instead — the
485 // ephemeral transaction here has no encryptor, and Doc
486 // deserialization would fail on ciphertext.
487 let db = Database::open(instance, &root_id).await?;
488 let value = db.get_store_state(&store).await?;
489 Ok(ServiceResponse::CrdtValue(value))
490 }
491
492 DatabaseOp::GetStoreEntries { store, tips, scope } => {
493 // Universal primitive (encrypted + unencrypted): returns raw
494 // Entry records with opaque data in canonical CRDT replay order.
495 // For encrypted stores the client decrypts+merges locally.
496 let db = Database::open(instance, &root_id).await?;
497 let entries = db.get_store_entries(&store, &tips, scope).await?;
498 Ok(ServiceResponse::Entries(entries))
499 }
500
501 DatabaseOp::GetStoreTipsUpToEntries { store, up_to } => {
502 let db = Database::open(instance, &root_id).await?;
503 let boundary = crate::Snapshot::from(up_to);
504 let snapshot = db
505 .ops()
506 .store_snapshot_at(&root_id, &store, &boundary)
507 .await?;
508 Ok(ServiceResponse::Ids(snapshot.into_tips()))
509 }
510
511 DatabaseOp::ComputeMergeState { store, entry_ids } => {
512 let db = Database::open(instance, &root_id).await?;
513 let merge_base = db
514 .ops()
515 .find_merge_base(&root_id, &store, &entry_ids)
516 .await?;
517 let path = db
518 .ops()
519 .get_path_from_to(&root_id, &store, &merge_base, &entry_ids)
520 .await?;
521 Ok(ServiceResponse::MergeState(MergeState { merge_base, path }))
522 }
523
524 DatabaseOp::GetCachedCrdtState { store, key } => {
525 // Per-tree Read gate already ran above. Try the caller's own
526 // User-scoped slot first (where client-uploaded ciphertext for
527 // encrypted stores lives), then fall back to Shared (where the
528 // daemon's own materialization of unencrypted stores lives).
529 // The fallback is what gives cross-user dedup on plaintext
530 // stores: alice triggers a server materialization, blob lands
531 // in Shared, bob's later read finds it without recomputing.
532 let backend = instance.require_local_engine()?;
533 let mut blob = backend
534 .get_cached_crdt_state(&CacheScope::User(user_uuid.to_string()), &key, &store)
535 .await?;
536 if blob.is_none() {
537 blob = backend
538 .get_cached_crdt_state(&CacheScope::Shared, &key, &store)
539 .await?;
540 }
541 Ok(ServiceResponse::CachedCrdtState(blob))
542 }
543
544 DatabaseOp::CacheCrdtState { store, key, blob } => {
545 // Per-tree Read gate already ran above. Per-user trust: the
546 // blob is opaque (cipher- or plaintext) and stored verbatim;
547 // only the submitting user can read it back. We never promote
548 // a client upload to Shared — the daemon can't verify the
549 // merge result, so cross-user visibility would be a poison
550 // vector. Shared writes only come from the daemon's own
551 // in-process (LocalBackend) materialization path.
552 instance
553 .require_local_engine()?
554 .cache_crdt_state(CacheScope::User(user_uuid.to_string()), &key, &store, blob)
555 .await?;
556 Ok(ServiceResponse::Ok)
557 }
558
559 DatabaseOp::SetInstanceMetadata { metadata } => {
560 // Admin-on-`_databases` gate already ran in the dispatcher (against
561 // the server-known system tree, not `root_id`).
562 instance.backend().set_instance_metadata(&metadata).await?;
563 Ok(ServiceResponse::Ok)
564 }
565 }
566}
567
568/// Handle `ServiceRequest::TrustedLoginUser`: look up the user's full record,
569/// mint a challenge, and move the connection into `AwaitingProof`.
570async fn handle_trusted_login_user(
571 instance: &Instance,
572 state: &mut ConnectionState,
573 username: String,
574) -> crate::Result<ServiceResponse> {
575 // Look up the full `UserInfo`. The encrypted root key + salt ship to the
576 // client so it can decrypt locally and sign the challenge in the same
577 // round-trip; the daemon never sees the password or the plaintext key.
578 // The non-credential fields (user_database_id, status) ride along so the
579 // client can build the `User` session after proof without a second wire
580 // read of `_users`. If the lookup fails (no such user, disabled,
581 // duplicates), drop back to `PreAuth` and bubble the error.
582 let users_db = instance.users_db().await?;
583 let (user_uuid, user_info) = match lookup_user_record(&users_db, &username).await {
584 Ok(v) => v,
585 Err(e) => {
586 *state = ConnectionState::PreAuth;
587 return Err(e);
588 }
589 };
590
591 let expected_pubkey = user_info.credentials.root_key_id.clone();
592 let challenge = generate_challenge();
593 *state = ConnectionState::AwaitingProof {
594 username,
595 user_uuid: user_uuid.clone(),
596 challenge: challenge.clone(),
597 expected_pubkey,
598 };
599 Ok(ServiceResponse::TrustedLoginChallenge {
600 challenge,
601 user_uuid,
602 user_info,
603 })
604}
605
606/// Handle `ServiceRequest::TrustedLoginProve`: verify the signature against the
607/// stored challenge and either transition to `Authenticated` or drop back
608/// to `PreAuth`.
609fn handle_trusted_login_prove(
610 state: &mut ConnectionState,
611 signature: &[u8],
612) -> crate::Result<ServiceResponse> {
613 let (username, user_uuid, challenge, expected_pubkey) =
614 match std::mem::replace(state, ConnectionState::PreAuth) {
615 ConnectionState::AwaitingProof {
616 username,
617 user_uuid,
618 challenge,
619 expected_pubkey,
620 } => (username, user_uuid, challenge, expected_pubkey),
621 other => {
622 // Restore the (unexpected) state we just took out so subsequent
623 // requests see consistent state.
624 *state = other;
625 return Err(crate::Error::Io(std::io::Error::new(
626 std::io::ErrorKind::InvalidData,
627 "TrustedLoginProve received outside of AwaitingProof state",
628 )));
629 }
630 };
631
632 match verify_challenge_response(&challenge, signature, &expected_pubkey) {
633 Ok(()) => {
634 let mut session_keyset = HashSet::new();
635 session_keyset.insert(expected_pubkey.clone());
636 *state = ConnectionState::Authenticated {
637 username,
638 user_uuid,
639 login_pubkey: expected_pubkey,
640 session_keyset,
641 pending_key_challenges: HashMap::new(),
642 };
643 Ok(ServiceResponse::TrustedLoginOk)
644 }
645 Err(e) => {
646 // Already reset to PreAuth via the mem::replace above.
647 Err(crate::Error::Auth(Box::new(e)))
648 }
649 }
650}
651
652/// Handle `ServiceRequest::SessionKeyChallenge`: mint a single-use challenge
653/// bound to `pubkey` and stash it in the connection's pending-challenges map.
654///
655/// Requires an authenticated connection. A repeat call for the same pubkey
656/// overwrites the prior challenge — last-issued wins, so a stale challenge
657/// can't be replayed.
658fn handle_session_key_challenge(
659 state: &mut ConnectionState,
660 pubkey: PublicKey,
661) -> crate::Result<ServiceResponse> {
662 match state {
663 ConnectionState::Authenticated {
664 pending_key_challenges,
665 ..
666 } => {
667 let challenge = generate_challenge();
668 pending_key_challenges.insert(pubkey, challenge.clone());
669 Ok(ServiceResponse::SessionKeyChallenge { challenge })
670 }
671 _ => Err(crate::Error::Auth(Box::new(
672 AuthError::InvalidAuthConfiguration {
673 reason: "SessionKeyChallenge requires an authenticated connection; \
674 complete TrustedLogin* first"
675 .to_string(),
676 },
677 ))),
678 }
679}
680
681/// Handle `ServiceRequest::SessionKeyRegister`: verify the signature against
682/// the matching pending challenge and, on success, add `pubkey` to the
683/// connection's session keyset.
684///
685/// The challenge is consumed (removed) whether verification succeeds or fails,
686/// so a bad signature can't be retried against the same challenge.
687fn handle_session_key_register(
688 state: &mut ConnectionState,
689 pubkey: PublicKey,
690 signature: &[u8],
691) -> crate::Result<ServiceResponse> {
692 match state {
693 ConnectionState::Authenticated {
694 session_keyset,
695 pending_key_challenges,
696 ..
697 } => {
698 let challenge = pending_key_challenges.remove(&pubkey).ok_or_else(|| {
699 crate::Error::Auth(Box::new(AuthError::InvalidAuthConfiguration {
700 reason: format!(
701 "no outstanding SessionKeyChallenge for pubkey '{pubkey}'; \
702 issue the challenge before registering"
703 ),
704 }))
705 })?;
706 verify_challenge_response(&challenge, signature, &pubkey)
707 .map_err(|e| crate::Error::Auth(Box::new(e)))?;
708 session_keyset.insert(pubkey);
709 Ok(ServiceResponse::Ok)
710 }
711 _ => Err(crate::Error::Auth(Box::new(
712 AuthError::InvalidAuthConfiguration {
713 reason: "SessionKeyRegister requires an authenticated connection; \
714 complete TrustedLogin* first"
715 .to_string(),
716 },
717 ))),
718 }
719}
720
721/// Resolve `pubkey`'s permission against `tree_id`'s `auth_settings` and reject
722/// the request if the resolved level doesn't cover `required`.
723///
724/// If the database doesn't exist on this daemon yet and `require_existing`
725/// is false, the gate passes through so the dispatched op surfaces its own
726/// response (NotFound, empty result, or — for write coordination — a
727/// no-op). This is what keeps the legitimate "create a new database" flow
728/// working: `Database::create` reads tips on the tree before its root entry
729/// has propagated, so an outright denial here would break creation. The
730/// cost is that callers can still distinguish "no such database" from
731/// "exists but no access"; closing that existence-leak channel is filed as
732/// a follow-up.
733///
734/// `require_existing = true` flips that to **fail closed**: an absent
735/// target is denied rather than waved through. Used for the
736/// `SetInstanceMetadata` admin gate (D8) — `_databases` always exists on an
737/// initialized daemon and that op is never a creation flow, so the
738/// create-flow passthrough there would only ever be a fail-open hole that
739/// lets any authenticated user rewrite the daemon's system-DB pointers if
740/// `_databases` were ever unreadable.
741///
742/// When the gate does fire, the denial error is the same shape regardless
743/// of which sub-check failed (key not in auth_settings, mismatched hint,
744/// insufficient permission level): no internal detail about *why* leaks
745/// back over the wire.
746///
747/// System databases (`_users`, `_databases`, `_sync`, `_instance`) are gated
748/// like any other tree: callers must hold the required permission in the
749/// system DB's `auth_settings`. The instance-admin bootstrap (first user on
750/// the device) writes the first user as `Admin(0)` on `_users` and
751/// `_databases`, which is how legitimate administrative access is granted —
752/// the previous hardcoded read exemption is gone. The daemon's device-keyed
753/// local path still handles internal system-database maintenance writes that
754/// originate inside the server.
755async fn gate_tree_permission(
756 instance: &Instance,
757 pubkey: &PublicKey,
758 identity: &SigKey,
759 tree_id: &ID,
760 required: Permission,
761 require_existing: bool,
762) -> crate::Result<()> {
763 let denied = || {
764 crate::Error::Auth(Box::new(AuthError::PermissionDenied {
765 reason: format!("tree {tree_id}: pubkey {pubkey} not permitted for {required:?}"),
766 }))
767 };
768
769 if !instance.has_database(tree_id).await {
770 return if require_existing {
771 Err(denied())
772 } else {
773 Ok(())
774 };
775 }
776
777 let database = Database::open(instance, tree_id).await?;
778 let settings_store = database.get_settings().await?;
779 let auth_settings = settings_store.auth_snapshot().await?;
780
781 let resolved =
782 match resolve_identity_permission(pubkey, identity, &auth_settings, Some(instance)).await {
783 Ok(p) => p,
784 // Resolution failures (key not found, mismatch, etc.) collapse to the
785 // same shape as an insufficient-permission denial, so the client
786 // can't tell whether its identity was unknown or merely too low.
787 Err(_) => return Err(denied()),
788 };
789
790 let allowed = match required {
791 Permission::Read => true,
792 Permission::Write(_) => resolved.can_write(),
793 Permission::Admin(_) => resolved.can_admin(),
794 };
795
796 if !allowed {
797 return Err(denied());
798 }
799
800 Ok(())
801}
802
803/// Per-tree read gate for ops keyed by a raw entry id, which therefore
804/// carry no inline tree id and never hit the pre-dispatch `tree_id()` gate
805/// (`Get`). The tree to authorise against is only knowable *after* the
806/// fetch: it is the entry's claimed `tree.root`, or — for a tree-root
807/// entry, whose `root()` is `None` — the entry's own id.
808///
809/// Model B (hard multi-tenant boundary): system DBs are unencrypted and
810/// protected solely by this gate, so a raw cross-tree `Get` MUST resolve
811/// and check the real owning tree before returning content. Delegates to
812/// `gate_tree_permission`, so the `has_database`-absent passthrough and the
813/// opaque denial shape are identical to the inline-tree-id path.
814async fn gate_entry_read(
815 instance: &Instance,
816 pubkey: &PublicKey,
817 identity: &SigKey,
818 entry: &crate::entry::Entry,
819) -> crate::Result<()> {
820 let owning_tree = entry.root().unwrap_or_else(|| entry.id());
821 // create-flow passthrough (false): a Get against a tree not yet
822 // registered on this daemon must be waved through, same as the
823 // inline-tree-id path.
824 gate_tree_permission(
825 instance,
826 pubkey,
827 identity,
828 &owning_tree,
829 Permission::Read,
830 false,
831 )
832 .await
833}
834
835#[cfg(test)]
836mod tests {
837 use super::*;
838 use crate::backend::database::InMemory;
839 use crate::service::protocol::{Handshake, write_frame};
840
841 /// Helper: start a server on a temp socket, return path + shutdown sender.
842 async fn start_test_server() -> (PathBuf, watch::Sender<()>, Instance) {
843 let dir = tempfile::tempdir().unwrap();
844 let socket_path = dir.keep().join("test.sock");
845 let (instance, _admin) = Instance::create_backend(
846 Box::new(InMemory::new()),
847 crate::NewUser::passwordless("admin"),
848 )
849 .await
850 .unwrap();
851 let (tx, rx) = watch::channel(());
852 let server = ServiceServer::new(instance.clone(), socket_path.clone());
853 tokio::spawn(async move {
854 let _ = server.run(rx).await;
855 });
856 // Wait for the socket to appear (server binds asynchronously). Poll
857 // with a short sleep instead of a fixed delay so a slow sandbox
858 // (where this test was occasionally flaky under `nix build`) doesn't
859 // race the bind step.
860 for _ in 0..50 {
861 if socket_path.exists() {
862 break;
863 }
864 tokio::time::sleep(std::time::Duration::from_millis(10)).await;
865 }
866 (socket_path, tx, instance)
867 }
868
869 #[tokio::test]
870 async fn test_server_starts_and_shuts_down() {
871 let (socket_path, tx, _instance) = start_test_server().await;
872 assert!(socket_path.exists());
873 drop(tx);
874 // Poll for cleanup with the same robustness as the bind wait.
875 for _ in 0..50 {
876 if !socket_path.exists() {
877 break;
878 }
879 tokio::time::sleep(std::time::Duration::from_millis(10)).await;
880 }
881 // Socket should be cleaned up
882 assert!(!socket_path.exists());
883 }
884
885 #[tokio::test]
886 async fn test_wrong_protocol_version() {
887 let (socket_path, _tx, _instance) = start_test_server().await;
888
889 let stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
890 let (mut reader, mut writer) = tokio::io::split(stream);
891
892 // Send wrong version
893 let handshake = Handshake {
894 protocol_version: 999,
895 };
896 write_frame(&mut writer, &handshake).await.unwrap();
897
898 // Read ack (server sends its version back)
899 let ack: Option<HandshakeAck> = read_frame(&mut reader).await.unwrap();
900 let ack = ack.unwrap();
901 assert_eq!(ack.protocol_version, PROTOCOL_VERSION);
902
903 // Connection should be closed by server after version mismatch
904 // Next read should get EOF
905 let result: crate::Result<Option<ServiceResponse>> = read_frame(&mut reader).await;
906 assert!(result.unwrap().is_none());
907 }
908
909 /// Load-bearing invariant: `ConnectionState` must never hold plaintext
910 /// signing material in any variant. The daemon participates in storage
911 /// and challenge-response only; the rejected Branch A design held
912 /// decrypted user keys server-side and that boundary was reinstated by
913 /// design (see Service Architecture doc § Decision record).
914 ///
915 /// Structure of the test: construct each variant, destructure with named
916 /// fields (so adding a field forces this test to be edited), and check
917 /// the static type of each field is not `PrivateKey`. A future refactor
918 /// that adds e.g. `decrypted_root_key: PrivateKey` to `Authenticated`
919 /// would either fail the type check at runtime or fail the destructure
920 /// match exhaustiveness at compile time.
921 #[test]
922 fn connection_state_never_holds_private_key() {
923 use crate::auth::crypto::{PrivateKey, generate_keypair};
924 use std::any::TypeId;
925
926 fn assert_not_private_key<T: 'static>(_value: &T, label: &str) {
927 assert_ne!(
928 TypeId::of::<T>(),
929 TypeId::of::<PrivateKey>(),
930 "ConnectionState field `{label}` is PrivateKey — daemon must not hold plaintext keys"
931 );
932 }
933
934 let (_signing, pubkey) = generate_keypair();
935 let states = [
936 ConnectionState::PreAuth,
937 ConnectionState::AwaitingProof {
938 username: "u".to_string(),
939 user_uuid: "uu".to_string(),
940 challenge: vec![1, 2, 3],
941 expected_pubkey: pubkey.clone(),
942 },
943 ConnectionState::Authenticated {
944 username: "u".to_string(),
945 user_uuid: "uu".to_string(),
946 login_pubkey: pubkey.clone(),
947 session_keyset: {
948 let mut s = HashSet::new();
949 s.insert(pubkey);
950 s
951 },
952 pending_key_challenges: HashMap::new(),
953 },
954 ];
955
956 for state in &states {
957 match state {
958 ConnectionState::PreAuth => {}
959 ConnectionState::AwaitingProof {
960 username,
961 user_uuid,
962 challenge,
963 expected_pubkey,
964 } => {
965 assert_not_private_key(username, "AwaitingProof::username");
966 assert_not_private_key(user_uuid, "AwaitingProof::user_uuid");
967 assert_not_private_key(challenge, "AwaitingProof::challenge");
968 assert_not_private_key(expected_pubkey, "AwaitingProof::expected_pubkey");
969 }
970 ConnectionState::Authenticated {
971 username,
972 user_uuid,
973 login_pubkey,
974 session_keyset,
975 pending_key_challenges,
976 } => {
977 assert_not_private_key(username, "Authenticated::username");
978 assert_not_private_key(user_uuid, "Authenticated::user_uuid");
979 assert_not_private_key(login_pubkey, "Authenticated::login_pubkey");
980 for k in session_keyset {
981 assert_not_private_key(k, "Authenticated::session_keyset entry");
982 }
983 for (k, ch) in pending_key_challenges {
984 assert_not_private_key(k, "Authenticated::pending_key_challenges key");
985 assert_not_private_key(
986 ch,
987 "Authenticated::pending_key_challenges challenge",
988 );
989 }
990 }
991 }
992 }
993 }
994
995 #[tokio::test]
996 async fn test_authenticated_request_rejected_without_login() {
997 // Companion to the integration test `test_unauthenticated_backend_op_rejected`
998 // — exercises the same gate path against the raw protocol so a
999 // regression here surfaces immediately, not just at the
1000 // `RemoteConnection` layer.
1001 let (socket_path, _tx, _instance) = start_test_server().await;
1002
1003 let stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
1004 let (mut reader, mut writer) = tokio::io::split(stream);
1005
1006 write_frame(
1007 &mut writer,
1008 &Handshake {
1009 protocol_version: PROTOCOL_VERSION,
1010 },
1011 )
1012 .await
1013 .unwrap();
1014 let _ack: Option<HandshakeAck> = read_frame(&mut reader).await.unwrap();
1015
1016 // Send an AuthenticatedDb request without completing TrustedLogin.
1017 write_frame(
1018 &mut writer,
1019 &ServiceRequest::AuthenticatedDb(Box::new(AuthenticatedDbRequest {
1020 root_id: crate::entry::ID::default(),
1021 identity: crate::auth::types::SigKey::default(),
1022 op: DatabaseOp::GetEntry {
1023 id: crate::entry::ID::from_bytes("nonexistent"),
1024 },
1025 })),
1026 )
1027 .await
1028 .unwrap();
1029
1030 let resp: Option<ServiceResponse> = read_frame(&mut reader).await.unwrap();
1031 match resp.unwrap() {
1032 ServiceResponse::Error(e) => {
1033 assert_eq!(
1034 e.module, "auth",
1035 "expected an auth-module error from the gate; got {e:?}"
1036 );
1037 }
1038 other => panic!("Expected gate Error, got {other:?}"),
1039 }
1040 }
1041
1042 #[tokio::test]
1043 async fn test_get_instance_metadata() {
1044 let (socket_path, _tx, _instance) = start_test_server().await;
1045
1046 let stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
1047 let (mut reader, mut writer) = tokio::io::split(stream);
1048
1049 // Handshake
1050 write_frame(
1051 &mut writer,
1052 &Handshake {
1053 protocol_version: PROTOCOL_VERSION,
1054 },
1055 )
1056 .await
1057 .unwrap();
1058 let _ack: Option<HandshakeAck> = read_frame(&mut reader).await.unwrap();
1059
1060 // Request metadata
1061 write_frame(&mut writer, &ServiceRequest::GetInstanceMetadata)
1062 .await
1063 .unwrap();
1064
1065 let resp: Option<ServiceResponse> = read_frame(&mut reader).await.unwrap();
1066 match resp.unwrap() {
1067 ServiceResponse::InstanceMetadata(Some(_meta)) => {
1068 // Server was initialized so metadata should exist
1069 }
1070 other => panic!("Expected InstanceMetadata(Some), got {other:?}"),
1071 }
1072 }
1073
1074 #[tokio::test]
1075 async fn test_stale_socket_cleanup() {
1076 let dir = tempfile::tempdir().unwrap();
1077 let socket_path = dir.path().join("test.sock");
1078
1079 // Create a stale socket file
1080 tokio::fs::write(&socket_path, "stale").await.unwrap();
1081 assert!(socket_path.exists());
1082
1083 let (instance, _admin) = Instance::create_backend(
1084 Box::new(InMemory::new()),
1085 crate::NewUser::passwordless("admin"),
1086 )
1087 .await
1088 .unwrap();
1089 let (_tx, rx) = watch::channel(());
1090 let server = ServiceServer::new(instance, socket_path.clone());
1091
1092 // Server should remove stale socket and bind successfully
1093 let handle = tokio::spawn(async move { server.run(rx).await });
1094
1095 // The server binds asynchronously; poll until it accepts a connection
1096 // rather than racing a fixed sleep (flaky under parallel test load).
1097 let mut stream = None;
1098 for _ in 0..200 {
1099 if let Ok(s) = tokio::net::UnixStream::connect(&socket_path).await {
1100 stream = Some(s);
1101 break;
1102 }
1103 tokio::time::sleep(std::time::Duration::from_millis(10)).await;
1104 }
1105 assert!(
1106 stream.is_some(),
1107 "server did not bind a connectable socket in time"
1108 );
1109
1110 handle.abort();
1111 }
1112
1113 /// D8 regression: `require_existing` flips the create-flow passthrough
1114 /// to fail-closed. An absent target is waved through with `false` (the
1115 /// `Database::create` path) but denied with `true` (the
1116 /// `SetInstanceMetadata` admin gate), so an unreadable `_databases`
1117 /// can't become a fail-open hole.
1118 #[tokio::test]
1119 async fn test_gate_require_existing_fails_closed_on_absent_db() {
1120 use crate::auth::crypto::generate_keypair;
1121
1122 let (instance, _admin) = Instance::create_backend(
1123 Box::new(InMemory::new()),
1124 crate::NewUser::passwordless("admin"),
1125 )
1126 .await
1127 .unwrap();
1128 let (_sk, pubkey) = generate_keypair();
1129 let absent = ID::from_bytes("no-such-tree");
1130
1131 gate_tree_permission(
1132 &instance,
1133 &pubkey,
1134 &SigKey::default(),
1135 &absent,
1136 Permission::Admin(0),
1137 false,
1138 )
1139 .await
1140 .expect("create-flow passthrough must wave an absent tree through");
1141
1142 let err = gate_tree_permission(
1143 &instance,
1144 &pubkey,
1145 &SigKey::default(),
1146 &absent,
1147 Permission::Admin(0),
1148 true,
1149 )
1150 .await
1151 .expect_err("require_existing must deny an absent tree");
1152 assert!(
1153 matches!(&err, crate::Error::Auth(b) if matches!(**b, AuthError::PermissionDenied { .. })),
1154 "expected PermissionDenied, got: {err:?}",
1155 );
1156 }
1157}