Development Documentation (main branch) - For stable release docs, see docs.rs/eidetica
Skip to main content

eidetica/service/
server.rs

1//! Service server: accepts Unix socket connections and dispatches `BackendImpl` operations.
2//!
3//! The server wraps an `Instance` (not just a backend) so it can handle write
4//! notifications through the Instance's callback system.
5
6use std::collections::{HashMap, HashSet};
7use std::os::unix::fs::PermissionsExt;
8use std::path::{Path, PathBuf};
9
10use tokio::net::UnixListener;
11use tokio::sync::watch;
12
13use crate::Instance;
14use crate::auth::crypto::{PublicKey, generate_challenge, verify_challenge_response};
15use crate::auth::errors::AuthError;
16use crate::auth::types::{Permission, SigKey};
17use crate::auth::validation::permissions::resolve_identity_permission;
18use crate::backend::CacheScope;
19use crate::database::Database;
20use crate::entry::ID;
21use crate::instance::WriteSource;
22use crate::service::error::ServiceError;
23use crate::service::protocol::{
24    AuthenticatedDbRequest, DatabaseOp, HandshakeAck, MergeState, PROTOCOL_VERSION, ServiceRequest,
25    ServiceResponse, read_frame, write_frame,
26};
27use crate::user::system_databases::lookup_user_record;
28
29/// Per-connection authentication state.
30///
31/// A connection moves `PreAuth → AwaitingProof → Authenticated` on a successful
32/// `TrustedLoginUser` / `TrustedLoginProve` exchange. A failed proof drops the
33/// connection back to `PreAuth` so the client can retry without reconnecting.
34/// Any other request while in `AwaitingProof` also resets the state so a
35/// half-finished login can't be exploited mid-flight.
36///
37/// "Trusted" refers to the assumption that whoever can reach this socket is
38/// already authorised by filesystem permissions (mode 0600 under
39/// `$XDG_RUNTIME_DIR`); see the protocol module docs and the Service
40/// Architecture brain doc § Trusted login threat model.
41#[derive(Debug, Clone)]
42enum ConnectionState {
43    /// No login attempt yet, or last attempt failed/abandoned.
44    PreAuth,
45    /// `TrustedLoginUser` succeeded; waiting for the client's `TrustedLoginProve`.
46    AwaitingProof {
47        username: String,
48        user_uuid: String,
49        challenge: Vec<u8>,
50        expected_pubkey: PublicKey,
51    },
52    /// Login completed. `login_pubkey` is the verified root pubkey for the
53    /// user, established at `TrustedLoginProve` time. `session_keyset` is the
54    /// set of pubkeys the client has further proven possession of via
55    /// `SessionKeyChallenge`/`SessionKeyRegister`; it always contains
56    /// `login_pubkey` and may include additional per-DB keys the user owns.
57    /// The dispatch path for `Authenticated`/`AuthenticatedDb` requests
58    /// validates the identity hint against this set and gates against the
59    /// resulting *acting* pubkey, so a single connection can drive ops on
60    /// databases authored by any key the user has proven they hold.
61    /// `user_uuid` is the per-user scope key for the unified CRDT-state
62    /// cache (see [`crate::backend::CacheScope::User`]).
63    /// `pending_key_challenges` holds outstanding registration challenges,
64    /// keyed by the pubkey the challenge was issued for. Each challenge is
65    /// single-use: the matching `SessionKeyRegister` consumes it whether
66    /// verification succeeds or fails.
67    #[allow(dead_code)] // username surfaces in audit/logging follow-ups
68    Authenticated {
69        username: String,
70        user_uuid: String,
71        login_pubkey: PublicKey,
72        session_keyset: HashSet<PublicKey>,
73        pending_key_challenges: HashMap<PublicKey, Vec<u8>>,
74    },
75}
76
77/// Eidetica service server that listens on a Unix domain socket.
78///
79/// The server wraps a full `Instance` so it can dispatch both storage operations
80/// (via the backend) and write callbacks (via `Instance::put_entry()`'s notification path).
81///
82/// CRDT-state caching lives in the underlying `BackendImpl` (scope-keyed
83/// via [`crate::backend::CacheScope`]); wire handlers route through
84/// `instance.backend()` directly rather than keeping a separate
85/// service-layer cache.
86pub struct ServiceServer {
87    instance: Instance,
88    socket_path: PathBuf,
89}
90
91impl ServiceServer {
92    /// Create a new service server.
93    ///
94    /// # Arguments
95    /// * `instance` - The Instance to serve. The server holds a strong reference.
96    /// * `socket_path` - Path for the Unix domain socket.
97    pub fn new(instance: Instance, socket_path: impl Into<PathBuf>) -> Self {
98        Self {
99            instance,
100            socket_path: socket_path.into(),
101        }
102    }
103
104    /// Get the socket path.
105    pub fn socket_path(&self) -> &Path {
106        &self.socket_path
107    }
108
109    /// Run the server until the shutdown signal is received.
110    ///
111    /// Removes any stale socket file, creates the parent directory, binds the
112    /// listener, and loops accepting connections. Each connection is handled in
113    /// a spawned task. On shutdown, the socket file is cleaned up.
114    ///
115    /// # Arguments
116    /// * `shutdown` - A watch receiver; the server stops when the sender is dropped.
117    pub async fn run(&self, mut shutdown: watch::Receiver<()>) -> crate::Result<()> {
118        // Remove stale socket if it exists
119        if self.socket_path.exists() {
120            tokio::fs::remove_file(&self.socket_path).await?;
121        }
122
123        // Create parent directory with owner-only permissions (0700)
124        if let Some(parent) = self.socket_path.parent() {
125            tokio::fs::create_dir_all(parent).await?;
126            tokio::fs::set_permissions(parent, std::fs::Permissions::from_mode(0o700)).await?;
127        }
128
129        let listener = UnixListener::bind(&self.socket_path)?;
130
131        // Restrict socket to owner-only access (0600)
132        tokio::fs::set_permissions(&self.socket_path, std::fs::Permissions::from_mode(0o600))
133            .await?;
134
135        tracing::info!("Service server listening on {}", self.socket_path.display());
136
137        loop {
138            tokio::select! {
139                accept_result = listener.accept() => {
140                    match accept_result {
141                        Ok((stream, _addr)) => {
142                            let instance = self.instance.clone();
143                            tokio::spawn(async move {
144                                if let Err(e) = handle_connection(stream, instance).await {
145                                    tracing::debug!("Connection handler error: {e}");
146                                }
147                            });
148                        }
149                        Err(e) => {
150                            tracing::error!("Failed to accept connection: {e}");
151                        }
152                    }
153                }
154                _ = shutdown.changed() => {
155                    tracing::info!("Service server shutting down");
156                    break;
157                }
158            }
159        }
160
161        // Clean up socket file
162        let _ = tokio::fs::remove_file(&self.socket_path).await;
163        Ok(())
164    }
165}
166
167/// Handle a single client connection.
168async fn handle_connection(
169    stream: tokio::net::UnixStream,
170    instance: Instance,
171) -> crate::Result<()> {
172    let (mut reader, mut writer) = tokio::io::split(stream);
173
174    // 1. Read and validate handshake
175    let handshake: crate::service::protocol::Handshake = match read_frame(&mut reader).await? {
176        Some(h) => h,
177        None => return Ok(()), // Client disconnected before handshake
178    };
179
180    if handshake.protocol_version != PROTOCOL_VERSION {
181        // Send error ack and close
182        let ack = HandshakeAck {
183            protocol_version: PROTOCOL_VERSION,
184        };
185        write_frame(&mut writer, &ack).await?;
186        return Err(crate::Error::Io(std::io::Error::new(
187            std::io::ErrorKind::InvalidData,
188            format!(
189                "Protocol version mismatch: client={}, server={}",
190                handshake.protocol_version, PROTOCOL_VERSION
191            ),
192        )));
193    }
194
195    // Send handshake ack
196    let ack = HandshakeAck {
197        protocol_version: PROTOCOL_VERSION,
198    };
199    write_frame(&mut writer, &ack).await?;
200
201    // 2. Request/response loop with per-connection auth state
202    let mut state = ConnectionState::PreAuth;
203    let loop_result: crate::Result<()> = async {
204        loop {
205            let request: ServiceRequest = match read_frame(&mut reader).await? {
206                Some(req) => req,
207                None => break, // Clean EOF
208            };
209
210            let response = dispatch(&instance, &mut state, request).await;
211            write_frame(&mut writer, &response).await?;
212        }
213        Ok(())
214    }
215    .await;
216
217    // Session teardown: nothing to reclaim. The unified CRDT-state cache
218    // lives in `BackendImpl` and is bounded by its own eviction policy
219    // (byte-bounded LRU on the in-memory backend; disk-bounded on SQL).
220    // Per-user cache slots survive disconnect intentionally so a
221    // reconnecting client recovers materialized state from tier 2 without
222    // recomputing from entries.
223
224    loop_result
225}
226
227/// Dispatch a service request to the appropriate Instance/Backend method.
228async fn dispatch(
229    instance: &Instance,
230    state: &mut ConnectionState,
231    request: ServiceRequest,
232) -> ServiceResponse {
233    match dispatch_inner(instance, state, request).await {
234        Ok(resp) => resp,
235        Err(e) => ServiceResponse::Error(ServiceError::from(&e)),
236    }
237}
238
239/// Inner dispatch that returns Result for ergonomic error handling.
240async fn dispatch_inner(
241    instance: &Instance,
242    state: &mut ConnectionState,
243    request: ServiceRequest,
244) -> crate::Result<ServiceResponse> {
245    match request {
246        // === Pre-auth: login handshake ===
247        ServiceRequest::TrustedLoginUser { username } => {
248            handle_trusted_login_user(instance, state, username).await
249        }
250        ServiceRequest::TrustedLoginProve { signature } => {
251            handle_trusted_login_prove(state, &signature)
252        }
253
254        // === Pre-auth: server identity ===
255        ServiceRequest::GetInstanceMetadata => {
256            let metadata = instance.backend().get_instance_metadata().await?;
257            Ok(ServiceResponse::InstanceMetadata(metadata))
258        }
259
260        // === Post-auth: extend the session keyset ===
261        ServiceRequest::SessionKeyChallenge { pubkey } => {
262            handle_session_key_challenge(state, pubkey)
263        }
264        ServiceRequest::SessionKeyRegister { pubkey, signature } => {
265            handle_session_key_register(state, pubkey, &signature)
266        }
267
268        // === Authenticated storage operations ===
269        //
270        // Gate 1: the connection must have completed `TrustedLogin*`. Gate 2:
271        // the per-tree permission gate. Every `DatabaseOp` carries its target
272        // `root_id` explicitly, so the gate is *unconditional* — there is no
273        // tree-less op to fall through it — with two exceptions handled below
274        // (`SubmitSignedEntry`, verification-gated; `SetInstanceMetadata`,
275        // gated against the server-known `_databases`, not the request root).
276        ServiceRequest::AuthenticatedDb(inner) => {
277            let (login_pubkey, keyset_snapshot, session_user_uuid) = match state {
278                ConnectionState::Authenticated {
279                    login_pubkey,
280                    session_keyset,
281                    user_uuid,
282                    ..
283                } => (
284                    login_pubkey.clone(),
285                    session_keyset.clone(),
286                    user_uuid.clone(),
287                ),
288                _ => {
289                    return Err(crate::Error::Auth(Box::new(
290                        AuthError::InvalidAuthConfiguration {
291                            reason: "database operation requires an authenticated connection; \
292                                 complete TrustedLogin* first"
293                                .to_string(),
294                        },
295                    )));
296                }
297            };
298
299            let AuthenticatedDbRequest {
300                root_id,
301                identity,
302                op,
303            } = *inner;
304
305            // Submit is verification-gated, not session-gated.
306            //
307            // Reads are session-gated (confidentiality boundary); submits
308            // are verification-gated (integrity boundary). `SubmitSignedEntry`
309            // requires only an *authenticated* connection (gate 1, the
310            // `ConnectionState::Authenticated` match above, still applies).
311            // Which tree the entry belongs to, and whether its signer may
312            // write that tree, is decided by the server's own verification
313            // pass in the handler (store `Unverified`, then
314            // `Database::open(...).verify()`) against the tree's *real*
315            // pinned auth lineage — not by who holds the socket. An attacker
316            // without a key the tree's auth grants cannot produce a
317            // `Verified` entry, and unverified junk is excluded from every
318            // default read by the frontier cut, so the per-tree session gate
319            // adds no correctness or isolation property here; it only blocks
320            // a legitimate transporter (e.g. an admin session carrying a
321            // user-signed genesis). See the verification-gated-submit design
322            // doc for the full threat analysis.
323            let is_submit = matches!(op, DatabaseOp::SubmitSignedEntry { .. });
324
325            // `SetInstanceMetadata` rewrites the daemon's pointers to its own
326            // system DBs. It is gated against `_databases` (a server-known
327            // tree), not the request's `root_id`, so an instance admin — by
328            // construction a user with Admin on `_databases` via the
329            // first-user bootstrap — is required. Fail closed (require_existing
330            // = true): `_databases` always exists on an initialized daemon and
331            // this is never a creation flow, so the create-flow passthrough
332            // must not apply (it would let any authenticated user rewrite
333            // system-DB pointers if `_databases` were ever unreadable).
334            let is_set_metadata = matches!(op, DatabaseOp::SetInstanceMetadata { .. });
335
336            // Submit accepts any identity hint (admin transports user-signed
337            // entries); every other op resolves an acting pubkey from the
338            // keyset and gates per-tree against it.
339            let acting_pubkey = if is_submit {
340                // Use the hint if it parses as a pubkey (for submit metadata),
341                // otherwise fall back to login_pubkey — submit doesn't gate
342                // on this value.
343                identity
344                    .hint()
345                    .pubkey
346                    .clone()
347                    .unwrap_or_else(|| login_pubkey.clone())
348            } else {
349                resolve_acting_pubkey(&identity, &login_pubkey, &keyset_snapshot)?
350            };
351
352            // Per-tree permission gate. Unconditional for every op *except*
353            // submit (verification in the handler is its boundary) and
354            // set-metadata (gated against `_databases` below). Create-flow
355            // passthrough (false) for the rest, so a not-yet-propagated tree is
356            // waved through and database creation works.
357            if is_set_metadata {
358                gate_tree_permission(
359                    instance,
360                    &acting_pubkey,
361                    &identity,
362                    instance.databases_db_id(),
363                    Permission::Admin(0),
364                    true,
365                )
366                .await?;
367            } else if !is_submit {
368                gate_tree_permission(
369                    instance,
370                    &acting_pubkey,
371                    &identity,
372                    &root_id,
373                    op.required_permission(),
374                    false,
375                )
376                .await?;
377            }
378
379            dispatch_database_op(
380                instance,
381                &acting_pubkey,
382                &identity,
383                &session_user_uuid,
384                root_id,
385                op,
386            )
387            .await
388        }
389    }
390}
391
392/// Resolve the *acting* pubkey for a session-gated op.
393///
394/// The identity hint, when present, must be in the connection's session
395/// keyset (proof of possession registered via `SessionKeyChallenge` /
396/// `SessionKeyRegister`, or established at login time). Returning the hint
397/// as the acting pubkey lets the per-tree gate check the actual key the
398/// caller wants to act as, not the connection-wide login key.
399///
400/// An absent hint defaults to the login pubkey — matches the pre-keyset
401/// behavior where every op acted as the login identity.
402fn resolve_acting_pubkey(
403    identity: &SigKey,
404    login_pubkey: &PublicKey,
405    session_keyset: &HashSet<PublicKey>,
406) -> crate::Result<PublicKey> {
407    match &identity.hint().pubkey {
408        Some(claimed) if session_keyset.contains(claimed) => Ok(claimed.clone()),
409        Some(claimed) => Err(crate::Error::Auth(Box::new(
410            AuthError::SigningKeyMismatch {
411                reason: format!(
412                    "request identity claims pubkey '{claimed}' but it is not in the session keyset; \
413                     register it first via SessionKeyChallenge/SessionKeyRegister"
414                ),
415            },
416        ))),
417        None => Ok(login_pubkey.clone()),
418    }
419}
420
421/// Dispatch a Database-level op against the server's local `Database`.
422///
423/// Additive sibling of `dispatch_backend_op`. The caller has already verified
424/// the session identity and run the unconditional per-tree permission gate on
425/// `root_id`. Because the server runs the `Database` layer here, verify-on-read
426/// and the Verified frontier are server-side **by construction**.
427async fn dispatch_database_op(
428    instance: &Instance,
429    acting_pubkey: &PublicKey,
430    identity: &SigKey,
431    user_uuid: &str,
432    root_id: ID,
433    op: DatabaseOp,
434) -> crate::Result<ServiceResponse> {
435    match op {
436        DatabaseOp::GetEntry { id } => {
437            let entry = instance.backend().get(&id).await?;
438            // Post-fetch owning-tree Read gate: a raw entry id carries no
439            // inline tree, so the pre-dispatch gate (which keys on `root_id`)
440            // could not cover the entry's real owning tree.
441            gate_entry_read(instance, acting_pubkey, identity, &entry).await?;
442            Ok(ServiceResponse::Entry(entry))
443        }
444
445        DatabaseOp::GetVerifiedTips => {
446            // The server runs the Database layer, so `snapshot()` returns the
447            // Verified frontier by construction — no client-side verify, no
448            // remote-detection heuristic.
449            let db = Database::open(instance, &root_id).await?;
450            let snapshot = db.snapshot().await?;
451            Ok(ServiceResponse::Ids(snapshot.into_tips()))
452        }
453
454        DatabaseOp::SubmitSignedEntry { entry } => {
455            // The client signed this entry; the server does NOT trust its
456            // claimed validity. Store it `Unverified`, then run our OWN
457            // verification pass against the entry's pinned settings. A
458            // poisoned entry never reaches `Verified` and is excluded from
459            // every default read by the frontier cut — D1 is closed by
460            // construction here, not by gate-hardening a raw `Put`.
461            instance
462                .put_entry(
463                    &root_id,
464                    crate::backend::VerificationStatus::Unverified,
465                    *entry,
466                    WriteSource::Remote,
467                )
468                .await?;
469            Database::open(instance, &root_id).await?.verify().await?;
470            Ok(ServiceResponse::Ok)
471        }
472
473        DatabaseOp::BeginTransaction { stores, scope } => {
474            // Single-sourced: both this handler and the Phase-3 remote seam
475            // call `Database::transaction_context`, so `Transaction::commit`'s
476            // build-sign path has one source of truth.
477            let db = Database::open(instance, &root_id).await?;
478            let ctx = db.transaction_context(&stores, scope).await?;
479            Ok(ServiceResponse::TransactionContext(ctx))
480        }
481
482        DatabaseOp::GetStoreState { store } => {
483            // Server-materialized merged state (unencrypted stores only).
484            // Encrypted stores must use GetStoreEntries instead — the
485            // ephemeral transaction here has no encryptor, and Doc
486            // deserialization would fail on ciphertext.
487            let db = Database::open(instance, &root_id).await?;
488            let value = db.get_store_state(&store).await?;
489            Ok(ServiceResponse::CrdtValue(value))
490        }
491
492        DatabaseOp::GetStoreEntries { store, tips, scope } => {
493            // Universal primitive (encrypted + unencrypted): returns raw
494            // Entry records with opaque data in canonical CRDT replay order.
495            // For encrypted stores the client decrypts+merges locally.
496            let db = Database::open(instance, &root_id).await?;
497            let entries = db.get_store_entries(&store, &tips, scope).await?;
498            Ok(ServiceResponse::Entries(entries))
499        }
500
501        DatabaseOp::GetStoreTipsUpToEntries { store, up_to } => {
502            let db = Database::open(instance, &root_id).await?;
503            let boundary = crate::Snapshot::from(up_to);
504            let snapshot = db
505                .ops()
506                .store_snapshot_at(&root_id, &store, &boundary)
507                .await?;
508            Ok(ServiceResponse::Ids(snapshot.into_tips()))
509        }
510
511        DatabaseOp::ComputeMergeState { store, entry_ids } => {
512            let db = Database::open(instance, &root_id).await?;
513            let merge_base = db
514                .ops()
515                .find_merge_base(&root_id, &store, &entry_ids)
516                .await?;
517            let path = db
518                .ops()
519                .get_path_from_to(&root_id, &store, &merge_base, &entry_ids)
520                .await?;
521            Ok(ServiceResponse::MergeState(MergeState { merge_base, path }))
522        }
523
524        DatabaseOp::GetCachedCrdtState { store, key } => {
525            // Per-tree Read gate already ran above. Try the caller's own
526            // User-scoped slot first (where client-uploaded ciphertext for
527            // encrypted stores lives), then fall back to Shared (where the
528            // daemon's own materialization of unencrypted stores lives).
529            // The fallback is what gives cross-user dedup on plaintext
530            // stores: alice triggers a server materialization, blob lands
531            // in Shared, bob's later read finds it without recomputing.
532            let backend = instance.require_local_engine()?;
533            let mut blob = backend
534                .get_cached_crdt_state(&CacheScope::User(user_uuid.to_string()), &key, &store)
535                .await?;
536            if blob.is_none() {
537                blob = backend
538                    .get_cached_crdt_state(&CacheScope::Shared, &key, &store)
539                    .await?;
540            }
541            Ok(ServiceResponse::CachedCrdtState(blob))
542        }
543
544        DatabaseOp::CacheCrdtState { store, key, blob } => {
545            // Per-tree Read gate already ran above. Per-user trust: the
546            // blob is opaque (cipher- or plaintext) and stored verbatim;
547            // only the submitting user can read it back. We never promote
548            // a client upload to Shared — the daemon can't verify the
549            // merge result, so cross-user visibility would be a poison
550            // vector. Shared writes only come from the daemon's own
551            // in-process (LocalBackend) materialization path.
552            instance
553                .require_local_engine()?
554                .cache_crdt_state(CacheScope::User(user_uuid.to_string()), &key, &store, blob)
555                .await?;
556            Ok(ServiceResponse::Ok)
557        }
558
559        DatabaseOp::SetInstanceMetadata { metadata } => {
560            // Admin-on-`_databases` gate already ran in the dispatcher (against
561            // the server-known system tree, not `root_id`).
562            instance.backend().set_instance_metadata(&metadata).await?;
563            Ok(ServiceResponse::Ok)
564        }
565    }
566}
567
568/// Handle `ServiceRequest::TrustedLoginUser`: look up the user's full record,
569/// mint a challenge, and move the connection into `AwaitingProof`.
570async fn handle_trusted_login_user(
571    instance: &Instance,
572    state: &mut ConnectionState,
573    username: String,
574) -> crate::Result<ServiceResponse> {
575    // Look up the full `UserInfo`. The encrypted root key + salt ship to the
576    // client so it can decrypt locally and sign the challenge in the same
577    // round-trip; the daemon never sees the password or the plaintext key.
578    // The non-credential fields (user_database_id, status) ride along so the
579    // client can build the `User` session after proof without a second wire
580    // read of `_users`. If the lookup fails (no such user, disabled,
581    // duplicates), drop back to `PreAuth` and bubble the error.
582    let users_db = instance.users_db().await?;
583    let (user_uuid, user_info) = match lookup_user_record(&users_db, &username).await {
584        Ok(v) => v,
585        Err(e) => {
586            *state = ConnectionState::PreAuth;
587            return Err(e);
588        }
589    };
590
591    let expected_pubkey = user_info.credentials.root_key_id.clone();
592    let challenge = generate_challenge();
593    *state = ConnectionState::AwaitingProof {
594        username,
595        user_uuid: user_uuid.clone(),
596        challenge: challenge.clone(),
597        expected_pubkey,
598    };
599    Ok(ServiceResponse::TrustedLoginChallenge {
600        challenge,
601        user_uuid,
602        user_info,
603    })
604}
605
606/// Handle `ServiceRequest::TrustedLoginProve`: verify the signature against the
607/// stored challenge and either transition to `Authenticated` or drop back
608/// to `PreAuth`.
609fn handle_trusted_login_prove(
610    state: &mut ConnectionState,
611    signature: &[u8],
612) -> crate::Result<ServiceResponse> {
613    let (username, user_uuid, challenge, expected_pubkey) =
614        match std::mem::replace(state, ConnectionState::PreAuth) {
615            ConnectionState::AwaitingProof {
616                username,
617                user_uuid,
618                challenge,
619                expected_pubkey,
620            } => (username, user_uuid, challenge, expected_pubkey),
621            other => {
622                // Restore the (unexpected) state we just took out so subsequent
623                // requests see consistent state.
624                *state = other;
625                return Err(crate::Error::Io(std::io::Error::new(
626                    std::io::ErrorKind::InvalidData,
627                    "TrustedLoginProve received outside of AwaitingProof state",
628                )));
629            }
630        };
631
632    match verify_challenge_response(&challenge, signature, &expected_pubkey) {
633        Ok(()) => {
634            let mut session_keyset = HashSet::new();
635            session_keyset.insert(expected_pubkey.clone());
636            *state = ConnectionState::Authenticated {
637                username,
638                user_uuid,
639                login_pubkey: expected_pubkey,
640                session_keyset,
641                pending_key_challenges: HashMap::new(),
642            };
643            Ok(ServiceResponse::TrustedLoginOk)
644        }
645        Err(e) => {
646            // Already reset to PreAuth via the mem::replace above.
647            Err(crate::Error::Auth(Box::new(e)))
648        }
649    }
650}
651
652/// Handle `ServiceRequest::SessionKeyChallenge`: mint a single-use challenge
653/// bound to `pubkey` and stash it in the connection's pending-challenges map.
654///
655/// Requires an authenticated connection. A repeat call for the same pubkey
656/// overwrites the prior challenge — last-issued wins, so a stale challenge
657/// can't be replayed.
658fn handle_session_key_challenge(
659    state: &mut ConnectionState,
660    pubkey: PublicKey,
661) -> crate::Result<ServiceResponse> {
662    match state {
663        ConnectionState::Authenticated {
664            pending_key_challenges,
665            ..
666        } => {
667            let challenge = generate_challenge();
668            pending_key_challenges.insert(pubkey, challenge.clone());
669            Ok(ServiceResponse::SessionKeyChallenge { challenge })
670        }
671        _ => Err(crate::Error::Auth(Box::new(
672            AuthError::InvalidAuthConfiguration {
673                reason: "SessionKeyChallenge requires an authenticated connection; \
674                     complete TrustedLogin* first"
675                    .to_string(),
676            },
677        ))),
678    }
679}
680
681/// Handle `ServiceRequest::SessionKeyRegister`: verify the signature against
682/// the matching pending challenge and, on success, add `pubkey` to the
683/// connection's session keyset.
684///
685/// The challenge is consumed (removed) whether verification succeeds or fails,
686/// so a bad signature can't be retried against the same challenge.
687fn handle_session_key_register(
688    state: &mut ConnectionState,
689    pubkey: PublicKey,
690    signature: &[u8],
691) -> crate::Result<ServiceResponse> {
692    match state {
693        ConnectionState::Authenticated {
694            session_keyset,
695            pending_key_challenges,
696            ..
697        } => {
698            let challenge = pending_key_challenges.remove(&pubkey).ok_or_else(|| {
699                crate::Error::Auth(Box::new(AuthError::InvalidAuthConfiguration {
700                    reason: format!(
701                        "no outstanding SessionKeyChallenge for pubkey '{pubkey}'; \
702                         issue the challenge before registering"
703                    ),
704                }))
705            })?;
706            verify_challenge_response(&challenge, signature, &pubkey)
707                .map_err(|e| crate::Error::Auth(Box::new(e)))?;
708            session_keyset.insert(pubkey);
709            Ok(ServiceResponse::Ok)
710        }
711        _ => Err(crate::Error::Auth(Box::new(
712            AuthError::InvalidAuthConfiguration {
713                reason: "SessionKeyRegister requires an authenticated connection; \
714                     complete TrustedLogin* first"
715                    .to_string(),
716            },
717        ))),
718    }
719}
720
721/// Resolve `pubkey`'s permission against `tree_id`'s `auth_settings` and reject
722/// the request if the resolved level doesn't cover `required`.
723///
724/// If the database doesn't exist on this daemon yet and `require_existing`
725/// is false, the gate passes through so the dispatched op surfaces its own
726/// response (NotFound, empty result, or — for write coordination — a
727/// no-op). This is what keeps the legitimate "create a new database" flow
728/// working: `Database::create` reads tips on the tree before its root entry
729/// has propagated, so an outright denial here would break creation. The
730/// cost is that callers can still distinguish "no such database" from
731/// "exists but no access"; closing that existence-leak channel is filed as
732/// a follow-up.
733///
734/// `require_existing = true` flips that to **fail closed**: an absent
735/// target is denied rather than waved through. Used for the
736/// `SetInstanceMetadata` admin gate (D8) — `_databases` always exists on an
737/// initialized daemon and that op is never a creation flow, so the
738/// create-flow passthrough there would only ever be a fail-open hole that
739/// lets any authenticated user rewrite the daemon's system-DB pointers if
740/// `_databases` were ever unreadable.
741///
742/// When the gate does fire, the denial error is the same shape regardless
743/// of which sub-check failed (key not in auth_settings, mismatched hint,
744/// insufficient permission level): no internal detail about *why* leaks
745/// back over the wire.
746///
747/// System databases (`_users`, `_databases`, `_sync`, `_instance`) are gated
748/// like any other tree: callers must hold the required permission in the
749/// system DB's `auth_settings`. The instance-admin bootstrap (first user on
750/// the device) writes the first user as `Admin(0)` on `_users` and
751/// `_databases`, which is how legitimate administrative access is granted —
752/// the previous hardcoded read exemption is gone. The daemon's device-keyed
753/// local path still handles internal system-database maintenance writes that
754/// originate inside the server.
755async fn gate_tree_permission(
756    instance: &Instance,
757    pubkey: &PublicKey,
758    identity: &SigKey,
759    tree_id: &ID,
760    required: Permission,
761    require_existing: bool,
762) -> crate::Result<()> {
763    let denied = || {
764        crate::Error::Auth(Box::new(AuthError::PermissionDenied {
765            reason: format!("tree {tree_id}: pubkey {pubkey} not permitted for {required:?}"),
766        }))
767    };
768
769    if !instance.has_database(tree_id).await {
770        return if require_existing {
771            Err(denied())
772        } else {
773            Ok(())
774        };
775    }
776
777    let database = Database::open(instance, tree_id).await?;
778    let settings_store = database.get_settings().await?;
779    let auth_settings = settings_store.auth_snapshot().await?;
780
781    let resolved =
782        match resolve_identity_permission(pubkey, identity, &auth_settings, Some(instance)).await {
783            Ok(p) => p,
784            // Resolution failures (key not found, mismatch, etc.) collapse to the
785            // same shape as an insufficient-permission denial, so the client
786            // can't tell whether its identity was unknown or merely too low.
787            Err(_) => return Err(denied()),
788        };
789
790    let allowed = match required {
791        Permission::Read => true,
792        Permission::Write(_) => resolved.can_write(),
793        Permission::Admin(_) => resolved.can_admin(),
794    };
795
796    if !allowed {
797        return Err(denied());
798    }
799
800    Ok(())
801}
802
803/// Per-tree read gate for ops keyed by a raw entry id, which therefore
804/// carry no inline tree id and never hit the pre-dispatch `tree_id()` gate
805/// (`Get`). The tree to authorise against is only knowable *after* the
806/// fetch: it is the entry's claimed `tree.root`, or — for a tree-root
807/// entry, whose `root()` is `None` — the entry's own id.
808///
809/// Model B (hard multi-tenant boundary): system DBs are unencrypted and
810/// protected solely by this gate, so a raw cross-tree `Get` MUST resolve
811/// and check the real owning tree before returning content. Delegates to
812/// `gate_tree_permission`, so the `has_database`-absent passthrough and the
813/// opaque denial shape are identical to the inline-tree-id path.
814async fn gate_entry_read(
815    instance: &Instance,
816    pubkey: &PublicKey,
817    identity: &SigKey,
818    entry: &crate::entry::Entry,
819) -> crate::Result<()> {
820    let owning_tree = entry.root().unwrap_or_else(|| entry.id());
821    // create-flow passthrough (false): a Get against a tree not yet
822    // registered on this daemon must be waved through, same as the
823    // inline-tree-id path.
824    gate_tree_permission(
825        instance,
826        pubkey,
827        identity,
828        &owning_tree,
829        Permission::Read,
830        false,
831    )
832    .await
833}
834
835#[cfg(test)]
836mod tests {
837    use super::*;
838    use crate::backend::database::InMemory;
839    use crate::service::protocol::{Handshake, write_frame};
840
841    /// Helper: start a server on a temp socket, return path + shutdown sender.
842    async fn start_test_server() -> (PathBuf, watch::Sender<()>, Instance) {
843        let dir = tempfile::tempdir().unwrap();
844        let socket_path = dir.keep().join("test.sock");
845        let (instance, _admin) = Instance::create_backend(
846            Box::new(InMemory::new()),
847            crate::NewUser::passwordless("admin"),
848        )
849        .await
850        .unwrap();
851        let (tx, rx) = watch::channel(());
852        let server = ServiceServer::new(instance.clone(), socket_path.clone());
853        tokio::spawn(async move {
854            let _ = server.run(rx).await;
855        });
856        // Wait for the socket to appear (server binds asynchronously). Poll
857        // with a short sleep instead of a fixed delay so a slow sandbox
858        // (where this test was occasionally flaky under `nix build`) doesn't
859        // race the bind step.
860        for _ in 0..50 {
861            if socket_path.exists() {
862                break;
863            }
864            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
865        }
866        (socket_path, tx, instance)
867    }
868
869    #[tokio::test]
870    async fn test_server_starts_and_shuts_down() {
871        let (socket_path, tx, _instance) = start_test_server().await;
872        assert!(socket_path.exists());
873        drop(tx);
874        // Poll for cleanup with the same robustness as the bind wait.
875        for _ in 0..50 {
876            if !socket_path.exists() {
877                break;
878            }
879            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
880        }
881        // Socket should be cleaned up
882        assert!(!socket_path.exists());
883    }
884
885    #[tokio::test]
886    async fn test_wrong_protocol_version() {
887        let (socket_path, _tx, _instance) = start_test_server().await;
888
889        let stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
890        let (mut reader, mut writer) = tokio::io::split(stream);
891
892        // Send wrong version
893        let handshake = Handshake {
894            protocol_version: 999,
895        };
896        write_frame(&mut writer, &handshake).await.unwrap();
897
898        // Read ack (server sends its version back)
899        let ack: Option<HandshakeAck> = read_frame(&mut reader).await.unwrap();
900        let ack = ack.unwrap();
901        assert_eq!(ack.protocol_version, PROTOCOL_VERSION);
902
903        // Connection should be closed by server after version mismatch
904        // Next read should get EOF
905        let result: crate::Result<Option<ServiceResponse>> = read_frame(&mut reader).await;
906        assert!(result.unwrap().is_none());
907    }
908
909    /// Load-bearing invariant: `ConnectionState` must never hold plaintext
910    /// signing material in any variant. The daemon participates in storage
911    /// and challenge-response only; the rejected Branch A design held
912    /// decrypted user keys server-side and that boundary was reinstated by
913    /// design (see Service Architecture doc § Decision record).
914    ///
915    /// Structure of the test: construct each variant, destructure with named
916    /// fields (so adding a field forces this test to be edited), and check
917    /// the static type of each field is not `PrivateKey`. A future refactor
918    /// that adds e.g. `decrypted_root_key: PrivateKey` to `Authenticated`
919    /// would either fail the type check at runtime or fail the destructure
920    /// match exhaustiveness at compile time.
921    #[test]
922    fn connection_state_never_holds_private_key() {
923        use crate::auth::crypto::{PrivateKey, generate_keypair};
924        use std::any::TypeId;
925
926        fn assert_not_private_key<T: 'static>(_value: &T, label: &str) {
927            assert_ne!(
928                TypeId::of::<T>(),
929                TypeId::of::<PrivateKey>(),
930                "ConnectionState field `{label}` is PrivateKey — daemon must not hold plaintext keys"
931            );
932        }
933
934        let (_signing, pubkey) = generate_keypair();
935        let states = [
936            ConnectionState::PreAuth,
937            ConnectionState::AwaitingProof {
938                username: "u".to_string(),
939                user_uuid: "uu".to_string(),
940                challenge: vec![1, 2, 3],
941                expected_pubkey: pubkey.clone(),
942            },
943            ConnectionState::Authenticated {
944                username: "u".to_string(),
945                user_uuid: "uu".to_string(),
946                login_pubkey: pubkey.clone(),
947                session_keyset: {
948                    let mut s = HashSet::new();
949                    s.insert(pubkey);
950                    s
951                },
952                pending_key_challenges: HashMap::new(),
953            },
954        ];
955
956        for state in &states {
957            match state {
958                ConnectionState::PreAuth => {}
959                ConnectionState::AwaitingProof {
960                    username,
961                    user_uuid,
962                    challenge,
963                    expected_pubkey,
964                } => {
965                    assert_not_private_key(username, "AwaitingProof::username");
966                    assert_not_private_key(user_uuid, "AwaitingProof::user_uuid");
967                    assert_not_private_key(challenge, "AwaitingProof::challenge");
968                    assert_not_private_key(expected_pubkey, "AwaitingProof::expected_pubkey");
969                }
970                ConnectionState::Authenticated {
971                    username,
972                    user_uuid,
973                    login_pubkey,
974                    session_keyset,
975                    pending_key_challenges,
976                } => {
977                    assert_not_private_key(username, "Authenticated::username");
978                    assert_not_private_key(user_uuid, "Authenticated::user_uuid");
979                    assert_not_private_key(login_pubkey, "Authenticated::login_pubkey");
980                    for k in session_keyset {
981                        assert_not_private_key(k, "Authenticated::session_keyset entry");
982                    }
983                    for (k, ch) in pending_key_challenges {
984                        assert_not_private_key(k, "Authenticated::pending_key_challenges key");
985                        assert_not_private_key(
986                            ch,
987                            "Authenticated::pending_key_challenges challenge",
988                        );
989                    }
990                }
991            }
992        }
993    }
994
995    #[tokio::test]
996    async fn test_authenticated_request_rejected_without_login() {
997        // Companion to the integration test `test_unauthenticated_backend_op_rejected`
998        // — exercises the same gate path against the raw protocol so a
999        // regression here surfaces immediately, not just at the
1000        // `RemoteConnection` layer.
1001        let (socket_path, _tx, _instance) = start_test_server().await;
1002
1003        let stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
1004        let (mut reader, mut writer) = tokio::io::split(stream);
1005
1006        write_frame(
1007            &mut writer,
1008            &Handshake {
1009                protocol_version: PROTOCOL_VERSION,
1010            },
1011        )
1012        .await
1013        .unwrap();
1014        let _ack: Option<HandshakeAck> = read_frame(&mut reader).await.unwrap();
1015
1016        // Send an AuthenticatedDb request without completing TrustedLogin.
1017        write_frame(
1018            &mut writer,
1019            &ServiceRequest::AuthenticatedDb(Box::new(AuthenticatedDbRequest {
1020                root_id: crate::entry::ID::default(),
1021                identity: crate::auth::types::SigKey::default(),
1022                op: DatabaseOp::GetEntry {
1023                    id: crate::entry::ID::from_bytes("nonexistent"),
1024                },
1025            })),
1026        )
1027        .await
1028        .unwrap();
1029
1030        let resp: Option<ServiceResponse> = read_frame(&mut reader).await.unwrap();
1031        match resp.unwrap() {
1032            ServiceResponse::Error(e) => {
1033                assert_eq!(
1034                    e.module, "auth",
1035                    "expected an auth-module error from the gate; got {e:?}"
1036                );
1037            }
1038            other => panic!("Expected gate Error, got {other:?}"),
1039        }
1040    }
1041
1042    #[tokio::test]
1043    async fn test_get_instance_metadata() {
1044        let (socket_path, _tx, _instance) = start_test_server().await;
1045
1046        let stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
1047        let (mut reader, mut writer) = tokio::io::split(stream);
1048
1049        // Handshake
1050        write_frame(
1051            &mut writer,
1052            &Handshake {
1053                protocol_version: PROTOCOL_VERSION,
1054            },
1055        )
1056        .await
1057        .unwrap();
1058        let _ack: Option<HandshakeAck> = read_frame(&mut reader).await.unwrap();
1059
1060        // Request metadata
1061        write_frame(&mut writer, &ServiceRequest::GetInstanceMetadata)
1062            .await
1063            .unwrap();
1064
1065        let resp: Option<ServiceResponse> = read_frame(&mut reader).await.unwrap();
1066        match resp.unwrap() {
1067            ServiceResponse::InstanceMetadata(Some(_meta)) => {
1068                // Server was initialized so metadata should exist
1069            }
1070            other => panic!("Expected InstanceMetadata(Some), got {other:?}"),
1071        }
1072    }
1073
1074    #[tokio::test]
1075    async fn test_stale_socket_cleanup() {
1076        let dir = tempfile::tempdir().unwrap();
1077        let socket_path = dir.path().join("test.sock");
1078
1079        // Create a stale socket file
1080        tokio::fs::write(&socket_path, "stale").await.unwrap();
1081        assert!(socket_path.exists());
1082
1083        let (instance, _admin) = Instance::create_backend(
1084            Box::new(InMemory::new()),
1085            crate::NewUser::passwordless("admin"),
1086        )
1087        .await
1088        .unwrap();
1089        let (_tx, rx) = watch::channel(());
1090        let server = ServiceServer::new(instance, socket_path.clone());
1091
1092        // Server should remove stale socket and bind successfully
1093        let handle = tokio::spawn(async move { server.run(rx).await });
1094
1095        // The server binds asynchronously; poll until it accepts a connection
1096        // rather than racing a fixed sleep (flaky under parallel test load).
1097        let mut stream = None;
1098        for _ in 0..200 {
1099            if let Ok(s) = tokio::net::UnixStream::connect(&socket_path).await {
1100                stream = Some(s);
1101                break;
1102            }
1103            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
1104        }
1105        assert!(
1106            stream.is_some(),
1107            "server did not bind a connectable socket in time"
1108        );
1109
1110        handle.abort();
1111    }
1112
1113    /// D8 regression: `require_existing` flips the create-flow passthrough
1114    /// to fail-closed. An absent target is waved through with `false` (the
1115    /// `Database::create` path) but denied with `true` (the
1116    /// `SetInstanceMetadata` admin gate), so an unreadable `_databases`
1117    /// can't become a fail-open hole.
1118    #[tokio::test]
1119    async fn test_gate_require_existing_fails_closed_on_absent_db() {
1120        use crate::auth::crypto::generate_keypair;
1121
1122        let (instance, _admin) = Instance::create_backend(
1123            Box::new(InMemory::new()),
1124            crate::NewUser::passwordless("admin"),
1125        )
1126        .await
1127        .unwrap();
1128        let (_sk, pubkey) = generate_keypair();
1129        let absent = ID::from_bytes("no-such-tree");
1130
1131        gate_tree_permission(
1132            &instance,
1133            &pubkey,
1134            &SigKey::default(),
1135            &absent,
1136            Permission::Admin(0),
1137            false,
1138        )
1139        .await
1140        .expect("create-flow passthrough must wave an absent tree through");
1141
1142        let err = gate_tree_permission(
1143            &instance,
1144            &pubkey,
1145            &SigKey::default(),
1146            &absent,
1147            Permission::Admin(0),
1148            true,
1149        )
1150        .await
1151        .expect_err("require_existing must deny an absent tree");
1152        assert!(
1153            matches!(&err, crate::Error::Auth(b) if matches!(**b, AuthError::PermissionDenied { .. })),
1154            "expected PermissionDenied, got: {err:?}",
1155        );
1156    }
1157}