Browse Source

fix(desktop): server spawn resilience (#13028)

Co-authored-by: Brendan Allan <[email protected]>
Adam 2 weeks ago
parent
commit
7e1247c420

+ 64 - 4
packages/desktop/src-tauri/src/cli.rs

@@ -1,9 +1,10 @@
 use tauri::{AppHandle, Manager, path::BaseDirectory};
 use tauri_plugin_shell::{
     ShellExt,
-    process::{Command, CommandChild, CommandEvent},
+    process::{Command, CommandChild, CommandEvent, TerminatedPayload},
 };
 use tauri_plugin_store::StoreExt;
+use tokio::sync::oneshot;
 
 use crate::{
     LogState,
@@ -273,12 +274,45 @@ pub fn create_command(app: &tauri::AppHandle, args: &str, extra_env: &[(&str, St
     }
 }
 
-pub fn serve(app: &AppHandle, hostname: &str, port: u32, password: &str) -> CommandChild {
+pub fn serve(
+    app: &AppHandle,
+    hostname: &str,
+    port: u32,
+    password: &str,
+) -> (CommandChild, oneshot::Receiver<TerminatedPayload>) {
     let log_state = app.state::<LogState>();
     let log_state_clone = log_state.inner().clone();
 
+    let (exit_tx, exit_rx) = oneshot::channel::<TerminatedPayload>();
+
     println!("spawning sidecar on port {port}");
 
+    if let Ok(mut logs) = log_state_clone.0.lock() {
+        let args =
+            format!("--print-logs --log-level WARN serve --hostname {hostname} --port {port}");
+
+        #[cfg(target_os = "windows")]
+        {
+            logs.push_back(format!("[SPAWN] sidecar=opencode-cli args=\"{args}\"\n"));
+        }
+
+        #[cfg(not(target_os = "windows"))]
+        {
+            let sidecar = get_sidecar_path(app);
+            let shell = get_user_shell();
+            let cmd = if shell.ends_with("/nu") {
+                format!("^\"{}\" {}", sidecar.display(), args)
+            } else {
+                format!("\"{}\" {}", sidecar.display(), args)
+            };
+            logs.push_back(format!("[SPAWN] shell=\"{shell}\" argv=\"-il -c {cmd}\"\n"));
+        }
+
+        while logs.len() > MAX_LOG_ENTRIES {
+            logs.pop_front();
+        }
+    }
+
     let envs = [
         ("OPENCODE_SERVER_USERNAME", "opencode".to_string()),
         ("OPENCODE_SERVER_PASSWORD", password.to_string()),
@@ -286,13 +320,14 @@ pub fn serve(app: &AppHandle, hostname: &str, port: u32, password: &str) -> Comm
 
     let (mut rx, child) = create_command(
         app,
-        format!("serve --hostname {hostname} --port {port}").as_str(),
+        format!("--print-logs --log-level WARN serve --hostname {hostname} --port {port}").as_str(),
         &envs,
     )
     .spawn()
     .expect("Failed to spawn opencode");
 
     tokio::spawn(async move {
+        let mut exit_tx = Some(exit_tx);
         while let Some(event) = rx.recv().await {
             match event {
                 CommandEvent::Stdout(line_bytes) => {
@@ -321,10 +356,35 @@ pub fn serve(app: &AppHandle, hostname: &str, port: u32, password: &str) -> Comm
                         }
                     }
                 }
+                CommandEvent::Error(err) => {
+                    eprintln!("{err}");
+
+                    if let Ok(mut logs) = log_state_clone.0.lock() {
+                        logs.push_back(format!("[ERROR] {err}\n"));
+                        while logs.len() > MAX_LOG_ENTRIES {
+                            logs.pop_front();
+                        }
+                    }
+                }
+                CommandEvent::Terminated(payload) => {
+                    if let Ok(mut logs) = log_state_clone.0.lock() {
+                        logs.push_back(format!(
+                            "[EXIT] code={:?} signal={:?}\n",
+                            payload.code, payload.signal
+                        ));
+                        while logs.len() > MAX_LOG_ENTRIES {
+                            logs.pop_front();
+                        }
+                    }
+
+                    if let Some(tx) = exit_tx.take() {
+                        let _ = tx.send(payload);
+                    }
+                }
                 _ => {}
             }
         }
     });
 
-    child
+    (child, exit_rx)
 }

+ 16 - 5
packages/desktop/src-tauri/src/lib.rs

@@ -582,14 +582,25 @@ async fn initialize(app: AppHandle) {
                     let app = app.clone();
                     Some(
                         async move {
-                            let Ok(Ok(_)) = timeout(Duration::from_secs(30), health_check.0).await
-                            else {
+                            let res = timeout(Duration::from_secs(30), health_check.0).await;
+                            let err = match res {
+                                Ok(Ok(Ok(()))) => None,
+                                Ok(Ok(Err(e))) => Some(e),
+                                Ok(Err(e)) => Some(format!("Health check task failed: {e}")),
+                                Err(_) => Some("Health check timed out".to_string()),
+                            };
+
+                            if let Some(err) = err {
                                 let _ = child.kill();
+
+                                let logs = get_logs(app.clone())
+                                    .await
+                                    .unwrap_or_else(|e| format!("[DESKTOP] Failed to read sidecar logs: {e}\n"));
+
                                 return Err(format!(
-                                    "Failed to spawn OpenCode Server. Logs:\n{}",
-                                    get_logs(app.clone()).await.unwrap()
+                                    "Failed to spawn OpenCode Server ({err}). Logs:\n{logs}"
                                 ));
-                            };
+                            }
 
                             println!("CLI health check OK");
 

+ 25 - 8
packages/desktop/src-tauri/src/server.rs

@@ -113,26 +113,43 @@ pub fn spawn_local_server(
     port: u32,
     password: String,
 ) -> (CommandChild, HealthCheck) {
-    let child = cli::serve(&app, &hostname, port, &password);
+    let (child, exit) = cli::serve(&app, &hostname, port, &password);
 
     let health_check = HealthCheck(tokio::spawn(async move {
         let url = format!("http://{hostname}:{port}");
-
         let timestamp = Instant::now();
-        loop {
-            tokio::time::sleep(Duration::from_millis(100)).await;
 
-            if check_health(&url, Some(&password)).await {
-                println!("Server ready after {:?}", timestamp.elapsed());
-                break;
+        let ready = async {
+            loop {
+                tokio::time::sleep(Duration::from_millis(100)).await;
+
+                if check_health(&url, Some(&password)).await {
+                    println!("Server ready after {:?}", timestamp.elapsed());
+                    return Ok(());
+                }
             }
+        };
+
+        let terminated = async {
+            match exit.await {
+                Ok(payload) => Err(format!(
+                    "Sidecar terminated before becoming healthy (code={:?} signal={:?})",
+                    payload.code, payload.signal
+                )),
+                Err(_) => Err("Sidecar terminated before becoming healthy".to_string()),
+            }
+        };
+
+        tokio::select! {
+            res = ready => res,
+            res = terminated => res,
         }
     }));
 
     (child, health_check)
 }
 
-pub struct HealthCheck(pub JoinHandle<()>);
+pub struct HealthCheck(pub JoinHandle<Result<(), String>>);
 
 pub async fn check_health(url: &str, password: Option<&str>) -> bool {
     let Ok(url) = reqwest::Url::parse(url) else {