use std::collections::VecDeque; use std::ffi::OsString; use std::fs; use std::fs::OpenOptions; use std::io::BufRead; use std::io::BufReader; use std::io::Write; use std::net::TcpListener; use std::net::TcpStream; use std::path::Path; use std::path::PathBuf; use std::process::Child; use std::process::ChildStdin; use std::process::ChildStdout; use std::process::Command; use std::process::Stdio; use std::thread; use std::time::Duration; use std::time::Instant; use std::time::SystemTime; use anyhow::Context; use anyhow::Result; use anyhow::bail; use clap::ArgAction; use clap::Parser; use clap::Subcommand; use codex_app_server_protocol::AccountLoginCompletedNotification; use codex_app_server_protocol::AskForApproval; use codex_app_server_protocol::ClientInfo; use codex_app_server_protocol::ClientRequest; use codex_app_server_protocol::CommandExecutionApprovalDecision; use codex_app_server_protocol::CommandExecutionRequestApprovalParams; use codex_app_server_protocol::CommandExecutionRequestApprovalResponse; use codex_app_server_protocol::CommandExecutionStatus; use codex_app_server_protocol::DynamicToolSpec; use codex_app_server_protocol::FileChangeApprovalDecision; use codex_app_server_protocol::FileChangeRequestApprovalParams; use codex_app_server_protocol::FileChangeRequestApprovalResponse; use codex_app_server_protocol::GetAccountRateLimitsResponse; use codex_app_server_protocol::InitializeCapabilities; use codex_app_server_protocol::InitializeParams; use codex_app_server_protocol::InitializeResponse; use codex_app_server_protocol::JSONRPCMessage; use codex_app_server_protocol::JSONRPCNotification; use codex_app_server_protocol::JSONRPCRequest; use codex_app_server_protocol::JSONRPCResponse; use codex_app_server_protocol::LoginAccountResponse; use codex_app_server_protocol::ModelListParams; use codex_app_server_protocol::ModelListResponse; use codex_app_server_protocol::ReadOnlyAccess; use codex_app_server_protocol::RequestId; use codex_app_server_protocol::SandboxPolicy; use codex_app_server_protocol::ServerNotification; use codex_app_server_protocol::ServerRequest; use codex_app_server_protocol::ThreadDecrementElicitationParams; use codex_app_server_protocol::ThreadDecrementElicitationResponse; use codex_app_server_protocol::ThreadIncrementElicitationParams; use codex_app_server_protocol::ThreadIncrementElicitationResponse; use codex_app_server_protocol::ThreadItem; use codex_app_server_protocol::ThreadListParams; use codex_app_server_protocol::ThreadListResponse; use codex_app_server_protocol::ThreadResumeParams; use codex_app_server_protocol::ThreadResumeResponse; use codex_app_server_protocol::ThreadStartParams; use codex_app_server_protocol::ThreadStartResponse; use codex_app_server_protocol::TurnStartParams; use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::TurnStatus; use codex_app_server_protocol::UserInput as V2UserInput; use codex_core::config::Config; use codex_otel::OtelProvider; use codex_otel::current_span_w3c_trace_context; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::protocol::W3cTraceContext; use codex_utils_cli::CliConfigOverrides; use serde::Serialize; use serde::de::DeserializeOwned; use serde_json::Value; use tracing::info_span; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tungstenite::Message; use tungstenite::WebSocket; use tungstenite::connect; use tungstenite::stream::MaybeTlsStream; use url::Url; use uuid::Uuid; const NOTIFICATIONS_TO_OPT_OUT: &[&str] = &[ // v2 item deltas. "command/exec/outputDelta", "item/agentMessage/delta", "item/plan/delta", "item/fileChange/outputDelta", "item/reasoning/summaryTextDelta", "item/reasoning/textDelta", ]; const APP_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5); const APP_SERVER_GRACEFUL_SHUTDOWN_POLL_INTERVAL: Duration = Duration::from_millis(100); const DEFAULT_ANALYTICS_ENABLED: bool = true; const OTEL_SERVICE_NAME: &str = "codex-app-server-test-client"; const TRACE_DISABLED_MESSAGE: &str = "Not enabled - enable tracing in $CODEX_HOME/config.toml to get a trace URL!"; /// Minimal launcher that initializes the Codex app-server and logs the handshake. #[derive(Parser)] #[command(author = "Codex", version, about = "Bootstrap Codex app-server", long_about = None)] struct Cli { /// Path to the `codex` CLI binary. When set, requests use stdio by /// spawning `codex app-server` as a child process. #[arg(long, env = "CODEX_BIN", global = true)] codex_bin: Option, /// Existing websocket server URL to connect to. /// /// If neither `--codex-bin` nor `--url` is provided, defaults to /// `ws://127.0.0.1:4222`. #[arg(long, env = "CODEX_APP_SERVER_URL", global = true)] url: Option, /// Forwarded to the `codex` CLI as `--config key=value`. Repeatable. /// /// Example: /// `--config 'model_providers.mock.base_url="http://localhost:4010/v2"'` #[arg( short = 'c', long = "config", value_name = "key=value", action = ArgAction::Append, global = true )] config_overrides: Vec, /// JSON array of dynamic tool specs or a single tool object. /// Prefix a filename with '@' to read from a file. /// /// Example: /// --dynamic-tools '[{"name":"demo","description":"Demo","inputSchema":{"type":"object"}}]' /// --dynamic-tools @/path/to/tools.json #[arg(long, value_name = "json-or-@file", global = true)] dynamic_tools: Option, #[command(subcommand)] command: CliCommand, } #[derive(Subcommand)] enum CliCommand { /// Start `codex app-server` on a websocket endpoint in the background. /// /// Logs are written to: /// `/tmp/codex-app-server-test-client/` Serve { /// WebSocket listen URL passed to `codex app-server --listen`. #[arg(long, default_value = "ws://127.0.0.1:4222")] listen: String, /// Kill any process listening on the same port before starting. #[arg(long, default_value_t = false)] kill: bool, }, /// Send a user message through the Codex app-server. SendMessage { /// User message to send to Codex. user_message: String, }, /// Send a user message through the app-server V2 thread/turn APIs. SendMessageV2 { /// Opt into experimental app-server methods and fields. #[arg(long)] experimental_api: bool, /// User message to send to Codex. user_message: String, }, /// Resume a V2 thread by id, then send a user message. ResumeMessageV2 { /// Existing thread id to resume. thread_id: String, /// User message to send to Codex. user_message: String, }, /// Resume a V2 thread and continuously stream notifications/events. /// /// This command does not auto-exit; stop it with SIGINT/SIGTERM/SIGKILL. ThreadResume { /// Existing thread id to resume. thread_id: String, }, /// Initialize the app-server and dump all inbound messages until interrupted. /// /// This command does not auto-exit; stop it with SIGINT/SIGTERM/SIGKILL. Watch, /// Start a V2 turn that elicits an ExecCommand approval. #[command(name = "trigger-cmd-approval")] TriggerCmdApproval { /// Optional prompt; defaults to a simple python command. user_message: Option, }, /// Start a V2 turn that elicits an ApplyPatch approval. #[command(name = "trigger-patch-approval")] TriggerPatchApproval { /// Optional prompt; defaults to creating a file via apply_patch. user_message: Option, }, /// Start a V2 turn that should not elicit an ExecCommand approval. #[command(name = "no-trigger-cmd-approval")] NoTriggerCmdApproval, /// Send two sequential V2 turns in the same thread to test follow-up behavior. SendFollowUpV2 { /// Initial user message for the first turn. first_message: String, /// Follow-up user message for the second turn. follow_up_message: String, }, /// Trigger zsh-fork multi-subcommand approvals and assert expected approval behavior. #[command(name = "trigger-zsh-fork-multi-cmd-approval")] TriggerZshForkMultiCmdApproval { /// Optional prompt; defaults to an explicit `/usr/bin/true && /usr/bin/true` command. user_message: Option, /// Minimum number of command-approval callbacks expected in the turn. #[arg(long, default_value_t = 2)] min_approvals: usize, /// One-based approval index to abort (e.g. --abort-on 2 aborts the second approval). #[arg(long)] abort_on: Option, }, /// Trigger the ChatGPT login flow and wait for completion. TestLogin { /// Use the device-code login flow instead of the browser callback flow. #[arg(long, default_value_t = false)] device_code: bool, }, /// Fetch the current account rate limits from the Codex app-server. GetAccountRateLimits, /// List the available models from the Codex app-server. #[command(name = "model-list")] ModelList, /// List stored threads from the Codex app-server. #[command(name = "thread-list")] ThreadList { /// Number of threads to return. #[arg(long, default_value_t = 20)] limit: u32, }, /// Increment the out-of-band elicitation pause counter for a thread. #[command(name = "thread-increment-elicitation")] ThreadIncrementElicitation { /// Existing thread id to update. thread_id: String, }, /// Decrement the out-of-band elicitation pause counter for a thread. #[command(name = "thread-decrement-elicitation")] ThreadDecrementElicitation { /// Existing thread id to update. thread_id: String, }, /// Run the live websocket harness that proves elicitation pause prevents a /// 10s unified exec timeout from killing a 15s helper script. #[command(name = "live-elicitation-timeout-pause")] LiveElicitationTimeoutPause { /// Model passed to `thread/start`. #[arg(long, env = "CODEX_E2E_MODEL", default_value = "gpt-5")] model: String, /// Existing workspace path used as the turn cwd. #[arg(long, value_name = "path", default_value = ".")] workspace: PathBuf, /// Helper script to run from the model; defaults to the repo-local /// live elicitation hold script. #[arg(long, value_name = "path")] script: Option, /// Seconds the helper script should sleep while the timeout is paused. #[arg(long, default_value_t = 15)] hold_seconds: u64, }, } pub async fn run() -> Result<()> { let Cli { codex_bin, url, config_overrides, dynamic_tools, command, } = Cli::parse(); let dynamic_tools = parse_dynamic_tools_arg(&dynamic_tools)?; match command { CliCommand::Serve { listen, kill } => { ensure_dynamic_tools_unused(&dynamic_tools, "serve")?; let codex_bin = codex_bin.unwrap_or_else(|| PathBuf::from("codex")); serve(&codex_bin, &config_overrides, &listen, kill) } CliCommand::SendMessage { user_message } => { ensure_dynamic_tools_unused(&dynamic_tools, "send-message")?; let endpoint = resolve_endpoint(codex_bin, url)?; send_message(&endpoint, &config_overrides, user_message).await } CliCommand::SendMessageV2 { experimental_api, user_message, } => { let endpoint = resolve_endpoint(codex_bin, url)?; send_message_v2_endpoint( &endpoint, &config_overrides, user_message, experimental_api, &dynamic_tools, ) .await } CliCommand::ResumeMessageV2 { thread_id, user_message, } => { let endpoint = resolve_endpoint(codex_bin, url)?; resume_message_v2( &endpoint, &config_overrides, thread_id, user_message, &dynamic_tools, ) .await } CliCommand::ThreadResume { thread_id } => { ensure_dynamic_tools_unused(&dynamic_tools, "thread-resume")?; let endpoint = resolve_endpoint(codex_bin, url)?; thread_resume_follow(&endpoint, &config_overrides, thread_id).await } CliCommand::Watch => { ensure_dynamic_tools_unused(&dynamic_tools, "watch")?; let endpoint = resolve_endpoint(codex_bin, url)?; watch(&endpoint, &config_overrides).await } CliCommand::TriggerCmdApproval { user_message } => { let endpoint = resolve_endpoint(codex_bin, url)?; trigger_cmd_approval(&endpoint, &config_overrides, user_message, &dynamic_tools).await } CliCommand::TriggerPatchApproval { user_message } => { let endpoint = resolve_endpoint(codex_bin, url)?; trigger_patch_approval(&endpoint, &config_overrides, user_message, &dynamic_tools).await } CliCommand::NoTriggerCmdApproval => { let endpoint = resolve_endpoint(codex_bin, url)?; no_trigger_cmd_approval(&endpoint, &config_overrides, &dynamic_tools).await } CliCommand::SendFollowUpV2 { first_message, follow_up_message, } => { let endpoint = resolve_endpoint(codex_bin, url)?; send_follow_up_v2( &endpoint, &config_overrides, first_message, follow_up_message, &dynamic_tools, ) .await } CliCommand::TriggerZshForkMultiCmdApproval { user_message, min_approvals, abort_on, } => { let endpoint = resolve_endpoint(codex_bin, url)?; trigger_zsh_fork_multi_cmd_approval( &endpoint, &config_overrides, user_message, min_approvals, abort_on, &dynamic_tools, ) .await } CliCommand::TestLogin { device_code } => { ensure_dynamic_tools_unused(&dynamic_tools, "test-login")?; let endpoint = resolve_endpoint(codex_bin, url)?; test_login(&endpoint, &config_overrides, device_code).await } CliCommand::GetAccountRateLimits => { ensure_dynamic_tools_unused(&dynamic_tools, "get-account-rate-limits")?; let endpoint = resolve_endpoint(codex_bin, url)?; get_account_rate_limits(&endpoint, &config_overrides).await } CliCommand::ModelList => { ensure_dynamic_tools_unused(&dynamic_tools, "model-list")?; let endpoint = resolve_endpoint(codex_bin, url)?; model_list(&endpoint, &config_overrides).await } CliCommand::ThreadList { limit } => { ensure_dynamic_tools_unused(&dynamic_tools, "thread-list")?; let endpoint = resolve_endpoint(codex_bin, url)?; thread_list(&endpoint, &config_overrides, limit).await } CliCommand::ThreadIncrementElicitation { thread_id } => { ensure_dynamic_tools_unused(&dynamic_tools, "thread-increment-elicitation")?; let url = resolve_shared_websocket_url(codex_bin, url, "thread-increment-elicitation")?; thread_increment_elicitation(&url, thread_id) } CliCommand::ThreadDecrementElicitation { thread_id } => { ensure_dynamic_tools_unused(&dynamic_tools, "thread-decrement-elicitation")?; let url = resolve_shared_websocket_url(codex_bin, url, "thread-decrement-elicitation")?; thread_decrement_elicitation(&url, thread_id) } CliCommand::LiveElicitationTimeoutPause { model, workspace, script, hold_seconds, } => { ensure_dynamic_tools_unused(&dynamic_tools, "live-elicitation-timeout-pause")?; live_elicitation_timeout_pause( codex_bin, url, &config_overrides, model, workspace, script, hold_seconds, ) } } } enum Endpoint { SpawnCodex(PathBuf), ConnectWs(String), } struct BackgroundAppServer { process: Child, url: String, } fn resolve_endpoint(codex_bin: Option, url: Option) -> Result { if codex_bin.is_some() && url.is_some() { bail!("--codex-bin and --url are mutually exclusive"); } if let Some(codex_bin) = codex_bin { return Ok(Endpoint::SpawnCodex(codex_bin)); } if let Some(url) = url { return Ok(Endpoint::ConnectWs(url)); } Ok(Endpoint::ConnectWs("ws://127.0.0.1:4222".to_string())) } fn resolve_shared_websocket_url( codex_bin: Option, url: Option, command: &str, ) -> Result { if codex_bin.is_some() { bail!( "{command} requires --url or an already-running websocket app-server; --codex-bin would spawn a private stdio app-server instead" ); } Ok(url.unwrap_or_else(|| "ws://127.0.0.1:4222".to_string())) } impl BackgroundAppServer { fn spawn(codex_bin: &Path, config_overrides: &[String]) -> Result { let listener = TcpListener::bind("127.0.0.1:0") .context("failed to reserve a local port for websocket app-server")?; let addr = listener.local_addr()?; drop(listener); let url = format!("ws://{addr}"); let mut cmd = Command::new(codex_bin); if let Some(codex_bin_parent) = codex_bin.parent() { let mut path = OsString::from(codex_bin_parent.as_os_str()); if let Some(existing_path) = std::env::var_os("PATH") { path.push(":"); path.push(existing_path); } cmd.env("PATH", path); } for override_kv in config_overrides { cmd.arg("--config").arg(override_kv); } let process = cmd .arg("app-server") .arg("--listen") .arg(&url) .stdin(Stdio::null()) .stdout(Stdio::null()) .stderr(Stdio::inherit()) .spawn() .with_context(|| format!("failed to start `{}` app-server", codex_bin.display()))?; Ok(Self { process, url }) } } impl Drop for BackgroundAppServer { fn drop(&mut self) { if let Ok(Some(status)) = self.process.try_wait() { println!("[background app-server exited: {status}]"); return; } let _ = self.process.kill(); let _ = self.process.wait(); } } fn serve(codex_bin: &Path, config_overrides: &[String], listen: &str, kill: bool) -> Result<()> { let runtime_dir = PathBuf::from("/tmp/codex-app-server-test-client"); fs::create_dir_all(&runtime_dir) .with_context(|| format!("failed to create runtime dir {}", runtime_dir.display()))?; let log_path = runtime_dir.join("app-server.log"); if kill { kill_listeners_on_same_port(listen)?; } let log_file = OpenOptions::new() .create(true) .append(true) .open(&log_path) .with_context(|| format!("failed to open log file {}", log_path.display()))?; let log_file_stderr = log_file .try_clone() .with_context(|| format!("failed to clone log file handle {}", log_path.display()))?; let mut cmdline = format!( "tail -f /dev/null | RUST_BACKTRACE=full RUST_LOG=warn,codex_=trace {}", shell_quote(&codex_bin.display().to_string()) ); for override_kv in config_overrides { cmdline.push_str(&format!(" --config {}", shell_quote(override_kv))); } cmdline.push_str(&format!(" app-server --listen {}", shell_quote(listen))); let child = Command::new("nohup") .arg("sh") .arg("-c") .arg(cmdline) .stdin(Stdio::null()) .stdout(Stdio::from(log_file)) .stderr(Stdio::from(log_file_stderr)) .spawn() .with_context(|| format!("failed to start `{}` app-server", codex_bin.display()))?; let pid = child.id(); println!("started codex app-server"); println!("listen: {listen}"); println!("pid: {pid} (launcher process)"); println!("log: {}", log_path.display()); Ok(()) } fn kill_listeners_on_same_port(listen: &str) -> Result<()> { let url = Url::parse(listen).with_context(|| format!("invalid --listen URL `{listen}`"))?; let port = url .port_or_known_default() .with_context(|| format!("unable to infer port from --listen URL `{listen}`"))?; let output = Command::new("lsof") .arg("-nP") .arg(format!("-tiTCP:{port}")) .arg("-sTCP:LISTEN") .output() .with_context(|| format!("failed to run lsof for port {port}"))?; if !output.status.success() { return Ok(()); } let pids: Vec = String::from_utf8_lossy(&output.stdout) .lines() .filter_map(|line| line.trim().parse::().ok()) .collect(); if pids.is_empty() { return Ok(()); } for pid in pids { println!("killing listener pid {pid} on port {port}"); let pid_str = pid.to_string(); let term_status = Command::new("kill") .arg(&pid_str) .status() .with_context(|| format!("failed to send SIGTERM to pid {pid}"))?; if !term_status.success() { continue; } } thread::sleep(Duration::from_millis(300)); let output = Command::new("lsof") .arg("-nP") .arg(format!("-tiTCP:{port}")) .arg("-sTCP:LISTEN") .output() .with_context(|| format!("failed to re-check listeners on port {port}"))?; if !output.status.success() { return Ok(()); } let remaining: Vec = String::from_utf8_lossy(&output.stdout) .lines() .filter_map(|line| line.trim().parse::().ok()) .collect(); for pid in remaining { println!("force killing remaining listener pid {pid} on port {port}"); let _ = Command::new("kill").arg("-9").arg(pid.to_string()).status(); } Ok(()) } fn shell_quote(input: &str) -> String { format!("'{}'", input.replace('\'', "'\\''")) } struct SendMessagePolicies<'a> { command_name: &'static str, experimental_api: bool, approval_policy: Option, sandbox_policy: Option, dynamic_tools: &'a Option>, } async fn send_message( endpoint: &Endpoint, config_overrides: &[String], user_message: String, ) -> Result<()> { let dynamic_tools = None; send_message_v2_with_policies( endpoint, config_overrides, user_message, SendMessagePolicies { command_name: "send-message", experimental_api: false, approval_policy: None, sandbox_policy: None, dynamic_tools: &dynamic_tools, }, ) .await } pub async fn send_message_v2( codex_bin: &Path, config_overrides: &[String], user_message: String, dynamic_tools: &Option>, ) -> Result<()> { let endpoint = Endpoint::SpawnCodex(codex_bin.to_path_buf()); send_message_v2_endpoint( &endpoint, config_overrides, user_message, /*experimental_api*/ true, dynamic_tools, ) .await } async fn send_message_v2_endpoint( endpoint: &Endpoint, config_overrides: &[String], user_message: String, experimental_api: bool, dynamic_tools: &Option>, ) -> Result<()> { if dynamic_tools.is_some() && !experimental_api { bail!("--dynamic-tools requires --experimental-api for send-message-v2"); } send_message_v2_with_policies( endpoint, config_overrides, user_message, SendMessagePolicies { command_name: "send-message-v2", experimental_api, approval_policy: None, sandbox_policy: None, dynamic_tools, }, ) .await } async fn trigger_zsh_fork_multi_cmd_approval( endpoint: &Endpoint, config_overrides: &[String], user_message: Option, min_approvals: usize, abort_on: Option, dynamic_tools: &Option>, ) -> Result<()> { if let Some(abort_on) = abort_on && abort_on == 0 { bail!("--abort-on must be >= 1 when provided"); } let default_prompt = "Run this exact command using shell command execution without rewriting or splitting it: /usr/bin/true && /usr/bin/true"; let message = user_message.unwrap_or_else(|| default_prompt.to_string()); with_client( "trigger-zsh-fork-multi-cmd-approval", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let thread_response = client.thread_start(ThreadStartParams { dynamic_tools: dynamic_tools.clone(), ..Default::default() })?; println!("< thread/start response: {thread_response:?}"); client.command_approval_behavior = match abort_on { Some(index) => CommandApprovalBehavior::AbortOn(index), None => CommandApprovalBehavior::AlwaysAccept, }; client.command_approval_count = 0; client.command_approval_item_ids.clear(); client.command_execution_statuses.clear(); client.last_turn_status = None; let mut turn_params = TurnStartParams { thread_id: thread_response.thread.id.clone(), input: vec![V2UserInput::Text { text: message, text_elements: Vec::new(), }], ..Default::default() }; turn_params.approval_policy = Some(AskForApproval::OnRequest); turn_params.sandbox_policy = Some(SandboxPolicy::ReadOnly { access: ReadOnlyAccess::FullAccess, network_access: false, }); let turn_response = client.turn_start(turn_params)?; println!("< turn/start response: {turn_response:?}"); client.stream_turn(&thread_response.thread.id, &turn_response.turn.id)?; if client.command_approval_count < min_approvals { bail!( "expected at least {min_approvals} command approvals, got {}", client.command_approval_count ); } let mut approvals_per_item = std::collections::BTreeMap::new(); for item_id in &client.command_approval_item_ids { *approvals_per_item.entry(item_id.clone()).or_insert(0usize) += 1; } let max_approvals_for_one_item = approvals_per_item.values().copied().max().unwrap_or(0); if max_approvals_for_one_item < min_approvals { bail!( "expected at least {min_approvals} approvals for one command item, got max {max_approvals_for_one_item} with map {approvals_per_item:?}" ); } let last_command_status = client.command_execution_statuses.last(); if abort_on.is_none() { if last_command_status != Some(&CommandExecutionStatus::Completed) { bail!("expected completed command execution, got {last_command_status:?}"); } if client.last_turn_status != Some(TurnStatus::Completed) { bail!( "expected completed turn in all-accept flow, got {:?}", client.last_turn_status ); } } else if last_command_status == Some(&CommandExecutionStatus::Completed) { bail!( "expected non-completed command execution in mixed approval/decline flow, got {last_command_status:?}" ); } println!( "[zsh-fork multi-approval summary] approvals={}, approvals_per_item={approvals_per_item:?}, command_statuses={:?}, turn_status={:?}", client.command_approval_count, client.command_execution_statuses, client.last_turn_status ); Ok(()) }, ) .await } async fn resume_message_v2( endpoint: &Endpoint, config_overrides: &[String], thread_id: String, user_message: String, dynamic_tools: &Option>, ) -> Result<()> { ensure_dynamic_tools_unused(dynamic_tools, "resume-message-v2")?; with_client("resume-message-v2", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let resume_response = client.thread_resume(ThreadResumeParams { thread_id, ..Default::default() })?; println!("< thread/resume response: {resume_response:?}"); let turn_response = client.turn_start(TurnStartParams { thread_id: resume_response.thread.id.clone(), input: vec![V2UserInput::Text { text: user_message, text_elements: Vec::new(), }], ..Default::default() })?; println!("< turn/start response: {turn_response:?}"); client.stream_turn(&resume_response.thread.id, &turn_response.turn.id)?; Ok(()) }) .await } async fn thread_resume_follow( endpoint: &Endpoint, config_overrides: &[String], thread_id: String, ) -> Result<()> { with_client("thread-resume", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let resume_response = client.thread_resume(ThreadResumeParams { thread_id, ..Default::default() })?; println!("< thread/resume response: {resume_response:?}"); println!("< streaming notifications until process is terminated"); client.stream_notifications_forever() }) .await } async fn watch(endpoint: &Endpoint, config_overrides: &[String]) -> Result<()> { with_client("watch", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); println!("< streaming inbound messages until process is terminated"); client.stream_notifications_forever() }) .await } async fn trigger_cmd_approval( endpoint: &Endpoint, config_overrides: &[String], user_message: Option, dynamic_tools: &Option>, ) -> Result<()> { let default_prompt = "Run `touch /tmp/should-trigger-approval` so I can confirm the file exists."; let message = user_message.unwrap_or_else(|| default_prompt.to_string()); send_message_v2_with_policies( endpoint, config_overrides, message, SendMessagePolicies { command_name: "trigger-cmd-approval", experimental_api: true, approval_policy: Some(AskForApproval::OnRequest), sandbox_policy: Some(SandboxPolicy::ReadOnly { access: ReadOnlyAccess::FullAccess, network_access: false, }), dynamic_tools, }, ) .await } async fn trigger_patch_approval( endpoint: &Endpoint, config_overrides: &[String], user_message: Option, dynamic_tools: &Option>, ) -> Result<()> { let default_prompt = "Create a file named APPROVAL_DEMO.txt containing a short hello message using apply_patch."; let message = user_message.unwrap_or_else(|| default_prompt.to_string()); send_message_v2_with_policies( endpoint, config_overrides, message, SendMessagePolicies { command_name: "trigger-patch-approval", experimental_api: true, approval_policy: Some(AskForApproval::OnRequest), sandbox_policy: Some(SandboxPolicy::ReadOnly { access: ReadOnlyAccess::FullAccess, network_access: false, }), dynamic_tools, }, ) .await } async fn no_trigger_cmd_approval( endpoint: &Endpoint, config_overrides: &[String], dynamic_tools: &Option>, ) -> Result<()> { let prompt = "Run `touch should_not_trigger_approval.txt`"; send_message_v2_with_policies( endpoint, config_overrides, prompt.to_string(), SendMessagePolicies { command_name: "no-trigger-cmd-approval", experimental_api: true, approval_policy: None, sandbox_policy: None, dynamic_tools, }, ) .await } async fn send_message_v2_with_policies( endpoint: &Endpoint, config_overrides: &[String], user_message: String, policies: SendMessagePolicies<'_>, ) -> Result<()> { with_client( policies.command_name, endpoint, config_overrides, |client| { let initialize = client.initialize_with_experimental_api(policies.experimental_api)?; println!("< initialize response: {initialize:?}"); let thread_response = client.thread_start(ThreadStartParams { dynamic_tools: policies.dynamic_tools.clone(), ..Default::default() })?; println!("< thread/start response: {thread_response:?}"); let mut turn_params = TurnStartParams { thread_id: thread_response.thread.id.clone(), input: vec![V2UserInput::Text { text: user_message, // Test client sends plain text without UI element ranges. text_elements: Vec::new(), }], ..Default::default() }; turn_params.approval_policy = policies.approval_policy; turn_params.sandbox_policy = policies.sandbox_policy; let turn_response = client.turn_start(turn_params)?; println!("< turn/start response: {turn_response:?}"); client.stream_turn(&thread_response.thread.id, &turn_response.turn.id)?; Ok(()) }, ) .await } async fn send_follow_up_v2( endpoint: &Endpoint, config_overrides: &[String], first_message: String, follow_up_message: String, dynamic_tools: &Option>, ) -> Result<()> { with_client("send-follow-up-v2", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let thread_response = client.thread_start(ThreadStartParams { dynamic_tools: dynamic_tools.clone(), ..Default::default() })?; println!("< thread/start response: {thread_response:?}"); let first_turn_params = TurnStartParams { thread_id: thread_response.thread.id.clone(), input: vec![V2UserInput::Text { text: first_message, // Test client sends plain text without UI element ranges. text_elements: Vec::new(), }], ..Default::default() }; let first_turn_response = client.turn_start(first_turn_params)?; println!("< turn/start response (initial): {first_turn_response:?}"); client.stream_turn(&thread_response.thread.id, &first_turn_response.turn.id)?; let follow_up_params = TurnStartParams { thread_id: thread_response.thread.id.clone(), input: vec![V2UserInput::Text { text: follow_up_message, // Test client sends plain text without UI element ranges. text_elements: Vec::new(), }], ..Default::default() }; let follow_up_response = client.turn_start(follow_up_params)?; println!("< turn/start response (follow-up): {follow_up_response:?}"); client.stream_turn(&thread_response.thread.id, &follow_up_response.turn.id)?; Ok(()) }) .await } async fn test_login( endpoint: &Endpoint, config_overrides: &[String], device_code: bool, ) -> Result<()> { with_client("test-login", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let login_response = if device_code { client.login_account_chatgpt_device_code()? } else { client.login_account_chatgpt()? }; println!("< account/login/start response: {login_response:?}"); let login_id = match login_response { LoginAccountResponse::Chatgpt { login_id, auth_url } => { println!("Open the following URL in your browser to continue:\n{auth_url}"); login_id } LoginAccountResponse::ChatgptDeviceCode { login_id, verification_url, user_code, } => { println!( "Open the following URL and enter the code to continue:\n{verification_url}\n\nCode: {user_code}" ); login_id } _ => bail!("expected chatgpt login response"), }; let completion = client.wait_for_account_login_completion(&login_id)?; println!("< account/login/completed notification: {completion:?}"); if completion.success { println!("Login succeeded."); Ok(()) } else { bail!( "login failed: {}", completion .error .as_deref() .unwrap_or("unknown error from account/login/completed") ); } }) .await } async fn get_account_rate_limits(endpoint: &Endpoint, config_overrides: &[String]) -> Result<()> { with_client( "get-account-rate-limits", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let response = client.get_account_rate_limits()?; println!("< account/rateLimits/read response: {response:?}"); Ok(()) }, ) .await } async fn model_list(endpoint: &Endpoint, config_overrides: &[String]) -> Result<()> { with_client("model-list", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let response = client.model_list(ModelListParams::default())?; println!("< model/list response: {response:?}"); Ok(()) }) .await } async fn thread_list(endpoint: &Endpoint, config_overrides: &[String], limit: u32) -> Result<()> { with_client("thread-list", endpoint, config_overrides, |client| { let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let response = client.thread_list(ThreadListParams { cursor: None, limit: Some(limit), sort_key: None, model_providers: None, source_kinds: None, archived: None, cwd: None, search_term: None, })?; println!("< thread/list response: {response:?}"); Ok(()) }) .await } async fn with_client( command_name: &'static str, endpoint: &Endpoint, config_overrides: &[String], f: impl FnOnce(&mut CodexClient) -> Result, ) -> Result { let tracing = TestClientTracing::initialize(config_overrides).await?; let command_span = info_span!( "app_server_test_client.command", otel.kind = "client", otel.name = command_name, app_server_test_client.command = command_name, ); let trace_summary = command_span.in_scope(|| TraceSummary::capture(tracing.traces_enabled)); let result = command_span.in_scope(|| { let mut client = CodexClient::connect(endpoint, config_overrides)?; f(&mut client) }); print_trace_summary(&trace_summary); result } fn thread_increment_elicitation(url: &str, thread_id: String) -> Result<()> { let endpoint = Endpoint::ConnectWs(url.to_string()); let mut client = CodexClient::connect(&endpoint, &[])?; let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let response = client.thread_increment_elicitation(ThreadIncrementElicitationParams { thread_id })?; println!("< thread/increment_elicitation response: {response:?}"); Ok(()) } fn thread_decrement_elicitation(url: &str, thread_id: String) -> Result<()> { let endpoint = Endpoint::ConnectWs(url.to_string()); let mut client = CodexClient::connect(&endpoint, &[])?; let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let response = client.thread_decrement_elicitation(ThreadDecrementElicitationParams { thread_id })?; println!("< thread/decrement_elicitation response: {response:?}"); Ok(()) } fn live_elicitation_timeout_pause( codex_bin: Option, url: Option, config_overrides: &[String], model: String, workspace: PathBuf, script: Option, hold_seconds: u64, ) -> Result<()> { if cfg!(windows) { bail!("live-elicitation-timeout-pause currently requires a POSIX shell"); } if hold_seconds <= 10 { bail!("--hold-seconds must be greater than 10 to exceed the unified exec timeout"); } let mut _background_server = None; let websocket_url = match (codex_bin, url) { (Some(_), Some(_)) => bail!("--codex-bin and --url are mutually exclusive"), (Some(codex_bin), None) => { let server = BackgroundAppServer::spawn(&codex_bin, config_overrides)?; let websocket_url = server.url.clone(); _background_server = Some(server); websocket_url } (None, Some(url)) => url, (None, None) => "ws://127.0.0.1:4222".to_string(), }; let script_path = script.unwrap_or_else(|| { PathBuf::from(env!("CARGO_MANIFEST_DIR")) .join("scripts") .join("live_elicitation_hold.sh") }); if !script_path.is_file() { bail!("helper script not found: {}", script_path.display()); } let workspace = workspace .canonicalize() .with_context(|| format!("failed to resolve workspace `{}`", workspace.display()))?; let app_server_test_client_bin = std::env::current_exe() .context("failed to resolve codex-app-server-test-client binary path")?; let endpoint = Endpoint::ConnectWs(websocket_url.clone()); let mut client = CodexClient::connect(&endpoint, &[])?; let initialize = client.initialize()?; println!("< initialize response: {initialize:?}"); let thread_response = client.thread_start(ThreadStartParams { model: Some(model), ..Default::default() })?; println!("< thread/start response: {thread_response:?}"); let thread_id = thread_response.thread.id; let command = format!( "APP_SERVER_URL={} APP_SERVER_TEST_CLIENT_BIN={} ELICITATION_HOLD_SECONDS={} sh {}", shell_quote(&websocket_url), shell_quote(&app_server_test_client_bin.display().to_string()), hold_seconds, shell_quote(&script_path.display().to_string()), ); let prompt = format!( "Use the `exec_command` tool exactly once. Set its `cmd` field to the exact shell command below. Do not rewrite it, do not split it, do not call any other tool, do not set `yield_time_ms`, and wait for the command to finish before replying.\n\n{command}\n\nAfter the command finishes, reply with exactly `DONE`." ); let started_at = Instant::now(); let turn_response = client.turn_start(TurnStartParams { thread_id: thread_id.clone(), input: vec![V2UserInput::Text { text: prompt, text_elements: Vec::new(), }], approval_policy: Some(AskForApproval::Never), sandbox_policy: Some(SandboxPolicy::DangerFullAccess), effort: Some(ReasoningEffort::High), cwd: Some(workspace), ..Default::default() })?; println!("< turn/start response: {turn_response:?}"); let stream_result = client.stream_turn(&thread_id, &turn_response.turn.id); let elapsed = started_at.elapsed(); let validation_result = (|| -> Result<()> { stream_result?; let helper_output = client .command_execution_outputs .iter() .find(|output| output.contains("[elicitation-hold]")) .cloned() .ok_or_else(|| anyhow::anyhow!("expected helper script markers in command output"))?; let minimum_elapsed = Duration::from_secs(hold_seconds.saturating_sub(1)); if client.last_turn_status != Some(TurnStatus::Completed) { bail!( "expected completed turn, got {:?} (last error: {:?})", client.last_turn_status, client.last_turn_error_message ); } if !client .command_execution_statuses .contains(&CommandExecutionStatus::Completed) { bail!( "expected a completed command execution, got {:?}", client.command_execution_statuses ); } if !client.helper_done_seen || !helper_output.contains("[elicitation-hold] done") { bail!( "expected helper script completion marker in command output, got: {helper_output:?}" ); } if !client.unexpected_items_before_helper_done.is_empty() { bail!( "turn started new items before helper completion: {:?}", client.unexpected_items_before_helper_done ); } if client.turn_completed_before_helper_done { bail!("turn completed before helper script finished"); } if elapsed < minimum_elapsed { bail!( "turn completed too quickly to prove timeout pause worked: elapsed={elapsed:?}, expected at least {minimum_elapsed:?}" ); } Ok(()) })(); match client.thread_decrement_elicitation(ThreadDecrementElicitationParams { thread_id: thread_id.clone(), }) { Ok(response) => { println!("[cleanup] thread/decrement_elicitation response after harness: {response:?}"); } Err(err) => { eprintln!("[cleanup] thread/decrement_elicitation ignored: {err:#}"); } } validation_result?; println!( "[live elicitation timeout pause summary] thread_id={thread_id}, turn_id={}, elapsed={elapsed:?}, command_statuses={:?}", turn_response.turn.id, client.command_execution_statuses ); Ok(()) } fn ensure_dynamic_tools_unused( dynamic_tools: &Option>, command: &str, ) -> Result<()> { if dynamic_tools.is_some() { bail!( "dynamic tools are only supported for v2 thread/start; remove --dynamic-tools for {command} or use send-message-v2" ); } Ok(()) } fn parse_dynamic_tools_arg(dynamic_tools: &Option) -> Result>> { let Some(raw_arg) = dynamic_tools.as_deref() else { return Ok(None); }; let raw_json = if let Some(path) = raw_arg.strip_prefix('@') { fs::read_to_string(Path::new(path)) .with_context(|| format!("read dynamic tools file {path}"))? } else { raw_arg.to_string() }; let value: Value = serde_json::from_str(&raw_json).context("parse dynamic tools JSON")?; let tools = match value { Value::Array(_) => serde_json::from_value(value).context("decode dynamic tools array")?, Value::Object(_) => vec![serde_json::from_value(value).context("decode dynamic tool")?], _ => bail!("dynamic tools JSON must be an object or array"), }; Ok(Some(tools)) } enum ClientTransport { Stdio { child: Child, stdin: Option, stdout: BufReader, }, WebSocket { url: String, socket: Box>>, }, } struct CodexClient { transport: ClientTransport, pending_notifications: VecDeque, command_approval_behavior: CommandApprovalBehavior, command_approval_count: usize, command_approval_item_ids: Vec, command_execution_statuses: Vec, command_execution_outputs: Vec, command_output_stream: String, command_item_started: bool, helper_done_seen: bool, turn_completed_before_helper_done: bool, unexpected_items_before_helper_done: Vec, last_turn_status: Option, last_turn_error_message: Option, } #[derive(Debug, Clone, Copy)] enum CommandApprovalBehavior { AlwaysAccept, AbortOn(usize), } fn item_started_before_helper_done_is_unexpected( item: &ThreadItem, command_item_started: bool, helper_done_seen: bool, ) -> bool { if !command_item_started || helper_done_seen { return false; } !matches!(item, ThreadItem::UserMessage { .. }) } impl CodexClient { fn connect(endpoint: &Endpoint, config_overrides: &[String]) -> Result { match endpoint { Endpoint::SpawnCodex(codex_bin) => Self::spawn_stdio(codex_bin, config_overrides), Endpoint::ConnectWs(url) => Self::connect_websocket(url), } } fn spawn_stdio(codex_bin: &Path, config_overrides: &[String]) -> Result { let codex_bin_display = codex_bin.display(); let mut cmd = Command::new(codex_bin); if let Some(codex_bin_parent) = codex_bin.parent() { let mut path = OsString::from(codex_bin_parent.as_os_str()); if let Some(existing_path) = std::env::var_os("PATH") { path.push(":"); path.push(existing_path); } cmd.env("PATH", path); } for override_kv in config_overrides { cmd.arg("--config").arg(override_kv); } let mut codex_app_server = cmd .arg("app-server") .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()) .spawn() .with_context(|| format!("failed to start `{codex_bin_display}` app-server"))?; let stdin = codex_app_server .stdin .take() .context("codex app-server stdin unavailable")?; let stdout = codex_app_server .stdout .take() .context("codex app-server stdout unavailable")?; Ok(Self { transport: ClientTransport::Stdio { child: codex_app_server, stdin: Some(stdin), stdout: BufReader::new(stdout), }, pending_notifications: VecDeque::new(), command_approval_behavior: CommandApprovalBehavior::AlwaysAccept, command_approval_count: 0, command_approval_item_ids: Vec::new(), command_execution_statuses: Vec::new(), command_execution_outputs: Vec::new(), command_output_stream: String::new(), command_item_started: false, helper_done_seen: false, turn_completed_before_helper_done: false, unexpected_items_before_helper_done: Vec::new(), last_turn_status: None, last_turn_error_message: None, }) } fn connect_websocket(url: &str) -> Result { let parsed = Url::parse(url).with_context(|| format!("invalid websocket URL `{url}`"))?; let deadline = Instant::now() + Duration::from_secs(10); let (socket, _response) = loop { match connect(parsed.as_str()) { Ok(result) => break result, Err(err) => { if Instant::now() >= deadline { return Err(err).with_context(|| { format!( "failed to connect to websocket app-server at `{url}`; if no server is running, start one with `codex-app-server-test-client serve --listen {url}`" ) }); } thread::sleep(Duration::from_millis(50)); } } }; Ok(Self { transport: ClientTransport::WebSocket { url: url.to_string(), socket: Box::new(socket), }, pending_notifications: VecDeque::new(), command_approval_behavior: CommandApprovalBehavior::AlwaysAccept, command_approval_count: 0, command_approval_item_ids: Vec::new(), command_execution_statuses: Vec::new(), command_execution_outputs: Vec::new(), command_output_stream: String::new(), command_item_started: false, helper_done_seen: false, turn_completed_before_helper_done: false, unexpected_items_before_helper_done: Vec::new(), last_turn_status: None, last_turn_error_message: None, }) } fn note_helper_output(&mut self, output: &str) { self.command_output_stream.push_str(output); if self .command_output_stream .contains("[elicitation-hold] done") { self.helper_done_seen = true; } } fn initialize(&mut self) -> Result { self.initialize_with_experimental_api(/*experimental_api*/ true) } fn initialize_with_experimental_api( &mut self, experimental_api: bool, ) -> Result { let request_id = self.request_id(); let request = ClientRequest::Initialize { request_id: request_id.clone(), params: InitializeParams { client_info: ClientInfo { name: "codex-toy-app-server".to_string(), title: Some("Codex Toy App Server".to_string()), version: env!("CARGO_PKG_VERSION").to_string(), }, capabilities: Some(InitializeCapabilities { experimental_api, opt_out_notification_methods: Some( NOTIFICATIONS_TO_OPT_OUT .iter() .map(|method| (*method).to_string()) .collect(), ), }), }, }; let response: InitializeResponse = self.send_request(request, request_id, "initialize")?; // Complete the initialize handshake. let initialized = JSONRPCMessage::Notification(JSONRPCNotification { method: "initialized".to_string(), params: None, }); self.write_jsonrpc_message(initialized)?; Ok(response) } fn thread_start(&mut self, params: ThreadStartParams) -> Result { let request_id = self.request_id(); let request = ClientRequest::ThreadStart { request_id: request_id.clone(), params, }; self.send_request(request, request_id, "thread/start") } fn thread_resume(&mut self, params: ThreadResumeParams) -> Result { let request_id = self.request_id(); let request = ClientRequest::ThreadResume { request_id: request_id.clone(), params, }; self.send_request(request, request_id, "thread/resume") } fn turn_start(&mut self, params: TurnStartParams) -> Result { let request_id = self.request_id(); let request = ClientRequest::TurnStart { request_id: request_id.clone(), params, }; self.send_request(request, request_id, "turn/start") } fn login_account_chatgpt(&mut self) -> Result { let request_id = self.request_id(); let request = ClientRequest::LoginAccount { request_id: request_id.clone(), params: codex_app_server_protocol::LoginAccountParams::Chatgpt, }; self.send_request(request, request_id, "account/login/start") } fn login_account_chatgpt_device_code(&mut self) -> Result { let request_id = self.request_id(); let request = ClientRequest::LoginAccount { request_id: request_id.clone(), params: codex_app_server_protocol::LoginAccountParams::ChatgptDeviceCode, }; self.send_request(request, request_id, "account/login/start") } fn get_account_rate_limits(&mut self) -> Result { let request_id = self.request_id(); let request = ClientRequest::GetAccountRateLimits { request_id: request_id.clone(), params: None, }; self.send_request(request, request_id, "account/rateLimits/read") } fn model_list(&mut self, params: ModelListParams) -> Result { let request_id = self.request_id(); let request = ClientRequest::ModelList { request_id: request_id.clone(), params, }; self.send_request(request, request_id, "model/list") } fn thread_list(&mut self, params: ThreadListParams) -> Result { let request_id = self.request_id(); let request = ClientRequest::ThreadList { request_id: request_id.clone(), params, }; self.send_request(request, request_id, "thread/list") } fn thread_increment_elicitation( &mut self, params: ThreadIncrementElicitationParams, ) -> Result { let request_id = self.request_id(); let request = ClientRequest::ThreadIncrementElicitation { request_id: request_id.clone(), params, }; self.send_request(request, request_id, "thread/increment_elicitation") } fn thread_decrement_elicitation( &mut self, params: ThreadDecrementElicitationParams, ) -> Result { let request_id = self.request_id(); let request = ClientRequest::ThreadDecrementElicitation { request_id: request_id.clone(), params, }; self.send_request(request, request_id, "thread/decrement_elicitation") } fn wait_for_account_login_completion( &mut self, expected_login_id: &str, ) -> Result { loop { let notification = self.next_notification()?; if let Ok(server_notification) = ServerNotification::try_from(notification) { match server_notification { ServerNotification::AccountLoginCompleted(completion) => { if completion.login_id.as_deref() == Some(expected_login_id) { return Ok(completion); } println!( "[ignoring account/login/completed for unexpected login_id: {:?}]", completion.login_id ); } ServerNotification::AccountRateLimitsUpdated(snapshot) => { println!("< accountRateLimitsUpdated notification: {snapshot:?}"); } _ => {} } } } } fn stream_turn(&mut self, thread_id: &str, turn_id: &str) -> Result<()> { loop { let notification = self.next_notification()?; let Ok(server_notification) = ServerNotification::try_from(notification) else { continue; }; match server_notification { ServerNotification::ThreadStarted(payload) => { if payload.thread.id == thread_id { println!("< thread/started notification: {:?}", payload.thread); } } ServerNotification::TurnStarted(payload) => { if payload.turn.id == turn_id { println!("< turn/started notification: {:?}", payload.turn.status); } } ServerNotification::AgentMessageDelta(delta) => { print!("{}", delta.delta); std::io::stdout().flush().ok(); } ServerNotification::CommandExecutionOutputDelta(delta) => { self.note_helper_output(&delta.delta); print!("{}", delta.delta); std::io::stdout().flush().ok(); } ServerNotification::TerminalInteraction(delta) => { println!("[stdin sent: {}]", delta.stdin); std::io::stdout().flush().ok(); } ServerNotification::ItemStarted(payload) => { if matches!(payload.item, ThreadItem::CommandExecution { .. }) { if self.command_item_started && !self.helper_done_seen { self.unexpected_items_before_helper_done .push(payload.item.clone()); } self.command_item_started = true; } else if item_started_before_helper_done_is_unexpected( &payload.item, self.command_item_started, self.helper_done_seen, ) { self.unexpected_items_before_helper_done .push(payload.item.clone()); } println!("\n< item started: {:?}", payload.item); } ServerNotification::ItemCompleted(payload) => { if let ThreadItem::CommandExecution { status, aggregated_output, .. } = payload.item.clone() { self.command_execution_statuses.push(status); if let Some(aggregated_output) = aggregated_output { self.note_helper_output(&aggregated_output); self.command_execution_outputs.push(aggregated_output); } } println!("< item completed: {:?}", payload.item); } ServerNotification::TurnCompleted(payload) => { if payload.turn.id == turn_id { self.last_turn_status = Some(payload.turn.status.clone()); if self.command_item_started && !self.helper_done_seen { self.turn_completed_before_helper_done = true; } self.last_turn_error_message = payload .turn .error .as_ref() .map(|error| error.message.clone()); println!("\n< turn/completed notification: {:?}", payload.turn.status); if payload.turn.status == TurnStatus::Failed && let Some(error) = payload.turn.error { println!("[turn error] {}", error.message); } break; } } ServerNotification::McpToolCallProgress(payload) => { println!("< MCP tool progress: {}", payload.message); } _ => { println!("[UNKNOWN SERVER NOTIFICATION] {server_notification:?}"); } } } Ok(()) } fn stream_notifications_forever(&mut self) -> Result<()> { loop { let _ = self.next_notification()?; } } fn send_request( &mut self, request: ClientRequest, request_id: RequestId, method: &str, ) -> Result where T: DeserializeOwned, { let request_span = info_span!( "app_server_test_client.request", otel.kind = "client", otel.name = method, rpc.system = "jsonrpc", rpc.method = method, rpc.request_id = ?request_id, ); request_span.in_scope(|| { self.write_request(&request)?; self.wait_for_response(request_id, method) }) } fn write_request(&mut self, request: &ClientRequest) -> Result<()> { let request_value = serde_json::to_value(request)?; let mut request: JSONRPCRequest = serde_json::from_value(request_value) .context("client request was not a valid JSON-RPC request")?; request.trace = current_span_w3c_trace_context(); let request_json = serde_json::to_string(&request)?; let request_pretty = serde_json::to_string_pretty(&request)?; print_multiline_with_prefix("> ", &request_pretty); self.write_payload(&request_json) } fn wait_for_response(&mut self, request_id: RequestId, method: &str) -> Result where T: DeserializeOwned, { loop { let message = self.read_jsonrpc_message()?; match message { JSONRPCMessage::Response(JSONRPCResponse { id, result }) => { if id == request_id { return serde_json::from_value(result) .with_context(|| format!("{method} response missing payload")); } } JSONRPCMessage::Error(err) => { if err.id == request_id { bail!("{method} failed: {err:?}"); } } JSONRPCMessage::Notification(notification) => { self.pending_notifications.push_back(notification); } JSONRPCMessage::Request(request) => { self.handle_server_request(request)?; } } } } fn next_notification(&mut self) -> Result { if let Some(notification) = self.pending_notifications.pop_front() { return Ok(notification); } loop { let message = self.read_jsonrpc_message()?; match message { JSONRPCMessage::Notification(notification) => return Ok(notification), JSONRPCMessage::Response(_) | JSONRPCMessage::Error(_) => { // No outstanding requests, so ignore stray responses/errors for now. continue; } JSONRPCMessage::Request(request) => { self.handle_server_request(request)?; } } } } fn read_jsonrpc_message(&mut self) -> Result { loop { let raw = self.read_payload()?; let trimmed = raw.trim(); if trimmed.is_empty() { continue; } let parsed: Value = serde_json::from_str(trimmed).context("response was not valid JSON-RPC")?; let pretty = serde_json::to_string_pretty(&parsed)?; print_multiline_with_prefix("< ", &pretty); let message: JSONRPCMessage = serde_json::from_value(parsed) .context("response was not a valid JSON-RPC message")?; return Ok(message); } } fn request_id(&self) -> RequestId { RequestId::String(Uuid::new_v4().to_string()) } fn handle_server_request(&mut self, request: JSONRPCRequest) -> Result<()> { let server_request = ServerRequest::try_from(request) .context("failed to deserialize ServerRequest from JSONRPCRequest")?; match server_request { ServerRequest::CommandExecutionRequestApproval { request_id, params } => { self.handle_command_execution_request_approval(request_id, params)?; } ServerRequest::FileChangeRequestApproval { request_id, params } => { self.approve_file_change_request(request_id, params)?; } other => { bail!("received unsupported server request: {other:?}"); } } Ok(()) } fn handle_command_execution_request_approval( &mut self, request_id: RequestId, params: CommandExecutionRequestApprovalParams, ) -> Result<()> { let CommandExecutionRequestApprovalParams { thread_id, turn_id, item_id, approval_id, reason, network_approval_context, command, cwd, command_actions, additional_permissions, proposed_execpolicy_amendment, proposed_network_policy_amendments, available_decisions, } = params; println!( "\n< commandExecution approval requested for thread {thread_id}, turn {turn_id}, item {item_id}, approval {}", approval_id.as_deref().unwrap_or("") ); self.command_approval_count += 1; self.command_approval_item_ids.push(item_id.clone()); if let Some(reason) = reason.as_deref() { println!("< reason: {reason}"); } if let Some(network_approval_context) = network_approval_context.as_ref() { println!("< network approval context: {network_approval_context:?}"); } if let Some(available_decisions) = available_decisions.as_ref() { println!("< available decisions: {available_decisions:?}"); } if let Some(command) = command.as_deref() { println!("< command: {command}"); } if let Some(cwd) = cwd.as_ref() { println!("< cwd: {}", cwd.display()); } if let Some(command_actions) = command_actions.as_ref() && !command_actions.is_empty() { println!("< command actions: {command_actions:?}"); } if let Some(additional_permissions) = additional_permissions.as_ref() { println!("< additional permissions: {additional_permissions:?}"); } if let Some(execpolicy_amendment) = proposed_execpolicy_amendment.as_ref() { println!("< proposed execpolicy amendment: {execpolicy_amendment:?}"); } if let Some(network_policy_amendments) = proposed_network_policy_amendments.as_ref() { println!("< proposed network policy amendments: {network_policy_amendments:?}"); } let decision = match self.command_approval_behavior { CommandApprovalBehavior::AlwaysAccept => CommandExecutionApprovalDecision::Accept, CommandApprovalBehavior::AbortOn(index) if self.command_approval_count == index => { CommandExecutionApprovalDecision::Cancel } CommandApprovalBehavior::AbortOn(_) => CommandExecutionApprovalDecision::Accept, }; let response = CommandExecutionRequestApprovalResponse { decision: decision.clone(), }; self.send_server_request_response(request_id, &response)?; println!( "< commandExecution decision for approval #{} on item {item_id}: {:?}", self.command_approval_count, decision ); Ok(()) } fn approve_file_change_request( &mut self, request_id: RequestId, params: FileChangeRequestApprovalParams, ) -> Result<()> { let FileChangeRequestApprovalParams { thread_id, turn_id, item_id, reason, grant_root, } = params; println!( "\n< fileChange approval requested for thread {thread_id}, turn {turn_id}, item {item_id}" ); if let Some(reason) = reason.as_deref() { println!("< reason: {reason}"); } if let Some(grant_root) = grant_root.as_deref() { println!("< grant root: {}", grant_root.display()); } let response = FileChangeRequestApprovalResponse { decision: FileChangeApprovalDecision::Accept, }; self.send_server_request_response(request_id, &response)?; println!("< approved fileChange request for item {item_id}"); Ok(()) } fn send_server_request_response(&mut self, request_id: RequestId, response: &T) -> Result<()> where T: Serialize, { let message = JSONRPCMessage::Response(JSONRPCResponse { id: request_id, result: serde_json::to_value(response)?, }); self.write_jsonrpc_message(message) } fn write_jsonrpc_message(&mut self, message: JSONRPCMessage) -> Result<()> { let payload = serde_json::to_string(&message)?; let pretty = serde_json::to_string_pretty(&message)?; print_multiline_with_prefix("> ", &pretty); self.write_payload(&payload) } fn write_payload(&mut self, payload: &str) -> Result<()> { match &mut self.transport { ClientTransport::Stdio { stdin, .. } => { if let Some(stdin) = stdin.as_mut() { writeln!(stdin, "{payload}")?; stdin .flush() .context("failed to flush payload to codex app-server")?; return Ok(()); } bail!("codex app-server stdin closed") } ClientTransport::WebSocket { socket, url } => { socket .send(Message::Text(payload.to_string().into())) .with_context(|| format!("failed to write websocket message to `{url}`"))?; Ok(()) } } } fn read_payload(&mut self) -> Result { match &mut self.transport { ClientTransport::Stdio { stdout, .. } => { let mut response_line = String::new(); let bytes = stdout .read_line(&mut response_line) .context("failed to read from codex app-server")?; if bytes == 0 { bail!("codex app-server closed stdout"); } Ok(response_line) } ClientTransport::WebSocket { socket, url } => loop { let frame = socket .read() .with_context(|| format!("failed to read websocket message from `{url}`"))?; match frame { Message::Text(text) => return Ok(text.to_string()), Message::Binary(_) | Message::Ping(_) | Message::Pong(_) => continue, Message::Close(_) => { bail!("websocket app-server at `{url}` closed the connection") } Message::Frame(_) => continue, } }, } } } fn print_multiline_with_prefix(prefix: &str, payload: &str) { for line in payload.lines() { println!("{prefix}{line}"); } } struct TestClientTracing { _otel_provider: Option, traces_enabled: bool, } impl TestClientTracing { async fn initialize(config_overrides: &[String]) -> Result { let cli_kv_overrides = CliConfigOverrides { raw_overrides: config_overrides.to_vec(), } .parse_overrides() .map_err(|e| anyhow::anyhow!("error parsing -c overrides: {e}"))?; let config = Config::load_with_cli_overrides(cli_kv_overrides) .await .context("error loading config")?; let otel_provider = codex_core::otel_init::build_provider( &config, env!("CARGO_PKG_VERSION"), Some(OTEL_SERVICE_NAME), DEFAULT_ANALYTICS_ENABLED, ) .map_err(|e| anyhow::anyhow!("error loading otel config: {e}"))?; let traces_enabled = otel_provider .as_ref() .and_then(|provider| provider.tracer_provider.as_ref()) .is_some(); if let Some(provider) = otel_provider.as_ref() && traces_enabled { let _ = tracing_subscriber::registry() .with(provider.tracing_layer()) .try_init(); } Ok(Self { traces_enabled, _otel_provider: otel_provider, }) } } #[derive(Debug, Clone, PartialEq, Eq)] enum TraceSummary { Enabled { url: String }, Disabled, } impl TraceSummary { fn capture(traces_enabled: bool) -> Self { if !traces_enabled { return Self::Disabled; } current_span_w3c_trace_context() .as_ref() .and_then(trace_url_from_context) .map_or(Self::Disabled, |url| Self::Enabled { url }) } } fn trace_url_from_context(trace: &W3cTraceContext) -> Option { let traceparent = trace.traceparent.as_deref()?; let mut parts = traceparent.split('-'); match (parts.next(), parts.next(), parts.next(), parts.next()) { (Some(_version), Some(trace_id), Some(_span_id), Some(_trace_flags)) if trace_id.len() == 32 => { Some(format!("go/trace/{trace_id}")) } _ => None, } } fn print_trace_summary(trace_summary: &TraceSummary) { println!("\n[Datadog trace]"); match trace_summary { TraceSummary::Enabled { url } => println!("{url}\n"), TraceSummary::Disabled => println!("{TRACE_DISABLED_MESSAGE}\n"), } } impl Drop for CodexClient { fn drop(&mut self) { let ClientTransport::Stdio { child, stdin, .. } = &mut self.transport else { return; }; let _ = stdin.take(); if let Ok(Some(status)) = child.try_wait() { println!("[codex app-server exited: {status}]"); return; } let deadline = SystemTime::now() + APP_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT; loop { if let Ok(Some(status)) = child.try_wait() { println!("[codex app-server exited: {status}]"); return; } if SystemTime::now() >= deadline { break; } thread::sleep(APP_SERVER_GRACEFUL_SHUTDOWN_POLL_INTERVAL); } let _ = child.kill(); let _ = child.wait(); } }