feat(app-server): propagate traces across tasks and core ops (#14387)

## Summary

This PR keeps app-server RPC request trace context alive for the full
lifetime of the work that request kicks off (e.g. for `thread/start`,
this is `app-server rpc handler -> tokio background task -> core op
submissions`). Previously we lose trace lineage once the request handler
returns or hands work off to background tasks.

This approach is especially relevant for `thread/start` and other RPC
handlers that run in a non-blocking way. In the near future we'll most
likely want to make all app-server handlers run in a non-blocking way by
default, and only queue operations that must operate in order (e.g.
thread RPCs per thread?), so we want to make sure tracing in app-server
just generally works.

Depends on https://github.com/openai/codex/pull/14300

**Before**
<img width="155" height="207" alt="image"
src="https://github.com/user-attachments/assets/c9487459-36f1-436c-beb7-fafeb40737af"
/>


**After**
<img width="299" height="337" alt="image"
src="https://github.com/user-attachments/assets/727392b2-d072-4427-9dc4-0502d8652dea"
/>

## What changed

- Keep request-scoped trace context around until we send the final
response or error, or the connection closes.
- Thread that trace context through detached `thread/start` work so
background startup stays attached to the originating request.
- Pass request trace context through to downstream core operations,
including:
  - thread creation
  - resume/fork flows
  - turn submission
  - review
  - interrupt
  - realtime conversation operations
- Add tracing tests that verify:
  - remote W3C trace context is preserved for `thread/start`
  - remote W3C trace context is preserved for `turn/start`
  - downstream core spans stay under the originating request span
  - request-scoped tracing state is cleaned up correctly
- Clean up shutdown behavior so detached background tasks and spawned
threads are drained before process exit.
This commit is contained in:
Owen Lin
2026-03-11 20:18:31 -07:00
committed by GitHub
parent bf5e997b31
commit 5bc82c5b93
24 changed files with 1524 additions and 308 deletions

View File

@@ -3,6 +3,7 @@ use crate::CodexAuth;
use crate::ModelProviderInfo;
use crate::agent::AgentControl;
use crate::codex::Codex;
use crate::codex::CodexSpawnArgs;
use crate::codex::CodexSpawnOk;
use crate::codex::INITIAL_SUBMIT_ID;
use crate::codex_thread::CodexThread;
@@ -30,11 +31,15 @@ use codex_protocol::protocol::McpServerRefreshConfig;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::W3cTraceContext;
use futures::StreamExt;
use futures::stream::FuturesUnordered;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::time::Duration;
use tokio::runtime::Handle;
use tokio::runtime::RuntimeFlavor;
use tokio::sync::RwLock;
@@ -118,6 +123,19 @@ pub struct NewThread {
pub session_configured: SessionConfiguredEvent,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub struct ThreadShutdownReport {
pub completed: Vec<ThreadId>,
pub submit_failed: Vec<ThreadId>,
pub timed_out: Vec<ThreadId>,
}
enum ShutdownOutcome {
Complete,
SubmitFailed,
TimedOut,
}
/// [`ThreadManager`] is responsible for creating threads and maintaining
/// them in memory.
pub struct ThreadManager {
@@ -329,6 +347,7 @@ impl ThreadManager {
dynamic_tools,
persist_extended_history,
None,
None,
))
.await
}
@@ -339,6 +358,7 @@ impl ThreadManager {
dynamic_tools: Vec<codex_protocol::dynamic_tools::DynamicToolSpec>,
persist_extended_history: bool,
metrics_service_name: Option<String>,
parent_trace: Option<W3cTraceContext>,
) -> CodexResult<NewThread> {
Box::pin(self.state.spawn_thread(
config,
@@ -348,6 +368,7 @@ impl ThreadManager {
dynamic_tools,
persist_extended_history,
metrics_service_name,
parent_trace,
))
.await
}
@@ -357,10 +378,17 @@ impl ThreadManager {
config: Config,
rollout_path: PathBuf,
auth_manager: Arc<AuthManager>,
parent_trace: Option<W3cTraceContext>,
) -> CodexResult<NewThread> {
let initial_history = RolloutRecorder::get_rollout_history(&rollout_path).await?;
Box::pin(self.resume_thread_with_history(config, initial_history, auth_manager, false))
.await
Box::pin(self.resume_thread_with_history(
config,
initial_history,
auth_manager,
false,
parent_trace,
))
.await
}
pub async fn resume_thread_with_history(
@@ -369,6 +397,7 @@ impl ThreadManager {
initial_history: InitialHistory,
auth_manager: Arc<AuthManager>,
persist_extended_history: bool,
parent_trace: Option<W3cTraceContext>,
) -> CodexResult<NewThread> {
Box::pin(self.state.spawn_thread(
config,
@@ -378,6 +407,7 @@ impl ThreadManager {
Vec::new(),
persist_extended_history,
None,
parent_trace,
))
.await
}
@@ -389,13 +419,55 @@ impl ThreadManager {
self.state.threads.write().await.remove(thread_id)
}
/// Closes all threads open in this ThreadManager
pub async fn remove_and_close_all_threads(&self) -> CodexResult<()> {
for thread in self.state.threads.read().await.values() {
thread.submit(Op::Shutdown).await?;
/// Tries to shut down all tracked threads concurrently within the provided timeout.
/// Threads that complete shutdown are removed from the manager; incomplete shutdowns
/// remain tracked so callers can retry or inspect them later.
pub async fn shutdown_all_threads_bounded(&self, timeout: Duration) -> ThreadShutdownReport {
let threads = {
let threads = self.state.threads.read().await;
threads
.iter()
.map(|(thread_id, thread)| (*thread_id, Arc::clone(thread)))
.collect::<Vec<_>>()
};
let mut shutdowns = threads
.into_iter()
.map(|(thread_id, thread)| async move {
let outcome = match tokio::time::timeout(timeout, thread.shutdown_and_wait()).await
{
Ok(Ok(())) => ShutdownOutcome::Complete,
Ok(Err(_)) => ShutdownOutcome::SubmitFailed,
Err(_) => ShutdownOutcome::TimedOut,
};
(thread_id, outcome)
})
.collect::<FuturesUnordered<_>>();
let mut report = ThreadShutdownReport::default();
while let Some((thread_id, outcome)) = shutdowns.next().await {
match outcome {
ShutdownOutcome::Complete => report.completed.push(thread_id),
ShutdownOutcome::SubmitFailed => report.submit_failed.push(thread_id),
ShutdownOutcome::TimedOut => report.timed_out.push(thread_id),
}
}
self.state.threads.write().await.clear();
Ok(())
let mut tracked_threads = self.state.threads.write().await;
for thread_id in &report.completed {
tracked_threads.remove(thread_id);
}
report
.completed
.sort_by_key(std::string::ToString::to_string);
report
.submit_failed
.sort_by_key(std::string::ToString::to_string);
report
.timed_out
.sort_by_key(std::string::ToString::to_string);
report
}
/// Fork an existing thread by taking messages up to the given position (not including
@@ -408,6 +480,7 @@ impl ThreadManager {
config: Config,
path: PathBuf,
persist_extended_history: bool,
parent_trace: Option<W3cTraceContext>,
) -> CodexResult<NewThread> {
let history = RolloutRecorder::get_rollout_history(&path).await?;
let history = truncate_before_nth_user_message(history, nth_user_message);
@@ -419,6 +492,7 @@ impl ThreadManager {
Vec::new(),
persist_extended_history,
None,
parent_trace,
))
.await
}
@@ -503,6 +577,7 @@ impl ThreadManagerState {
persist_extended_history,
metrics_service_name,
inherited_shell_snapshot,
None,
))
.await
}
@@ -526,6 +601,7 @@ impl ThreadManagerState {
false,
None,
inherited_shell_snapshot,
None,
))
.await
}
@@ -549,6 +625,7 @@ impl ThreadManagerState {
persist_extended_history,
None,
inherited_shell_snapshot,
None,
))
.await
}
@@ -564,6 +641,7 @@ impl ThreadManagerState {
dynamic_tools: Vec<codex_protocol::dynamic_tools::DynamicToolSpec>,
persist_extended_history: bool,
metrics_service_name: Option<String>,
parent_trace: Option<W3cTraceContext>,
) -> CodexResult<NewThread> {
Box::pin(self.spawn_thread_with_source(
config,
@@ -575,6 +653,7 @@ impl ThreadManagerState {
persist_extended_history,
metrics_service_name,
None,
parent_trace,
))
.await
}
@@ -591,28 +670,30 @@ impl ThreadManagerState {
persist_extended_history: bool,
metrics_service_name: Option<String>,
inherited_shell_snapshot: Option<Arc<ShellSnapshot>>,
parent_trace: Option<W3cTraceContext>,
) -> CodexResult<NewThread> {
let watch_registration = self
.file_watcher
.register_config(&config, self.skills_manager.as_ref());
let CodexSpawnOk {
codex, thread_id, ..
} = Codex::spawn(
} = Codex::spawn(CodexSpawnArgs {
config,
auth_manager,
Arc::clone(&self.models_manager),
Arc::clone(&self.skills_manager),
Arc::clone(&self.plugins_manager),
Arc::clone(&self.mcp_manager),
Arc::clone(&self.file_watcher),
initial_history,
models_manager: Arc::clone(&self.models_manager),
skills_manager: Arc::clone(&self.skills_manager),
plugins_manager: Arc::clone(&self.plugins_manager),
mcp_manager: Arc::clone(&self.mcp_manager),
file_watcher: Arc::clone(&self.file_watcher),
conversation_history: initial_history,
session_source,
agent_control,
dynamic_tools,
persist_extended_history,
metrics_service_name,
inherited_shell_snapshot,
)
parent_trace,
})
.await?;
self.finalize_thread_spawn(codex, thread_id, watch_registration)
.await
@@ -672,11 +753,14 @@ fn truncate_before_nth_user_message(history: InitialHistory, n: usize) -> Initia
mod tests {
use super::*;
use crate::codex::make_session_and_context;
use crate::config::test_config;
use assert_matches::assert_matches;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::ResponseItem;
use pretty_assertions::assert_eq;
use std::time::Duration;
use tempfile::tempdir;
fn user_msg(text: &str) -> ResponseItem {
ResponseItem::Message {
@@ -783,4 +867,40 @@ mod tests {
serde_json::to_value(&expected).unwrap()
);
}
#[tokio::test]
async fn shutdown_all_threads_bounded_submits_shutdown_to_every_thread() {
let temp_dir = tempdir().expect("tempdir");
let mut config = test_config();
config.codex_home = temp_dir.path().join("codex-home");
config.cwd = config.codex_home.clone();
std::fs::create_dir_all(&config.codex_home).expect("create codex home");
let manager = ThreadManager::with_models_provider_and_home_for_tests(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
config.codex_home.clone(),
);
let thread_1 = manager
.start_thread(config.clone())
.await
.expect("start first thread")
.thread_id;
let thread_2 = manager
.start_thread(config)
.await
.expect("start second thread")
.thread_id;
let report = manager
.shutdown_all_threads_bounded(Duration::from_secs(10))
.await;
let mut expected_completed = vec![thread_1, thread_2];
expected_completed.sort_by_key(std::string::ToString::to_string);
assert_eq!(report.completed, expected_completed);
assert!(report.submit_failed.is_empty());
assert!(report.timed_out.is_empty());
assert!(manager.list_thread_ids().await.is_empty());
}
}