feat(app-server): propagate app-server trace context into core (#13368)

### Summary
Propagate trace context originating at app-server RPC method handlers ->
codex core submission loop (so this includes spans such as `run_turn`!).
This implements PR 2 of the app-server tracing rollout.

This also removes the old lower-level env-based reparenting in core so
explicit request/submission ancestry wins instead of being overridden by
ambient `TRACEPARENT` state.

### What changed
- Added `trace: Option<W3cTraceContext>` to codex_protocol::Submission
- Taught `Codex::submit()` / `submit_with_id()` to automatically capture
the current span context when constructing or forwarding a submission
- Wrapped the core submission loop in a submission_dispatch span
parented from Submission.trace
- Warn on invalid submission trace carriers and ignore them cleanly
- Removed the old env-based downstream reparenting path in core task
execution
- Stopped OTEL provider init from implicitly attaching env trace context
process-wide
- Updated mcp-server Submission call sites for the new field

Added focused unit tests for:
- capturing trace context into Submission
- preferring `Submission.trace` when building the core dispatch span

### Why
PR 1 gave us consistent inbound request spans in app-server, but that
only covered the transport boundary. For long-running work like turns
and reviews, the important missing piece was preserving ancestry after
the request handler returns and core continues work on a different async
path.

This change makes that handoff explicit and keeps the parentage rules
simple:
- app-server request span sets the current context
- `Submission.trace` snapshots that context
- core restores it once, at the submission boundary
- deeper core spans inherit naturally

That also lets us stop relying on env-based reparenting for this path,
which was too ambient and could override explicit ancestry.
This commit is contained in:
Owen Lin
2026-03-03 17:03:45 -08:00
committed by GitHub
parent 0fbd84081b
commit 52521a5e40
12 changed files with 489 additions and 196 deletions

View File

@@ -34,6 +34,8 @@ use codex_core::format_exec_policy_error_with_source;
use codex_core::git_info::get_git_repo_root;
use codex_core::models_manager::collaboration_mode_presets::CollaborationModesConfig;
use codex_core::models_manager::manager::RefreshStrategy;
use codex_otel::set_parent_from_context;
use codex_otel::traceparent_context_from_env;
use codex_protocol::approvals::ElicitationAction;
use codex_protocol::config_types::SandboxMode;
use codex_protocol::protocol::AskForApproval;
@@ -58,9 +60,12 @@ use std::path::PathBuf;
use std::sync::Arc;
use supports_color::Stream;
use tokio::sync::Mutex;
use tracing::Instrument;
use tracing::debug;
use tracing::error;
use tracing::field;
use tracing::info;
use tracing::info_span;
use tracing::warn;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::prelude::*;
@@ -94,6 +99,32 @@ struct ThreadEventEnvelope {
suppress_output: bool,
}
struct ExecRunArgs {
command: Option<ExecCommand>,
config: Config,
cursor_ansi: bool,
dangerously_bypass_approvals_and_sandbox: bool,
exec_span: tracing::Span,
images: Vec<PathBuf>,
json_mode: bool,
last_message_file: Option<PathBuf>,
model_provider: Option<String>,
oss: bool,
output_schema_path: Option<PathBuf>,
prompt: Option<String>,
skip_git_repo_check: bool,
stderr_with_ansi: bool,
}
fn exec_root_span() -> tracing::Span {
info_span!(
"codex.exec",
otel.kind = "internal",
thread.id = field::Empty,
turn.id = field::Empty,
)
}
pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result<()> {
if let Err(err) = set_default_originator("codex_exec".to_string()) {
tracing::warn!(?err, "Failed to set codex exec originator override {err:?}");
@@ -347,6 +378,48 @@ pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result
.with(otel_logger_layer)
.try_init();
let exec_span = exec_root_span();
if let Some(context) = traceparent_context_from_env() {
set_parent_from_context(&exec_span, context);
}
run_exec_session(ExecRunArgs {
command,
config,
cursor_ansi,
dangerously_bypass_approvals_and_sandbox,
exec_span: exec_span.clone(),
images,
json_mode,
last_message_file,
model_provider,
oss,
output_schema_path,
prompt,
skip_git_repo_check,
stderr_with_ansi,
})
.instrument(exec_span)
.await
}
async fn run_exec_session(args: ExecRunArgs) -> anyhow::Result<()> {
let ExecRunArgs {
command,
config,
cursor_ansi,
dangerously_bypass_approvals_and_sandbox,
exec_span,
images,
json_mode,
last_message_file,
model_provider,
oss,
output_schema_path,
prompt,
skip_git_repo_check,
stderr_with_ansi,
} = args;
let mut event_processor: Box<dyn EventProcessor> = match json_mode {
true => Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone())),
_ => Box::new(EventProcessorWithHumanOutput::create_with_ansi(
@@ -435,6 +508,9 @@ pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result
} else {
thread_manager.start_thread(config.clone()).await?
};
let primary_thread_id_for_span = primary_thread_id.to_string();
exec_span.record("thread.id", primary_thread_id_for_span.as_str());
let (initial_operation, prompt_summary) = match (command, prompt, images) {
(Some(ExecCommand::Review(review_cli)), _, _) => {
let review_request = build_review_request(review_cli)?;
@@ -554,7 +630,7 @@ pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result
});
}
match initial_operation {
let task_id = match initial_operation {
InitialOperation::UserTurn {
items,
output_schema,
@@ -583,6 +659,7 @@ pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result
task_id
}
};
exec_span.record("turn.id", task_id.as_str());
// Run the loop until the task is complete.
// Track whether a fatal error was reported by the server so we can
@@ -934,13 +1011,44 @@ fn build_review_request(args: ReviewArgs) -> anyhow::Result<ReviewRequest> {
#[cfg(test)]
mod tests {
use super::*;
use codex_otel::set_parent_from_w3c_trace_context;
use opentelemetry::trace::TraceContextExt;
use opentelemetry::trace::TraceId;
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_sdk::trace::SdkTracerProvider;
use pretty_assertions::assert_eq;
use tracing_opentelemetry::OpenTelemetrySpanExt;
fn test_tracing_subscriber() -> impl tracing::Subscriber + Send + Sync {
let provider = SdkTracerProvider::builder().build();
let tracer = provider.tracer("codex-exec-tests");
tracing_subscriber::registry().with(tracing_opentelemetry::layer().with_tracer(tracer))
}
#[test]
fn exec_defaults_analytics_to_enabled() {
assert_eq!(DEFAULT_ANALYTICS_ENABLED, true);
}
#[test]
fn exec_root_span_can_be_parented_from_trace_context() {
let subscriber = test_tracing_subscriber();
let _guard = tracing::subscriber::set_default(subscriber);
let parent = codex_protocol::protocol::W3cTraceContext {
traceparent: Some("00-00000000000000000000000000000077-0000000000000088-01".into()),
tracestate: Some("vendor=value".into()),
};
let exec_span = exec_root_span();
assert!(set_parent_from_w3c_trace_context(&exec_span, &parent));
let trace_id = exec_span.context().span().span_context().trace_id();
assert_eq!(
trace_id,
TraceId::from_hex("00000000000000000000000000000077").expect("trace id")
);
}
#[test]
fn builds_uncommitted_review_request() {
let request = build_review_request(ReviewArgs {