feat(app-server): propagate app-server trace context into core (#13368)

### Summary
Propagate trace context originating at app-server RPC method handlers ->
codex core submission loop (so this includes spans such as `run_turn`!).
This implements PR 2 of the app-server tracing rollout.

This also removes the old lower-level env-based reparenting in core so
explicit request/submission ancestry wins instead of being overridden by
ambient `TRACEPARENT` state.

### What changed
- Added `trace: Option<W3cTraceContext>` to codex_protocol::Submission
- Taught `Codex::submit()` / `submit_with_id()` to automatically capture
the current span context when constructing or forwarding a submission
- Wrapped the core submission loop in a submission_dispatch span
parented from Submission.trace
- Warn on invalid submission trace carriers and ignore them cleanly
- Removed the old env-based downstream reparenting path in core task
execution
- Stopped OTEL provider init from implicitly attaching env trace context
process-wide
- Updated mcp-server Submission call sites for the new field

Added focused unit tests for:
- capturing trace context into Submission
- preferring `Submission.trace` when building the core dispatch span

### Why
PR 1 gave us consistent inbound request spans in app-server, but that
only covered the transport boundary. For long-running work like turns
and reviews, the important missing piece was preserving ancestry after
the request handler returns and core continues work on a different async
path.

This change makes that handoff explicit and keeps the parentage rules
simple:
- app-server request span sets the current context
- `Submission.trace` snapshots that context
- core restores it once, at the submission boundary
- deeper core spans inherit naturally

That also lets us stop relying on env-based reparenting for this path,
which was too ambient and could override explicit ancestry.
This commit is contained in:
Owen Lin
2026-03-03 17:03:45 -08:00
committed by GitHub
parent 0fbd84081b
commit 52521a5e40
12 changed files with 489 additions and 196 deletions

View File

@@ -154,6 +154,7 @@ pub(crate) async fn run_codex_thread_one_shot(
.send(Submission {
id: "shutdown".to_string(),
op: Op::Shutdown {},
trace: None,
})
.await;
child_cancel.cancel();
@@ -298,11 +299,11 @@ async fn forward_ops(
cancel_token_ops: CancellationToken,
) {
loop {
let op: Op = match rx_ops.recv().or_cancel(&cancel_token_ops).await {
Ok(Ok(Submission { id: _, op })) => op,
let submission = match rx_ops.recv().or_cancel(&cancel_token_ops).await {
Ok(Ok(submission)) => submission,
Ok(Err(_)) | Err(_) => break,
};
let _ = codex.submit(op).await;
let _ = codex.submit_with_id(submission).await;
}
}
@@ -550,4 +551,47 @@ mod tests {
"expected Shutdown op after cancellation"
);
}
#[tokio::test]
async fn forward_ops_preserves_submission_trace_context() {
let (tx_sub, rx_sub) = bounded(SUBMISSION_CHANNEL_CAPACITY);
let (_tx_events, rx_events) = bounded(SUBMISSION_CHANNEL_CAPACITY);
let (_agent_status_tx, agent_status) = watch::channel(AgentStatus::PendingInit);
let (session, _ctx, _rx_evt) = crate::codex::make_session_and_context_with_rx().await;
let codex = Arc::new(Codex {
tx_sub,
rx_event: rx_events,
agent_status,
session,
});
let (tx_ops, rx_ops) = bounded(1);
let cancel = CancellationToken::new();
let forward = tokio::spawn(forward_ops(Arc::clone(&codex), rx_ops, cancel));
let submission = Submission {
id: "sub-1".to_string(),
op: Op::Interrupt,
trace: Some(codex_protocol::protocol::W3cTraceContext {
traceparent: Some(
"00-1234567890abcdef1234567890abcdef-1234567890abcdef-01".to_string(),
),
tracestate: Some("vendor=state".to_string()),
}),
};
tx_ops.send(submission.clone()).await.unwrap();
drop(tx_ops);
let forwarded = timeout(Duration::from_secs(1), rx_sub.recv())
.await
.expect("forward_ops hung")
.expect("forwarded submission missing");
assert_eq!(submission.id, forwarded.id);
assert_eq!(submission.op, forwarded.op);
assert_eq!(submission.trace, forwarded.trace);
timeout(Duration::from_secs(1), forward)
.await
.expect("forward_ops did not exit")
.expect("forward_ops join error");
}
}