Files
codex/codex-rs/otel/src/trace_context.rs
Owen Lin aa3fe8abf8 feat(core): persist trace_id for turns in RolloutItem::TurnContext (#13602)
This PR adds a durable trace linkage for each turn by storing the active
trace ID on the rollout TurnContext record stored in session rollout
files.

Before this change, we propagated trace context at runtime but didn’t
persist a stable per-turn trace key in rollout history. That made
after-the-fact debugging harder (for example, mapping a historical turn
to the corresponding trace in datadog). This sets us up for much easier
debugging in the future.

### What changed
- Added an optional `trace_id` to TurnContextItem (rollout schema).
- Added a small OTEL helper to read the current span trace ID.
- Captured `trace_id` when creating `TurnContext` and included it in
`to_turn_context_item()`.
- Updated tests and fixtures that construct TurnContextItem so
older/no-trace cases still work.

### Why this approach
TurnContext is already the canonical durable per-turn metadata in
rollout. This keeps ownership clean: trace linkage lives with other
persisted turn metadata.
2026-03-05 13:26:48 -08:00

141 lines
4.5 KiB
Rust

use std::collections::HashMap;
use codex_protocol::protocol::W3cTraceContext;
use opentelemetry::Context;
use opentelemetry::propagation::TextMapPropagator;
use opentelemetry::trace::TraceContextExt;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use tracing::Span;
use tracing_opentelemetry::OpenTelemetrySpanExt;
pub fn current_span_w3c_trace_context() -> Option<W3cTraceContext> {
let context = Span::current().context();
if !context.span().span_context().is_valid() {
return None;
}
let mut headers = HashMap::new();
TraceContextPropagator::new().inject_context(&context, &mut headers);
Some(W3cTraceContext {
traceparent: headers.remove("traceparent"),
tracestate: headers.remove("tracestate"),
})
}
pub fn current_span_trace_id() -> Option<String> {
let context = Span::current().context();
let span = context.span();
let span_context = span.span_context();
if !span_context.is_valid() {
return None;
}
Some(span_context.trace_id().to_string())
}
pub fn context_from_w3c_trace_context(trace: &W3cTraceContext) -> Option<Context> {
context_from_trace_headers(trace.traceparent.as_deref(), trace.tracestate.as_deref())
}
pub fn set_parent_from_w3c_trace_context(span: &Span, trace: &W3cTraceContext) -> bool {
if let Some(context) = context_from_w3c_trace_context(trace) {
set_parent_from_context(span, context);
true
} else {
false
}
}
pub fn set_parent_from_context(span: &Span, context: Context) {
let _ = span.set_parent(context);
}
pub(crate) fn context_from_trace_headers(
traceparent: Option<&str>,
tracestate: Option<&str>,
) -> Option<Context> {
let traceparent = traceparent?;
let mut headers = HashMap::new();
headers.insert("traceparent".to_string(), traceparent.to_string());
if let Some(tracestate) = tracestate {
headers.insert("tracestate".to_string(), tracestate.to_string());
}
let context = TraceContextPropagator::new().extract(&headers);
if !context.span().span_context().is_valid() {
return None;
}
Some(context)
}
#[cfg(test)]
mod tests {
use super::context_from_trace_headers;
use super::context_from_w3c_trace_context;
use super::current_span_trace_id;
use codex_protocol::protocol::W3cTraceContext;
use opentelemetry::trace::SpanId;
use opentelemetry::trace::TraceContextExt;
use opentelemetry::trace::TraceId;
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_sdk::trace::SdkTracerProvider;
use pretty_assertions::assert_eq;
use tracing::trace_span;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
#[test]
fn parses_valid_w3c_trace_context() {
let trace_id = "00000000000000000000000000000001";
let span_id = "0000000000000002";
let context = context_from_w3c_trace_context(&W3cTraceContext {
traceparent: Some(format!("00-{trace_id}-{span_id}-01")),
tracestate: None,
})
.expect("trace context");
let span = context.span();
let span_context = span.span_context();
assert_eq!(
span_context.trace_id(),
TraceId::from_hex(trace_id).unwrap()
);
assert_eq!(span_context.span_id(), SpanId::from_hex(span_id).unwrap());
assert!(span_context.is_remote());
}
#[test]
fn invalid_traceparent_returns_none() {
assert!(context_from_trace_headers(Some("not-a-traceparent"), None).is_none());
}
#[test]
fn missing_traceparent_returns_none() {
assert!(
context_from_w3c_trace_context(&W3cTraceContext {
traceparent: None,
tracestate: Some("vendor=value".to_string()),
})
.is_none()
);
}
#[test]
fn current_span_trace_id_returns_hex_trace_id() {
let provider = SdkTracerProvider::builder().build();
let tracer = provider.tracer("codex-otel-tests");
let subscriber =
tracing_subscriber::registry().with(tracing_opentelemetry::layer().with_tracer(tracer));
let _guard = subscriber.set_default();
let span = trace_span!("test_span");
let _entered = span.enter();
let trace_id = current_span_trace_id().expect("trace id");
assert_eq!(trace_id.len(), 32);
assert!(trace_id.chars().all(|ch| ch.is_ascii_hexdigit()));
assert_ne!(trace_id, "00000000000000000000000000000000");
}
}