fix(otel): make HTTP trace export survive app-server runtimes (#14300)

## Summary

This PR fixes OTLP HTTP trace export in runtimes where the previous
exporter setup was unreliable, especially around app-server usage. It
also removes the old `codex_otel::otel_provider` compatibility shim and
switches remaining call sites over to the crate-root
`codex_otel::OtelProvider` export.

## What changed

- Use a runtime-safe OTLP HTTP trace exporter path for Tokio runtimes.
- Add an async HTTP client path for trace export when we are already
inside a multi-thread Tokio runtime.
- Make provider shutdown flush traces before tearing down the tracer
provider.
- Add loopback coverage that verifies traces are actually sent to
`/v1/traces`:
  - outside Tokio
  - inside a multi-thread Tokio runtime
  - inside a current-thread Tokio runtime
- Remove the `codex_otel::otel_provider` shim and update remaining
imports.

## Why

I hit cases where spans were being created correctly but never made it
to the collector. The issue turned out to be in exporter/runtime
behavior rather than the span plumbing itself. This PR narrows that gap
and gives us regression coverage for the actual export path.
This commit is contained in:
Owen Lin
2026-03-11 09:59:49 -07:00
committed by Michael Bolin
parent 548583198a
commit fa1242c83b
12 changed files with 511 additions and 19 deletions

View File

@@ -23,9 +23,11 @@ use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig;
use opentelemetry_sdk::Resource;
use opentelemetry_sdk::logs::SdkLoggerProvider;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use opentelemetry_sdk::runtime;
use opentelemetry_sdk::trace::BatchSpanProcessor;
use opentelemetry_sdk::trace::SdkTracerProvider;
use opentelemetry_sdk::trace::Tracer;
use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor as TokioBatchSpanProcessor;
use opentelemetry_semantic_conventions as semconv;
use std::error::Error;
use tracing::debug;
@@ -50,15 +52,16 @@ pub struct OtelProvider {
impl OtelProvider {
pub fn shutdown(&self) {
if let Some(logger) = &self.logger {
let _ = logger.shutdown();
}
if let Some(tracer_provider) = &self.tracer_provider {
let _ = tracer_provider.force_flush();
let _ = tracer_provider.shutdown();
}
if let Some(metrics) = &self.metrics {
let _ = metrics.shutdown();
}
if let Some(logger) = &self.logger {
let _ = logger.shutdown();
}
}
pub fn from(settings: &OtelSettings) -> Result<Option<Self>, Box<dyn Error>> {
@@ -159,15 +162,16 @@ impl OtelProvider {
impl Drop for OtelProvider {
fn drop(&mut self) {
if let Some(logger) = &self.logger {
let _ = logger.shutdown();
}
if let Some(tracer_provider) = &self.tracer_provider {
let _ = tracer_provider.force_flush();
let _ = tracer_provider.shutdown();
}
if let Some(metrics) = &self.metrics {
let _ = metrics.shutdown();
}
if let Some(logger) = &self.logger {
let _ = logger.shutdown();
}
}
}
@@ -321,6 +325,34 @@ fn build_tracer_provider(
} => {
debug!("Using OTLP Http exporter for traces: {endpoint}");
if crate::otlp::current_tokio_runtime_is_multi_thread() {
let protocol = match protocol {
OtelHttpProtocol::Binary => Protocol::HttpBinary,
OtelHttpProtocol::Json => Protocol::HttpJson,
};
let mut exporter_builder = SpanExporter::builder()
.with_http()
.with_endpoint(endpoint)
.with_protocol(protocol)
.with_headers(headers);
let client = crate::otlp::build_async_http_client(
tls.as_ref(),
OTEL_EXPORTER_OTLP_TRACES_TIMEOUT,
)?;
exporter_builder = exporter_builder.with_http_client(client);
let processor =
TokioBatchSpanProcessor::builder(exporter_builder.build()?, runtime::Tokio)
.build();
return Ok(SdkTracerProvider::builder()
.with_resource(resource.clone())
.with_span_processor(processor)
.build());
}
let protocol = match protocol {
OtelHttpProtocol::Binary => Protocol::HttpBinary,
OtelHttpProtocol::Json => Protocol::HttpJson,