mirror of
https://github.com/openai/codex.git
synced 2026-05-05 05:42:33 +03:00
feat: show runtime metrics in console (#10278)
Summary of changes:
- Adds a new feature flag: runtime_metrics
- Declared in core/src/features.rs
- Added to core/config.schema.json
- Wired into OTEL init in core/src/otel_init.rs
- Enables on-demand runtime metric snapshots in OTEL
- Adds runtime_metrics: bool to otel/src/config.rs
- Enables experimental custom reader features in otel/Cargo.toml
- Adds snapshot/reset/summary APIs in:
- otel/src/lib.rs
- otel/src/metrics/client.rs
- otel/src/metrics/config.rs
- otel/src/metrics/error.rs
- Defines metric names and a runtime summary builder
- New files:
- otel/src/metrics/names.rs
- otel/src/metrics/runtime_metrics.rs
- Summarizes totals for:
- Tool calls
- API requests
- SSE/streaming events
- Instruments metrics collection in OTEL manager
- otel/src/traces/otel_manager.rs now records:
- API call counts + durations
- SSE event counts + durations (success/failure)
- Tool call metrics now use shared constants
- Surfaces runtime metrics in the TUI
- Resets runtime metrics at turn start in tui/src/chatwidget.rs
- Displays metrics in the final separator line in
tui/src/history_cell.rs
- Adds tests
- New OTEL tests:
- otel/tests/suite/snapshot.rs
- otel/tests/suite/runtime_summary.rs
- New TUI test:
- final_message_separator_includes_runtime_metrics in
tui/src/history_cell.rs
Scope:
- 19 files changed
- ~652 insertions, 38 deletions
<img width="922" height="169" alt="Screenshot 2026-01-30 at 4 11 34 PM"
src="https://github.com/user-attachments/assets/1efd754d-a16d-4564-83a5-f4442fd2f998"
/>
This commit is contained in:
@@ -22,13 +22,20 @@ use opentelemetry_otlp::WithTonicConfig;
|
||||
use opentelemetry_otlp::tonic_types::metadata::MetadataMap;
|
||||
use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig;
|
||||
use opentelemetry_sdk::Resource;
|
||||
use opentelemetry_sdk::metrics::InstrumentKind;
|
||||
use opentelemetry_sdk::metrics::ManualReader;
|
||||
use opentelemetry_sdk::metrics::PeriodicReader;
|
||||
use opentelemetry_sdk::metrics::Pipeline;
|
||||
use opentelemetry_sdk::metrics::SdkMeterProvider;
|
||||
use opentelemetry_sdk::metrics::Temporality;
|
||||
use opentelemetry_sdk::metrics::data::ResourceMetrics;
|
||||
use opentelemetry_sdk::metrics::reader::MetricReader;
|
||||
use opentelemetry_semantic_conventions as semconv;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::Weak;
|
||||
use std::time::Duration;
|
||||
use tracing::debug;
|
||||
|
||||
@@ -37,6 +44,39 @@ const METER_NAME: &str = "codex";
|
||||
const DURATION_UNIT: &str = "ms";
|
||||
const DURATION_DESCRIPTION: &str = "Duration in milliseconds.";
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct SharedManualReader {
|
||||
inner: Arc<ManualReader>,
|
||||
}
|
||||
|
||||
impl SharedManualReader {
|
||||
fn new(inner: Arc<ManualReader>) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
}
|
||||
|
||||
impl MetricReader for SharedManualReader {
|
||||
fn register_pipeline(&self, pipeline: Weak<Pipeline>) {
|
||||
self.inner.register_pipeline(pipeline);
|
||||
}
|
||||
|
||||
fn collect(&self, rm: &mut ResourceMetrics) -> opentelemetry_sdk::error::OTelSdkResult {
|
||||
self.inner.collect(rm)
|
||||
}
|
||||
|
||||
fn force_flush(&self) -> opentelemetry_sdk::error::OTelSdkResult {
|
||||
self.inner.force_flush()
|
||||
}
|
||||
|
||||
fn shutdown_with_timeout(&self, timeout: Duration) -> opentelemetry_sdk::error::OTelSdkResult {
|
||||
self.inner.shutdown_with_timeout(timeout)
|
||||
}
|
||||
|
||||
fn temporality(&self, kind: InstrumentKind) -> Temporality {
|
||||
self.inner.temporality(kind)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MetricsClientInner {
|
||||
meter_provider: SdkMeterProvider,
|
||||
@@ -44,6 +84,7 @@ struct MetricsClientInner {
|
||||
counters: Mutex<HashMap<String, Counter<u64>>>,
|
||||
histograms: Mutex<HashMap<String, Histogram<f64>>>,
|
||||
duration_histograms: Mutex<HashMap<String, Histogram<f64>>>,
|
||||
runtime_reader: Option<Arc<ManualReader>>,
|
||||
default_tags: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
@@ -144,26 +185,41 @@ pub struct MetricsClient(std::sync::Arc<MetricsClientInner>);
|
||||
impl MetricsClient {
|
||||
/// Build a metrics client from configuration and validate defaults.
|
||||
pub fn new(config: MetricsConfig) -> Result<Self> {
|
||||
validate_tags(&config.default_tags)?;
|
||||
let MetricsConfig {
|
||||
environment,
|
||||
service_name,
|
||||
service_version,
|
||||
exporter,
|
||||
export_interval,
|
||||
runtime_reader,
|
||||
default_tags,
|
||||
} = config;
|
||||
|
||||
validate_tags(&default_tags)?;
|
||||
|
||||
let resource = Resource::builder()
|
||||
.with_service_name(config.service_name.clone())
|
||||
.with_service_name(service_name)
|
||||
.with_attributes(vec![
|
||||
KeyValue::new(
|
||||
semconv::attribute::SERVICE_VERSION,
|
||||
config.service_version.clone(),
|
||||
),
|
||||
KeyValue::new(ENV_ATTRIBUTE, config.environment.clone()),
|
||||
KeyValue::new(semconv::attribute::SERVICE_VERSION, service_version),
|
||||
KeyValue::new(ENV_ATTRIBUTE, environment),
|
||||
])
|
||||
.build();
|
||||
|
||||
let (meter_provider, meter) = match config.exporter {
|
||||
let runtime_reader = runtime_reader.then(|| {
|
||||
Arc::new(
|
||||
ManualReader::builder()
|
||||
.with_temporality(Temporality::Delta)
|
||||
.build(),
|
||||
)
|
||||
});
|
||||
|
||||
let (meter_provider, meter) = match exporter {
|
||||
MetricsExporter::InMemory(exporter) => {
|
||||
build_provider(resource, exporter, config.export_interval)
|
||||
build_provider(resource, exporter, export_interval, runtime_reader.clone())
|
||||
}
|
||||
MetricsExporter::Otlp(exporter) => {
|
||||
let exporter = build_otlp_metric_exporter(exporter, Temporality::Delta)?;
|
||||
build_provider(resource, exporter, config.export_interval)
|
||||
build_provider(resource, exporter, export_interval, runtime_reader.clone())
|
||||
}
|
||||
};
|
||||
|
||||
@@ -173,7 +229,8 @@ impl MetricsClient {
|
||||
counters: Mutex::new(HashMap::new()),
|
||||
histograms: Mutex::new(HashMap::new()),
|
||||
duration_histograms: Mutex::new(HashMap::new()),
|
||||
default_tags: config.default_tags,
|
||||
runtime_reader,
|
||||
default_tags,
|
||||
})))
|
||||
}
|
||||
|
||||
@@ -209,6 +266,18 @@ impl MetricsClient {
|
||||
Ok(Timer::new(name, tags, self))
|
||||
}
|
||||
|
||||
/// Collect a runtime metrics snapshot without shutting down the provider.
|
||||
pub fn snapshot(&self) -> Result<ResourceMetrics> {
|
||||
let Some(reader) = &self.0.runtime_reader else {
|
||||
return Err(MetricsError::RuntimeSnapshotUnavailable);
|
||||
};
|
||||
let mut snapshot = ResourceMetrics::default();
|
||||
reader
|
||||
.collect(&mut snapshot)
|
||||
.map_err(|source| MetricsError::RuntimeSnapshotCollect { source })?;
|
||||
Ok(snapshot)
|
||||
}
|
||||
|
||||
/// Flush metrics and stop the underlying OTEL meter provider.
|
||||
pub fn shutdown(&self) -> Result<()> {
|
||||
self.0.shutdown()
|
||||
@@ -219,6 +288,7 @@ fn build_provider<E>(
|
||||
resource: Resource,
|
||||
exporter: E,
|
||||
interval: Option<Duration>,
|
||||
runtime_reader: Option<Arc<ManualReader>>,
|
||||
) -> (SdkMeterProvider, Meter)
|
||||
where
|
||||
E: opentelemetry_sdk::metrics::exporter::PushMetricExporter + 'static,
|
||||
@@ -228,10 +298,11 @@ where
|
||||
reader_builder = reader_builder.with_interval(interval);
|
||||
}
|
||||
let reader = reader_builder.build();
|
||||
let provider = SdkMeterProvider::builder()
|
||||
.with_resource(resource)
|
||||
.with_reader(reader)
|
||||
.build();
|
||||
let mut provider_builder = SdkMeterProvider::builder().with_resource(resource);
|
||||
if let Some(reader) = runtime_reader {
|
||||
provider_builder = provider_builder.with_reader(SharedManualReader::new(reader));
|
||||
}
|
||||
let provider = provider_builder.with_reader(reader).build();
|
||||
let meter = provider.meter(METER_NAME);
|
||||
(provider, meter)
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ pub struct MetricsConfig {
|
||||
pub(crate) service_version: String,
|
||||
pub(crate) exporter: MetricsExporter,
|
||||
pub(crate) export_interval: Option<Duration>,
|
||||
pub(crate) runtime_reader: bool,
|
||||
pub(crate) default_tags: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
@@ -35,6 +36,7 @@ impl MetricsConfig {
|
||||
service_version: service_version.into(),
|
||||
exporter: MetricsExporter::Otlp(exporter),
|
||||
export_interval: None,
|
||||
runtime_reader: false,
|
||||
default_tags: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
@@ -52,6 +54,7 @@ impl MetricsConfig {
|
||||
service_version: service_version.into(),
|
||||
exporter: MetricsExporter::InMemory(exporter),
|
||||
export_interval: None,
|
||||
runtime_reader: false,
|
||||
default_tags: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
@@ -62,6 +65,12 @@ impl MetricsConfig {
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable a manual reader for on-demand runtime snapshots.
|
||||
pub fn with_runtime_reader(mut self) -> Self {
|
||||
self.runtime_reader = true;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a default tag that will be sent with every metric.
|
||||
pub fn with_tag(mut self, key: impl Into<String>, value: impl Into<String>) -> Result<Self> {
|
||||
let key = key.into();
|
||||
|
||||
@@ -34,4 +34,13 @@ pub enum MetricsError {
|
||||
#[source]
|
||||
source: opentelemetry_sdk::error::OTelSdkError,
|
||||
},
|
||||
|
||||
#[error("runtime metrics snapshot reader is not enabled")]
|
||||
RuntimeSnapshotUnavailable,
|
||||
|
||||
#[error("failed to collect runtime metrics snapshot from metrics reader")]
|
||||
RuntimeSnapshotCollect {
|
||||
#[source]
|
||||
source: opentelemetry_sdk::error::OTelSdkError,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
mod client;
|
||||
mod config;
|
||||
mod error;
|
||||
pub(crate) mod names;
|
||||
pub(crate) mod runtime_metrics;
|
||||
pub(crate) mod timer;
|
||||
pub(crate) mod validation;
|
||||
|
||||
|
||||
6
codex-rs/otel/src/metrics/names.rs
Normal file
6
codex-rs/otel/src/metrics/names.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
pub(crate) const TOOL_CALL_COUNT_METRIC: &str = "codex.tool.call";
|
||||
pub(crate) const TOOL_CALL_DURATION_METRIC: &str = "codex.tool.call.duration_ms";
|
||||
pub(crate) const API_CALL_COUNT_METRIC: &str = "codex.api_request";
|
||||
pub(crate) const API_CALL_DURATION_METRIC: &str = "codex.api_request.duration_ms";
|
||||
pub(crate) const SSE_EVENT_COUNT_METRIC: &str = "codex.sse_event";
|
||||
pub(crate) const SSE_EVENT_DURATION_METRIC: &str = "codex.sse_event.duration_ms";
|
||||
101
codex-rs/otel/src/metrics/runtime_metrics.rs
Normal file
101
codex-rs/otel/src/metrics/runtime_metrics.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
use crate::metrics::names::API_CALL_COUNT_METRIC;
|
||||
use crate::metrics::names::API_CALL_DURATION_METRIC;
|
||||
use crate::metrics::names::SSE_EVENT_COUNT_METRIC;
|
||||
use crate::metrics::names::SSE_EVENT_DURATION_METRIC;
|
||||
use crate::metrics::names::TOOL_CALL_COUNT_METRIC;
|
||||
use crate::metrics::names::TOOL_CALL_DURATION_METRIC;
|
||||
use opentelemetry_sdk::metrics::data::AggregatedMetrics;
|
||||
use opentelemetry_sdk::metrics::data::Metric;
|
||||
use opentelemetry_sdk::metrics::data::MetricData;
|
||||
use opentelemetry_sdk::metrics::data::ResourceMetrics;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub struct RuntimeMetricTotals {
|
||||
pub count: u64,
|
||||
pub duration_ms: u64,
|
||||
}
|
||||
|
||||
impl RuntimeMetricTotals {
|
||||
pub fn is_empty(self) -> bool {
|
||||
self.count == 0 && self.duration_ms == 0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub struct RuntimeMetricsSummary {
|
||||
pub tool_calls: RuntimeMetricTotals,
|
||||
pub api_calls: RuntimeMetricTotals,
|
||||
pub streaming_events: RuntimeMetricTotals,
|
||||
}
|
||||
|
||||
impl RuntimeMetricsSummary {
|
||||
pub fn is_empty(self) -> bool {
|
||||
self.tool_calls.is_empty() && self.api_calls.is_empty() && self.streaming_events.is_empty()
|
||||
}
|
||||
|
||||
pub(crate) fn from_snapshot(snapshot: &ResourceMetrics) -> Self {
|
||||
let tool_calls = RuntimeMetricTotals {
|
||||
count: sum_counter(snapshot, TOOL_CALL_COUNT_METRIC),
|
||||
duration_ms: sum_histogram_ms(snapshot, TOOL_CALL_DURATION_METRIC),
|
||||
};
|
||||
let api_calls = RuntimeMetricTotals {
|
||||
count: sum_counter(snapshot, API_CALL_COUNT_METRIC),
|
||||
duration_ms: sum_histogram_ms(snapshot, API_CALL_DURATION_METRIC),
|
||||
};
|
||||
let streaming_events = RuntimeMetricTotals {
|
||||
count: sum_counter(snapshot, SSE_EVENT_COUNT_METRIC),
|
||||
duration_ms: sum_histogram_ms(snapshot, SSE_EVENT_DURATION_METRIC),
|
||||
};
|
||||
Self {
|
||||
tool_calls,
|
||||
api_calls,
|
||||
streaming_events,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn sum_counter(snapshot: &ResourceMetrics, name: &str) -> u64 {
|
||||
snapshot
|
||||
.scope_metrics()
|
||||
.flat_map(opentelemetry_sdk::metrics::data::ScopeMetrics::metrics)
|
||||
.filter(|metric| metric.name() == name)
|
||||
.map(sum_counter_metric)
|
||||
.sum()
|
||||
}
|
||||
|
||||
fn sum_counter_metric(metric: &Metric) -> u64 {
|
||||
match metric.data() {
|
||||
AggregatedMetrics::U64(MetricData::Sum(sum)) => sum
|
||||
.data_points()
|
||||
.map(opentelemetry_sdk::metrics::data::SumDataPoint::value)
|
||||
.sum(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn sum_histogram_ms(snapshot: &ResourceMetrics, name: &str) -> u64 {
|
||||
snapshot
|
||||
.scope_metrics()
|
||||
.flat_map(opentelemetry_sdk::metrics::data::ScopeMetrics::metrics)
|
||||
.filter(|metric| metric.name() == name)
|
||||
.map(sum_histogram_metric_ms)
|
||||
.sum()
|
||||
}
|
||||
|
||||
fn sum_histogram_metric_ms(metric: &Metric) -> u64 {
|
||||
match metric.data() {
|
||||
AggregatedMetrics::F64(MetricData::Histogram(histogram)) => histogram
|
||||
.data_points()
|
||||
.map(|point| f64_to_u64(point.sum()))
|
||||
.sum(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn f64_to_u64(value: f64) -> u64 {
|
||||
if !value.is_finite() || value <= 0.0 {
|
||||
return 0;
|
||||
}
|
||||
let clamped = value.min(u64::MAX as f64);
|
||||
clamped.round() as u64
|
||||
}
|
||||
Reference in New Issue
Block a user