feat: expose detailed metrics to runtime metrics (#10699)

This commit is contained in:
Anton Panasenko
2026-02-05 18:22:30 -08:00
committed by GitHub
parent d74fa8edd1
commit 4ee039744e
8 changed files with 279 additions and 11 deletions

View File

@@ -12,3 +12,11 @@ pub(crate) const RESPONSES_API_OVERHEAD_DURATION_METRIC: &str =
"codex.responses_api_overhead.duration_ms";
pub(crate) const RESPONSES_API_INFERENCE_TIME_DURATION_METRIC: &str =
"codex.responses_api_inference_time.duration_ms";
pub(crate) const RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC: &str =
"codex.responses_api_engine_iapi_ttft.duration_ms";
pub(crate) const RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC: &str =
"codex.responses_api_engine_service_ttft.duration_ms";
pub(crate) const RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC: &str =
"codex.responses_api_engine_iapi_tbt.duration_ms";
pub(crate) const RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC: &str =
"codex.responses_api_engine_service_tbt.duration_ms";

View File

@@ -1,5 +1,9 @@
use crate::metrics::names::API_CALL_COUNT_METRIC;
use crate::metrics::names::API_CALL_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_INFERENCE_TIME_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_OVERHEAD_DURATION_METRIC;
use crate::metrics::names::SSE_EVENT_COUNT_METRIC;
@@ -25,6 +29,11 @@ impl RuntimeMetricTotals {
pub fn is_empty(self) -> bool {
self.count == 0 && self.duration_ms == 0
}
pub fn merge(&mut self, other: Self) {
self.count = self.count.saturating_add(other.count);
self.duration_ms = self.duration_ms.saturating_add(other.duration_ms);
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
@@ -36,6 +45,10 @@ pub struct RuntimeMetricsSummary {
pub websocket_events: RuntimeMetricTotals,
pub responses_api_overhead_ms: u64,
pub responses_api_inference_time_ms: u64,
pub responses_api_engine_iapi_ttft_ms: u64,
pub responses_api_engine_service_ttft_ms: u64,
pub responses_api_engine_iapi_tbt_ms: u64,
pub responses_api_engine_service_tbt_ms: u64,
}
impl RuntimeMetricsSummary {
@@ -47,6 +60,48 @@ impl RuntimeMetricsSummary {
&& self.websocket_events.is_empty()
&& self.responses_api_overhead_ms == 0
&& self.responses_api_inference_time_ms == 0
&& self.responses_api_engine_iapi_ttft_ms == 0
&& self.responses_api_engine_service_ttft_ms == 0
&& self.responses_api_engine_iapi_tbt_ms == 0
&& self.responses_api_engine_service_tbt_ms == 0
}
pub fn merge(&mut self, other: Self) {
self.tool_calls.merge(other.tool_calls);
self.api_calls.merge(other.api_calls);
self.streaming_events.merge(other.streaming_events);
self.websocket_calls.merge(other.websocket_calls);
self.websocket_events.merge(other.websocket_events);
if other.responses_api_overhead_ms > 0 {
self.responses_api_overhead_ms = other.responses_api_overhead_ms;
}
if other.responses_api_inference_time_ms > 0 {
self.responses_api_inference_time_ms = other.responses_api_inference_time_ms;
}
if other.responses_api_engine_iapi_ttft_ms > 0 {
self.responses_api_engine_iapi_ttft_ms = other.responses_api_engine_iapi_ttft_ms;
}
if other.responses_api_engine_service_ttft_ms > 0 {
self.responses_api_engine_service_ttft_ms = other.responses_api_engine_service_ttft_ms;
}
if other.responses_api_engine_iapi_tbt_ms > 0 {
self.responses_api_engine_iapi_tbt_ms = other.responses_api_engine_iapi_tbt_ms;
}
if other.responses_api_engine_service_tbt_ms > 0 {
self.responses_api_engine_service_tbt_ms = other.responses_api_engine_service_tbt_ms;
}
}
pub fn responses_api_summary(&self) -> RuntimeMetricsSummary {
Self {
responses_api_overhead_ms: self.responses_api_overhead_ms,
responses_api_inference_time_ms: self.responses_api_inference_time_ms,
responses_api_engine_iapi_ttft_ms: self.responses_api_engine_iapi_ttft_ms,
responses_api_engine_service_ttft_ms: self.responses_api_engine_service_ttft_ms,
responses_api_engine_iapi_tbt_ms: self.responses_api_engine_iapi_tbt_ms,
responses_api_engine_service_tbt_ms: self.responses_api_engine_service_tbt_ms,
..RuntimeMetricsSummary::default()
}
}
pub(crate) fn from_snapshot(snapshot: &ResourceMetrics) -> Self {
@@ -74,6 +129,14 @@ impl RuntimeMetricsSummary {
sum_histogram_ms(snapshot, RESPONSES_API_OVERHEAD_DURATION_METRIC);
let responses_api_inference_time_ms =
sum_histogram_ms(snapshot, RESPONSES_API_INFERENCE_TIME_DURATION_METRIC);
let responses_api_engine_iapi_ttft_ms =
sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC);
let responses_api_engine_service_ttft_ms =
sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC);
let responses_api_engine_iapi_tbt_ms =
sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC);
let responses_api_engine_service_tbt_ms =
sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC);
Self {
tool_calls,
api_calls,
@@ -82,6 +145,10 @@ impl RuntimeMetricsSummary {
websocket_events,
responses_api_overhead_ms,
responses_api_inference_time_ms,
responses_api_engine_iapi_ttft_ms,
responses_api_engine_service_ttft_ms,
responses_api_engine_iapi_tbt_ms,
responses_api_engine_service_tbt_ms,
}
}
}

View File

@@ -1,6 +1,10 @@
use crate::TelemetryAuthMode;
use crate::metrics::names::API_CALL_COUNT_METRIC;
use crate::metrics::names::API_CALL_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_INFERENCE_TIME_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_OVERHEAD_DURATION_METRIC;
use crate::metrics::names::SSE_EVENT_COUNT_METRIC;
@@ -48,6 +52,10 @@ const RESPONSES_WEBSOCKET_TIMING_KIND: &str = "responsesapi.websocket_timing";
const RESPONSES_WEBSOCKET_TIMING_METRICS_FIELD: &str = "timing_metrics";
const RESPONSES_API_OVERHEAD_FIELD: &str = "responses_duration_excl_engine_and_client_tool_time_ms";
const RESPONSES_API_INFERENCE_FIELD: &str = "engine_service_total_ms";
const RESPONSES_API_ENGINE_IAPI_TTFT_FIELD: &str = "engine_iapi_ttft_total_ms";
const RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD: &str = "engine_service_ttft_total_ms";
const RESPONSES_API_ENGINE_IAPI_TBT_FIELD: &str = "engine_iapi_tbt_across_engine_calls_ms";
const RESPONSES_API_ENGINE_SERVICE_TBT_FIELD: &str = "engine_service_tbt_across_engine_calls_ms";
impl OtelManager {
#[allow(clippy::too_many_arguments)]
@@ -674,6 +682,42 @@ impl OtelManager {
if let Some(duration) = duration_from_ms_value(inference_value) {
self.record_duration(RESPONSES_API_INFERENCE_TIME_DURATION_METRIC, duration, &[]);
}
let engine_iapi_ttft_value =
timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_IAPI_TTFT_FIELD));
if let Some(duration) = duration_from_ms_value(engine_iapi_ttft_value) {
self.record_duration(
RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC,
duration,
&[],
);
}
let engine_service_ttft_value =
timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD));
if let Some(duration) = duration_from_ms_value(engine_service_ttft_value) {
self.record_duration(
RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC,
duration,
&[],
);
}
let engine_iapi_tbt_value =
timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_IAPI_TBT_FIELD));
if let Some(duration) = duration_from_ms_value(engine_iapi_tbt_value) {
self.record_duration(RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC, duration, &[]);
}
let engine_service_tbt_value =
timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_SERVICE_TBT_FIELD));
if let Some(duration) = duration_from_ms_value(engine_service_tbt_value) {
self.record_duration(
RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC,
duration,
&[],
);
}
}
fn responses_type(event: &ResponseEvent) -> String {