mirror of
https://github.com/openai/codex.git
synced 2026-05-05 05:42:33 +03:00
Add auth env observability (#14905)
CXC-410 Emit Env Var Status with `/feedback` report Add more observability on top of #14611 [Unset](https://openai.sentry.io/issues/7340419168/?project=4510195390611458&query=019cfa8d-c1ba-7002-96fa-e35fc340551d&referrer=issue-stream) [Set](https://openai.sentry.io/issues/7340426331/?project=4510195390611458&query=019cfa91-aba1-7823-ab7e-762edfbc0ed4&referrer=issue-stream) <img width="1063" height="610" alt="image" src="https://github.com/user-attachments/assets/937ab026-1c2d-4757-81d5-5f31b853113e" /> ###### Summary - Adds auth-env telemetry that records whether key auth-related env overrides were present on session start and request paths. - Threads those auth-env fields through `/responses`, websocket, and `/models` telemetry and feedback metadata. - Buckets custom provider `env_key` configuration to a safe `"configured"` value instead of emitting raw config text. - Keeps the slice observability-only: no raw token values or raw URLs are emitted. ###### Rationale (from spec findings) - 401 and auth-path debugging needs a way to distinguish env-driven auth paths from sessions with no auth env override. - Startup and model-refresh failures need the same auth-env diagnostics as normal request failures. - Feedback and Sentry tags need the same auth-env signal as OTel events so reports can be triaged consistently. - Custom provider config is user-controlled text, so the telemetry contract must stay presence-only / bucketed. ###### Scope - Adds a small `AuthEnvTelemetry` bundle for env presence collection and threads it through the main request/session telemetry paths. - Does not add endpoint/base-url/provider-header/geo routing attribution or broader telemetry API redesign. ###### Trade-offs - `provider_env_key_name` is bucketed to `"configured"` instead of preserving the literal configured env var name. - `/models` is included because startup/model-refresh auth failures need the same diagnostics, but broader parity work remains out of scope. - This slice keeps the existing telemetry APIs and layers auth-env fields onto them rather than redesigning the metadata model. ###### Client follow-up - Add the separate endpoint/base-url attribution slice if routing-source diagnosis is still needed. - Add provider-header or residency attribution only if auth-env presence proves insufficient in real reports. - Revisit whether any additional auth-related env inputs need safe bucketing after more 401 triage data. ###### Testing - `cargo test -p codex-core emit_feedback_request_tags -- --nocapture` - `cargo test -p codex-core collect_auth_env_telemetry_buckets_provider_env_key_name -- --nocapture` - `cargo test -p codex-core models_request_telemetry_emits_auth_env_feedback_tags_on_failure -- --nocapture` - `cargo test -p codex-otel otel_export_routing_policy_routes_api_request_auth_observability -- --nocapture` - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_connect_auth_observability -- --nocapture` - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_request_transport_observability -- --nocapture` - `cargo test -p codex-core --no-run --message-format short` - `cargo test -p codex-otel --no-run --message-format short` --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -34,6 +34,8 @@ use crate::api_bridge::CoreAuthProvider;
|
||||
use crate::api_bridge::auth_provider_from_auth;
|
||||
use crate::api_bridge::map_api_error;
|
||||
use crate::auth::UnauthorizedRecovery;
|
||||
use crate::auth_env_telemetry::AuthEnvTelemetry;
|
||||
use crate::auth_env_telemetry::collect_auth_env_telemetry;
|
||||
use codex_api::CompactClient as ApiCompactClient;
|
||||
use codex_api::CompactionInput as ApiCompactionInput;
|
||||
use codex_api::MemoriesClient as ApiMemoriesClient;
|
||||
@@ -106,7 +108,7 @@ use crate::response_debug_context::telemetry_transport_error_message;
|
||||
use crate::tools::spec::create_tools_json_for_responses_api;
|
||||
use crate::util::FeedbackRequestTags;
|
||||
use crate::util::emit_feedback_auth_recovery_tags;
|
||||
use crate::util::emit_feedback_request_tags;
|
||||
use crate::util::emit_feedback_request_tags_with_auth_env;
|
||||
|
||||
pub const OPENAI_BETA_HEADER: &str = "OpenAI-Beta";
|
||||
pub const X_CODEX_TURN_STATE_HEADER: &str = "x-codex-turn-state";
|
||||
@@ -138,6 +140,7 @@ struct ModelClientState {
|
||||
auth_manager: Option<Arc<AuthManager>>,
|
||||
conversation_id: ThreadId,
|
||||
provider: ModelProviderInfo,
|
||||
auth_env_telemetry: AuthEnvTelemetry,
|
||||
session_source: SessionSource,
|
||||
model_verbosity: Option<VerbosityConfig>,
|
||||
responses_websockets_enabled_by_feature: bool,
|
||||
@@ -267,11 +270,16 @@ impl ModelClient {
|
||||
include_timing_metrics: bool,
|
||||
beta_features_header: Option<String>,
|
||||
) -> Self {
|
||||
let codex_api_key_env_enabled = auth_manager
|
||||
.as_ref()
|
||||
.is_some_and(|manager| manager.codex_api_key_env_enabled());
|
||||
let auth_env_telemetry = collect_auth_env_telemetry(&provider, codex_api_key_env_enabled);
|
||||
Self {
|
||||
state: Arc::new(ModelClientState {
|
||||
auth_manager,
|
||||
conversation_id,
|
||||
provider,
|
||||
auth_env_telemetry,
|
||||
session_source,
|
||||
model_verbosity,
|
||||
responses_websockets_enabled_by_feature,
|
||||
@@ -362,6 +370,7 @@ impl ModelClient {
|
||||
PendingUnauthorizedRetry::default(),
|
||||
),
|
||||
RequestRouteTelemetry::for_endpoint(RESPONSES_COMPACT_ENDPOINT),
|
||||
self.state.auth_env_telemetry.clone(),
|
||||
);
|
||||
let client =
|
||||
ApiCompactClient::new(transport, client_setup.api_provider, client_setup.api_auth)
|
||||
@@ -430,6 +439,7 @@ impl ModelClient {
|
||||
PendingUnauthorizedRetry::default(),
|
||||
),
|
||||
RequestRouteTelemetry::for_endpoint(MEMORIES_SUMMARIZE_ENDPOINT),
|
||||
self.state.auth_env_telemetry.clone(),
|
||||
);
|
||||
let client =
|
||||
ApiMemoriesClient::new(transport, client_setup.api_provider, client_setup.api_auth)
|
||||
@@ -474,11 +484,13 @@ impl ModelClient {
|
||||
session_telemetry: &SessionTelemetry,
|
||||
auth_context: AuthRequestTelemetryContext,
|
||||
request_route_telemetry: RequestRouteTelemetry,
|
||||
auth_env_telemetry: AuthEnvTelemetry,
|
||||
) -> Arc<dyn RequestTelemetry> {
|
||||
let telemetry = Arc::new(ApiTelemetry::new(
|
||||
session_telemetry.clone(),
|
||||
auth_context,
|
||||
request_route_telemetry,
|
||||
auth_env_telemetry,
|
||||
));
|
||||
let request_telemetry: Arc<dyn RequestTelemetry> = telemetry;
|
||||
request_telemetry
|
||||
@@ -561,6 +573,7 @@ impl ModelClient {
|
||||
session_telemetry,
|
||||
auth_context,
|
||||
request_route_telemetry,
|
||||
self.state.auth_env_telemetry.clone(),
|
||||
);
|
||||
let websocket_connect_timeout = self.state.provider.websocket_connect_timeout();
|
||||
let start = Instant::now();
|
||||
@@ -601,27 +614,30 @@ impl ModelClient {
|
||||
response_debug.auth_error.as_deref(),
|
||||
response_debug.auth_error_code.as_deref(),
|
||||
);
|
||||
emit_feedback_request_tags(&FeedbackRequestTags {
|
||||
endpoint: request_route_telemetry.endpoint,
|
||||
auth_header_attached: auth_context.auth_header_attached,
|
||||
auth_header_name: auth_context.auth_header_name,
|
||||
auth_mode: auth_context.auth_mode,
|
||||
auth_retry_after_unauthorized: Some(auth_context.retry_after_unauthorized),
|
||||
auth_recovery_mode: auth_context.recovery_mode,
|
||||
auth_recovery_phase: auth_context.recovery_phase,
|
||||
auth_connection_reused: Some(false),
|
||||
auth_request_id: response_debug.request_id.as_deref(),
|
||||
auth_cf_ray: response_debug.cf_ray.as_deref(),
|
||||
auth_error: response_debug.auth_error.as_deref(),
|
||||
auth_error_code: response_debug.auth_error_code.as_deref(),
|
||||
auth_recovery_followup_success: auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(result.is_ok()),
|
||||
auth_recovery_followup_status: auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(status)
|
||||
.flatten(),
|
||||
});
|
||||
emit_feedback_request_tags_with_auth_env(
|
||||
&FeedbackRequestTags {
|
||||
endpoint: request_route_telemetry.endpoint,
|
||||
auth_header_attached: auth_context.auth_header_attached,
|
||||
auth_header_name: auth_context.auth_header_name,
|
||||
auth_mode: auth_context.auth_mode,
|
||||
auth_retry_after_unauthorized: Some(auth_context.retry_after_unauthorized),
|
||||
auth_recovery_mode: auth_context.recovery_mode,
|
||||
auth_recovery_phase: auth_context.recovery_phase,
|
||||
auth_connection_reused: Some(false),
|
||||
auth_request_id: response_debug.request_id.as_deref(),
|
||||
auth_cf_ray: response_debug.cf_ray.as_deref(),
|
||||
auth_error: response_debug.auth_error.as_deref(),
|
||||
auth_error_code: response_debug.auth_error_code.as_deref(),
|
||||
auth_recovery_followup_success: auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(result.is_ok()),
|
||||
auth_recovery_followup_status: auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(status)
|
||||
.flatten(),
|
||||
},
|
||||
&self.state.auth_env_telemetry,
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
@@ -1030,6 +1046,7 @@ impl ModelClientSession {
|
||||
session_telemetry,
|
||||
request_auth_context,
|
||||
RequestRouteTelemetry::for_endpoint(RESPONSES_ENDPOINT),
|
||||
self.client.state.auth_env_telemetry.clone(),
|
||||
);
|
||||
let compression = self.responses_request_compression(client_setup.auth.as_ref());
|
||||
let options = self.build_responses_options(turn_metadata_header, compression);
|
||||
@@ -1190,11 +1207,13 @@ impl ModelClientSession {
|
||||
session_telemetry: &SessionTelemetry,
|
||||
auth_context: AuthRequestTelemetryContext,
|
||||
request_route_telemetry: RequestRouteTelemetry,
|
||||
auth_env_telemetry: AuthEnvTelemetry,
|
||||
) -> (Arc<dyn RequestTelemetry>, Arc<dyn SseTelemetry>) {
|
||||
let telemetry = Arc::new(ApiTelemetry::new(
|
||||
session_telemetry.clone(),
|
||||
auth_context,
|
||||
request_route_telemetry,
|
||||
auth_env_telemetry,
|
||||
));
|
||||
let request_telemetry: Arc<dyn RequestTelemetry> = telemetry.clone();
|
||||
let sse_telemetry: Arc<dyn SseTelemetry> = telemetry;
|
||||
@@ -1206,11 +1225,13 @@ impl ModelClientSession {
|
||||
session_telemetry: &SessionTelemetry,
|
||||
auth_context: AuthRequestTelemetryContext,
|
||||
request_route_telemetry: RequestRouteTelemetry,
|
||||
auth_env_telemetry: AuthEnvTelemetry,
|
||||
) -> Arc<dyn WebsocketTelemetry> {
|
||||
let telemetry = Arc::new(ApiTelemetry::new(
|
||||
session_telemetry.clone(),
|
||||
auth_context,
|
||||
request_route_telemetry,
|
||||
auth_env_telemetry,
|
||||
));
|
||||
let websocket_telemetry: Arc<dyn WebsocketTelemetry> = telemetry;
|
||||
websocket_telemetry
|
||||
@@ -1663,6 +1684,7 @@ struct ApiTelemetry {
|
||||
session_telemetry: SessionTelemetry,
|
||||
auth_context: AuthRequestTelemetryContext,
|
||||
request_route_telemetry: RequestRouteTelemetry,
|
||||
auth_env_telemetry: AuthEnvTelemetry,
|
||||
}
|
||||
|
||||
impl ApiTelemetry {
|
||||
@@ -1670,11 +1692,13 @@ impl ApiTelemetry {
|
||||
session_telemetry: SessionTelemetry,
|
||||
auth_context: AuthRequestTelemetryContext,
|
||||
request_route_telemetry: RequestRouteTelemetry,
|
||||
auth_env_telemetry: AuthEnvTelemetry,
|
||||
) -> Self {
|
||||
Self {
|
||||
session_telemetry,
|
||||
auth_context,
|
||||
request_route_telemetry,
|
||||
auth_env_telemetry,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1708,29 +1732,32 @@ impl RequestTelemetry for ApiTelemetry {
|
||||
debug.auth_error.as_deref(),
|
||||
debug.auth_error_code.as_deref(),
|
||||
);
|
||||
emit_feedback_request_tags(&FeedbackRequestTags {
|
||||
endpoint: self.request_route_telemetry.endpoint,
|
||||
auth_header_attached: self.auth_context.auth_header_attached,
|
||||
auth_header_name: self.auth_context.auth_header_name,
|
||||
auth_mode: self.auth_context.auth_mode,
|
||||
auth_retry_after_unauthorized: Some(self.auth_context.retry_after_unauthorized),
|
||||
auth_recovery_mode: self.auth_context.recovery_mode,
|
||||
auth_recovery_phase: self.auth_context.recovery_phase,
|
||||
auth_connection_reused: None,
|
||||
auth_request_id: debug.request_id.as_deref(),
|
||||
auth_cf_ray: debug.cf_ray.as_deref(),
|
||||
auth_error: debug.auth_error.as_deref(),
|
||||
auth_error_code: debug.auth_error_code.as_deref(),
|
||||
auth_recovery_followup_success: self
|
||||
.auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(error.is_none()),
|
||||
auth_recovery_followup_status: self
|
||||
.auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(status)
|
||||
.flatten(),
|
||||
});
|
||||
emit_feedback_request_tags_with_auth_env(
|
||||
&FeedbackRequestTags {
|
||||
endpoint: self.request_route_telemetry.endpoint,
|
||||
auth_header_attached: self.auth_context.auth_header_attached,
|
||||
auth_header_name: self.auth_context.auth_header_name,
|
||||
auth_mode: self.auth_context.auth_mode,
|
||||
auth_retry_after_unauthorized: Some(self.auth_context.retry_after_unauthorized),
|
||||
auth_recovery_mode: self.auth_context.recovery_mode,
|
||||
auth_recovery_phase: self.auth_context.recovery_phase,
|
||||
auth_connection_reused: None,
|
||||
auth_request_id: debug.request_id.as_deref(),
|
||||
auth_cf_ray: debug.cf_ray.as_deref(),
|
||||
auth_error: debug.auth_error.as_deref(),
|
||||
auth_error_code: debug.auth_error_code.as_deref(),
|
||||
auth_recovery_followup_success: self
|
||||
.auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(error.is_none()),
|
||||
auth_recovery_followup_status: self
|
||||
.auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(status)
|
||||
.flatten(),
|
||||
},
|
||||
&self.auth_env_telemetry,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1759,29 +1786,32 @@ impl WebsocketTelemetry for ApiTelemetry {
|
||||
error_message.as_deref(),
|
||||
connection_reused,
|
||||
);
|
||||
emit_feedback_request_tags(&FeedbackRequestTags {
|
||||
endpoint: self.request_route_telemetry.endpoint,
|
||||
auth_header_attached: self.auth_context.auth_header_attached,
|
||||
auth_header_name: self.auth_context.auth_header_name,
|
||||
auth_mode: self.auth_context.auth_mode,
|
||||
auth_retry_after_unauthorized: Some(self.auth_context.retry_after_unauthorized),
|
||||
auth_recovery_mode: self.auth_context.recovery_mode,
|
||||
auth_recovery_phase: self.auth_context.recovery_phase,
|
||||
auth_connection_reused: Some(connection_reused),
|
||||
auth_request_id: debug.request_id.as_deref(),
|
||||
auth_cf_ray: debug.cf_ray.as_deref(),
|
||||
auth_error: debug.auth_error.as_deref(),
|
||||
auth_error_code: debug.auth_error_code.as_deref(),
|
||||
auth_recovery_followup_success: self
|
||||
.auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(error.is_none()),
|
||||
auth_recovery_followup_status: self
|
||||
.auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(status)
|
||||
.flatten(),
|
||||
});
|
||||
emit_feedback_request_tags_with_auth_env(
|
||||
&FeedbackRequestTags {
|
||||
endpoint: self.request_route_telemetry.endpoint,
|
||||
auth_header_attached: self.auth_context.auth_header_attached,
|
||||
auth_header_name: self.auth_context.auth_header_name,
|
||||
auth_mode: self.auth_context.auth_mode,
|
||||
auth_retry_after_unauthorized: Some(self.auth_context.retry_after_unauthorized),
|
||||
auth_recovery_mode: self.auth_context.recovery_mode,
|
||||
auth_recovery_phase: self.auth_context.recovery_phase,
|
||||
auth_connection_reused: Some(connection_reused),
|
||||
auth_request_id: debug.request_id.as_deref(),
|
||||
auth_cf_ray: debug.cf_ray.as_deref(),
|
||||
auth_error: debug.auth_error.as_deref(),
|
||||
auth_error_code: debug.auth_error_code.as_deref(),
|
||||
auth_recovery_followup_success: self
|
||||
.auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(error.is_none()),
|
||||
auth_recovery_followup_status: self
|
||||
.auth_context
|
||||
.retry_after_unauthorized
|
||||
.then_some(status)
|
||||
.flatten(),
|
||||
},
|
||||
&self.auth_env_telemetry,
|
||||
);
|
||||
}
|
||||
|
||||
fn on_ws_event(
|
||||
|
||||
Reference in New Issue
Block a user