mirror of
https://github.com/openai/codex.git
synced 2026-05-05 22:01:37 +03:00
Add auth env observability (#14905)
CXC-410 Emit Env Var Status with `/feedback` report Add more observability on top of #14611 [Unset](https://openai.sentry.io/issues/7340419168/?project=4510195390611458&query=019cfa8d-c1ba-7002-96fa-e35fc340551d&referrer=issue-stream) [Set](https://openai.sentry.io/issues/7340426331/?project=4510195390611458&query=019cfa91-aba1-7823-ab7e-762edfbc0ed4&referrer=issue-stream) <img width="1063" height="610" alt="image" src="https://github.com/user-attachments/assets/937ab026-1c2d-4757-81d5-5f31b853113e" /> ###### Summary - Adds auth-env telemetry that records whether key auth-related env overrides were present on session start and request paths. - Threads those auth-env fields through `/responses`, websocket, and `/models` telemetry and feedback metadata. - Buckets custom provider `env_key` configuration to a safe `"configured"` value instead of emitting raw config text. - Keeps the slice observability-only: no raw token values or raw URLs are emitted. ###### Rationale (from spec findings) - 401 and auth-path debugging needs a way to distinguish env-driven auth paths from sessions with no auth env override. - Startup and model-refresh failures need the same auth-env diagnostics as normal request failures. - Feedback and Sentry tags need the same auth-env signal as OTel events so reports can be triaged consistently. - Custom provider config is user-controlled text, so the telemetry contract must stay presence-only / bucketed. ###### Scope - Adds a small `AuthEnvTelemetry` bundle for env presence collection and threads it through the main request/session telemetry paths. - Does not add endpoint/base-url/provider-header/geo routing attribution or broader telemetry API redesign. ###### Trade-offs - `provider_env_key_name` is bucketed to `"configured"` instead of preserving the literal configured env var name. - `/models` is included because startup/model-refresh auth failures need the same diagnostics, but broader parity work remains out of scope. - This slice keeps the existing telemetry APIs and layers auth-env fields onto them rather than redesigning the metadata model. ###### Client follow-up - Add the separate endpoint/base-url attribution slice if routing-source diagnosis is still needed. - Add provider-header or residency attribution only if auth-env presence proves insufficient in real reports. - Revisit whether any additional auth-related env inputs need safe bucketing after more 401 triage data. ###### Testing - `cargo test -p codex-core emit_feedback_request_tags -- --nocapture` - `cargo test -p codex-core collect_auth_env_telemetry_buckets_provider_env_key_name -- --nocapture` - `cargo test -p codex-core models_request_telemetry_emits_auth_env_feedback_tags_on_failure -- --nocapture` - `cargo test -p codex-otel otel_export_routing_policy_routes_api_request_auth_observability -- --nocapture` - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_connect_auth_observability -- --nocapture` - `cargo test -p codex-otel otel_export_routing_policy_routes_websocket_request_transport_observability -- --nocapture` - `cargo test -p codex-core --no-run --message-format short` - `cargo test -p codex-otel --no-run --message-format short` --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -7,6 +7,7 @@ use rand::Rng;
|
||||
use tracing::debug;
|
||||
use tracing::error;
|
||||
|
||||
use crate::auth_env_telemetry::AuthEnvTelemetry;
|
||||
use crate::parse_command::shlex_join;
|
||||
|
||||
const INITIAL_DELAY_MS: u64 = 200;
|
||||
@@ -54,6 +55,23 @@ pub(crate) struct FeedbackRequestTags<'a> {
|
||||
pub auth_recovery_followup_status: Option<u16>,
|
||||
}
|
||||
|
||||
struct FeedbackRequestSnapshot<'a> {
|
||||
endpoint: &'a str,
|
||||
auth_header_attached: bool,
|
||||
auth_header_name: &'a str,
|
||||
auth_mode: &'a str,
|
||||
auth_retry_after_unauthorized: String,
|
||||
auth_recovery_mode: &'a str,
|
||||
auth_recovery_phase: &'a str,
|
||||
auth_connection_reused: String,
|
||||
auth_request_id: &'a str,
|
||||
auth_cf_ray: &'a str,
|
||||
auth_error: &'a str,
|
||||
auth_error_code: &'a str,
|
||||
auth_recovery_followup_success: String,
|
||||
auth_recovery_followup_status: String,
|
||||
}
|
||||
|
||||
struct Auth401FeedbackSnapshot<'a> {
|
||||
request_id: &'a str,
|
||||
cf_ray: &'a str,
|
||||
@@ -77,42 +95,84 @@ impl<'a> Auth401FeedbackSnapshot<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FeedbackRequestSnapshot<'a> {
|
||||
fn from_tags(tags: &'a FeedbackRequestTags<'a>) -> Self {
|
||||
Self {
|
||||
endpoint: tags.endpoint,
|
||||
auth_header_attached: tags.auth_header_attached,
|
||||
auth_header_name: tags.auth_header_name.unwrap_or(""),
|
||||
auth_mode: tags.auth_mode.unwrap_or(""),
|
||||
auth_retry_after_unauthorized: tags
|
||||
.auth_retry_after_unauthorized
|
||||
.map_or_else(String::new, |value| value.to_string()),
|
||||
auth_recovery_mode: tags.auth_recovery_mode.unwrap_or(""),
|
||||
auth_recovery_phase: tags.auth_recovery_phase.unwrap_or(""),
|
||||
auth_connection_reused: tags
|
||||
.auth_connection_reused
|
||||
.map_or_else(String::new, |value| value.to_string()),
|
||||
auth_request_id: tags.auth_request_id.unwrap_or(""),
|
||||
auth_cf_ray: tags.auth_cf_ray.unwrap_or(""),
|
||||
auth_error: tags.auth_error.unwrap_or(""),
|
||||
auth_error_code: tags.auth_error_code.unwrap_or(""),
|
||||
auth_recovery_followup_success: tags
|
||||
.auth_recovery_followup_success
|
||||
.map_or_else(String::new, |value| value.to_string()),
|
||||
auth_recovery_followup_status: tags
|
||||
.auth_recovery_followup_status
|
||||
.map_or_else(String::new, |value| value.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn emit_feedback_request_tags(tags: &FeedbackRequestTags<'_>) {
|
||||
let auth_header_name = tags.auth_header_name.unwrap_or("");
|
||||
let auth_mode = tags.auth_mode.unwrap_or("");
|
||||
let auth_retry_after_unauthorized = tags
|
||||
.auth_retry_after_unauthorized
|
||||
.map_or_else(String::new, |value| value.to_string());
|
||||
let auth_recovery_mode = tags.auth_recovery_mode.unwrap_or("");
|
||||
let auth_recovery_phase = tags.auth_recovery_phase.unwrap_or("");
|
||||
let auth_connection_reused = tags
|
||||
.auth_connection_reused
|
||||
.map_or_else(String::new, |value| value.to_string());
|
||||
let auth_request_id = tags.auth_request_id.unwrap_or("");
|
||||
let auth_cf_ray = tags.auth_cf_ray.unwrap_or("");
|
||||
let auth_error = tags.auth_error.unwrap_or("");
|
||||
let auth_error_code = tags.auth_error_code.unwrap_or("");
|
||||
let auth_recovery_followup_success = tags
|
||||
.auth_recovery_followup_success
|
||||
.map_or_else(String::new, |value| value.to_string());
|
||||
let auth_recovery_followup_status = tags
|
||||
.auth_recovery_followup_status
|
||||
.map_or_else(String::new, |value| value.to_string());
|
||||
let snapshot = FeedbackRequestSnapshot::from_tags(tags);
|
||||
feedback_tags!(
|
||||
endpoint = tags.endpoint,
|
||||
auth_header_attached = tags.auth_header_attached,
|
||||
auth_header_name = auth_header_name,
|
||||
auth_mode = auth_mode,
|
||||
auth_retry_after_unauthorized = auth_retry_after_unauthorized,
|
||||
auth_recovery_mode = auth_recovery_mode,
|
||||
auth_recovery_phase = auth_recovery_phase,
|
||||
auth_connection_reused = auth_connection_reused,
|
||||
auth_request_id = auth_request_id,
|
||||
auth_cf_ray = auth_cf_ray,
|
||||
auth_error = auth_error,
|
||||
auth_error_code = auth_error_code,
|
||||
auth_recovery_followup_success = auth_recovery_followup_success,
|
||||
auth_recovery_followup_status = auth_recovery_followup_status
|
||||
endpoint = snapshot.endpoint,
|
||||
auth_header_attached = snapshot.auth_header_attached,
|
||||
auth_header_name = snapshot.auth_header_name,
|
||||
auth_mode = snapshot.auth_mode,
|
||||
auth_retry_after_unauthorized = snapshot.auth_retry_after_unauthorized,
|
||||
auth_recovery_mode = snapshot.auth_recovery_mode,
|
||||
auth_recovery_phase = snapshot.auth_recovery_phase,
|
||||
auth_connection_reused = snapshot.auth_connection_reused,
|
||||
auth_request_id = snapshot.auth_request_id,
|
||||
auth_cf_ray = snapshot.auth_cf_ray,
|
||||
auth_error = snapshot.auth_error,
|
||||
auth_error_code = snapshot.auth_error_code,
|
||||
auth_recovery_followup_success = snapshot.auth_recovery_followup_success,
|
||||
auth_recovery_followup_status = snapshot.auth_recovery_followup_status
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) fn emit_feedback_request_tags_with_auth_env(
|
||||
tags: &FeedbackRequestTags<'_>,
|
||||
auth_env: &AuthEnvTelemetry,
|
||||
) {
|
||||
let snapshot = FeedbackRequestSnapshot::from_tags(tags);
|
||||
feedback_tags!(
|
||||
endpoint = snapshot.endpoint,
|
||||
auth_header_attached = snapshot.auth_header_attached,
|
||||
auth_header_name = snapshot.auth_header_name,
|
||||
auth_mode = snapshot.auth_mode,
|
||||
auth_retry_after_unauthorized = snapshot.auth_retry_after_unauthorized,
|
||||
auth_recovery_mode = snapshot.auth_recovery_mode,
|
||||
auth_recovery_phase = snapshot.auth_recovery_phase,
|
||||
auth_connection_reused = snapshot.auth_connection_reused,
|
||||
auth_request_id = snapshot.auth_request_id,
|
||||
auth_cf_ray = snapshot.auth_cf_ray,
|
||||
auth_error = snapshot.auth_error,
|
||||
auth_error_code = snapshot.auth_error_code,
|
||||
auth_recovery_followup_success = snapshot.auth_recovery_followup_success,
|
||||
auth_recovery_followup_status = snapshot.auth_recovery_followup_status,
|
||||
auth_env_openai_api_key_present = auth_env.openai_api_key_env_present,
|
||||
auth_env_codex_api_key_present = auth_env.codex_api_key_env_present,
|
||||
auth_env_codex_api_key_enabled = auth_env.codex_api_key_env_enabled,
|
||||
auth_env_provider_key_name = auth_env.provider_env_key_name.as_deref().unwrap_or(""),
|
||||
auth_env_provider_key_present = auth_env
|
||||
.provider_env_key_present
|
||||
.map_or_else(String::new, |value| value.to_string()),
|
||||
auth_env_refresh_token_url_override_present = auth_env.refresh_token_url_override_present
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user