diff --git a/codex-rs/core/src/codex_delegate.rs b/codex-rs/core/src/codex_delegate.rs index 55b3619e11..d7911ff349 100644 --- a/codex-rs/core/src/codex_delegate.rs +++ b/codex-rs/core/src/codex_delegate.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use async_channel::Receiver; use async_channel::Sender; +use codex_analytics::GuardianApprovalRequestSource; use codex_async_utils::OrCancelExt; use codex_exec_server::EnvironmentManager; use codex_protocol::protocol::ApplyPatchApprovalRequestEvent; @@ -40,7 +41,7 @@ use crate::codex::emit_subagent_session_started; use crate::config::Config; use crate::guardian::GuardianApprovalRequest; use crate::guardian::new_guardian_review_id; -use crate::guardian::review_approval_request_with_cancel; +use crate::guardian::review_approval_request_with_source_and_cancel; use crate::guardian::routes_approval_to_guardian; use crate::mcp_tool_call::MCP_TOOL_APPROVAL_ACCEPT; use crate::mcp_tool_call::MCP_TOOL_APPROVAL_ACCEPT_FOR_SESSION; @@ -747,12 +748,13 @@ fn spawn_guardian_review( let _ = tx.send(ReviewDecision::Denied); return; }; - let decision = runtime.block_on(review_approval_request_with_cancel( + let decision = runtime.block_on(review_approval_request_with_source_and_cancel( &session, &turn, review_id, request, retry_reason, + GuardianApprovalRequestSource::DelegatedSubagent, cancel_token, )); let _ = tx.send(decision); diff --git a/codex-rs/core/src/guardian/mod.rs b/codex-rs/core/src/guardian/mod.rs index 67e9a828ee..f5926a0db5 100644 --- a/codex-rs/core/src/guardian/mod.rs +++ b/codex-rs/core/src/guardian/mod.rs @@ -19,6 +19,7 @@ mod review_session; use std::time::Duration; use codex_protocol::protocol::GuardianAssessmentDecisionSource; +use codex_protocol::protocol::GuardianAssessmentOutcome; use serde::Deserialize; use serde::Serialize; @@ -30,7 +31,9 @@ pub(crate) use review::guardian_timeout_message; pub(crate) use review::is_guardian_reviewer_source; pub(crate) use review::new_guardian_review_id; pub(crate) use review::review_approval_request; +#[cfg(test)] pub(crate) use review::review_approval_request_with_cancel; +pub(crate) use review::review_approval_request_with_source_and_cancel; pub(crate) use review::routes_approval_to_guardian; pub(crate) use review_session::GuardianReviewSessionManager; @@ -45,14 +48,6 @@ const GUARDIAN_MAX_ACTION_STRING_TOKENS: usize = 16_000; const GUARDIAN_RECENT_ENTRY_LIMIT: usize = 40; const TRUNCATION_TAG: &str = "truncated"; -/// Final allow/deny outcome returned by the guardian reviewer. -#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)] -#[serde(rename_all = "lowercase")] -pub(crate) enum GuardianAssessmentOutcome { - Allow, - Deny, -} - /// Structured output contract that the guardian reviewer must satisfy. #[derive(Debug, Clone, Deserialize, Serialize)] pub(crate) struct GuardianAssessment { diff --git a/codex-rs/core/src/guardian/review.rs b/codex-rs/core/src/guardian/review.rs index 861d3576cd..495fba06a1 100644 --- a/codex-rs/core/src/guardian/review.rs +++ b/codex-rs/core/src/guardian/review.rs @@ -1,5 +1,14 @@ use std::sync::Arc; +use std::time::Instant; +use codex_analytics::GuardianApprovalRequestSource; +use codex_analytics::GuardianReviewDecision; +use codex_analytics::GuardianReviewFailureReason; +use codex_analytics::GuardianReviewSessionKind; +use codex_analytics::GuardianReviewTerminalStatus; +use codex_analytics::GuardianReviewedAction; +use codex_analytics::now_unix_seconds; +use codex_features::Feature; use codex_protocol::config_types::ApprovalsReviewer; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; @@ -10,12 +19,14 @@ use codex_protocol::protocol::GuardianRiskLevel; use codex_protocol::protocol::GuardianUserAuthorization; use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::SubAgentSource; +use codex_protocol::protocol::TokenUsage; use codex_protocol::protocol::WarningEvent; use tokio_util::sync::CancellationToken; use crate::codex::Session; use crate::codex::TurnContext; +use super::GUARDIAN_REVIEW_TIMEOUT; use super::GUARDIAN_REVIEWER_NAME; use super::GuardianApprovalRequest; use super::GuardianAssessment; @@ -76,10 +87,34 @@ pub(crate) fn guardian_timeout_message() -> String { #[derive(Debug)] pub(super) enum GuardianReviewOutcome { Completed(anyhow::Result), + Failed(GuardianReviewFailure), TimedOut, Aborted, } +#[derive(Debug)] +pub(super) enum GuardianReviewFailure { + PromptBuild(anyhow::Error), + Session(anyhow::Error), + Parse(anyhow::Error), +} + +impl GuardianReviewFailure { + fn reason(&self) -> GuardianReviewFailureReason { + match self { + Self::PromptBuild(_) => GuardianReviewFailureReason::PromptBuildError, + Self::Session(_) => GuardianReviewFailureReason::SessionError, + Self::Parse(_) => GuardianReviewFailureReason::ParseError, + } + } + + fn error(&self) -> &anyhow::Error { + match self { + Self::PromptBuild(err) | Self::Session(err) | Self::Parse(err) => err, + } + } +} + fn guardian_risk_level_str(level: GuardianRiskLevel) -> &'static str { match level { GuardianRiskLevel::Low => "low", @@ -89,6 +124,202 @@ fn guardian_risk_level_str(level: GuardianRiskLevel) -> &'static str { } } +fn guardian_reviewed_action(request: &GuardianApprovalRequest) -> GuardianReviewedAction { + match request { + GuardianApprovalRequest::Shell { + command, + cwd, + sandbox_permissions, + additional_permissions, + justification, + .. + } => GuardianReviewedAction::Shell { + command: command.clone(), + command_display: codex_shell_command::parse_command::shlex_join(command), + cwd: cwd.to_string_lossy().into_owned(), + sandbox_permissions: *sandbox_permissions, + additional_permissions: additional_permissions.clone(), + justification: justification.clone(), + }, + GuardianApprovalRequest::ExecCommand { + command, + cwd, + sandbox_permissions, + additional_permissions, + justification, + tty, + .. + } => GuardianReviewedAction::UnifiedExec { + command: command.clone(), + command_display: codex_shell_command::parse_command::shlex_join(command), + cwd: cwd.to_string_lossy().into_owned(), + sandbox_permissions: *sandbox_permissions, + additional_permissions: additional_permissions.clone(), + justification: justification.clone(), + tty: *tty, + }, + #[cfg(unix)] + GuardianApprovalRequest::Execve { + source, + program, + argv, + cwd, + additional_permissions, + .. + } => GuardianReviewedAction::Execve { + source: *source, + program: program.clone(), + argv: argv.clone(), + cwd: cwd.to_string_lossy().into_owned(), + additional_permissions: additional_permissions.clone(), + }, + GuardianApprovalRequest::ApplyPatch { cwd, files, .. } => { + GuardianReviewedAction::ApplyPatch { + cwd: cwd.to_string_lossy().into_owned(), + files: files + .iter() + .map(|file| file.to_string_lossy().into_owned()) + .collect(), + } + } + GuardianApprovalRequest::NetworkAccess { + target, + host, + protocol, + port, + .. + } => GuardianReviewedAction::NetworkAccess { + target: target.clone(), + host: host.clone(), + protocol: *protocol, + port: *port, + }, + GuardianApprovalRequest::McpToolCall { + server, + tool_name, + connector_id, + connector_name, + tool_title, + .. + } => GuardianReviewedAction::McpToolCall { + server: server.clone(), + tool_name: tool_name.clone(), + connector_id: connector_id.clone(), + connector_name: connector_name.clone(), + tool_title: tool_title.clone(), + }, + } +} + +struct GuardianReviewAnalyticsContext { + thread_id: String, + turn_id: String, + review_id: String, + target_item_id: Option, + retry_reason: Option, + approval_request_source: GuardianApprovalRequestSource, + reviewed_action: GuardianReviewedAction, + started_at: u64, + started_instant: Instant, +} + +struct GuardianReviewAnalyticsTerminal { + decision: GuardianReviewDecision, + terminal_status: GuardianReviewTerminalStatus, + failure_reason: Option, + risk_level: Option, + user_authorization: Option, + outcome: Option, + rationale: Option, + guardian_thread_id: Option, + guardian_session_kind: Option, + guardian_model: Option, + guardian_reasoning_effort: Option, + had_prior_review_context: Option, + reviewed_action_truncated: bool, + token_usage: Option, + time_to_first_token_ms: Option, + completed_at: u64, +} + +#[derive(Default)] +struct GuardianReviewMetadataFields { + guardian_thread_id: Option, + guardian_session_kind: Option, + guardian_model: Option, + guardian_reasoning_effort: Option, + had_prior_review_context: Option, + reviewed_action_truncated: bool, + token_usage: Option, + time_to_first_token_ms: Option, +} + +impl GuardianReviewAnalyticsContext { + fn track( + &self, + session: &Session, + turn: &TurnContext, + terminal: GuardianReviewAnalyticsTerminal, + ) { + if !turn.config.features.enabled(Feature::GeneralAnalytics) { + return; + } + let completion_latency_ms = self.started_instant.elapsed().as_millis() as u64; + session + .services + .analytics_events_client + .track_guardian_review(codex_analytics::GuardianReviewEventParams { + thread_id: self.thread_id.clone(), + turn_id: self.turn_id.clone(), + review_id: self.review_id.clone(), + target_item_id: self.target_item_id.clone(), + retry_reason: self.retry_reason.clone(), + approval_request_source: self.approval_request_source, + reviewed_action: self.reviewed_action.clone(), + reviewed_action_truncated: terminal.reviewed_action_truncated, + decision: terminal.decision, + terminal_status: terminal.terminal_status, + failure_reason: terminal.failure_reason, + risk_level: terminal.risk_level, + user_authorization: terminal.user_authorization, + outcome: terminal.outcome, + rationale: terminal.rationale, + guardian_thread_id: terminal.guardian_thread_id, + guardian_session_kind: terminal.guardian_session_kind, + guardian_model: terminal.guardian_model, + guardian_reasoning_effort: terminal.guardian_reasoning_effort, + had_prior_review_context: terminal.had_prior_review_context, + review_timeout_ms: GUARDIAN_REVIEW_TIMEOUT.as_millis() as u64, + // TODO(rhan-oai): plumb nested Guardian review session tool-call counts. + tool_call_count: None, + time_to_first_token_ms: terminal.time_to_first_token_ms, + completion_latency_ms: Some(completion_latency_ms), + started_at: self.started_at, + completed_at: Some(terminal.completed_at), + input_tokens: terminal + .token_usage + .as_ref() + .map(|usage| usage.input_tokens), + cached_input_tokens: terminal + .token_usage + .as_ref() + .map(|usage| usage.cached_input_tokens), + output_tokens: terminal + .token_usage + .as_ref() + .map(|usage| usage.output_tokens), + reasoning_output_tokens: terminal + .token_usage + .as_ref() + .map(|usage| usage.reasoning_output_tokens), + total_tokens: terminal + .token_usage + .as_ref() + .map(|usage| usage.total_tokens), + }); + } +} + /// Whether this turn should route `on-request` approval prompts through the /// guardian reviewer instead of surfacing them to the user. ARC may still /// block actions earlier in the flow. @@ -116,11 +347,25 @@ async fn run_guardian_review( review_id: String, request: GuardianApprovalRequest, retry_reason: Option, + approval_request_source: GuardianApprovalRequestSource, external_cancel: Option, ) -> ReviewDecision { + let started_at = now_unix_seconds(); + let started_instant = Instant::now(); let target_item_id = guardian_request_target_item_id(&request).map(str::to_string); let assessment_turn_id = guardian_request_turn_id(&request, &turn.sub_id).to_string(); let action_summary = guardian_assessment_action(&request); + let analytics_context = GuardianReviewAnalyticsContext { + thread_id: session.conversation_id.to_string(), + turn_id: assessment_turn_id.clone(), + review_id: review_id.clone(), + target_item_id: target_item_id.clone(), + retry_reason: retry_reason.clone(), + approval_request_source, + reviewed_action: guardian_reviewed_action(&request), + started_at, + started_instant, + }; session .send_event( turn.as_ref(), @@ -142,6 +387,28 @@ async fn run_guardian_review( .as_ref() .is_some_and(CancellationToken::is_cancelled) { + analytics_context.track( + session.as_ref(), + turn.as_ref(), + GuardianReviewAnalyticsTerminal { + decision: GuardianReviewDecision::Aborted, + terminal_status: GuardianReviewTerminalStatus::Aborted, + failure_reason: Some(GuardianReviewFailureReason::Cancelled), + risk_level: None, + user_authorization: None, + outcome: None, + rationale: None, + guardian_thread_id: None, + guardian_session_kind: None, + guardian_model: None, + guardian_reasoning_effort: None, + had_prior_review_context: None, + reviewed_action_truncated: false, + token_usage: None, + time_to_first_token_ms: None, + completed_at: now_unix_seconds(), + }, + ); session .send_event( turn.as_ref(), @@ -167,24 +434,142 @@ async fn run_guardian_review( session.clone(), turn.clone(), request, - retry_reason, + retry_reason.clone(), schema, external_cancel, ) .await; + let completed_at = now_unix_seconds(); let assessment = match outcome { - GuardianReviewOutcome::Completed(Ok(assessment)) => assessment, - GuardianReviewOutcome::Completed(Err(err)) => GuardianAssessment { - risk_level: GuardianRiskLevel::High, - user_authorization: GuardianUserAuthorization::Unknown, - outcome: GuardianAssessmentOutcome::Deny, - rationale: format!("Automatic approval review failed: {err}"), - }, + GuardianReviewOutcome::Completed(Ok(assessment)) => { + let metadata = GuardianReviewMetadataFields::default(); + analytics_context.track( + session.as_ref(), + turn.as_ref(), + GuardianReviewAnalyticsTerminal { + decision: if matches!(assessment.outcome, GuardianAssessmentOutcome::Allow) { + GuardianReviewDecision::Approved + } else { + GuardianReviewDecision::Denied + }, + terminal_status: if matches!( + assessment.outcome, + GuardianAssessmentOutcome::Allow + ) { + GuardianReviewTerminalStatus::Approved + } else { + GuardianReviewTerminalStatus::Denied + }, + failure_reason: None, + risk_level: Some(assessment.risk_level), + user_authorization: Some(assessment.user_authorization), + outcome: Some(assessment.outcome), + rationale: Some(assessment.rationale.clone()), + guardian_thread_id: metadata.guardian_thread_id, + guardian_session_kind: metadata.guardian_session_kind, + guardian_model: metadata.guardian_model, + guardian_reasoning_effort: metadata.guardian_reasoning_effort, + had_prior_review_context: metadata.had_prior_review_context, + reviewed_action_truncated: metadata.reviewed_action_truncated, + token_usage: metadata.token_usage, + time_to_first_token_ms: metadata.time_to_first_token_ms, + completed_at, + }, + ); + assessment + } + GuardianReviewOutcome::Completed(Err(err)) => { + let metadata = GuardianReviewMetadataFields::default(); + let rationale = format!("Automatic approval review failed: {err}"); + analytics_context.track( + session.as_ref(), + turn.as_ref(), + GuardianReviewAnalyticsTerminal { + decision: GuardianReviewDecision::Denied, + terminal_status: GuardianReviewTerminalStatus::FailedClosed, + failure_reason: Some(GuardianReviewFailureReason::SessionError), + risk_level: None, + user_authorization: None, + outcome: None, + rationale: None, + guardian_thread_id: metadata.guardian_thread_id, + guardian_session_kind: metadata.guardian_session_kind, + guardian_model: metadata.guardian_model, + guardian_reasoning_effort: metadata.guardian_reasoning_effort, + had_prior_review_context: metadata.had_prior_review_context, + reviewed_action_truncated: metadata.reviewed_action_truncated, + token_usage: metadata.token_usage, + time_to_first_token_ms: metadata.time_to_first_token_ms, + completed_at, + }, + ); + GuardianAssessment { + risk_level: GuardianRiskLevel::High, + user_authorization: GuardianUserAuthorization::Unknown, + outcome: GuardianAssessmentOutcome::Deny, + rationale, + } + } + GuardianReviewOutcome::Failed(failure) => { + let metadata = GuardianReviewMetadataFields::default(); + let rationale = format!("Automatic approval review failed: {}", failure.error()); + analytics_context.track( + session.as_ref(), + turn.as_ref(), + GuardianReviewAnalyticsTerminal { + decision: GuardianReviewDecision::Denied, + terminal_status: GuardianReviewTerminalStatus::FailedClosed, + failure_reason: Some(failure.reason()), + risk_level: None, + user_authorization: None, + outcome: None, + rationale: None, + guardian_thread_id: metadata.guardian_thread_id, + guardian_session_kind: metadata.guardian_session_kind, + guardian_model: metadata.guardian_model, + guardian_reasoning_effort: metadata.guardian_reasoning_effort, + had_prior_review_context: metadata.had_prior_review_context, + reviewed_action_truncated: metadata.reviewed_action_truncated, + token_usage: metadata.token_usage, + time_to_first_token_ms: metadata.time_to_first_token_ms, + completed_at, + }, + ); + GuardianAssessment { + risk_level: GuardianRiskLevel::High, + user_authorization: GuardianUserAuthorization::Unknown, + outcome: GuardianAssessmentOutcome::Deny, + rationale, + } + } GuardianReviewOutcome::TimedOut => { + let metadata = GuardianReviewMetadataFields::default(); let rationale = "Automatic approval review timed out while evaluating the requested approval." .to_string(); + analytics_context.track( + session.as_ref(), + turn.as_ref(), + GuardianReviewAnalyticsTerminal { + decision: GuardianReviewDecision::Denied, + terminal_status: GuardianReviewTerminalStatus::TimedOut, + failure_reason: Some(GuardianReviewFailureReason::Timeout), + risk_level: None, + user_authorization: None, + outcome: None, + rationale: None, + guardian_thread_id: metadata.guardian_thread_id, + guardian_session_kind: metadata.guardian_session_kind, + guardian_model: metadata.guardian_model, + guardian_reasoning_effort: metadata.guardian_reasoning_effort, + had_prior_review_context: metadata.had_prior_review_context, + reviewed_action_truncated: metadata.reviewed_action_truncated, + token_usage: metadata.token_usage, + time_to_first_token_ms: metadata.time_to_first_token_ms, + completed_at, + }, + ); session .send_event( turn.as_ref(), @@ -212,6 +597,29 @@ async fn run_guardian_review( return ReviewDecision::TimedOut; } GuardianReviewOutcome::Aborted => { + let metadata = GuardianReviewMetadataFields::default(); + analytics_context.track( + session.as_ref(), + turn.as_ref(), + GuardianReviewAnalyticsTerminal { + decision: GuardianReviewDecision::Aborted, + terminal_status: GuardianReviewTerminalStatus::Aborted, + failure_reason: Some(GuardianReviewFailureReason::Cancelled), + risk_level: None, + user_authorization: None, + outcome: None, + rationale: None, + guardian_thread_id: metadata.guardian_thread_id, + guardian_session_kind: metadata.guardian_session_kind, + guardian_model: metadata.guardian_model, + guardian_reasoning_effort: metadata.guardian_reasoning_effort, + had_prior_review_context: metadata.had_prior_review_context, + reviewed_action_truncated: metadata.reviewed_action_truncated, + token_usage: metadata.token_usage, + time_to_first_token_ms: metadata.time_to_first_token_ms, + completed_at, + }, + ); session .send_event( turn.as_ref(), @@ -309,11 +717,34 @@ pub(crate) async fn review_approval_request( review_id, request, retry_reason, + GuardianApprovalRequestSource::MainTurn, /*external_cancel*/ None, ) .await } +pub(crate) async fn review_approval_request_with_source_and_cancel( + session: &Arc, + turn: &Arc, + review_id: String, + request: GuardianApprovalRequest, + retry_reason: Option, + approval_request_source: GuardianApprovalRequestSource, + cancel_token: CancellationToken, +) -> ReviewDecision { + run_guardian_review( + Arc::clone(session), + Arc::clone(turn), + review_id, + request, + retry_reason, + approval_request_source, + Some(cancel_token), + ) + .await +} + +#[cfg(test)] pub(crate) async fn review_approval_request_with_cancel( session: &Arc, turn: &Arc, @@ -328,6 +759,7 @@ pub(crate) async fn review_approval_request_with_cancel( review_id, request, retry_reason, + GuardianApprovalRequestSource::MainTurn, Some(cancel_token), ) .await @@ -358,7 +790,9 @@ pub(super) async fn run_guardian_review_session( let live_network_config = match session.services.network_proxy.as_ref() { Some(network_proxy) => match network_proxy.proxy().current_cfg().await { Ok(config) => Some(config), - Err(err) => return GuardianReviewOutcome::Completed(Err(err)), + Err(err) => { + return GuardianReviewOutcome::Failed(GuardianReviewFailure::PromptBuild(err)); + } }, None => None, }; @@ -408,7 +842,7 @@ pub(super) async fn run_guardian_review_session( ); let guardian_config = match guardian_config { Ok(config) => config, - Err(err) => return GuardianReviewOutcome::Completed(Err(err)), + Err(err) => return GuardianReviewOutcome::Failed(GuardianReviewFailure::PromptBuild(err)), }; match session @@ -428,15 +862,49 @@ pub(super) async fn run_guardian_review_session( }) .await { - GuardianReviewSessionOutcome::Completed(Ok(last_agent_message)) => { - GuardianReviewOutcome::Completed(parse_guardian_assessment( - last_agent_message.as_deref(), - )) - } + GuardianReviewSessionOutcome::Completed(Ok(last_agent_message)) => match last_agent_message + { + Some(last_agent_message) => { + match parse_guardian_assessment(Some(&last_agent_message)) { + Ok(assessment) => GuardianReviewOutcome::Completed(Ok(assessment)), + Err(err) => GuardianReviewOutcome::Failed(GuardianReviewFailure::Parse(err)), + } + } + None => GuardianReviewOutcome::Failed(GuardianReviewFailure::Session(anyhow::anyhow!( + "guardian review completed without an assessment payload" + ))), + }, GuardianReviewSessionOutcome::Completed(Err(err)) => { - GuardianReviewOutcome::Completed(Err(err)) + GuardianReviewOutcome::Failed(GuardianReviewFailure::Session(err)) } GuardianReviewSessionOutcome::TimedOut => GuardianReviewOutcome::TimedOut, GuardianReviewSessionOutcome::Aborted => GuardianReviewOutcome::Aborted, } } + +#[cfg(test)] +mod review_tests { + use super::*; + + #[test] + fn guardian_review_failure_reason_distinguishes_failure_kinds() { + let parse_failure = GuardianReviewFailure::Parse(anyhow::anyhow!("bad guardian JSON")); + let prompt_failure = + GuardianReviewFailure::PromptBuild(anyhow::anyhow!("bad prompt/config")); + let session_failure = + GuardianReviewFailure::Session(anyhow::anyhow!("guardian runtime failed")); + + assert!(matches!( + parse_failure.reason(), + GuardianReviewFailureReason::ParseError + )); + assert!(matches!( + prompt_failure.reason(), + GuardianReviewFailureReason::PromptBuildError + )); + assert!(matches!( + session_failure.reason(), + GuardianReviewFailureReason::SessionError + )); + } +}