Compare commits

...

2 Commits

Author SHA1 Message Date
rhan-oai
bb4e510fa7 [codex-analytics] guardian review analytics events emission 2026-04-13 14:25:25 -07:00
rhan-oai
d99dd9be4d [codex-analytics] guardian review analytics schema polishing 2026-04-13 13:59:59 -07:00
9 changed files with 659 additions and 82 deletions

View File

@@ -7,6 +7,12 @@ use crate::events::CodexCompactionEventRequest;
use crate::events::CodexPluginEventRequest;
use crate::events::CodexPluginUsedEventRequest;
use crate::events::CodexRuntimeMetadata;
use crate::events::GuardianApprovalRequestSource;
use crate::events::GuardianReviewDecision;
use crate::events::GuardianReviewEventParams;
use crate::events::GuardianReviewFailureReason;
use crate::events::GuardianReviewTerminalStatus;
use crate::events::GuardianReviewedAction;
use crate::events::ThreadInitializationMode;
use crate::events::ThreadInitializedEvent;
use crate::events::ThreadInitializedEventParams;
@@ -57,6 +63,7 @@ use codex_plugin::AppConnectorId;
use codex_plugin::PluginCapabilitySummary;
use codex_plugin::PluginId;
use codex_plugin::PluginTelemetryMetadata;
use codex_protocol::approvals::NetworkApprovalProtocol;
use codex_protocol::protocol::SubAgentSource;
use pretty_assertions::assert_eq;
use serde_json::json;
@@ -685,6 +692,126 @@ async fn compaction_event_ingests_custom_fact() {
assert_eq!(payload[0]["event_params"]["status"], "failed");
}
#[tokio::test]
async fn guardian_review_event_ingests_custom_fact_with_optional_target_item() {
let mut reducer = AnalyticsReducer::default();
let mut events = Vec::new();
reducer
.ingest(
AnalyticsFact::Initialize {
connection_id: 7,
params: InitializeParams {
client_info: ClientInfo {
name: "codex-tui".to_string(),
title: None,
version: "1.0.0".to_string(),
},
capabilities: Some(InitializeCapabilities {
experimental_api: false,
opt_out_notification_methods: None,
}),
},
product_client_id: DEFAULT_ORIGINATOR.to_string(),
runtime: sample_runtime_metadata(),
rpc_transport: AppServerRpcTransport::Websocket,
},
&mut events,
)
.await;
reducer
.ingest(
AnalyticsFact::Response {
connection_id: 7,
response: Box::new(sample_thread_start_response(
"thread-guardian",
/*ephemeral*/ false,
"gpt-5",
)),
},
&mut events,
)
.await;
events.clear();
reducer
.ingest(
AnalyticsFact::Custom(CustomAnalyticsFact::GuardianReview(Box::new(
GuardianReviewEventParams {
thread_id: "thread-guardian".to_string(),
turn_id: "turn-guardian".to_string(),
review_id: "review-guardian".to_string(),
target_item_id: None,
retry_reason: Some("network retry".to_string()),
approval_request_source: GuardianApprovalRequestSource::DelegatedSubagent,
reviewed_action: GuardianReviewedAction::NetworkAccess {
target: "https://example.com".to_string(),
host: "example.com".to_string(),
protocol: NetworkApprovalProtocol::Https,
port: 443,
},
reviewed_action_truncated: false,
decision: GuardianReviewDecision::Denied,
terminal_status: GuardianReviewTerminalStatus::TimedOut,
failure_reason: Some(GuardianReviewFailureReason::Timeout),
risk_level: None,
user_authorization: None,
outcome: None,
rationale: None,
guardian_thread_id: None,
guardian_session_kind: None,
guardian_model: None,
guardian_reasoning_effort: None,
had_prior_review_context: None,
review_timeout_ms: 90_000,
tool_call_count: None,
time_to_first_token_ms: None,
completion_latency_ms: Some(90_000),
started_at: 100,
completed_at: Some(190),
input_tokens: None,
cached_input_tokens: None,
output_tokens: None,
reasoning_output_tokens: None,
total_tokens: None,
},
))),
&mut events,
)
.await;
let payload = serde_json::to_value(&events).expect("serialize events");
assert_eq!(payload.as_array().expect("events array").len(), 1);
assert_eq!(payload[0]["event_type"], "codex_guardian_review");
assert_eq!(payload[0]["event_params"]["thread_id"], "thread-guardian");
assert_eq!(payload[0]["event_params"]["turn_id"], "turn-guardian");
assert_eq!(payload[0]["event_params"]["review_id"], "review-guardian");
assert_eq!(payload[0]["event_params"]["target_item_id"], json!(null));
assert_eq!(
payload[0]["event_params"]["approval_request_source"],
"delegated_subagent"
);
assert_eq!(
payload[0]["event_params"]["app_server_client"]["product_client_id"],
DEFAULT_ORIGINATOR
);
assert_eq!(
payload[0]["event_params"]["runtime"]["codex_rs_version"],
"0.1.0"
);
assert_eq!(
payload[0]["event_params"]["reviewed_action"]["type"],
"network_access"
);
assert_eq!(
payload[0]["event_params"]["reviewed_action"]["host"],
"example.com"
);
assert_eq!(payload[0]["event_params"]["terminal_status"], "timed_out");
assert_eq!(payload[0]["event_params"]["failure_reason"], "timeout");
assert_eq!(payload[0]["event_params"]["review_timeout_ms"], 90_000);
}
#[test]
fn subagent_thread_started_review_serializes_expected_shape() {
let event = TrackEventRequest::ThreadInitialized(subagent_thread_started_event_request(

View File

@@ -1,5 +1,11 @@
use crate::facts::AppInvocation;
use crate::facts::CodexCompactionEvent;
use crate::facts::CompactionImplementation;
use crate::facts::CompactionPhase;
use crate::facts::CompactionReason;
use crate::facts::CompactionStatus;
use crate::facts::CompactionStrategy;
use crate::facts::CompactionTrigger;
use crate::facts::InvocationType;
use crate::facts::PluginState;
use crate::facts::SubAgentThreadStartedInput;
@@ -9,6 +15,10 @@ use codex_plugin::PluginTelemetryMetadata;
use codex_protocol::approvals::NetworkApprovalProtocol;
use codex_protocol::models::PermissionProfile;
use codex_protocol::models::SandboxPermissions;
use codex_protocol::protocol::GuardianAssessmentOutcome;
use codex_protocol::protocol::GuardianCommandSource;
use codex_protocol::protocol::GuardianRiskLevel;
use codex_protocol::protocol::GuardianUserAuthorization;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use serde::Serialize;
@@ -147,31 +157,6 @@ pub enum GuardianReviewSessionKind {
EphemeralForked,
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum GuardianReviewRiskLevel {
Low,
Medium,
High,
Critical,
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum GuardianReviewUserAuthorization {
Unknown,
Low,
Medium,
High,
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum GuardianReviewOutcome {
Allow,
Deny,
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum GuardianApprovalRequestSource {
@@ -228,19 +213,12 @@ pub enum GuardianReviewedAction {
},
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum GuardianCommandSource {
Shell,
UnifiedExec,
}
#[derive(Clone, Serialize)]
pub struct GuardianReviewEventParams {
pub thread_id: String,
pub turn_id: String,
pub review_id: String,
pub target_item_id: String,
pub target_item_id: Option<String>,
pub retry_reason: Option<String>,
pub approval_request_source: GuardianApprovalRequestSource,
pub reviewed_action: GuardianReviewedAction,
@@ -248,9 +226,9 @@ pub struct GuardianReviewEventParams {
pub decision: GuardianReviewDecision,
pub terminal_status: GuardianReviewTerminalStatus,
pub failure_reason: Option<GuardianReviewFailureReason>,
pub risk_level: Option<GuardianReviewRiskLevel>,
pub user_authorization: Option<GuardianReviewUserAuthorization>,
pub outcome: Option<GuardianReviewOutcome>,
pub risk_level: Option<GuardianRiskLevel>,
pub user_authorization: Option<GuardianUserAuthorization>,
pub outcome: Option<GuardianAssessmentOutcome>,
pub rationale: Option<String>,
pub guardian_thread_id: Option<String>,
pub guardian_session_kind: Option<GuardianReviewSessionKind>,
@@ -258,7 +236,7 @@ pub struct GuardianReviewEventParams {
pub guardian_reasoning_effort: Option<String>,
pub had_prior_review_context: Option<bool>,
pub review_timeout_ms: u64,
pub tool_call_count: u64,
pub tool_call_count: Option<u64>,
pub time_to_first_token_ms: Option<u64>,
pub completion_latency_ms: Option<u64>,
pub started_at: u64,
@@ -310,12 +288,12 @@ pub(crate) struct CodexCompactionEventParams {
pub(crate) thread_source: Option<&'static str>,
pub(crate) subagent_source: Option<String>,
pub(crate) parent_thread_id: Option<String>,
pub(crate) trigger: crate::facts::CompactionTrigger,
pub(crate) reason: crate::facts::CompactionReason,
pub(crate) implementation: crate::facts::CompactionImplementation,
pub(crate) phase: crate::facts::CompactionPhase,
pub(crate) strategy: crate::facts::CompactionStrategy,
pub(crate) status: crate::facts::CompactionStatus,
pub(crate) trigger: CompactionTrigger,
pub(crate) reason: CompactionReason,
pub(crate) implementation: CompactionImplementation,
pub(crate) phase: CompactionPhase,
pub(crate) strategy: CompactionStrategy,
pub(crate) status: CompactionStatus,
pub(crate) error: Option<String>,
pub(crate) active_context_tokens_before: i64,
pub(crate) active_context_tokens_after: i64,

View File

@@ -3,18 +3,17 @@ mod events;
mod facts;
mod reducer;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
pub use client::AnalyticsEventsClient;
pub use events::AppServerRpcTransport;
pub use events::GuardianApprovalRequestSource;
pub use events::GuardianCommandSource;
pub use events::GuardianReviewDecision;
pub use events::GuardianReviewEventParams;
pub use events::GuardianReviewFailureReason;
pub use events::GuardianReviewOutcome;
pub use events::GuardianReviewRiskLevel;
pub use events::GuardianReviewSessionKind;
pub use events::GuardianReviewTerminalStatus;
pub use events::GuardianReviewUserAuthorization;
pub use events::GuardianReviewedAction;
pub use facts::AppInvocation;
pub use facts::CodexCompactionEvent;
@@ -32,3 +31,10 @@ pub use facts::build_track_events_context;
#[cfg(test)]
mod analytics_client_tests;
pub fn now_unix_seconds() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}

View File

@@ -3,6 +3,7 @@ use std::sync::Arc;
use async_channel::Receiver;
use async_channel::Sender;
use codex_analytics::GuardianApprovalRequestSource;
use codex_async_utils::OrCancelExt;
use codex_exec_server::EnvironmentManager;
use codex_protocol::protocol::ApplyPatchApprovalRequestEvent;
@@ -40,7 +41,7 @@ use crate::codex::emit_subagent_session_started;
use crate::config::Config;
use crate::guardian::GuardianApprovalRequest;
use crate::guardian::new_guardian_review_id;
use crate::guardian::review_approval_request_with_cancel;
use crate::guardian::review_approval_request_with_source_and_cancel;
use crate::guardian::routes_approval_to_guardian;
use crate::mcp_tool_call::MCP_TOOL_APPROVAL_ACCEPT;
use crate::mcp_tool_call::MCP_TOOL_APPROVAL_ACCEPT_FOR_SESSION;
@@ -747,12 +748,13 @@ fn spawn_guardian_review(
let _ = tx.send(ReviewDecision::Denied);
return;
};
let decision = runtime.block_on(review_approval_request_with_cancel(
let decision = runtime.block_on(review_approval_request_with_source_and_cancel(
&session,
&turn,
review_id,
request,
retry_reason,
GuardianApprovalRequestSource::DelegatedSubagent,
cancel_token,
));
let _ = tx.send(decision);

View File

@@ -1,7 +1,5 @@
use std::sync::Arc;
use std::time::Instant;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
use crate::Prompt;
use crate::client::ModelClientSession;
@@ -19,6 +17,7 @@ use codex_analytics::CompactionReason;
use codex_analytics::CompactionStatus;
use codex_analytics::CompactionStrategy;
use codex_analytics::CompactionTrigger;
use codex_analytics::now_unix_seconds;
use codex_features::Feature;
use codex_model_provider_info::ModelProviderInfo;
use codex_protocol::error::CodexErr;
@@ -372,13 +371,6 @@ pub(crate) fn compaction_status_from_result<T>(result: &CodexResult<T>) -> Compa
}
}
fn now_unix_seconds() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|duration| duration.as_secs())
.unwrap_or_default()
}
pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
let mut pieces = Vec::new();
for item in content {

View File

@@ -19,6 +19,7 @@ mod review_session;
use std::time::Duration;
use codex_protocol::protocol::GuardianAssessmentDecisionSource;
use codex_protocol::protocol::GuardianAssessmentOutcome;
use serde::Deserialize;
use serde::Serialize;
@@ -30,7 +31,9 @@ pub(crate) use review::guardian_timeout_message;
pub(crate) use review::is_guardian_reviewer_source;
pub(crate) use review::new_guardian_review_id;
pub(crate) use review::review_approval_request;
#[cfg(test)]
pub(crate) use review::review_approval_request_with_cancel;
pub(crate) use review::review_approval_request_with_source_and_cancel;
pub(crate) use review::routes_approval_to_guardian;
pub(crate) use review_session::GuardianReviewSessionManager;
@@ -45,14 +48,6 @@ const GUARDIAN_MAX_ACTION_STRING_TOKENS: usize = 16_000;
const GUARDIAN_RECENT_ENTRY_LIMIT: usize = 40;
const TRUNCATION_TAG: &str = "truncated";
/// Final allow/deny outcome returned by the guardian reviewer.
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub(crate) enum GuardianAssessmentOutcome {
Allow,
Deny,
}
/// Structured output contract that the guardian reviewer must satisfy.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub(crate) struct GuardianAssessment {

View File

@@ -1,5 +1,14 @@
use std::sync::Arc;
use std::time::Instant;
use codex_analytics::GuardianApprovalRequestSource;
use codex_analytics::GuardianReviewDecision;
use codex_analytics::GuardianReviewFailureReason;
use codex_analytics::GuardianReviewSessionKind;
use codex_analytics::GuardianReviewTerminalStatus;
use codex_analytics::GuardianReviewedAction;
use codex_analytics::now_unix_seconds;
use codex_features::Feature;
use codex_protocol::config_types::ApprovalsReviewer;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
@@ -10,12 +19,14 @@ use codex_protocol::protocol::GuardianRiskLevel;
use codex_protocol::protocol::GuardianUserAuthorization;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::TokenUsage;
use codex_protocol::protocol::WarningEvent;
use tokio_util::sync::CancellationToken;
use crate::codex::Session;
use crate::codex::TurnContext;
use super::GUARDIAN_REVIEW_TIMEOUT;
use super::GUARDIAN_REVIEWER_NAME;
use super::GuardianApprovalRequest;
use super::GuardianAssessment;
@@ -76,10 +87,34 @@ pub(crate) fn guardian_timeout_message() -> String {
#[derive(Debug)]
pub(super) enum GuardianReviewOutcome {
Completed(anyhow::Result<GuardianAssessment>),
Failed(GuardianReviewFailure),
TimedOut,
Aborted,
}
#[derive(Debug)]
pub(super) enum GuardianReviewFailure {
PromptBuild(anyhow::Error),
Session(anyhow::Error),
Parse(anyhow::Error),
}
impl GuardianReviewFailure {
fn reason(&self) -> GuardianReviewFailureReason {
match self {
Self::PromptBuild(_) => GuardianReviewFailureReason::PromptBuildError,
Self::Session(_) => GuardianReviewFailureReason::SessionError,
Self::Parse(_) => GuardianReviewFailureReason::ParseError,
}
}
fn error(&self) -> &anyhow::Error {
match self {
Self::PromptBuild(err) | Self::Session(err) | Self::Parse(err) => err,
}
}
}
fn guardian_risk_level_str(level: GuardianRiskLevel) -> &'static str {
match level {
GuardianRiskLevel::Low => "low",
@@ -89,6 +124,202 @@ fn guardian_risk_level_str(level: GuardianRiskLevel) -> &'static str {
}
}
fn guardian_reviewed_action(request: &GuardianApprovalRequest) -> GuardianReviewedAction {
match request {
GuardianApprovalRequest::Shell {
command,
cwd,
sandbox_permissions,
additional_permissions,
justification,
..
} => GuardianReviewedAction::Shell {
command: command.clone(),
command_display: codex_shell_command::parse_command::shlex_join(command),
cwd: cwd.to_string_lossy().into_owned(),
sandbox_permissions: *sandbox_permissions,
additional_permissions: additional_permissions.clone(),
justification: justification.clone(),
},
GuardianApprovalRequest::ExecCommand {
command,
cwd,
sandbox_permissions,
additional_permissions,
justification,
tty,
..
} => GuardianReviewedAction::UnifiedExec {
command: command.clone(),
command_display: codex_shell_command::parse_command::shlex_join(command),
cwd: cwd.to_string_lossy().into_owned(),
sandbox_permissions: *sandbox_permissions,
additional_permissions: additional_permissions.clone(),
justification: justification.clone(),
tty: *tty,
},
#[cfg(unix)]
GuardianApprovalRequest::Execve {
source,
program,
argv,
cwd,
additional_permissions,
..
} => GuardianReviewedAction::Execve {
source: *source,
program: program.clone(),
argv: argv.clone(),
cwd: cwd.to_string_lossy().into_owned(),
additional_permissions: additional_permissions.clone(),
},
GuardianApprovalRequest::ApplyPatch { cwd, files, .. } => {
GuardianReviewedAction::ApplyPatch {
cwd: cwd.to_string_lossy().into_owned(),
files: files
.iter()
.map(|file| file.to_string_lossy().into_owned())
.collect(),
}
}
GuardianApprovalRequest::NetworkAccess {
target,
host,
protocol,
port,
..
} => GuardianReviewedAction::NetworkAccess {
target: target.clone(),
host: host.clone(),
protocol: *protocol,
port: *port,
},
GuardianApprovalRequest::McpToolCall {
server,
tool_name,
connector_id,
connector_name,
tool_title,
..
} => GuardianReviewedAction::McpToolCall {
server: server.clone(),
tool_name: tool_name.clone(),
connector_id: connector_id.clone(),
connector_name: connector_name.clone(),
tool_title: tool_title.clone(),
},
}
}
struct GuardianReviewAnalyticsContext {
thread_id: String,
turn_id: String,
review_id: String,
target_item_id: Option<String>,
retry_reason: Option<String>,
approval_request_source: GuardianApprovalRequestSource,
reviewed_action: GuardianReviewedAction,
started_at: u64,
started_instant: Instant,
}
struct GuardianReviewAnalyticsTerminal {
decision: GuardianReviewDecision,
terminal_status: GuardianReviewTerminalStatus,
failure_reason: Option<GuardianReviewFailureReason>,
risk_level: Option<GuardianRiskLevel>,
user_authorization: Option<GuardianUserAuthorization>,
outcome: Option<GuardianAssessmentOutcome>,
rationale: Option<String>,
guardian_thread_id: Option<String>,
guardian_session_kind: Option<GuardianReviewSessionKind>,
guardian_model: Option<String>,
guardian_reasoning_effort: Option<String>,
had_prior_review_context: Option<bool>,
reviewed_action_truncated: bool,
token_usage: Option<TokenUsage>,
time_to_first_token_ms: Option<u64>,
completed_at: u64,
}
#[derive(Default)]
struct GuardianReviewMetadataFields {
guardian_thread_id: Option<String>,
guardian_session_kind: Option<GuardianReviewSessionKind>,
guardian_model: Option<String>,
guardian_reasoning_effort: Option<String>,
had_prior_review_context: Option<bool>,
reviewed_action_truncated: bool,
token_usage: Option<TokenUsage>,
time_to_first_token_ms: Option<u64>,
}
impl GuardianReviewAnalyticsContext {
fn track(
&self,
session: &Session,
turn: &TurnContext,
terminal: GuardianReviewAnalyticsTerminal,
) {
if !turn.config.features.enabled(Feature::GeneralAnalytics) {
return;
}
let completion_latency_ms = self.started_instant.elapsed().as_millis() as u64;
session
.services
.analytics_events_client
.track_guardian_review(codex_analytics::GuardianReviewEventParams {
thread_id: self.thread_id.clone(),
turn_id: self.turn_id.clone(),
review_id: self.review_id.clone(),
target_item_id: self.target_item_id.clone(),
retry_reason: self.retry_reason.clone(),
approval_request_source: self.approval_request_source,
reviewed_action: self.reviewed_action.clone(),
reviewed_action_truncated: terminal.reviewed_action_truncated,
decision: terminal.decision,
terminal_status: terminal.terminal_status,
failure_reason: terminal.failure_reason,
risk_level: terminal.risk_level,
user_authorization: terminal.user_authorization,
outcome: terminal.outcome,
rationale: terminal.rationale,
guardian_thread_id: terminal.guardian_thread_id,
guardian_session_kind: terminal.guardian_session_kind,
guardian_model: terminal.guardian_model,
guardian_reasoning_effort: terminal.guardian_reasoning_effort,
had_prior_review_context: terminal.had_prior_review_context,
review_timeout_ms: GUARDIAN_REVIEW_TIMEOUT.as_millis() as u64,
// TODO(rhan-oai): plumb nested Guardian review session tool-call counts.
tool_call_count: None,
time_to_first_token_ms: terminal.time_to_first_token_ms,
completion_latency_ms: Some(completion_latency_ms),
started_at: self.started_at,
completed_at: Some(terminal.completed_at),
input_tokens: terminal
.token_usage
.as_ref()
.map(|usage| usage.input_tokens),
cached_input_tokens: terminal
.token_usage
.as_ref()
.map(|usage| usage.cached_input_tokens),
output_tokens: terminal
.token_usage
.as_ref()
.map(|usage| usage.output_tokens),
reasoning_output_tokens: terminal
.token_usage
.as_ref()
.map(|usage| usage.reasoning_output_tokens),
total_tokens: terminal
.token_usage
.as_ref()
.map(|usage| usage.total_tokens),
});
}
}
/// Whether this turn should route `on-request` approval prompts through the
/// guardian reviewer instead of surfacing them to the user. ARC may still
/// block actions earlier in the flow.
@@ -116,11 +347,25 @@ async fn run_guardian_review(
review_id: String,
request: GuardianApprovalRequest,
retry_reason: Option<String>,
approval_request_source: GuardianApprovalRequestSource,
external_cancel: Option<CancellationToken>,
) -> ReviewDecision {
let started_at = now_unix_seconds();
let started_instant = Instant::now();
let target_item_id = guardian_request_target_item_id(&request).map(str::to_string);
let assessment_turn_id = guardian_request_turn_id(&request, &turn.sub_id).to_string();
let action_summary = guardian_assessment_action(&request);
let analytics_context = GuardianReviewAnalyticsContext {
thread_id: session.conversation_id.to_string(),
turn_id: assessment_turn_id.clone(),
review_id: review_id.clone(),
target_item_id: target_item_id.clone(),
retry_reason: retry_reason.clone(),
approval_request_source,
reviewed_action: guardian_reviewed_action(&request),
started_at,
started_instant,
};
session
.send_event(
turn.as_ref(),
@@ -142,6 +387,28 @@ async fn run_guardian_review(
.as_ref()
.is_some_and(CancellationToken::is_cancelled)
{
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsTerminal {
decision: GuardianReviewDecision::Aborted,
terminal_status: GuardianReviewTerminalStatus::Aborted,
failure_reason: Some(GuardianReviewFailureReason::Cancelled),
risk_level: None,
user_authorization: None,
outcome: None,
rationale: None,
guardian_thread_id: None,
guardian_session_kind: None,
guardian_model: None,
guardian_reasoning_effort: None,
had_prior_review_context: None,
reviewed_action_truncated: false,
token_usage: None,
time_to_first_token_ms: None,
completed_at: now_unix_seconds(),
},
);
session
.send_event(
turn.as_ref(),
@@ -167,24 +434,142 @@ async fn run_guardian_review(
session.clone(),
turn.clone(),
request,
retry_reason,
retry_reason.clone(),
schema,
external_cancel,
)
.await;
let completed_at = now_unix_seconds();
let assessment = match outcome {
GuardianReviewOutcome::Completed(Ok(assessment)) => assessment,
GuardianReviewOutcome::Completed(Err(err)) => GuardianAssessment {
risk_level: GuardianRiskLevel::High,
user_authorization: GuardianUserAuthorization::Unknown,
outcome: GuardianAssessmentOutcome::Deny,
rationale: format!("Automatic approval review failed: {err}"),
},
GuardianReviewOutcome::Completed(Ok(assessment)) => {
let metadata = GuardianReviewMetadataFields::default();
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsTerminal {
decision: if matches!(assessment.outcome, GuardianAssessmentOutcome::Allow) {
GuardianReviewDecision::Approved
} else {
GuardianReviewDecision::Denied
},
terminal_status: if matches!(
assessment.outcome,
GuardianAssessmentOutcome::Allow
) {
GuardianReviewTerminalStatus::Approved
} else {
GuardianReviewTerminalStatus::Denied
},
failure_reason: None,
risk_level: Some(assessment.risk_level),
user_authorization: Some(assessment.user_authorization),
outcome: Some(assessment.outcome),
rationale: Some(assessment.rationale.clone()),
guardian_thread_id: metadata.guardian_thread_id,
guardian_session_kind: metadata.guardian_session_kind,
guardian_model: metadata.guardian_model,
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
had_prior_review_context: metadata.had_prior_review_context,
reviewed_action_truncated: metadata.reviewed_action_truncated,
token_usage: metadata.token_usage,
time_to_first_token_ms: metadata.time_to_first_token_ms,
completed_at,
},
);
assessment
}
GuardianReviewOutcome::Completed(Err(err)) => {
let metadata = GuardianReviewMetadataFields::default();
let rationale = format!("Automatic approval review failed: {err}");
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsTerminal {
decision: GuardianReviewDecision::Denied,
terminal_status: GuardianReviewTerminalStatus::FailedClosed,
failure_reason: Some(GuardianReviewFailureReason::SessionError),
risk_level: None,
user_authorization: None,
outcome: None,
rationale: None,
guardian_thread_id: metadata.guardian_thread_id,
guardian_session_kind: metadata.guardian_session_kind,
guardian_model: metadata.guardian_model,
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
had_prior_review_context: metadata.had_prior_review_context,
reviewed_action_truncated: metadata.reviewed_action_truncated,
token_usage: metadata.token_usage,
time_to_first_token_ms: metadata.time_to_first_token_ms,
completed_at,
},
);
GuardianAssessment {
risk_level: GuardianRiskLevel::High,
user_authorization: GuardianUserAuthorization::Unknown,
outcome: GuardianAssessmentOutcome::Deny,
rationale,
}
}
GuardianReviewOutcome::Failed(failure) => {
let metadata = GuardianReviewMetadataFields::default();
let rationale = format!("Automatic approval review failed: {}", failure.error());
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsTerminal {
decision: GuardianReviewDecision::Denied,
terminal_status: GuardianReviewTerminalStatus::FailedClosed,
failure_reason: Some(failure.reason()),
risk_level: None,
user_authorization: None,
outcome: None,
rationale: None,
guardian_thread_id: metadata.guardian_thread_id,
guardian_session_kind: metadata.guardian_session_kind,
guardian_model: metadata.guardian_model,
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
had_prior_review_context: metadata.had_prior_review_context,
reviewed_action_truncated: metadata.reviewed_action_truncated,
token_usage: metadata.token_usage,
time_to_first_token_ms: metadata.time_to_first_token_ms,
completed_at,
},
);
GuardianAssessment {
risk_level: GuardianRiskLevel::High,
user_authorization: GuardianUserAuthorization::Unknown,
outcome: GuardianAssessmentOutcome::Deny,
rationale,
}
}
GuardianReviewOutcome::TimedOut => {
let metadata = GuardianReviewMetadataFields::default();
let rationale =
"Automatic approval review timed out while evaluating the requested approval."
.to_string();
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsTerminal {
decision: GuardianReviewDecision::Denied,
terminal_status: GuardianReviewTerminalStatus::TimedOut,
failure_reason: Some(GuardianReviewFailureReason::Timeout),
risk_level: None,
user_authorization: None,
outcome: None,
rationale: None,
guardian_thread_id: metadata.guardian_thread_id,
guardian_session_kind: metadata.guardian_session_kind,
guardian_model: metadata.guardian_model,
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
had_prior_review_context: metadata.had_prior_review_context,
reviewed_action_truncated: metadata.reviewed_action_truncated,
token_usage: metadata.token_usage,
time_to_first_token_ms: metadata.time_to_first_token_ms,
completed_at,
},
);
session
.send_event(
turn.as_ref(),
@@ -212,6 +597,29 @@ async fn run_guardian_review(
return ReviewDecision::TimedOut;
}
GuardianReviewOutcome::Aborted => {
let metadata = GuardianReviewMetadataFields::default();
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsTerminal {
decision: GuardianReviewDecision::Aborted,
terminal_status: GuardianReviewTerminalStatus::Aborted,
failure_reason: Some(GuardianReviewFailureReason::Cancelled),
risk_level: None,
user_authorization: None,
outcome: None,
rationale: None,
guardian_thread_id: metadata.guardian_thread_id,
guardian_session_kind: metadata.guardian_session_kind,
guardian_model: metadata.guardian_model,
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
had_prior_review_context: metadata.had_prior_review_context,
reviewed_action_truncated: metadata.reviewed_action_truncated,
token_usage: metadata.token_usage,
time_to_first_token_ms: metadata.time_to_first_token_ms,
completed_at,
},
);
session
.send_event(
turn.as_ref(),
@@ -309,11 +717,34 @@ pub(crate) async fn review_approval_request(
review_id,
request,
retry_reason,
GuardianApprovalRequestSource::MainTurn,
/*external_cancel*/ None,
)
.await
}
pub(crate) async fn review_approval_request_with_source_and_cancel(
session: &Arc<Session>,
turn: &Arc<TurnContext>,
review_id: String,
request: GuardianApprovalRequest,
retry_reason: Option<String>,
approval_request_source: GuardianApprovalRequestSource,
cancel_token: CancellationToken,
) -> ReviewDecision {
run_guardian_review(
Arc::clone(session),
Arc::clone(turn),
review_id,
request,
retry_reason,
approval_request_source,
Some(cancel_token),
)
.await
}
#[cfg(test)]
pub(crate) async fn review_approval_request_with_cancel(
session: &Arc<Session>,
turn: &Arc<TurnContext>,
@@ -328,6 +759,7 @@ pub(crate) async fn review_approval_request_with_cancel(
review_id,
request,
retry_reason,
GuardianApprovalRequestSource::MainTurn,
Some(cancel_token),
)
.await
@@ -358,7 +790,9 @@ pub(super) async fn run_guardian_review_session(
let live_network_config = match session.services.network_proxy.as_ref() {
Some(network_proxy) => match network_proxy.proxy().current_cfg().await {
Ok(config) => Some(config),
Err(err) => return GuardianReviewOutcome::Completed(Err(err)),
Err(err) => {
return GuardianReviewOutcome::Failed(GuardianReviewFailure::PromptBuild(err));
}
},
None => None,
};
@@ -408,7 +842,7 @@ pub(super) async fn run_guardian_review_session(
);
let guardian_config = match guardian_config {
Ok(config) => config,
Err(err) => return GuardianReviewOutcome::Completed(Err(err)),
Err(err) => return GuardianReviewOutcome::Failed(GuardianReviewFailure::PromptBuild(err)),
};
match session
@@ -428,15 +862,49 @@ pub(super) async fn run_guardian_review_session(
})
.await
{
GuardianReviewSessionOutcome::Completed(Ok(last_agent_message)) => {
GuardianReviewOutcome::Completed(parse_guardian_assessment(
last_agent_message.as_deref(),
))
}
GuardianReviewSessionOutcome::Completed(Ok(last_agent_message)) => match last_agent_message
{
Some(last_agent_message) => {
match parse_guardian_assessment(Some(&last_agent_message)) {
Ok(assessment) => GuardianReviewOutcome::Completed(Ok(assessment)),
Err(err) => GuardianReviewOutcome::Failed(GuardianReviewFailure::Parse(err)),
}
}
None => GuardianReviewOutcome::Failed(GuardianReviewFailure::Session(anyhow::anyhow!(
"guardian review completed without an assessment payload"
))),
},
GuardianReviewSessionOutcome::Completed(Err(err)) => {
GuardianReviewOutcome::Completed(Err(err))
GuardianReviewOutcome::Failed(GuardianReviewFailure::Session(err))
}
GuardianReviewSessionOutcome::TimedOut => GuardianReviewOutcome::TimedOut,
GuardianReviewSessionOutcome::Aborted => GuardianReviewOutcome::Aborted,
}
}
#[cfg(test)]
mod review_tests {
use super::*;
#[test]
fn guardian_review_failure_reason_distinguishes_failure_kinds() {
let parse_failure = GuardianReviewFailure::Parse(anyhow::anyhow!("bad guardian JSON"));
let prompt_failure =
GuardianReviewFailure::PromptBuild(anyhow::anyhow!("bad prompt/config"));
let session_failure =
GuardianReviewFailure::Session(anyhow::anyhow!("guardian runtime failed"));
assert!(matches!(
parse_failure.reason(),
GuardianReviewFailureReason::ParseError
));
assert!(matches!(
prompt_failure.reason(),
GuardianReviewFailureReason::PromptBuildError
));
assert!(matches!(
session_failure.reason(),
GuardianReviewFailureReason::SessionError
));
}
}

View File

@@ -99,6 +99,14 @@ pub enum GuardianUserAuthorization {
High,
}
/// Final allow/deny outcome returned by the guardian reviewer.
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "lowercase")]
pub enum GuardianAssessmentOutcome {
Allow,
Deny,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum GuardianAssessmentStatus {

View File

@@ -67,6 +67,7 @@ pub use crate::approvals::ExecPolicyAmendment;
pub use crate::approvals::GuardianAssessmentAction;
pub use crate::approvals::GuardianAssessmentDecisionSource;
pub use crate::approvals::GuardianAssessmentEvent;
pub use crate::approvals::GuardianAssessmentOutcome;
pub use crate::approvals::GuardianAssessmentStatus;
pub use crate::approvals::GuardianCommandSource;
pub use crate::approvals::GuardianRiskLevel;