mirror of
https://github.com/openai/codex.git
synced 2026-04-14 19:41:45 +03:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
810bd9d037 | ||
|
|
bb4e510fa7 | ||
|
|
d99dd9be4d |
@@ -7,6 +7,12 @@ use crate::events::CodexCompactionEventRequest;
|
||||
use crate::events::CodexPluginEventRequest;
|
||||
use crate::events::CodexPluginUsedEventRequest;
|
||||
use crate::events::CodexRuntimeMetadata;
|
||||
use crate::events::GuardianApprovalRequestSource;
|
||||
use crate::events::GuardianReviewDecision;
|
||||
use crate::events::GuardianReviewEventParams;
|
||||
use crate::events::GuardianReviewFailureReason;
|
||||
use crate::events::GuardianReviewTerminalStatus;
|
||||
use crate::events::GuardianReviewedAction;
|
||||
use crate::events::ThreadInitializationMode;
|
||||
use crate::events::ThreadInitializedEvent;
|
||||
use crate::events::ThreadInitializedEventParams;
|
||||
@@ -57,6 +63,7 @@ use codex_plugin::AppConnectorId;
|
||||
use codex_plugin::PluginCapabilitySummary;
|
||||
use codex_plugin::PluginId;
|
||||
use codex_plugin::PluginTelemetryMetadata;
|
||||
use codex_protocol::approvals::NetworkApprovalProtocol;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
@@ -685,6 +692,126 @@ async fn compaction_event_ingests_custom_fact() {
|
||||
assert_eq!(payload[0]["event_params"]["status"], "failed");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn guardian_review_event_ingests_custom_fact_with_optional_target_item() {
|
||||
let mut reducer = AnalyticsReducer::default();
|
||||
let mut events = Vec::new();
|
||||
|
||||
reducer
|
||||
.ingest(
|
||||
AnalyticsFact::Initialize {
|
||||
connection_id: 7,
|
||||
params: InitializeParams {
|
||||
client_info: ClientInfo {
|
||||
name: "codex-tui".to_string(),
|
||||
title: None,
|
||||
version: "1.0.0".to_string(),
|
||||
},
|
||||
capabilities: Some(InitializeCapabilities {
|
||||
experimental_api: false,
|
||||
opt_out_notification_methods: None,
|
||||
}),
|
||||
},
|
||||
product_client_id: DEFAULT_ORIGINATOR.to_string(),
|
||||
runtime: sample_runtime_metadata(),
|
||||
rpc_transport: AppServerRpcTransport::Websocket,
|
||||
},
|
||||
&mut events,
|
||||
)
|
||||
.await;
|
||||
reducer
|
||||
.ingest(
|
||||
AnalyticsFact::Response {
|
||||
connection_id: 7,
|
||||
response: Box::new(sample_thread_start_response(
|
||||
"thread-guardian",
|
||||
/*ephemeral*/ false,
|
||||
"gpt-5",
|
||||
)),
|
||||
},
|
||||
&mut events,
|
||||
)
|
||||
.await;
|
||||
events.clear();
|
||||
|
||||
reducer
|
||||
.ingest(
|
||||
AnalyticsFact::Custom(CustomAnalyticsFact::GuardianReview(Box::new(
|
||||
GuardianReviewEventParams {
|
||||
thread_id: "thread-guardian".to_string(),
|
||||
turn_id: "turn-guardian".to_string(),
|
||||
review_id: "review-guardian".to_string(),
|
||||
target_item_id: None,
|
||||
retry_reason: Some("network retry".to_string()),
|
||||
approval_request_source: GuardianApprovalRequestSource::DelegatedSubagent,
|
||||
reviewed_action: GuardianReviewedAction::NetworkAccess {
|
||||
target: "https://example.com".to_string(),
|
||||
host: "example.com".to_string(),
|
||||
protocol: NetworkApprovalProtocol::Https,
|
||||
port: 443,
|
||||
},
|
||||
reviewed_action_truncated: false,
|
||||
decision: GuardianReviewDecision::Denied,
|
||||
terminal_status: GuardianReviewTerminalStatus::TimedOut,
|
||||
failure_reason: Some(GuardianReviewFailureReason::Timeout),
|
||||
risk_level: None,
|
||||
user_authorization: None,
|
||||
outcome: None,
|
||||
rationale: None,
|
||||
guardian_thread_id: None,
|
||||
guardian_session_kind: None,
|
||||
guardian_model: None,
|
||||
guardian_reasoning_effort: None,
|
||||
had_prior_review_context: None,
|
||||
review_timeout_ms: 90_000,
|
||||
tool_call_count: None,
|
||||
time_to_first_token_ms: None,
|
||||
completion_latency_ms: Some(90_000),
|
||||
started_at: 100,
|
||||
completed_at: Some(190),
|
||||
input_tokens: None,
|
||||
cached_input_tokens: None,
|
||||
output_tokens: None,
|
||||
reasoning_output_tokens: None,
|
||||
total_tokens: None,
|
||||
},
|
||||
))),
|
||||
&mut events,
|
||||
)
|
||||
.await;
|
||||
|
||||
let payload = serde_json::to_value(&events).expect("serialize events");
|
||||
assert_eq!(payload.as_array().expect("events array").len(), 1);
|
||||
assert_eq!(payload[0]["event_type"], "codex_guardian_review");
|
||||
assert_eq!(payload[0]["event_params"]["thread_id"], "thread-guardian");
|
||||
assert_eq!(payload[0]["event_params"]["turn_id"], "turn-guardian");
|
||||
assert_eq!(payload[0]["event_params"]["review_id"], "review-guardian");
|
||||
assert_eq!(payload[0]["event_params"]["target_item_id"], json!(null));
|
||||
assert_eq!(
|
||||
payload[0]["event_params"]["approval_request_source"],
|
||||
"delegated_subagent"
|
||||
);
|
||||
assert_eq!(
|
||||
payload[0]["event_params"]["app_server_client"]["product_client_id"],
|
||||
DEFAULT_ORIGINATOR
|
||||
);
|
||||
assert_eq!(
|
||||
payload[0]["event_params"]["runtime"]["codex_rs_version"],
|
||||
"0.1.0"
|
||||
);
|
||||
assert_eq!(
|
||||
payload[0]["event_params"]["reviewed_action"]["type"],
|
||||
"network_access"
|
||||
);
|
||||
assert_eq!(
|
||||
payload[0]["event_params"]["reviewed_action"]["host"],
|
||||
"example.com"
|
||||
);
|
||||
assert_eq!(payload[0]["event_params"]["terminal_status"], "timed_out");
|
||||
assert_eq!(payload[0]["event_params"]["failure_reason"], "timeout");
|
||||
assert_eq!(payload[0]["event_params"]["review_timeout_ms"], 90_000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_thread_started_review_serializes_expected_shape() {
|
||||
let event = TrackEventRequest::ThreadInitialized(subagent_thread_started_event_request(
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
use crate::facts::AppInvocation;
|
||||
use crate::facts::CodexCompactionEvent;
|
||||
use crate::facts::CompactionImplementation;
|
||||
use crate::facts::CompactionPhase;
|
||||
use crate::facts::CompactionReason;
|
||||
use crate::facts::CompactionStatus;
|
||||
use crate::facts::CompactionStrategy;
|
||||
use crate::facts::CompactionTrigger;
|
||||
use crate::facts::InvocationType;
|
||||
use crate::facts::PluginState;
|
||||
use crate::facts::SubAgentThreadStartedInput;
|
||||
@@ -9,6 +15,10 @@ use codex_plugin::PluginTelemetryMetadata;
|
||||
use codex_protocol::approvals::NetworkApprovalProtocol;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
use codex_protocol::models::SandboxPermissions;
|
||||
use codex_protocol::protocol::GuardianAssessmentOutcome;
|
||||
use codex_protocol::protocol::GuardianCommandSource;
|
||||
use codex_protocol::protocol::GuardianRiskLevel;
|
||||
use codex_protocol::protocol::GuardianUserAuthorization;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use serde::Serialize;
|
||||
@@ -147,31 +157,6 @@ pub enum GuardianReviewSessionKind {
|
||||
EphemeralForked,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum GuardianReviewRiskLevel {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
Critical,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum GuardianReviewUserAuthorization {
|
||||
Unknown,
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum GuardianReviewOutcome {
|
||||
Allow,
|
||||
Deny,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum GuardianApprovalRequestSource {
|
||||
@@ -228,19 +213,12 @@ pub enum GuardianReviewedAction {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum GuardianCommandSource {
|
||||
Shell,
|
||||
UnifiedExec,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
pub struct GuardianReviewEventParams {
|
||||
pub thread_id: String,
|
||||
pub turn_id: String,
|
||||
pub review_id: String,
|
||||
pub target_item_id: String,
|
||||
pub target_item_id: Option<String>,
|
||||
pub retry_reason: Option<String>,
|
||||
pub approval_request_source: GuardianApprovalRequestSource,
|
||||
pub reviewed_action: GuardianReviewedAction,
|
||||
@@ -248,9 +226,9 @@ pub struct GuardianReviewEventParams {
|
||||
pub decision: GuardianReviewDecision,
|
||||
pub terminal_status: GuardianReviewTerminalStatus,
|
||||
pub failure_reason: Option<GuardianReviewFailureReason>,
|
||||
pub risk_level: Option<GuardianReviewRiskLevel>,
|
||||
pub user_authorization: Option<GuardianReviewUserAuthorization>,
|
||||
pub outcome: Option<GuardianReviewOutcome>,
|
||||
pub risk_level: Option<GuardianRiskLevel>,
|
||||
pub user_authorization: Option<GuardianUserAuthorization>,
|
||||
pub outcome: Option<GuardianAssessmentOutcome>,
|
||||
pub rationale: Option<String>,
|
||||
pub guardian_thread_id: Option<String>,
|
||||
pub guardian_session_kind: Option<GuardianReviewSessionKind>,
|
||||
@@ -258,7 +236,7 @@ pub struct GuardianReviewEventParams {
|
||||
pub guardian_reasoning_effort: Option<String>,
|
||||
pub had_prior_review_context: Option<bool>,
|
||||
pub review_timeout_ms: u64,
|
||||
pub tool_call_count: u64,
|
||||
pub tool_call_count: Option<u64>,
|
||||
pub time_to_first_token_ms: Option<u64>,
|
||||
pub completion_latency_ms: Option<u64>,
|
||||
pub started_at: u64,
|
||||
@@ -310,12 +288,12 @@ pub(crate) struct CodexCompactionEventParams {
|
||||
pub(crate) thread_source: Option<&'static str>,
|
||||
pub(crate) subagent_source: Option<String>,
|
||||
pub(crate) parent_thread_id: Option<String>,
|
||||
pub(crate) trigger: crate::facts::CompactionTrigger,
|
||||
pub(crate) reason: crate::facts::CompactionReason,
|
||||
pub(crate) implementation: crate::facts::CompactionImplementation,
|
||||
pub(crate) phase: crate::facts::CompactionPhase,
|
||||
pub(crate) strategy: crate::facts::CompactionStrategy,
|
||||
pub(crate) status: crate::facts::CompactionStatus,
|
||||
pub(crate) trigger: CompactionTrigger,
|
||||
pub(crate) reason: CompactionReason,
|
||||
pub(crate) implementation: CompactionImplementation,
|
||||
pub(crate) phase: CompactionPhase,
|
||||
pub(crate) strategy: CompactionStrategy,
|
||||
pub(crate) status: CompactionStatus,
|
||||
pub(crate) error: Option<String>,
|
||||
pub(crate) active_context_tokens_before: i64,
|
||||
pub(crate) active_context_tokens_after: i64,
|
||||
|
||||
@@ -3,18 +3,17 @@ mod events;
|
||||
mod facts;
|
||||
mod reducer;
|
||||
|
||||
use std::time::SystemTime;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
pub use client::AnalyticsEventsClient;
|
||||
pub use events::AppServerRpcTransport;
|
||||
pub use events::GuardianApprovalRequestSource;
|
||||
pub use events::GuardianCommandSource;
|
||||
pub use events::GuardianReviewDecision;
|
||||
pub use events::GuardianReviewEventParams;
|
||||
pub use events::GuardianReviewFailureReason;
|
||||
pub use events::GuardianReviewOutcome;
|
||||
pub use events::GuardianReviewRiskLevel;
|
||||
pub use events::GuardianReviewSessionKind;
|
||||
pub use events::GuardianReviewTerminalStatus;
|
||||
pub use events::GuardianReviewUserAuthorization;
|
||||
pub use events::GuardianReviewedAction;
|
||||
pub use facts::AppInvocation;
|
||||
pub use facts::CodexCompactionEvent;
|
||||
@@ -32,3 +31,10 @@ pub use facts::build_track_events_context;
|
||||
|
||||
#[cfg(test)]
|
||||
mod analytics_client_tests;
|
||||
|
||||
pub fn now_unix_seconds() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::sync::Arc;
|
||||
|
||||
use async_channel::Receiver;
|
||||
use async_channel::Sender;
|
||||
use codex_analytics::GuardianApprovalRequestSource;
|
||||
use codex_async_utils::OrCancelExt;
|
||||
use codex_exec_server::EnvironmentManager;
|
||||
use codex_protocol::protocol::ApplyPatchApprovalRequestEvent;
|
||||
@@ -40,7 +41,7 @@ use crate::codex::emit_subagent_session_started;
|
||||
use crate::config::Config;
|
||||
use crate::guardian::GuardianApprovalRequest;
|
||||
use crate::guardian::new_guardian_review_id;
|
||||
use crate::guardian::review_approval_request_with_cancel;
|
||||
use crate::guardian::review_approval_request_with_source_and_cancel;
|
||||
use crate::guardian::routes_approval_to_guardian;
|
||||
use crate::mcp_tool_call::MCP_TOOL_APPROVAL_ACCEPT;
|
||||
use crate::mcp_tool_call::MCP_TOOL_APPROVAL_ACCEPT_FOR_SESSION;
|
||||
@@ -747,12 +748,13 @@ fn spawn_guardian_review(
|
||||
let _ = tx.send(ReviewDecision::Denied);
|
||||
return;
|
||||
};
|
||||
let decision = runtime.block_on(review_approval_request_with_cancel(
|
||||
let decision = runtime.block_on(review_approval_request_with_source_and_cancel(
|
||||
&session,
|
||||
&turn,
|
||||
review_id,
|
||||
request,
|
||||
retry_reason,
|
||||
GuardianApprovalRequestSource::DelegatedSubagent,
|
||||
cancel_token,
|
||||
));
|
||||
let _ = tx.send(decision);
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use std::time::SystemTime;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use crate::Prompt;
|
||||
use crate::client::ModelClientSession;
|
||||
@@ -19,6 +17,7 @@ use codex_analytics::CompactionReason;
|
||||
use codex_analytics::CompactionStatus;
|
||||
use codex_analytics::CompactionStrategy;
|
||||
use codex_analytics::CompactionTrigger;
|
||||
use codex_analytics::now_unix_seconds;
|
||||
use codex_features::Feature;
|
||||
use codex_model_provider_info::ModelProviderInfo;
|
||||
use codex_protocol::error::CodexErr;
|
||||
@@ -372,13 +371,6 @@ pub(crate) fn compaction_status_from_result<T>(result: &CodexResult<T>) -> Compa
|
||||
}
|
||||
}
|
||||
|
||||
fn now_unix_seconds() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|duration| duration.as_secs())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
|
||||
let mut pieces = Vec::new();
|
||||
for item in content {
|
||||
|
||||
@@ -19,6 +19,7 @@ mod review_session;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_protocol::protocol::GuardianAssessmentDecisionSource;
|
||||
use codex_protocol::protocol::GuardianAssessmentOutcome;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
@@ -30,7 +31,9 @@ pub(crate) use review::guardian_timeout_message;
|
||||
pub(crate) use review::is_guardian_reviewer_source;
|
||||
pub(crate) use review::new_guardian_review_id;
|
||||
pub(crate) use review::review_approval_request;
|
||||
#[cfg(test)]
|
||||
pub(crate) use review::review_approval_request_with_cancel;
|
||||
pub(crate) use review::review_approval_request_with_source_and_cancel;
|
||||
pub(crate) use review::routes_approval_to_guardian;
|
||||
pub(crate) use review_session::GuardianReviewSessionManager;
|
||||
|
||||
@@ -45,14 +48,6 @@ const GUARDIAN_MAX_ACTION_STRING_TOKENS: usize = 16_000;
|
||||
const GUARDIAN_RECENT_ENTRY_LIMIT: usize = 40;
|
||||
const TRUNCATION_TAG: &str = "truncated";
|
||||
|
||||
/// Final allow/deny outcome returned by the guardian reviewer.
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub(crate) enum GuardianAssessmentOutcome {
|
||||
Allow,
|
||||
Deny,
|
||||
}
|
||||
|
||||
/// Structured output contract that the guardian reviewer must satisfy.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub(crate) struct GuardianAssessment {
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use codex_analytics::GuardianApprovalRequestSource;
|
||||
use codex_analytics::GuardianReviewDecision;
|
||||
use codex_analytics::GuardianReviewFailureReason;
|
||||
use codex_analytics::GuardianReviewSessionKind;
|
||||
use codex_analytics::GuardianReviewTerminalStatus;
|
||||
use codex_analytics::GuardianReviewedAction;
|
||||
use codex_analytics::now_unix_seconds;
|
||||
use codex_features::Feature;
|
||||
use codex_protocol::config_types::ApprovalsReviewer;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
@@ -10,12 +19,14 @@ use codex_protocol::protocol::GuardianRiskLevel;
|
||||
use codex_protocol::protocol::GuardianUserAuthorization;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use codex_protocol::protocol::WarningEvent;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
|
||||
use super::GUARDIAN_REVIEW_TIMEOUT;
|
||||
use super::GUARDIAN_REVIEWER_NAME;
|
||||
use super::GuardianApprovalRequest;
|
||||
use super::GuardianAssessment;
|
||||
@@ -26,6 +37,7 @@ use super::approval_request::guardian_request_target_item_id;
|
||||
use super::approval_request::guardian_request_turn_id;
|
||||
use super::prompt::guardian_output_schema;
|
||||
use super::prompt::parse_guardian_assessment;
|
||||
use super::review_session::GuardianReviewSessionMetadata;
|
||||
use super::review_session::GuardianReviewSessionOutcome;
|
||||
use super::review_session::GuardianReviewSessionParams;
|
||||
use super::review_session::build_guardian_review_session_config;
|
||||
@@ -75,9 +87,36 @@ pub(crate) fn guardian_timeout_message() -> String {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) enum GuardianReviewOutcome {
|
||||
Completed(anyhow::Result<GuardianAssessment>),
|
||||
TimedOut,
|
||||
Aborted,
|
||||
Completed(
|
||||
anyhow::Result<GuardianAssessment>,
|
||||
Option<GuardianReviewSessionMetadata>,
|
||||
),
|
||||
Failed(GuardianReviewFailure, Option<GuardianReviewSessionMetadata>),
|
||||
TimedOut(Option<GuardianReviewSessionMetadata>),
|
||||
Aborted(Option<GuardianReviewSessionMetadata>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) enum GuardianReviewFailure {
|
||||
PromptBuild(anyhow::Error),
|
||||
Session(anyhow::Error),
|
||||
Parse(anyhow::Error),
|
||||
}
|
||||
|
||||
impl GuardianReviewFailure {
|
||||
fn reason(&self) -> GuardianReviewFailureReason {
|
||||
match self {
|
||||
Self::PromptBuild(_) => GuardianReviewFailureReason::PromptBuildError,
|
||||
Self::Session(_) => GuardianReviewFailureReason::SessionError,
|
||||
Self::Parse(_) => GuardianReviewFailureReason::ParseError,
|
||||
}
|
||||
}
|
||||
|
||||
fn error(&self) -> &anyhow::Error {
|
||||
match self {
|
||||
Self::PromptBuild(err) | Self::Session(err) | Self::Parse(err) => err,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn guardian_risk_level_str(level: GuardianRiskLevel) -> &'static str {
|
||||
@@ -89,6 +128,220 @@ fn guardian_risk_level_str(level: GuardianRiskLevel) -> &'static str {
|
||||
}
|
||||
}
|
||||
|
||||
fn guardian_reviewed_action(request: &GuardianApprovalRequest) -> GuardianReviewedAction {
|
||||
match request {
|
||||
GuardianApprovalRequest::Shell {
|
||||
command,
|
||||
cwd,
|
||||
sandbox_permissions,
|
||||
additional_permissions,
|
||||
justification,
|
||||
..
|
||||
} => GuardianReviewedAction::Shell {
|
||||
command: command.clone(),
|
||||
command_display: codex_shell_command::parse_command::shlex_join(command),
|
||||
cwd: cwd.to_string_lossy().into_owned(),
|
||||
sandbox_permissions: *sandbox_permissions,
|
||||
additional_permissions: additional_permissions.clone(),
|
||||
justification: justification.clone(),
|
||||
},
|
||||
GuardianApprovalRequest::ExecCommand {
|
||||
command,
|
||||
cwd,
|
||||
sandbox_permissions,
|
||||
additional_permissions,
|
||||
justification,
|
||||
tty,
|
||||
..
|
||||
} => GuardianReviewedAction::UnifiedExec {
|
||||
command: command.clone(),
|
||||
command_display: codex_shell_command::parse_command::shlex_join(command),
|
||||
cwd: cwd.to_string_lossy().into_owned(),
|
||||
sandbox_permissions: *sandbox_permissions,
|
||||
additional_permissions: additional_permissions.clone(),
|
||||
justification: justification.clone(),
|
||||
tty: *tty,
|
||||
},
|
||||
#[cfg(unix)]
|
||||
GuardianApprovalRequest::Execve {
|
||||
source,
|
||||
program,
|
||||
argv,
|
||||
cwd,
|
||||
additional_permissions,
|
||||
..
|
||||
} => GuardianReviewedAction::Execve {
|
||||
source: *source,
|
||||
program: program.clone(),
|
||||
argv: argv.clone(),
|
||||
cwd: cwd.to_string_lossy().into_owned(),
|
||||
additional_permissions: additional_permissions.clone(),
|
||||
},
|
||||
GuardianApprovalRequest::ApplyPatch { cwd, files, .. } => {
|
||||
GuardianReviewedAction::ApplyPatch {
|
||||
cwd: cwd.to_string_lossy().into_owned(),
|
||||
files: files
|
||||
.iter()
|
||||
.map(|file| file.to_string_lossy().into_owned())
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
GuardianApprovalRequest::NetworkAccess {
|
||||
target,
|
||||
host,
|
||||
protocol,
|
||||
port,
|
||||
..
|
||||
} => GuardianReviewedAction::NetworkAccess {
|
||||
target: target.clone(),
|
||||
host: host.clone(),
|
||||
protocol: *protocol,
|
||||
port: *port,
|
||||
},
|
||||
GuardianApprovalRequest::McpToolCall {
|
||||
server,
|
||||
tool_name,
|
||||
connector_id,
|
||||
connector_name,
|
||||
tool_title,
|
||||
..
|
||||
} => GuardianReviewedAction::McpToolCall {
|
||||
server: server.clone(),
|
||||
tool_name: tool_name.clone(),
|
||||
connector_id: connector_id.clone(),
|
||||
connector_name: connector_name.clone(),
|
||||
tool_title: tool_title.clone(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
struct GuardianReviewAnalyticsContext {
|
||||
thread_id: String,
|
||||
turn_id: String,
|
||||
review_id: String,
|
||||
target_item_id: Option<String>,
|
||||
retry_reason: Option<String>,
|
||||
approval_request_source: GuardianApprovalRequestSource,
|
||||
reviewed_action: GuardianReviewedAction,
|
||||
started_at: u64,
|
||||
started_instant: Instant,
|
||||
}
|
||||
|
||||
struct GuardianReviewAnalyticsTerminal {
|
||||
decision: GuardianReviewDecision,
|
||||
terminal_status: GuardianReviewTerminalStatus,
|
||||
failure_reason: Option<GuardianReviewFailureReason>,
|
||||
risk_level: Option<GuardianRiskLevel>,
|
||||
user_authorization: Option<GuardianUserAuthorization>,
|
||||
outcome: Option<GuardianAssessmentOutcome>,
|
||||
rationale: Option<String>,
|
||||
guardian_thread_id: Option<String>,
|
||||
guardian_session_kind: Option<GuardianReviewSessionKind>,
|
||||
guardian_model: Option<String>,
|
||||
guardian_reasoning_effort: Option<String>,
|
||||
had_prior_review_context: Option<bool>,
|
||||
reviewed_action_truncated: bool,
|
||||
token_usage: Option<TokenUsage>,
|
||||
time_to_first_token_ms: Option<u64>,
|
||||
completed_at: u64,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct GuardianReviewMetadataFields {
|
||||
guardian_thread_id: Option<String>,
|
||||
guardian_session_kind: Option<GuardianReviewSessionKind>,
|
||||
guardian_model: Option<String>,
|
||||
guardian_reasoning_effort: Option<String>,
|
||||
had_prior_review_context: Option<bool>,
|
||||
reviewed_action_truncated: bool,
|
||||
token_usage: Option<TokenUsage>,
|
||||
time_to_first_token_ms: Option<u64>,
|
||||
}
|
||||
|
||||
fn guardian_review_metadata_fields(
|
||||
metadata: Option<GuardianReviewSessionMetadata>,
|
||||
) -> GuardianReviewMetadataFields {
|
||||
match metadata {
|
||||
Some(metadata) => GuardianReviewMetadataFields {
|
||||
guardian_thread_id: Some(metadata.guardian_thread_id),
|
||||
guardian_session_kind: Some(metadata.guardian_session_kind),
|
||||
guardian_model: Some(metadata.guardian_model),
|
||||
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
|
||||
had_prior_review_context: Some(metadata.had_prior_review_context),
|
||||
reviewed_action_truncated: false,
|
||||
token_usage: metadata.token_usage,
|
||||
time_to_first_token_ms: None,
|
||||
},
|
||||
None => GuardianReviewMetadataFields::default(),
|
||||
}
|
||||
}
|
||||
|
||||
impl GuardianReviewAnalyticsContext {
|
||||
fn track(
|
||||
&self,
|
||||
session: &Session,
|
||||
turn: &TurnContext,
|
||||
terminal: GuardianReviewAnalyticsTerminal,
|
||||
) {
|
||||
if !turn.config.features.enabled(Feature::GeneralAnalytics) {
|
||||
return;
|
||||
}
|
||||
let completion_latency_ms = self.started_instant.elapsed().as_millis() as u64;
|
||||
session
|
||||
.services
|
||||
.analytics_events_client
|
||||
.track_guardian_review(codex_analytics::GuardianReviewEventParams {
|
||||
thread_id: self.thread_id.clone(),
|
||||
turn_id: self.turn_id.clone(),
|
||||
review_id: self.review_id.clone(),
|
||||
target_item_id: self.target_item_id.clone(),
|
||||
retry_reason: self.retry_reason.clone(),
|
||||
approval_request_source: self.approval_request_source,
|
||||
reviewed_action: self.reviewed_action.clone(),
|
||||
reviewed_action_truncated: terminal.reviewed_action_truncated,
|
||||
decision: terminal.decision,
|
||||
terminal_status: terminal.terminal_status,
|
||||
failure_reason: terminal.failure_reason,
|
||||
risk_level: terminal.risk_level,
|
||||
user_authorization: terminal.user_authorization,
|
||||
outcome: terminal.outcome,
|
||||
rationale: terminal.rationale,
|
||||
guardian_thread_id: terminal.guardian_thread_id,
|
||||
guardian_session_kind: terminal.guardian_session_kind,
|
||||
guardian_model: terminal.guardian_model,
|
||||
guardian_reasoning_effort: terminal.guardian_reasoning_effort,
|
||||
had_prior_review_context: terminal.had_prior_review_context,
|
||||
review_timeout_ms: GUARDIAN_REVIEW_TIMEOUT.as_millis() as u64,
|
||||
// TODO(rhan-oai): plumb nested Guardian review session tool-call counts.
|
||||
tool_call_count: None,
|
||||
time_to_first_token_ms: terminal.time_to_first_token_ms,
|
||||
completion_latency_ms: Some(completion_latency_ms),
|
||||
started_at: self.started_at,
|
||||
completed_at: Some(terminal.completed_at),
|
||||
input_tokens: terminal
|
||||
.token_usage
|
||||
.as_ref()
|
||||
.map(|usage| usage.input_tokens),
|
||||
cached_input_tokens: terminal
|
||||
.token_usage
|
||||
.as_ref()
|
||||
.map(|usage| usage.cached_input_tokens),
|
||||
output_tokens: terminal
|
||||
.token_usage
|
||||
.as_ref()
|
||||
.map(|usage| usage.output_tokens),
|
||||
reasoning_output_tokens: terminal
|
||||
.token_usage
|
||||
.as_ref()
|
||||
.map(|usage| usage.reasoning_output_tokens),
|
||||
total_tokens: terminal
|
||||
.token_usage
|
||||
.as_ref()
|
||||
.map(|usage| usage.total_tokens),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether this turn should route `on-request` approval prompts through the
|
||||
/// guardian reviewer instead of surfacing them to the user. ARC may still
|
||||
/// block actions earlier in the flow.
|
||||
@@ -116,11 +369,25 @@ async fn run_guardian_review(
|
||||
review_id: String,
|
||||
request: GuardianApprovalRequest,
|
||||
retry_reason: Option<String>,
|
||||
approval_request_source: GuardianApprovalRequestSource,
|
||||
external_cancel: Option<CancellationToken>,
|
||||
) -> ReviewDecision {
|
||||
let started_at = now_unix_seconds();
|
||||
let started_instant = Instant::now();
|
||||
let target_item_id = guardian_request_target_item_id(&request).map(str::to_string);
|
||||
let assessment_turn_id = guardian_request_turn_id(&request, &turn.sub_id).to_string();
|
||||
let action_summary = guardian_assessment_action(&request);
|
||||
let analytics_context = GuardianReviewAnalyticsContext {
|
||||
thread_id: session.conversation_id.to_string(),
|
||||
turn_id: assessment_turn_id.clone(),
|
||||
review_id: review_id.clone(),
|
||||
target_item_id: target_item_id.clone(),
|
||||
retry_reason: retry_reason.clone(),
|
||||
approval_request_source,
|
||||
reviewed_action: guardian_reviewed_action(&request),
|
||||
started_at,
|
||||
started_instant,
|
||||
};
|
||||
session
|
||||
.send_event(
|
||||
turn.as_ref(),
|
||||
@@ -142,6 +409,28 @@ async fn run_guardian_review(
|
||||
.as_ref()
|
||||
.is_some_and(CancellationToken::is_cancelled)
|
||||
{
|
||||
analytics_context.track(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
GuardianReviewAnalyticsTerminal {
|
||||
decision: GuardianReviewDecision::Aborted,
|
||||
terminal_status: GuardianReviewTerminalStatus::Aborted,
|
||||
failure_reason: Some(GuardianReviewFailureReason::Cancelled),
|
||||
risk_level: None,
|
||||
user_authorization: None,
|
||||
outcome: None,
|
||||
rationale: None,
|
||||
guardian_thread_id: None,
|
||||
guardian_session_kind: None,
|
||||
guardian_model: None,
|
||||
guardian_reasoning_effort: None,
|
||||
had_prior_review_context: None,
|
||||
reviewed_action_truncated: false,
|
||||
token_usage: None,
|
||||
time_to_first_token_ms: None,
|
||||
completed_at: now_unix_seconds(),
|
||||
},
|
||||
);
|
||||
session
|
||||
.send_event(
|
||||
turn.as_ref(),
|
||||
@@ -167,24 +456,142 @@ async fn run_guardian_review(
|
||||
session.clone(),
|
||||
turn.clone(),
|
||||
request,
|
||||
retry_reason,
|
||||
retry_reason.clone(),
|
||||
schema,
|
||||
external_cancel,
|
||||
)
|
||||
.await;
|
||||
|
||||
let completed_at = now_unix_seconds();
|
||||
let assessment = match outcome {
|
||||
GuardianReviewOutcome::Completed(Ok(assessment)) => assessment,
|
||||
GuardianReviewOutcome::Completed(Err(err)) => GuardianAssessment {
|
||||
risk_level: GuardianRiskLevel::High,
|
||||
user_authorization: GuardianUserAuthorization::Unknown,
|
||||
outcome: GuardianAssessmentOutcome::Deny,
|
||||
rationale: format!("Automatic approval review failed: {err}"),
|
||||
},
|
||||
GuardianReviewOutcome::TimedOut => {
|
||||
GuardianReviewOutcome::Completed(Ok(assessment), metadata) => {
|
||||
let metadata = guardian_review_metadata_fields(metadata);
|
||||
analytics_context.track(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
GuardianReviewAnalyticsTerminal {
|
||||
decision: if matches!(assessment.outcome, GuardianAssessmentOutcome::Allow) {
|
||||
GuardianReviewDecision::Approved
|
||||
} else {
|
||||
GuardianReviewDecision::Denied
|
||||
},
|
||||
terminal_status: if matches!(
|
||||
assessment.outcome,
|
||||
GuardianAssessmentOutcome::Allow
|
||||
) {
|
||||
GuardianReviewTerminalStatus::Approved
|
||||
} else {
|
||||
GuardianReviewTerminalStatus::Denied
|
||||
},
|
||||
failure_reason: None,
|
||||
risk_level: Some(assessment.risk_level),
|
||||
user_authorization: Some(assessment.user_authorization),
|
||||
outcome: Some(assessment.outcome),
|
||||
rationale: Some(assessment.rationale.clone()),
|
||||
guardian_thread_id: metadata.guardian_thread_id,
|
||||
guardian_session_kind: metadata.guardian_session_kind,
|
||||
guardian_model: metadata.guardian_model,
|
||||
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
|
||||
had_prior_review_context: metadata.had_prior_review_context,
|
||||
reviewed_action_truncated: metadata.reviewed_action_truncated,
|
||||
token_usage: metadata.token_usage,
|
||||
time_to_first_token_ms: metadata.time_to_first_token_ms,
|
||||
completed_at,
|
||||
},
|
||||
);
|
||||
assessment
|
||||
}
|
||||
GuardianReviewOutcome::Completed(Err(err), metadata) => {
|
||||
let metadata = guardian_review_metadata_fields(metadata);
|
||||
let rationale = format!("Automatic approval review failed: {err}");
|
||||
analytics_context.track(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
GuardianReviewAnalyticsTerminal {
|
||||
decision: GuardianReviewDecision::Denied,
|
||||
terminal_status: GuardianReviewTerminalStatus::FailedClosed,
|
||||
failure_reason: Some(GuardianReviewFailureReason::SessionError),
|
||||
risk_level: None,
|
||||
user_authorization: None,
|
||||
outcome: None,
|
||||
rationale: None,
|
||||
guardian_thread_id: metadata.guardian_thread_id,
|
||||
guardian_session_kind: metadata.guardian_session_kind,
|
||||
guardian_model: metadata.guardian_model,
|
||||
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
|
||||
had_prior_review_context: metadata.had_prior_review_context,
|
||||
reviewed_action_truncated: metadata.reviewed_action_truncated,
|
||||
token_usage: metadata.token_usage,
|
||||
time_to_first_token_ms: metadata.time_to_first_token_ms,
|
||||
completed_at,
|
||||
},
|
||||
);
|
||||
GuardianAssessment {
|
||||
risk_level: GuardianRiskLevel::High,
|
||||
user_authorization: GuardianUserAuthorization::Unknown,
|
||||
outcome: GuardianAssessmentOutcome::Deny,
|
||||
rationale,
|
||||
}
|
||||
}
|
||||
GuardianReviewOutcome::Failed(failure, metadata) => {
|
||||
let metadata = guardian_review_metadata_fields(metadata);
|
||||
let rationale = format!("Automatic approval review failed: {}", failure.error());
|
||||
analytics_context.track(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
GuardianReviewAnalyticsTerminal {
|
||||
decision: GuardianReviewDecision::Denied,
|
||||
terminal_status: GuardianReviewTerminalStatus::FailedClosed,
|
||||
failure_reason: Some(failure.reason()),
|
||||
risk_level: None,
|
||||
user_authorization: None,
|
||||
outcome: None,
|
||||
rationale: None,
|
||||
guardian_thread_id: metadata.guardian_thread_id,
|
||||
guardian_session_kind: metadata.guardian_session_kind,
|
||||
guardian_model: metadata.guardian_model,
|
||||
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
|
||||
had_prior_review_context: metadata.had_prior_review_context,
|
||||
reviewed_action_truncated: metadata.reviewed_action_truncated,
|
||||
token_usage: metadata.token_usage,
|
||||
time_to_first_token_ms: metadata.time_to_first_token_ms,
|
||||
completed_at,
|
||||
},
|
||||
);
|
||||
GuardianAssessment {
|
||||
risk_level: GuardianRiskLevel::High,
|
||||
user_authorization: GuardianUserAuthorization::Unknown,
|
||||
outcome: GuardianAssessmentOutcome::Deny,
|
||||
rationale,
|
||||
}
|
||||
}
|
||||
GuardianReviewOutcome::TimedOut(metadata) => {
|
||||
let metadata = guardian_review_metadata_fields(metadata);
|
||||
let rationale =
|
||||
"Automatic approval review timed out while evaluating the requested approval."
|
||||
.to_string();
|
||||
analytics_context.track(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
GuardianReviewAnalyticsTerminal {
|
||||
decision: GuardianReviewDecision::Denied,
|
||||
terminal_status: GuardianReviewTerminalStatus::TimedOut,
|
||||
failure_reason: Some(GuardianReviewFailureReason::Timeout),
|
||||
risk_level: None,
|
||||
user_authorization: None,
|
||||
outcome: None,
|
||||
rationale: None,
|
||||
guardian_thread_id: metadata.guardian_thread_id,
|
||||
guardian_session_kind: metadata.guardian_session_kind,
|
||||
guardian_model: metadata.guardian_model,
|
||||
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
|
||||
had_prior_review_context: metadata.had_prior_review_context,
|
||||
reviewed_action_truncated: metadata.reviewed_action_truncated,
|
||||
token_usage: metadata.token_usage,
|
||||
time_to_first_token_ms: metadata.time_to_first_token_ms,
|
||||
completed_at,
|
||||
},
|
||||
);
|
||||
session
|
||||
.send_event(
|
||||
turn.as_ref(),
|
||||
@@ -211,7 +618,30 @@ async fn run_guardian_review(
|
||||
.await;
|
||||
return ReviewDecision::TimedOut;
|
||||
}
|
||||
GuardianReviewOutcome::Aborted => {
|
||||
GuardianReviewOutcome::Aborted(metadata) => {
|
||||
let metadata = guardian_review_metadata_fields(metadata);
|
||||
analytics_context.track(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
GuardianReviewAnalyticsTerminal {
|
||||
decision: GuardianReviewDecision::Aborted,
|
||||
terminal_status: GuardianReviewTerminalStatus::Aborted,
|
||||
failure_reason: Some(GuardianReviewFailureReason::Cancelled),
|
||||
risk_level: None,
|
||||
user_authorization: None,
|
||||
outcome: None,
|
||||
rationale: None,
|
||||
guardian_thread_id: metadata.guardian_thread_id,
|
||||
guardian_session_kind: metadata.guardian_session_kind,
|
||||
guardian_model: metadata.guardian_model,
|
||||
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
|
||||
had_prior_review_context: metadata.had_prior_review_context,
|
||||
reviewed_action_truncated: metadata.reviewed_action_truncated,
|
||||
token_usage: metadata.token_usage,
|
||||
time_to_first_token_ms: metadata.time_to_first_token_ms,
|
||||
completed_at,
|
||||
},
|
||||
);
|
||||
session
|
||||
.send_event(
|
||||
turn.as_ref(),
|
||||
@@ -309,11 +739,34 @@ pub(crate) async fn review_approval_request(
|
||||
review_id,
|
||||
request,
|
||||
retry_reason,
|
||||
GuardianApprovalRequestSource::MainTurn,
|
||||
/*external_cancel*/ None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn review_approval_request_with_source_and_cancel(
|
||||
session: &Arc<Session>,
|
||||
turn: &Arc<TurnContext>,
|
||||
review_id: String,
|
||||
request: GuardianApprovalRequest,
|
||||
retry_reason: Option<String>,
|
||||
approval_request_source: GuardianApprovalRequestSource,
|
||||
cancel_token: CancellationToken,
|
||||
) -> ReviewDecision {
|
||||
run_guardian_review(
|
||||
Arc::clone(session),
|
||||
Arc::clone(turn),
|
||||
review_id,
|
||||
request,
|
||||
retry_reason,
|
||||
approval_request_source,
|
||||
Some(cancel_token),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) async fn review_approval_request_with_cancel(
|
||||
session: &Arc<Session>,
|
||||
turn: &Arc<TurnContext>,
|
||||
@@ -328,6 +781,7 @@ pub(crate) async fn review_approval_request_with_cancel(
|
||||
review_id,
|
||||
request,
|
||||
retry_reason,
|
||||
GuardianApprovalRequestSource::MainTurn,
|
||||
Some(cancel_token),
|
||||
)
|
||||
.await
|
||||
@@ -358,7 +812,12 @@ pub(super) async fn run_guardian_review_session(
|
||||
let live_network_config = match session.services.network_proxy.as_ref() {
|
||||
Some(network_proxy) => match network_proxy.proxy().current_cfg().await {
|
||||
Ok(config) => Some(config),
|
||||
Err(err) => return GuardianReviewOutcome::Completed(Err(err)),
|
||||
Err(err) => {
|
||||
return GuardianReviewOutcome::Failed(
|
||||
GuardianReviewFailure::PromptBuild(err),
|
||||
None,
|
||||
);
|
||||
}
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
@@ -408,10 +867,12 @@ pub(super) async fn run_guardian_review_session(
|
||||
);
|
||||
let guardian_config = match guardian_config {
|
||||
Ok(config) => config,
|
||||
Err(err) => return GuardianReviewOutcome::Completed(Err(err)),
|
||||
Err(err) => {
|
||||
return GuardianReviewOutcome::Failed(GuardianReviewFailure::PromptBuild(err), None);
|
||||
}
|
||||
};
|
||||
|
||||
match session
|
||||
let (session_outcome, session_metadata) = session
|
||||
.guardian_review_session
|
||||
.run_review(GuardianReviewSessionParams {
|
||||
parent_session: Arc::clone(&session),
|
||||
@@ -426,17 +887,63 @@ pub(super) async fn run_guardian_review_session(
|
||||
personality: turn.personality,
|
||||
external_cancel,
|
||||
})
|
||||
.await
|
||||
{
|
||||
GuardianReviewSessionOutcome::Completed(Ok(last_agent_message)) => {
|
||||
GuardianReviewOutcome::Completed(parse_guardian_assessment(
|
||||
last_agent_message.as_deref(),
|
||||
))
|
||||
}
|
||||
.await;
|
||||
|
||||
match session_outcome {
|
||||
GuardianReviewSessionOutcome::Completed(Ok(last_agent_message)) => match last_agent_message
|
||||
{
|
||||
Some(last_agent_message) => {
|
||||
match parse_guardian_assessment(Some(&last_agent_message)) {
|
||||
Ok(assessment) => {
|
||||
GuardianReviewOutcome::Completed(Ok(assessment), session_metadata)
|
||||
}
|
||||
Err(err) => GuardianReviewOutcome::Failed(
|
||||
GuardianReviewFailure::Parse(err),
|
||||
session_metadata,
|
||||
),
|
||||
}
|
||||
}
|
||||
None => GuardianReviewOutcome::Failed(
|
||||
GuardianReviewFailure::Session(anyhow::anyhow!(
|
||||
"guardian review completed without an assessment payload"
|
||||
)),
|
||||
session_metadata,
|
||||
),
|
||||
},
|
||||
GuardianReviewSessionOutcome::Completed(Err(err)) => {
|
||||
GuardianReviewOutcome::Completed(Err(err))
|
||||
GuardianReviewOutcome::Failed(GuardianReviewFailure::Session(err), session_metadata)
|
||||
}
|
||||
GuardianReviewSessionOutcome::TimedOut => GuardianReviewOutcome::TimedOut,
|
||||
GuardianReviewSessionOutcome::Aborted => GuardianReviewOutcome::Aborted,
|
||||
GuardianReviewSessionOutcome::PromptBuildFailed(err) => {
|
||||
GuardianReviewOutcome::Failed(GuardianReviewFailure::PromptBuild(err), session_metadata)
|
||||
}
|
||||
GuardianReviewSessionOutcome::TimedOut => GuardianReviewOutcome::TimedOut(session_metadata),
|
||||
GuardianReviewSessionOutcome::Aborted => GuardianReviewOutcome::Aborted(session_metadata),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod review_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn guardian_review_failure_reason_distinguishes_failure_kinds() {
|
||||
let parse_failure = GuardianReviewFailure::Parse(anyhow::anyhow!("bad guardian JSON"));
|
||||
let prompt_failure =
|
||||
GuardianReviewFailure::PromptBuild(anyhow::anyhow!("bad prompt/config"));
|
||||
let session_failure =
|
||||
GuardianReviewFailure::Session(anyhow::anyhow!("guardian runtime failed"));
|
||||
|
||||
assert!(matches!(
|
||||
parse_failure.reason(),
|
||||
GuardianReviewFailureReason::ParseError
|
||||
));
|
||||
assert!(matches!(
|
||||
prompt_failure.reason(),
|
||||
GuardianReviewFailureReason::PromptBuildError
|
||||
));
|
||||
assert!(matches!(
|
||||
session_failure.reason(),
|
||||
GuardianReviewFailureReason::SessionError
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use codex_analytics::GuardianReviewSessionKind;
|
||||
use codex_protocol::config_types::Personality;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use codex_protocol::models::DeveloperInstructions;
|
||||
@@ -17,6 +18,7 @@ use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use serde_json::Value;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -57,10 +59,21 @@ const GUARDIAN_FOLLOWUP_REVIEW_REMINDER: &str = concat!(
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum GuardianReviewSessionOutcome {
|
||||
Completed(anyhow::Result<Option<String>>),
|
||||
PromptBuildFailed(anyhow::Error),
|
||||
TimedOut,
|
||||
Aborted,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct GuardianReviewSessionMetadata {
|
||||
pub(crate) guardian_thread_id: String,
|
||||
pub(crate) guardian_session_kind: GuardianReviewSessionKind,
|
||||
pub(crate) guardian_model: String,
|
||||
pub(crate) guardian_reasoning_effort: Option<String>,
|
||||
pub(crate) had_prior_review_context: bool,
|
||||
pub(crate) token_usage: Option<TokenUsage>,
|
||||
}
|
||||
|
||||
pub(crate) struct GuardianReviewSessionParams {
|
||||
pub(crate) parent_session: Arc<Session>,
|
||||
pub(crate) parent_turn: Arc<TurnContext>,
|
||||
@@ -100,6 +113,21 @@ struct GuardianReviewState {
|
||||
last_committed_fork_snapshot: Option<GuardianReviewForkSnapshot>,
|
||||
}
|
||||
|
||||
fn had_prior_review_context(prompt_mode: &GuardianPromptMode) -> bool {
|
||||
matches!(prompt_mode, GuardianPromptMode::Delta { .. })
|
||||
}
|
||||
|
||||
fn token_usage_delta(start: &TokenUsage, end: &TokenUsage) -> TokenUsage {
|
||||
TokenUsage {
|
||||
input_tokens: (end.input_tokens - start.input_tokens).max(0),
|
||||
cached_input_tokens: (end.cached_input_tokens - start.cached_input_tokens).max(0),
|
||||
output_tokens: (end.output_tokens - start.output_tokens).max(0),
|
||||
reasoning_output_tokens: (end.reasoning_output_tokens - start.reasoning_output_tokens)
|
||||
.max(0),
|
||||
total_tokens: (end.total_tokens - start.total_tokens).max(0),
|
||||
}
|
||||
}
|
||||
|
||||
struct EphemeralReviewCleanup {
|
||||
state: Arc<Mutex<GuardianReviewSessionState>>,
|
||||
review_session: Option<Arc<GuardianReviewSession>>,
|
||||
@@ -266,10 +294,14 @@ impl GuardianReviewSessionManager {
|
||||
pub(crate) async fn run_review(
|
||||
&self,
|
||||
params: GuardianReviewSessionParams,
|
||||
) -> GuardianReviewSessionOutcome {
|
||||
) -> (
|
||||
GuardianReviewSessionOutcome,
|
||||
Option<GuardianReviewSessionMetadata>,
|
||||
) {
|
||||
let deadline = tokio::time::Instant::now() + GUARDIAN_REVIEW_TIMEOUT;
|
||||
let next_reuse_key = GuardianReviewSessionReuseKey::from_spawn_config(¶ms.spawn_config);
|
||||
let mut stale_trunk_to_shutdown = None;
|
||||
let mut spawned_trunk = false;
|
||||
let trunk_candidate = match run_before_review_deadline(
|
||||
deadline,
|
||||
params.external_cancel.as_ref(),
|
||||
@@ -303,16 +335,17 @@ impl GuardianReviewSessionManager {
|
||||
{
|
||||
Ok(Ok(review_session)) => Arc::new(review_session),
|
||||
Ok(Err(err)) => {
|
||||
return GuardianReviewSessionOutcome::Completed(Err(err));
|
||||
return (GuardianReviewSessionOutcome::PromptBuildFailed(err), None);
|
||||
}
|
||||
Err(outcome) => return outcome,
|
||||
Err(outcome) => return (outcome, None),
|
||||
};
|
||||
state.trunk = Some(Arc::clone(&review_session));
|
||||
spawned_trunk = true;
|
||||
}
|
||||
|
||||
state.trunk.as_ref().cloned()
|
||||
}
|
||||
Err(outcome) => return outcome,
|
||||
Err(outcome) => return (outcome, None),
|
||||
};
|
||||
|
||||
if let Some(review_session) = stale_trunk_to_shutdown {
|
||||
@@ -320,9 +353,12 @@ impl GuardianReviewSessionManager {
|
||||
}
|
||||
|
||||
let Some(trunk) = trunk_candidate else {
|
||||
return GuardianReviewSessionOutcome::Completed(Err(anyhow!(
|
||||
"guardian review session was not available after spawn"
|
||||
)));
|
||||
return (
|
||||
GuardianReviewSessionOutcome::Completed(Err(anyhow!(
|
||||
"guardian review session was not available after spawn"
|
||||
))),
|
||||
None,
|
||||
);
|
||||
};
|
||||
|
||||
if trunk.reuse_key != next_reuse_key {
|
||||
@@ -350,20 +386,25 @@ impl GuardianReviewSessionManager {
|
||||
}
|
||||
};
|
||||
|
||||
let (outcome, keep_review_session) =
|
||||
run_review_on_session(trunk.as_ref(), ¶ms, deadline).await;
|
||||
let guardian_session_kind = if spawned_trunk {
|
||||
GuardianReviewSessionKind::TrunkNew
|
||||
} else {
|
||||
GuardianReviewSessionKind::TrunkReused
|
||||
};
|
||||
let (outcome, keep_review_session, metadata) =
|
||||
run_review_on_session(trunk.as_ref(), ¶ms, guardian_session_kind, deadline).await;
|
||||
if keep_review_session && matches!(outcome, GuardianReviewSessionOutcome::Completed(_)) {
|
||||
trunk.refresh_last_committed_fork_snapshot().await;
|
||||
}
|
||||
drop(trunk_guard);
|
||||
|
||||
if keep_review_session {
|
||||
outcome
|
||||
(outcome, Some(metadata))
|
||||
} else {
|
||||
if let Some(review_session) = self.remove_trunk_if_current(&trunk).await {
|
||||
review_session.shutdown_in_background();
|
||||
}
|
||||
outcome
|
||||
(outcome, Some(metadata))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -460,7 +501,10 @@ impl GuardianReviewSessionManager {
|
||||
reuse_key: GuardianReviewSessionReuseKey,
|
||||
deadline: tokio::time::Instant,
|
||||
fork_snapshot: Option<GuardianReviewForkSnapshot>,
|
||||
) -> GuardianReviewSessionOutcome {
|
||||
) -> (
|
||||
GuardianReviewSessionOutcome,
|
||||
Option<GuardianReviewSessionMetadata>,
|
||||
) {
|
||||
let spawn_cancel_token = CancellationToken::new();
|
||||
let mut fork_config = params.spawn_config.clone();
|
||||
fork_config.ephemeral = true;
|
||||
@@ -479,20 +523,26 @@ impl GuardianReviewSessionManager {
|
||||
.await
|
||||
{
|
||||
Ok(Ok(review_session)) => Arc::new(review_session),
|
||||
Ok(Err(err)) => return GuardianReviewSessionOutcome::Completed(Err(err)),
|
||||
Err(outcome) => return outcome,
|
||||
Ok(Err(err)) => return (GuardianReviewSessionOutcome::PromptBuildFailed(err), None),
|
||||
Err(outcome) => return (outcome, None),
|
||||
};
|
||||
self.register_active_ephemeral(Arc::clone(&review_session))
|
||||
.await;
|
||||
let mut cleanup =
|
||||
EphemeralReviewCleanup::new(Arc::clone(&self.state), Arc::clone(&review_session));
|
||||
|
||||
let (outcome, _) = run_review_on_session(review_session.as_ref(), ¶ms, deadline).await;
|
||||
let (outcome, _, metadata) = run_review_on_session(
|
||||
review_session.as_ref(),
|
||||
¶ms,
|
||||
GuardianReviewSessionKind::EphemeralForked,
|
||||
deadline,
|
||||
)
|
||||
.await;
|
||||
if let Some(review_session) = self.take_active_ephemeral(&review_session).await {
|
||||
cleanup.disarm();
|
||||
review_session.shutdown_in_background();
|
||||
}
|
||||
outcome
|
||||
(outcome, Some(metadata))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -539,8 +589,13 @@ async fn spawn_guardian_review_session(
|
||||
async fn run_review_on_session(
|
||||
review_session: &GuardianReviewSession,
|
||||
params: &GuardianReviewSessionParams,
|
||||
guardian_session_kind: GuardianReviewSessionKind,
|
||||
deadline: tokio::time::Instant,
|
||||
) -> (GuardianReviewSessionOutcome, bool) {
|
||||
) -> (
|
||||
GuardianReviewSessionOutcome,
|
||||
bool,
|
||||
GuardianReviewSessionMetadata,
|
||||
) {
|
||||
let (send_followup_reminder, prompt_mode) = {
|
||||
let state = review_session.state.lock().await;
|
||||
|
||||
@@ -555,6 +610,14 @@ async fn run_review_on_session(
|
||||
|
||||
(send_followup_reminder, prompt_mode)
|
||||
};
|
||||
let mut guardian_metadata = GuardianReviewSessionMetadata {
|
||||
guardian_thread_id: review_session.codex.session.conversation_id.to_string(),
|
||||
guardian_session_kind,
|
||||
guardian_model: params.model.clone(),
|
||||
guardian_reasoning_effort: params.reasoning_effort.map(|effort| effort.to_string()),
|
||||
had_prior_review_context: had_prior_review_context(&prompt_mode),
|
||||
token_usage: None,
|
||||
};
|
||||
if send_followup_reminder {
|
||||
append_guardian_followup_reminder(review_session).await;
|
||||
}
|
||||
@@ -579,6 +642,8 @@ async fn run_review_on_session(
|
||||
prompt_mode,
|
||||
)
|
||||
.await?;
|
||||
let token_usage_at_review_start =
|
||||
review_session.codex.session.total_token_usage().await;
|
||||
|
||||
review_session
|
||||
.codex
|
||||
@@ -598,29 +663,45 @@ async fn run_review_on_session(
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok::<GuardianTranscriptCursor, anyhow::Error>(prompt_items.transcript_cursor)
|
||||
Ok::<(GuardianTranscriptCursor, Option<TokenUsage>), anyhow::Error>((
|
||||
prompt_items.transcript_cursor,
|
||||
token_usage_at_review_start,
|
||||
))
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
let submit_result = match submit_result {
|
||||
Ok(submit_result) => submit_result,
|
||||
Err(outcome) => return (outcome, false),
|
||||
Err(outcome) => return (outcome, false, guardian_metadata),
|
||||
};
|
||||
let transcript_cursor = match submit_result {
|
||||
Ok(transcript_cursor) => transcript_cursor,
|
||||
let (transcript_cursor, token_usage_at_review_start) = match submit_result {
|
||||
Ok(submit_result) => submit_result,
|
||||
Err(err) => {
|
||||
return (GuardianReviewSessionOutcome::Completed(Err(err)), false);
|
||||
return (
|
||||
GuardianReviewSessionOutcome::PromptBuildFailed(err),
|
||||
false,
|
||||
guardian_metadata,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let outcome =
|
||||
wait_for_guardian_review(review_session, deadline, params.external_cancel.as_ref()).await;
|
||||
if matches!(outcome.0, GuardianReviewSessionOutcome::Completed(_)) {
|
||||
if outcome.2
|
||||
&& let Some(token_usage_at_review_start) = token_usage_at_review_start
|
||||
&& let Some(total_token_usage) = review_session.codex.session.total_token_usage().await
|
||||
{
|
||||
guardian_metadata.token_usage = Some(token_usage_delta(
|
||||
&token_usage_at_review_start,
|
||||
&total_token_usage,
|
||||
));
|
||||
}
|
||||
let mut state = review_session.state.lock().await;
|
||||
state.prior_review_count = state.prior_review_count.saturating_add(1);
|
||||
state.last_reviewed_transcript_cursor = Some(transcript_cursor);
|
||||
}
|
||||
outcome
|
||||
(outcome.0, outcome.1, guardian_metadata)
|
||||
}
|
||||
|
||||
async fn append_guardian_followup_reminder(review_session: &GuardianReviewSession) {
|
||||
@@ -649,7 +730,7 @@ async fn wait_for_guardian_review(
|
||||
review_session: &GuardianReviewSession,
|
||||
deadline: tokio::time::Instant,
|
||||
external_cancel: Option<&CancellationToken>,
|
||||
) -> (GuardianReviewSessionOutcome, bool) {
|
||||
) -> (GuardianReviewSessionOutcome, bool, bool) {
|
||||
let timeout = tokio::time::sleep_until(deadline);
|
||||
tokio::pin!(timeout);
|
||||
let mut last_error_message: Option<String> = None;
|
||||
@@ -658,7 +739,7 @@ async fn wait_for_guardian_review(
|
||||
tokio::select! {
|
||||
_ = &mut timeout => {
|
||||
let keep_review_session = interrupt_and_drain_turn(&review_session.codex).await.is_ok();
|
||||
return (GuardianReviewSessionOutcome::TimedOut, keep_review_session);
|
||||
return (GuardianReviewSessionOutcome::TimedOut, keep_review_session, false);
|
||||
}
|
||||
_ = async {
|
||||
if let Some(cancel_token) = external_cancel {
|
||||
@@ -668,7 +749,7 @@ async fn wait_for_guardian_review(
|
||||
}
|
||||
} => {
|
||||
let keep_review_session = interrupt_and_drain_turn(&review_session.codex).await.is_ok();
|
||||
return (GuardianReviewSessionOutcome::Aborted, keep_review_session);
|
||||
return (GuardianReviewSessionOutcome::Aborted, keep_review_session, false);
|
||||
}
|
||||
event = review_session.codex.next_event() => {
|
||||
match event {
|
||||
@@ -680,18 +761,20 @@ async fn wait_for_guardian_review(
|
||||
return (
|
||||
GuardianReviewSessionOutcome::Completed(Err(anyhow!(error_message))),
|
||||
true,
|
||||
true,
|
||||
);
|
||||
}
|
||||
return (
|
||||
GuardianReviewSessionOutcome::Completed(Ok(turn_complete.last_agent_message)),
|
||||
true,
|
||||
true,
|
||||
);
|
||||
}
|
||||
EventMsg::Error(error) => {
|
||||
last_error_message = Some(error.message);
|
||||
}
|
||||
EventMsg::TurnAborted(_) => {
|
||||
return (GuardianReviewSessionOutcome::Aborted, true);
|
||||
return (GuardianReviewSessionOutcome::Aborted, true, false);
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
@@ -699,6 +782,7 @@ async fn wait_for_guardian_review(
|
||||
return (
|
||||
GuardianReviewSessionOutcome::Completed(Err(err.into())),
|
||||
false,
|
||||
false,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -950,4 +1034,44 @@ mod tests {
|
||||
assert_eq!(outcome.unwrap(), 42);
|
||||
assert!(!cancel_token.is_cancelled());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn had_prior_review_context_tracks_prompt_mode() {
|
||||
assert!(!had_prior_review_context(&GuardianPromptMode::Full));
|
||||
assert!(had_prior_review_context(&GuardianPromptMode::Delta {
|
||||
cursor: GuardianTranscriptCursor {
|
||||
parent_history_version: 7,
|
||||
transcript_entry_count: 42,
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_usage_delta_never_reports_negative_usage() {
|
||||
let start = TokenUsage {
|
||||
input_tokens: 10,
|
||||
cached_input_tokens: 8,
|
||||
output_tokens: 6,
|
||||
reasoning_output_tokens: 4,
|
||||
total_tokens: 28,
|
||||
};
|
||||
let end = TokenUsage {
|
||||
input_tokens: 15,
|
||||
cached_input_tokens: 7,
|
||||
output_tokens: 10,
|
||||
reasoning_output_tokens: 2,
|
||||
total_tokens: 34,
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
token_usage_delta(&start, &end),
|
||||
TokenUsage {
|
||||
input_tokens: 5,
|
||||
cached_input_tokens: 0,
|
||||
output_tokens: 4,
|
||||
reasoning_output_tokens: 0,
|
||||
total_tokens: 6,
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,6 +99,14 @@ pub enum GuardianUserAuthorization {
|
||||
High,
|
||||
}
|
||||
|
||||
/// Final allow/deny outcome returned by the guardian reviewer.
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum GuardianAssessmentOutcome {
|
||||
Allow,
|
||||
Deny,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum GuardianAssessmentStatus {
|
||||
|
||||
@@ -67,6 +67,7 @@ pub use crate::approvals::ExecPolicyAmendment;
|
||||
pub use crate::approvals::GuardianAssessmentAction;
|
||||
pub use crate::approvals::GuardianAssessmentDecisionSource;
|
||||
pub use crate::approvals::GuardianAssessmentEvent;
|
||||
pub use crate::approvals::GuardianAssessmentOutcome;
|
||||
pub use crate::approvals::GuardianAssessmentStatus;
|
||||
pub use crate::approvals::GuardianCommandSource;
|
||||
pub use crate::approvals::GuardianRiskLevel;
|
||||
|
||||
Reference in New Issue
Block a user