Compare commits

...

2 Commits

Author SHA1 Message Date
Won Park
3895ddd6b1 Clarify guardian timeout guidance (#17521)
## Summary
- update the guardian timeout guidance to say permission approval review
timed out
- simplify the retry guidance to say retry once or ask the user for
guidance or explicit approval

## Testing
- cargo test -p codex-core
guardian_timeout_message_distinguishes_timeout_from_policy_denial
- cargo test -p codex-core
guardian_review_decision_maps_to_mcp_tool_decision
2026-04-12 02:03:53 -07:00
Won Park
ba839c23f3 changing decision semantics after guardian timeout (#17486)
**Summary**

This PR treats Guardian timeouts as distinct from explicit denials in
the core approval paths.
Timeouts now return timeout-specific guidance instead of Guardian
policy-rejection messaging.
It updates the command, shell, network, and MCP approval flows and adds
focused test coverage.
2026-04-12 00:00:50 -07:00
8 changed files with 96 additions and 16 deletions

View File

@@ -26,6 +26,7 @@ pub(crate) use approval_request::GuardianApprovalRequest;
pub(crate) use approval_request::GuardianMcpAnnotations;
pub(crate) use approval_request::guardian_approval_request_to_json;
pub(crate) use review::guardian_rejection_message;
pub(crate) use review::guardian_timeout_message;
pub(crate) use review::is_guardian_reviewer_source;
pub(crate) use review::new_guardian_review_id;
pub(crate) use review::review_approval_request;

View File

@@ -38,6 +38,12 @@ const GUARDIAN_REJECTION_INSTRUCTIONS: &str = concat!(
"Otherwise, stop and request user input.",
);
const GUARDIAN_TIMEOUT_INSTRUCTIONS: &str = concat!(
"The automatic permission approval review did not finish before its deadline. ",
"Do not assume the action is unsafe based on the timeout alone. ",
"You may retry once, or ask the user for guidance or explicit approval.",
);
pub(crate) fn new_guardian_review_id() -> String {
uuid::Uuid::new_v4().to_string()
}
@@ -63,6 +69,10 @@ pub(crate) async fn guardian_rejection_message(session: &Session, review_id: &st
}
}
pub(crate) fn guardian_timeout_message() -> String {
GUARDIAN_TIMEOUT_INSTRUCTIONS.to_string()
}
#[derive(Debug)]
pub(super) enum GuardianReviewOutcome {
Completed(anyhow::Result<GuardianAssessment>),
@@ -97,8 +107,9 @@ pub(crate) fn is_guardian_reviewer_source(
)
}
/// This function always fails closed: any timeout, review-session failure, or
/// parse failure is treated as a high-risk denial.
/// This function always fails closed: timeouts, review-session failures, and
/// parse failures all block execution, but timeouts are still surfaced to the
/// caller as distinct from explicit guardian denials.
async fn run_guardian_review(
session: Arc<Session>,
turn: Arc<TurnContext>,
@@ -170,14 +181,36 @@ async fn run_guardian_review(
outcome: GuardianAssessmentOutcome::Deny,
rationale: format!("Automatic approval review failed: {err}"),
},
GuardianReviewOutcome::TimedOut => GuardianAssessment {
risk_level: GuardianRiskLevel::High,
user_authorization: GuardianUserAuthorization::Unknown,
outcome: GuardianAssessmentOutcome::Deny,
rationale:
GuardianReviewOutcome::TimedOut => {
let rationale =
"Automatic approval review timed out while evaluating the requested approval."
.to_string(),
},
.to_string();
session
.send_event(
turn.as_ref(),
EventMsg::Warning(WarningEvent {
message: rationale.clone(),
}),
)
.await;
session
.send_event(
turn.as_ref(),
EventMsg::GuardianAssessment(GuardianAssessmentEvent {
id: review_id,
target_item_id,
turn_id: assessment_turn_id,
status: GuardianAssessmentStatus::TimedOut,
risk_level: None,
user_authorization: None,
rationale: Some(rationale),
decision_source: Some(GuardianAssessmentDecisionSource::Agent),
action: terminal_action,
}),
)
.await;
return ReviewDecision::TimedOut;
}
GuardianReviewOutcome::Aborted => {
session
.send_event(

View File

@@ -718,6 +718,14 @@ async fn cancelled_guardian_review_emits_terminal_abort_without_warning() {
assert!(warnings.is_empty());
}
#[test]
fn guardian_timeout_message_distinguishes_timeout_from_policy_denial() {
let message = guardian_timeout_message();
assert!(message.contains("did not finish before its deadline"));
assert!(message.contains("retry once"));
assert!(!message.contains("unacceptable risk"));
}
#[tokio::test]
async fn routes_approval_to_guardian_requires_auto_only_review_policy() {
let (_session, mut turn) = crate::codex::make_session_and_context().await;

View File

@@ -24,6 +24,7 @@ use crate::guardian::GuardianApprovalRequest;
use crate::guardian::GuardianMcpAnnotations;
use crate::guardian::guardian_approval_request_to_json;
use crate::guardian::guardian_rejection_message;
use crate::guardian::guardian_timeout_message;
use crate::guardian::new_guardian_review_id;
use crate::guardian::review_approval_request;
use crate::guardian::routes_approval_to_guardian;
@@ -980,9 +981,12 @@ async fn mcp_tool_approval_decision_from_guardian(
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
| ReviewDecision::NetworkPolicyAmendment { .. } => McpToolApprovalDecision::Accept,
ReviewDecision::ApprovedForSession => McpToolApprovalDecision::AcceptForSession,
ReviewDecision::Denied | ReviewDecision::TimedOut => McpToolApprovalDecision::Decline {
ReviewDecision::Denied => McpToolApprovalDecision::Decline {
message: Some(guardian_rejection_message(sess, review_id).await),
},
ReviewDecision::TimedOut => McpToolApprovalDecision::Decline {
message: Some(guardian_timeout_message()),
},
ReviewDecision::Abort => McpToolApprovalDecision::Decline { message: None },
}
}

View File

@@ -843,6 +843,20 @@ async fn guardian_review_decision_maps_to_mcp_tool_decision() {
};
assert!(message.contains("Reason: too risky"));
assert!(message.contains("The agent must not attempt to achieve the same outcome"));
let timeout = mcp_tool_approval_decision_from_guardian(
session.as_ref(),
"review-id",
ReviewDecision::TimedOut,
)
.await;
let McpToolApprovalDecision::Decline {
message: Some(message),
} = timeout
else {
panic!("guardian timeout should carry a timeout message");
};
assert!(message.contains("did not finish before its deadline"));
assert!(!message.contains("unacceptable risk"));
assert_eq!(
mcp_tool_approval_decision_from_guardian(
session.as_ref(),

View File

@@ -1,6 +1,7 @@
use crate::codex::Session;
use crate::guardian::GuardianApprovalRequest;
use crate::guardian::guardian_rejection_message;
use crate::guardian::guardian_timeout_message;
use crate::guardian::new_guardian_review_id;
use crate::guardian::review_approval_request;
use crate::guardian::routes_approval_to_guardian;
@@ -488,7 +489,7 @@ impl NetworkApprovalService {
PendingApprovalDecision::Deny
}
},
ReviewDecision::Denied | ReviewDecision::TimedOut | ReviewDecision::Abort => {
ReviewDecision::Denied | ReviewDecision::Abort => {
if let Some(review_id) = guardian_review_id.as_deref() {
if let Some(owner_call) = owner_call.as_ref() {
let message = guardian_rejection_message(session.as_ref(), review_id).await;
@@ -507,6 +508,16 @@ impl NetworkApprovalService {
}
PendingApprovalDecision::Deny
}
ReviewDecision::TimedOut => {
if let Some(owner_call) = owner_call.as_ref() {
self.record_call_outcome(
&owner_call.registration_id,
NetworkApprovalOutcome::DeniedByPolicy(guardian_timeout_message()),
)
.await;
}
PendingApprovalDecision::Deny
}
};
if matches!(resolved, PendingApprovalDecision::AllowForSession) {

View File

@@ -7,6 +7,7 @@ retry with an escalated sandbox strategy on denial (no reapproval thanks to
caching).
*/
use crate::guardian::guardian_rejection_message;
use crate::guardian::guardian_timeout_message;
use crate::guardian::new_guardian_review_id;
use crate::guardian::routes_approval_to_guardian;
use crate::network_policy_decision::network_approval_context_from_payload;
@@ -151,7 +152,7 @@ impl ToolOrchestrator {
otel.tool_decision(otel_tn, otel_ci, &decision, otel_source);
match decision {
ReviewDecision::Denied | ReviewDecision::TimedOut | ReviewDecision::Abort => {
ReviewDecision::Denied | ReviewDecision::Abort => {
let reason = if let Some(review_id) = guardian_review_id.as_deref() {
guardian_rejection_message(tool_ctx.session.as_ref(), review_id).await
} else {
@@ -159,6 +160,9 @@ impl ToolOrchestrator {
};
return Err(ToolError::Rejected(reason));
}
ReviewDecision::TimedOut => {
return Err(ToolError::Rejected(guardian_timeout_message()));
}
ReviewDecision::Approved
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
| ReviewDecision::ApprovedForSession => {}
@@ -306,9 +310,7 @@ impl ToolOrchestrator {
otel.tool_decision(otel_tn, otel_ci, &decision, otel_source);
match decision {
ReviewDecision::Denied
| ReviewDecision::TimedOut
| ReviewDecision::Abort => {
ReviewDecision::Denied | ReviewDecision::Abort => {
let reason = if let Some(review_id) = guardian_review_id.as_deref() {
guardian_rejection_message(tool_ctx.session.as_ref(), review_id)
.await
@@ -317,6 +319,9 @@ impl ToolOrchestrator {
};
return Err(ToolError::Rejected(reason));
}
ReviewDecision::TimedOut => {
return Err(ToolError::Rejected(guardian_timeout_message()));
}
ReviewDecision::Approved
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
| ReviewDecision::ApprovedForSession => {}

View File

@@ -4,6 +4,7 @@ use crate::exec::ExecExpiration;
use crate::exec::is_likely_sandbox_denied;
use crate::guardian::GuardianApprovalRequest;
use crate::guardian::guardian_rejection_message;
use crate::guardian::guardian_timeout_message;
use crate::guardian::new_guardian_review_id;
use crate::guardian::review_approval_request;
use crate::guardian::routes_approval_to_guardian;
@@ -485,7 +486,7 @@ impl CoreShellActionProvider {
EscalationDecision::deny(Some("User denied execution".to_string()))
}
},
ReviewDecision::Denied | ReviewDecision::TimedOut => {
ReviewDecision::Denied => {
let message = if let Some(review_id) =
prompt_decision.guardian_review_id.as_deref()
{
@@ -495,6 +496,9 @@ impl CoreShellActionProvider {
};
EscalationDecision::deny(Some(message))
}
ReviewDecision::TimedOut => {
EscalationDecision::deny(Some(guardian_timeout_message()))
}
ReviewDecision::Abort => {
EscalationDecision::deny(Some("User cancelled execution".to_string()))
}