mirror of
https://github.com/openai/codex.git
synced 2026-05-04 13:21:54 +03:00
Add guardian approval MVP (#13692)
## Summary - add the guardian reviewer flow for `on-request` approvals in command, patch, sandbox-retry, and managed-network approval paths - keep guardian behind `features.guardian_approval` instead of exposing a public `approval_policy = guardian` mode - route ordinary `OnRequest` approvals to the guardian subagent when the feature is enabled, without changing the public approval-mode surface ## Public model - public approval modes stay unchanged - guardian is enabled via `features.guardian_approval` - when that feature is on, `approval_policy = on-request` keeps the same approval boundaries but sends those approval requests to the guardian reviewer instead of the user - `/experimental` only persists the feature flag; it does not rewrite `approval_policy` - CLI and app-server no longer expose a separate `guardian` approval mode in this PR ## Guardian reviewer - the reviewer runs as a normal subagent and reuses the existing subagent/thread machinery - it is locked to a read-only sandbox and `approval_policy = never` - it does not inherit user/project exec-policy rules - it prefers `gpt-5.4` when the current provider exposes it, otherwise falls back to the parent turn's active model - it fail-closes on timeout, startup failure, malformed output, or any other review error - it currently auto-approves only when `risk_score < 80` ## Review context and policy - guardian mirrors `OnRequest` approval semantics rather than introducing a separate approval policy - explicit `require_escalated` requests follow the same approval surface as `OnRequest`; the difference is only who reviews them - managed-network allowlist misses that enter the approval flow are also reviewed by guardian - the review prompt includes bounded recent transcript history plus recent tool call/result evidence - transcript entries and planned-action strings are truncated with explicit `<guardian_truncated ... />` markers so large payloads stay bounded - apply-patch reviews include the full patch content (without duplicating the structured `changes` payload) - the guardian request layout is snapshot-tested using the same model-visible Responses request formatter used elsewhere in core ## Guardian network behavior - the guardian subagent inherits the parent session's managed-network allowlist when one exists, so it can use the same approved network surface while reviewing - exact session-scoped network approvals are copied into the guardian session with protocol/port scope preserved - those copied approvals are now seeded before the guardian's first turn is submitted, so inherited approvals are available during any immediate review-time checks ## Out of scope / follow-ups - the sandbox-permission validation split was pulled into a separate PR and is not part of this diff - a future follow-up can enable `serde_json` preserve-order in `codex-core` and then simplify the guardian action rendering further --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
committed by
GitHub
parent
cf143bf71e
commit
e84ee33cc0
@@ -62,6 +62,9 @@ impl Stage {
|
||||
|
||||
pub fn experimental_announcement(self) -> Option<&'static str> {
|
||||
match self {
|
||||
Stage::Experimental {
|
||||
announcement: "", ..
|
||||
} => None,
|
||||
Stage::Experimental { announcement, .. } => Some(announcement),
|
||||
_ => None,
|
||||
}
|
||||
@@ -144,6 +147,8 @@ pub enum Feature {
|
||||
Steer,
|
||||
/// Allow request_user_input in Default collaboration mode.
|
||||
DefaultModeRequestUserInput,
|
||||
/// Enable guardian subagent approvals.
|
||||
GuardianApproval,
|
||||
/// Enable collaboration modes (Plan, Default).
|
||||
/// Kept for config backward compatibility; behavior is always collaboration-modes-enabled.
|
||||
CollaborationModes,
|
||||
@@ -693,6 +698,16 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::GuardianApproval,
|
||||
key: "guardian_approval",
|
||||
stage: Stage::Experimental {
|
||||
name: "Guardian approvals",
|
||||
menu_description: "Let a guardian subagent review `on-request` approval prompts instead of showing them to you, including sandbox escapes and blocked network access.",
|
||||
announcement: "",
|
||||
},
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::CollaborationModes,
|
||||
key: "collaboration_modes",
|
||||
@@ -888,6 +903,23 @@ mod tests {
|
||||
assert_eq!(Feature::JsRepl.default_enabled(), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn guardian_approval_is_experimental_and_user_toggleable() {
|
||||
let spec = Feature::GuardianApproval.info();
|
||||
let stage = spec.stage;
|
||||
|
||||
assert!(matches!(stage, Stage::Experimental { .. }));
|
||||
assert_eq!(stage.experimental_menu_name(), Some("Guardian approvals"));
|
||||
assert_eq!(
|
||||
stage.experimental_menu_description().map(str::to_owned),
|
||||
Some(
|
||||
"Let a guardian subagent review `on-request` approval prompts instead of showing them to you, including sandbox escapes and blocked network access.".to_string()
|
||||
)
|
||||
);
|
||||
assert_eq!(stage.experimental_announcement(), None);
|
||||
assert_eq!(Feature::GuardianApproval.default_enabled(), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_generation_is_under_development() {
|
||||
assert_eq!(Feature::ImageGeneration.stage(), Stage::UnderDevelopment);
|
||||
|
||||
Reference in New Issue
Block a user