mirror of
https://github.com/openai/codex.git
synced 2026-04-29 02:41:12 +03:00
Add guardian approval MVP (#13692)
## Summary - add the guardian reviewer flow for `on-request` approvals in command, patch, sandbox-retry, and managed-network approval paths - keep guardian behind `features.guardian_approval` instead of exposing a public `approval_policy = guardian` mode - route ordinary `OnRequest` approvals to the guardian subagent when the feature is enabled, without changing the public approval-mode surface ## Public model - public approval modes stay unchanged - guardian is enabled via `features.guardian_approval` - when that feature is on, `approval_policy = on-request` keeps the same approval boundaries but sends those approval requests to the guardian reviewer instead of the user - `/experimental` only persists the feature flag; it does not rewrite `approval_policy` - CLI and app-server no longer expose a separate `guardian` approval mode in this PR ## Guardian reviewer - the reviewer runs as a normal subagent and reuses the existing subagent/thread machinery - it is locked to a read-only sandbox and `approval_policy = never` - it does not inherit user/project exec-policy rules - it prefers `gpt-5.4` when the current provider exposes it, otherwise falls back to the parent turn's active model - it fail-closes on timeout, startup failure, malformed output, or any other review error - it currently auto-approves only when `risk_score < 80` ## Review context and policy - guardian mirrors `OnRequest` approval semantics rather than introducing a separate approval policy - explicit `require_escalated` requests follow the same approval surface as `OnRequest`; the difference is only who reviews them - managed-network allowlist misses that enter the approval flow are also reviewed by guardian - the review prompt includes bounded recent transcript history plus recent tool call/result evidence - transcript entries and planned-action strings are truncated with explicit `<guardian_truncated ... />` markers so large payloads stay bounded - apply-patch reviews include the full patch content (without duplicating the structured `changes` payload) - the guardian request layout is snapshot-tested using the same model-visible Responses request formatter used elsewhere in core ## Guardian network behavior - the guardian subagent inherits the parent session's managed-network allowlist when one exists, so it can use the same approved network surface while reviewing - exact session-scoped network approvals are copied into the guardian session with protocol/port scope preserved - those copied approvals are now seeded before the guardian's first turn is submitted, so inherited approvals are available during any immediate review-time checks ## Out of scope / follow-ups - the sandbox-permission validation split was pulled into a separate PR and is not part of this diff - a future follow-up can enable `serde_json` preserve-order in `codex-core` and then simplify the guardian action rendering further --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
committed by
GitHub
parent
cf143bf71e
commit
e84ee33cc0
@@ -16,6 +16,7 @@ use codex_protocol::protocol::Submission;
|
||||
use codex_protocol::request_user_input::RequestUserInputArgs;
|
||||
use codex_protocol::request_user_input::RequestUserInputResponse;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use serde_json::Value;
|
||||
use std::time::Duration;
|
||||
use tokio::time::timeout;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -36,6 +37,7 @@ use codex_protocol::protocol::InitialHistory;
|
||||
/// The returned `events_rx` yields non-approval events emitted by the sub-agent.
|
||||
/// Approval requests are handled via `parent_session` and are not surfaced.
|
||||
/// The returned `ops_tx` allows the caller to submit additional `Op`s to the sub-agent.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn run_codex_thread_interactive(
|
||||
config: Config,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
@@ -43,6 +45,7 @@ pub(crate) async fn run_codex_thread_interactive(
|
||||
parent_session: Arc<Session>,
|
||||
parent_ctx: Arc<TurnContext>,
|
||||
cancel_token: CancellationToken,
|
||||
subagent_source: SubAgentSource,
|
||||
initial_history: Option<InitialHistory>,
|
||||
) -> Result<Codex, CodexErr> {
|
||||
let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
|
||||
@@ -57,7 +60,7 @@ pub(crate) async fn run_codex_thread_interactive(
|
||||
Arc::clone(&parent_session.services.mcp_manager),
|
||||
Arc::clone(&parent_session.services.file_watcher),
|
||||
initial_history.unwrap_or(InitialHistory::New),
|
||||
SessionSource::SubAgent(SubAgentSource::Review),
|
||||
SessionSource::SubAgent(subagent_source),
|
||||
parent_session.services.agent_control.clone(),
|
||||
Vec::new(),
|
||||
false,
|
||||
@@ -113,6 +116,8 @@ pub(crate) async fn run_codex_thread_one_shot(
|
||||
parent_session: Arc<Session>,
|
||||
parent_ctx: Arc<TurnContext>,
|
||||
cancel_token: CancellationToken,
|
||||
subagent_source: SubAgentSource,
|
||||
final_output_json_schema: Option<Value>,
|
||||
initial_history: Option<InitialHistory>,
|
||||
) -> Result<Codex, CodexErr> {
|
||||
// Use a child token so we can stop the delegate after completion without
|
||||
@@ -125,6 +130,7 @@ pub(crate) async fn run_codex_thread_one_shot(
|
||||
parent_session,
|
||||
parent_ctx,
|
||||
child_cancel.clone(),
|
||||
subagent_source,
|
||||
initial_history,
|
||||
)
|
||||
.await?;
|
||||
@@ -132,7 +138,7 @@ pub(crate) async fn run_codex_thread_one_shot(
|
||||
// Send the initial input to kick off the one-shot turn.
|
||||
io.submit(Op::UserInput {
|
||||
items: input,
|
||||
final_output_json_schema: None,
|
||||
final_output_json_schema,
|
||||
})
|
||||
.await?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user