Compare commits

...

19 Commits

Author SHA1 Message Date
Ahmed Ibrahim
38e772e463 codex: restart PR CI (#13871) 2026-03-07 23:18:45 -08:00
Ahmed Ibrahim
30e8a01356 codex: reconnect PR ancestry (#13871) 2026-03-07 11:30:15 -08:00
Ahmed Ibrahim
b03266984e codex: align Bazel snapshot source remap 2026-03-07 11:28:06 -08:00
Ahmed Ibrahim
d5ecd1b750 codex: restart PR CI (#13871) 2026-03-07 11:24:09 -08:00
Ahmed Ibrahim
934a0da85a codex: fix guardian snapshot drift in PR CI 2026-03-07 11:20:59 -08:00
Ahmed Ibrahim
5f1a510823 codex: stabilize thread unsubscribe app-server test 2026-03-07 11:07:02 -08:00
Ahmed Ibrahim
7f10eb22a1 codex: refresh guardian approval snapshot (#13593) 2026-03-07 10:58:05 -08:00
Ahmed Ibrahim
6cadc1e883 codex: sort guardian action json (#13593) 2026-03-07 10:50:19 -08:00
Ahmed Ibrahim
5fd9f40810 codex: fix Bazel insta snapshot paths (#13593) 2026-03-07 10:43:19 -08:00
Ahmed Ibrahim
8a9c9277ba codex: align guardian snapshot metadata 2026-03-07 10:26:30 -08:00
Ahmed Ibrahim
4f9683bd25 codex: include core snapshots in bazel tests 2026-03-07 10:25:38 -08:00
Ahmed Ibrahim
545ffd8a6c codex: fix non-linux guardian popup snapshot 2026-03-07 10:22:27 -08:00
Ahmed Ibrahim
bd0641a108 codex: stabilize guardian snapshots on linux 2026-03-07 10:20:02 -08:00
Ahmed Ibrahim
47b2b37721 codex: restore guardian popup snapshot 2026-03-07 10:19:19 -08:00
Ahmed Ibrahim
ed56314b3f codex: fix guardian popup snapshot on PR #13871 2026-03-07 10:13:51 -08:00
Ahmed Ibrahim
7ad3becc2f codex: stabilize guardian permissions validation test on PR #13871 2026-03-07 09:55:21 -08:00
Ahmed Ibrahim
f661b3a617 codex: fix guardian CI drift on PR #13871 2026-03-07 09:40:40 -08:00
Ahmed Ibrahim
a9c2c45683 Merge remote-tracking branch 'origin/main' into dev/flaky-thread-unsubscribe-test 2026-03-07 09:11:01 -08:00
Ahmed Ibrahim
c9bbf3121b codex: stabilize thread unsubscribe app-server test 2026-03-07 08:44:30 -08:00
10 changed files with 133 additions and 97 deletions

View File

@@ -34,6 +34,51 @@ use tokio::time::timeout;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
async fn wait_for_responses_request_count_to_stabilize(
server: &wiremock::MockServer,
expected_count: usize,
settle_duration: std::time::Duration,
) -> Result<()> {
timeout(DEFAULT_READ_TIMEOUT, async {
let mut stable_since: Option<tokio::time::Instant> = None;
loop {
let requests = server
.received_requests()
.await
.context("failed to fetch received requests")?;
let responses_request_count = requests
.iter()
.filter(|request| {
request.method == "POST" && request.url.path().ends_with("/responses")
})
.count();
if responses_request_count > expected_count {
anyhow::bail!(
"expected exactly {expected_count} /responses requests, got {responses_request_count}"
);
}
if responses_request_count == expected_count {
match stable_since {
Some(stable_since) if stable_since.elapsed() >= settle_duration => {
return Ok::<(), anyhow::Error>(());
}
None => stable_since = Some(tokio::time::Instant::now()),
Some(_) => {}
}
} else {
stable_since = None;
}
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
}
})
.await??;
Ok(())
}
#[tokio::test]
async fn thread_unsubscribe_unloads_thread_and_emits_thread_closed_notification() -> Result<()> {
let server = create_mock_responses_server_repeating_assistant("Done").await;
@@ -168,6 +213,13 @@ async fn thread_unsubscribe_during_turn_interrupts_turn_and_emits_thread_closed(
};
assert_eq!(payload.thread_id, thread_id);
wait_for_responses_request_count_to_stabilize(
&server,
1,
std::time::Duration::from_millis(200),
)
.await?;
Ok(())
}

View File

@@ -34,7 +34,7 @@ codex_rust_crate(
"models.json",
"prompt.md",
],
test_data_extra = [
test_data_extra = glob(["src/**/snapshots/**"]) + [
"config.schema.json",
# This is a bit of a hack, but empirically, some of our integration tests
# are relying on the presence of this file as a repo root marker. When

View File

@@ -2,24 +2,21 @@ use super::*;
use crate::config_loader::ConfigLayerEntry;
use crate::config_loader::ConfigRequirements;
use crate::config_loader::ConfigRequirementsToml;
use crate::exec::ExecParams;
use crate::exec_policy::ExecPolicyManager;
use crate::features::Feature;
use crate::guardian::GUARDIAN_SUBAGENT_NAME;
use crate::protocol::AskForApproval;
use crate::sandboxing::SandboxPermissions;
use crate::tools::handlers::normalize_and_validate_additional_permissions;
use crate::turn_diff_tracker::TurnDiffTracker;
use codex_app_server_protocol::ConfigLayerSource;
use codex_execpolicy::Decision;
use codex_execpolicy::Evaluation;
use codex_execpolicy::RuleMatch;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::NetworkPermissions;
use codex_protocol::models::PermissionProfile;
use codex_utils_absolute_path::AbsolutePathBuf;
use pretty_assertions::assert_eq;
use serde::Deserialize;
use std::collections::HashMap;
use std::fs;
use std::sync::Arc;
use tempfile::tempdir;
@@ -39,89 +36,23 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
.features
.enable(Feature::RequestPermissions)
.expect("test setup should allow enabling request permissions");
turn_context_raw
.sandbox_policy
.set(SandboxPolicy::DangerFullAccess)
.expect("test setup should allow updating sandbox policy");
let session = Arc::new(session);
let turn_context = Arc::new(turn_context_raw);
let params = ExecParams {
command: if cfg!(windows) {
vec![
"cmd.exe".to_string(),
"/C".to_string(),
"echo hi".to_string(),
]
} else {
vec![
"/bin/sh".to_string(),
"-c".to_string(),
"echo hi".to_string(),
]
},
cwd: turn_context.cwd.clone(),
expiration: 1000.into(),
env: HashMap::new(),
network: None,
sandbox_permissions: SandboxPermissions::WithAdditionalPermissions,
windows_sandbox_level: turn_context.windows_sandbox_level,
justification: Some("test".to_string()),
arg0: None,
let additional_permissions = PermissionProfile {
network: Some(NetworkPermissions {
enabled: Some(true),
}),
file_system: None,
macos: None,
};
let normalized = normalize_and_validate_additional_permissions(
session.features().enabled(Feature::RequestPermissions),
turn_context_raw.approval_policy.value(),
SandboxPermissions::WithAdditionalPermissions,
Some(additional_permissions.clone()),
&turn_context_raw.cwd,
)
.expect("shell additional permissions should pass policy validation");
let handler = ShellHandler;
let resp = handler
.handle(ToolInvocation {
session: Arc::clone(&session),
turn: Arc::clone(&turn_context),
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
call_id: "test-call".to_string(),
tool_name: "shell".to_string(),
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": params.command.clone(),
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
"timeout_ms": params.expiration.timeout_ms(),
"sandbox_permissions": params.sandbox_permissions,
"additional_permissions": PermissionProfile {
network: Some(NetworkPermissions {
enabled: Some(true),
}),
file_system: None,
macos: None,
},
"justification": params.justification.clone(),
})
.to_string(),
},
})
.await;
let output = match resp.expect("expected Ok result") {
ToolOutput::Function {
body: FunctionCallOutputBody::Text(content),
..
} => content,
_ => panic!("unexpected tool output"),
};
#[derive(Deserialize, PartialEq, Eq, Debug)]
struct ResponseExecMetadata {
exit_code: i32,
}
#[derive(Deserialize)]
struct ResponseExecOutput {
output: String,
metadata: ResponseExecMetadata,
}
let exec_output: ResponseExecOutput =
serde_json::from_str(&output).expect("valid exec output json");
assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
assert!(exec_output.output.contains("hi"));
assert_eq!(normalized, Some(additional_permissions));
}
#[tokio::test]

View File

@@ -664,12 +664,14 @@ fn truncate_guardian_action_value(value: Value) -> Value {
.map(truncate_guardian_action_value)
.collect::<Vec<_>>(),
),
Value::Object(values) => Value::Object(
values
Value::Object(values) => {
let mut entries = values
.into_iter()
.map(|(key, value)| (key, truncate_guardian_action_value(value)))
.collect(),
),
.collect::<Vec<_>>();
entries.sort_by(|(left, _), (right, _)| left.cmp(right));
Value::Object(entries.into_iter().collect())
}
other => other,
}
}

View File

@@ -169,8 +169,28 @@ fn format_guardian_action_pretty_truncates_large_string_fields() {
.as_str()
.expect("test patch should serialize as a string");
let change_count_index = rendered
.find("\"change_count\"")
.expect("rendered json should contain change_count");
let cwd_index = rendered
.find("\"cwd\"")
.expect("rendered json should contain cwd");
let files_index = rendered
.find("\"files\"")
.expect("rendered json should contain files");
let patch_index = rendered
.find("\"patch\"")
.expect("rendered json should contain patch");
let tool_index = rendered
.find("\"tool\"")
.expect("rendered json should contain tool");
assert!(rendered.contains("\"tool\": \"apply_patch\""));
assert!(rendered.len() < original_patch.len());
assert!(change_count_index < cwd_index);
assert!(cwd_index < files_index);
assert!(files_index < patch_index);
assert!(patch_index < tool_index);
}
#[test]

View File

@@ -1,5 +1,5 @@
---
source: core/src/guardian.rs
source: core/src/guardian_tests.rs
expression: "context_snapshot::format_labeled_requests_snapshot(\"Guardian review request layout\",\n&[(\"Guardian Review Request\", &request)], &ContextSnapshotOptions::default(),)"
---
Scenario: Guardian review request layout

View File

@@ -90,7 +90,7 @@ fn resolve_workdir_base_path(
/// Validates feature/policy constraints for `with_additional_permissions` and
/// normalizes any path-based permissions. Errors if the request is invalid.
pub(super) fn normalize_and_validate_additional_permissions(
pub(crate) fn normalize_and_validate_additional_permissions(
request_permission_enabled: bool,
approval_policy: AskForApproval,
sandbox_permissions: SandboxPermissions,

View File

@@ -0,0 +1,19 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Experimental features
Toggle experimental features. Changes are saved to config.toml.
[ ] JavaScript REPL Enable a persistent Node-backed JavaScript REPL for interactive website debugging
and other inline JavaScript execution capabilities. Requires Node >= v22.22.0
installed.
[ ] Bubblewrap sandbox Try the new linux sandbox based on bubblewrap.
[ ] Multi-agents Ask Codex to spawn multiple agents to parallelize the work and win in efficiency.
[ ] Apps Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart
Codex after enabling.
[ ] Guardian approvals Let a guardian subagent review `on-request` approval prompts instead of showing
them to you, including sandbox escapes and blocked network access.
[ ] Prevent sleep while running Keep your computer awake while Codex is running a thread.
Press space to select or enter to save for next conversation

View File

@@ -6949,6 +6949,11 @@ async fn experimental_popup_includes_guardian_approval() {
chat.open_experimental_popup();
let popup = render_bottom_popup(&chat, 120);
#[cfg(target_os = "linux")]
insta::with_settings!({ snapshot_suffix => "linux" }, {
assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
});
#[cfg(not(target_os = "linux"))]
assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
}

View File

@@ -80,10 +80,15 @@ def codex_rust_crate(
`CARGO_BIN_EXE_*` environment variables. These are only needed for binaries from a different crate.
"""
test_env = {
"INSTA_WORKSPACE_ROOT": ".",
"INSTA_WORKSPACE_ROOT": "codex-rs",
"INSTA_SNAPSHOT_PATH": "src",
}
cargo_like_package = native.package_name()
if cargo_like_package.startswith("codex-rs/"):
cargo_like_package = cargo_like_package[len("codex-rs/"):]
snapshot_path_remap = "--remap-path-prefix=%s=%s" % (native.package_name(), cargo_like_package)
rustc_env = {
"BAZEL_PACKAGE": native.package_name(),
} | rustc_env
@@ -127,7 +132,9 @@ def codex_rust_crate(
crate = name,
env = test_env,
deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra,
rustc_flags = rustc_flags_extra,
# Keep `file!()` paths Cargo-like (`core/src/...`) instead of
# Bazel package-prefixed (`codex-rs/core/src/...`) for snapshot parity.
rustc_flags = rustc_flags_extra + [snapshot_path_remap],
rustc_env = rustc_env,
data = test_data_extra,
tags = test_tags,
@@ -174,11 +181,11 @@ def codex_rust_crate(
compile_data = native.glob(["tests/**"], allow_empty = True) + integration_compile_data_extra,
deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra,
# Keep `file!()` paths Cargo-like (`core/tests/...`) instead of
# Bazel workspace-prefixed (`codex-rs/core/tests/...`) for snapshot parity.
rustc_flags = rustc_flags_extra + ["--remap-path-prefix=codex-rs="],
# Bazel package-prefixed (`codex-rs/core/tests/...`) for snapshot parity.
rustc_flags = rustc_flags_extra + [snapshot_path_remap],
rustc_env = rustc_env,
# Important: do not merge `test_env` here. Its unit-test-only
# `INSTA_WORKSPACE_ROOT="."` can point integration tests at the
# `INSTA_WORKSPACE_ROOT="codex-rs"` can point integration tests at the
# runfiles cwd and cause false `.snap.new` churn on Linux.
env = cargo_env,
tags = test_tags,