mirror of
https://github.com/openai/codex.git
synced 2026-03-16 10:56:29 +03:00
Compare commits
19 Commits
keyz/codeg
...
dev/flaky-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38e772e463 | ||
|
|
30e8a01356 | ||
|
|
b03266984e | ||
|
|
d5ecd1b750 | ||
|
|
934a0da85a | ||
|
|
5f1a510823 | ||
|
|
7f10eb22a1 | ||
|
|
6cadc1e883 | ||
|
|
5fd9f40810 | ||
|
|
8a9c9277ba | ||
|
|
4f9683bd25 | ||
|
|
545ffd8a6c | ||
|
|
bd0641a108 | ||
|
|
47b2b37721 | ||
|
|
ed56314b3f | ||
|
|
7ad3becc2f | ||
|
|
f661b3a617 | ||
|
|
a9c2c45683 | ||
|
|
c9bbf3121b |
@@ -34,6 +34,51 @@ use tokio::time::timeout;
|
||||
|
||||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
|
||||
async fn wait_for_responses_request_count_to_stabilize(
|
||||
server: &wiremock::MockServer,
|
||||
expected_count: usize,
|
||||
settle_duration: std::time::Duration,
|
||||
) -> Result<()> {
|
||||
timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
let mut stable_since: Option<tokio::time::Instant> = None;
|
||||
loop {
|
||||
let requests = server
|
||||
.received_requests()
|
||||
.await
|
||||
.context("failed to fetch received requests")?;
|
||||
let responses_request_count = requests
|
||||
.iter()
|
||||
.filter(|request| {
|
||||
request.method == "POST" && request.url.path().ends_with("/responses")
|
||||
})
|
||||
.count();
|
||||
|
||||
if responses_request_count > expected_count {
|
||||
anyhow::bail!(
|
||||
"expected exactly {expected_count} /responses requests, got {responses_request_count}"
|
||||
);
|
||||
}
|
||||
|
||||
if responses_request_count == expected_count {
|
||||
match stable_since {
|
||||
Some(stable_since) if stable_since.elapsed() >= settle_duration => {
|
||||
return Ok::<(), anyhow::Error>(());
|
||||
}
|
||||
None => stable_since = Some(tokio::time::Instant::now()),
|
||||
Some(_) => {}
|
||||
}
|
||||
} else {
|
||||
stable_since = None;
|
||||
}
|
||||
|
||||
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_unsubscribe_unloads_thread_and_emits_thread_closed_notification() -> Result<()> {
|
||||
let server = create_mock_responses_server_repeating_assistant("Done").await;
|
||||
@@ -168,6 +213,13 @@ async fn thread_unsubscribe_during_turn_interrupts_turn_and_emits_thread_closed(
|
||||
};
|
||||
assert_eq!(payload.thread_id, thread_id);
|
||||
|
||||
wait_for_responses_request_count_to_stabilize(
|
||||
&server,
|
||||
1,
|
||||
std::time::Duration::from_millis(200),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ codex_rust_crate(
|
||||
"models.json",
|
||||
"prompt.md",
|
||||
],
|
||||
test_data_extra = [
|
||||
test_data_extra = glob(["src/**/snapshots/**"]) + [
|
||||
"config.schema.json",
|
||||
# This is a bit of a hack, but empirically, some of our integration tests
|
||||
# are relying on the presence of this file as a repo root marker. When
|
||||
|
||||
@@ -2,24 +2,21 @@ use super::*;
|
||||
use crate::config_loader::ConfigLayerEntry;
|
||||
use crate::config_loader::ConfigRequirements;
|
||||
use crate::config_loader::ConfigRequirementsToml;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_policy::ExecPolicyManager;
|
||||
use crate::features::Feature;
|
||||
use crate::guardian::GUARDIAN_SUBAGENT_NAME;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::handlers::normalize_and_validate_additional_permissions;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_app_server_protocol::ConfigLayerSource;
|
||||
use codex_execpolicy::Decision;
|
||||
use codex_execpolicy::Evaluation;
|
||||
use codex_execpolicy::RuleMatch;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::NetworkPermissions;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::sync::Arc;
|
||||
use tempfile::tempdir;
|
||||
@@ -39,89 +36,23 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
|
||||
.features
|
||||
.enable(Feature::RequestPermissions)
|
||||
.expect("test setup should allow enabling request permissions");
|
||||
turn_context_raw
|
||||
.sandbox_policy
|
||||
.set(SandboxPolicy::DangerFullAccess)
|
||||
.expect("test setup should allow updating sandbox policy");
|
||||
let session = Arc::new(session);
|
||||
let turn_context = Arc::new(turn_context_raw);
|
||||
|
||||
let params = ExecParams {
|
||||
command: if cfg!(windows) {
|
||||
vec![
|
||||
"cmd.exe".to_string(),
|
||||
"/C".to_string(),
|
||||
"echo hi".to_string(),
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"echo hi".to_string(),
|
||||
]
|
||||
},
|
||||
cwd: turn_context.cwd.clone(),
|
||||
expiration: 1000.into(),
|
||||
env: HashMap::new(),
|
||||
network: None,
|
||||
sandbox_permissions: SandboxPermissions::WithAdditionalPermissions,
|
||||
windows_sandbox_level: turn_context.windows_sandbox_level,
|
||||
justification: Some("test".to_string()),
|
||||
arg0: None,
|
||||
let additional_permissions = PermissionProfile {
|
||||
network: Some(NetworkPermissions {
|
||||
enabled: Some(true),
|
||||
}),
|
||||
file_system: None,
|
||||
macos: None,
|
||||
};
|
||||
let normalized = normalize_and_validate_additional_permissions(
|
||||
session.features().enabled(Feature::RequestPermissions),
|
||||
turn_context_raw.approval_policy.value(),
|
||||
SandboxPermissions::WithAdditionalPermissions,
|
||||
Some(additional_permissions.clone()),
|
||||
&turn_context_raw.cwd,
|
||||
)
|
||||
.expect("shell additional permissions should pass policy validation");
|
||||
|
||||
let handler = ShellHandler;
|
||||
let resp = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "test-call".to_string(),
|
||||
tool_name: "shell".to_string(),
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": params.command.clone(),
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": params.expiration.timeout_ms(),
|
||||
"sandbox_permissions": params.sandbox_permissions,
|
||||
"additional_permissions": PermissionProfile {
|
||||
network: Some(NetworkPermissions {
|
||||
enabled: Some(true),
|
||||
}),
|
||||
file_system: None,
|
||||
macos: None,
|
||||
},
|
||||
"justification": params.justification.clone(),
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await;
|
||||
|
||||
let output = match resp.expect("expected Ok result") {
|
||||
ToolOutput::Function {
|
||||
body: FunctionCallOutputBody::Text(content),
|
||||
..
|
||||
} => content,
|
||||
_ => panic!("unexpected tool output"),
|
||||
};
|
||||
|
||||
#[derive(Deserialize, PartialEq, Eq, Debug)]
|
||||
struct ResponseExecMetadata {
|
||||
exit_code: i32,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ResponseExecOutput {
|
||||
output: String,
|
||||
metadata: ResponseExecMetadata,
|
||||
}
|
||||
|
||||
let exec_output: ResponseExecOutput =
|
||||
serde_json::from_str(&output).expect("valid exec output json");
|
||||
|
||||
assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
|
||||
assert!(exec_output.output.contains("hi"));
|
||||
assert_eq!(normalized, Some(additional_permissions));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -664,12 +664,14 @@ fn truncate_guardian_action_value(value: Value) -> Value {
|
||||
.map(truncate_guardian_action_value)
|
||||
.collect::<Vec<_>>(),
|
||||
),
|
||||
Value::Object(values) => Value::Object(
|
||||
values
|
||||
Value::Object(values) => {
|
||||
let mut entries = values
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key, truncate_guardian_action_value(value)))
|
||||
.collect(),
|
||||
),
|
||||
.collect::<Vec<_>>();
|
||||
entries.sort_by(|(left, _), (right, _)| left.cmp(right));
|
||||
Value::Object(entries.into_iter().collect())
|
||||
}
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,8 +169,28 @@ fn format_guardian_action_pretty_truncates_large_string_fields() {
|
||||
.as_str()
|
||||
.expect("test patch should serialize as a string");
|
||||
|
||||
let change_count_index = rendered
|
||||
.find("\"change_count\"")
|
||||
.expect("rendered json should contain change_count");
|
||||
let cwd_index = rendered
|
||||
.find("\"cwd\"")
|
||||
.expect("rendered json should contain cwd");
|
||||
let files_index = rendered
|
||||
.find("\"files\"")
|
||||
.expect("rendered json should contain files");
|
||||
let patch_index = rendered
|
||||
.find("\"patch\"")
|
||||
.expect("rendered json should contain patch");
|
||||
let tool_index = rendered
|
||||
.find("\"tool\"")
|
||||
.expect("rendered json should contain tool");
|
||||
|
||||
assert!(rendered.contains("\"tool\": \"apply_patch\""));
|
||||
assert!(rendered.len() < original_patch.len());
|
||||
assert!(change_count_index < cwd_index);
|
||||
assert!(cwd_index < files_index);
|
||||
assert!(files_index < patch_index);
|
||||
assert!(patch_index < tool_index);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
source: core/src/guardian.rs
|
||||
source: core/src/guardian_tests.rs
|
||||
expression: "context_snapshot::format_labeled_requests_snapshot(\"Guardian review request layout\",\n&[(\"Guardian Review Request\", &request)], &ContextSnapshotOptions::default(),)"
|
||||
---
|
||||
Scenario: Guardian review request layout
|
||||
|
||||
@@ -90,7 +90,7 @@ fn resolve_workdir_base_path(
|
||||
|
||||
/// Validates feature/policy constraints for `with_additional_permissions` and
|
||||
/// normalizes any path-based permissions. Errors if the request is invalid.
|
||||
pub(super) fn normalize_and_validate_additional_permissions(
|
||||
pub(crate) fn normalize_and_validate_additional_permissions(
|
||||
request_permission_enabled: bool,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
---
|
||||
source: tui/src/chatwidget/tests.rs
|
||||
expression: popup
|
||||
---
|
||||
Experimental features
|
||||
Toggle experimental features. Changes are saved to config.toml.
|
||||
|
||||
› [ ] JavaScript REPL Enable a persistent Node-backed JavaScript REPL for interactive website debugging
|
||||
and other inline JavaScript execution capabilities. Requires Node >= v22.22.0
|
||||
installed.
|
||||
[ ] Bubblewrap sandbox Try the new linux sandbox based on bubblewrap.
|
||||
[ ] Multi-agents Ask Codex to spawn multiple agents to parallelize the work and win in efficiency.
|
||||
[ ] Apps Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart
|
||||
Codex after enabling.
|
||||
[ ] Guardian approvals Let a guardian subagent review `on-request` approval prompts instead of showing
|
||||
them to you, including sandbox escapes and blocked network access.
|
||||
[ ] Prevent sleep while running Keep your computer awake while Codex is running a thread.
|
||||
|
||||
Press space to select or enter to save for next conversation
|
||||
@@ -6949,6 +6949,11 @@ async fn experimental_popup_includes_guardian_approval() {
|
||||
chat.open_experimental_popup();
|
||||
|
||||
let popup = render_bottom_popup(&chat, 120);
|
||||
#[cfg(target_os = "linux")]
|
||||
insta::with_settings!({ snapshot_suffix => "linux" }, {
|
||||
assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
|
||||
});
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
|
||||
}
|
||||
|
||||
|
||||
17
defs.bzl
17
defs.bzl
@@ -80,10 +80,15 @@ def codex_rust_crate(
|
||||
`CARGO_BIN_EXE_*` environment variables. These are only needed for binaries from a different crate.
|
||||
"""
|
||||
test_env = {
|
||||
"INSTA_WORKSPACE_ROOT": ".",
|
||||
"INSTA_WORKSPACE_ROOT": "codex-rs",
|
||||
"INSTA_SNAPSHOT_PATH": "src",
|
||||
}
|
||||
|
||||
cargo_like_package = native.package_name()
|
||||
if cargo_like_package.startswith("codex-rs/"):
|
||||
cargo_like_package = cargo_like_package[len("codex-rs/"):]
|
||||
snapshot_path_remap = "--remap-path-prefix=%s=%s" % (native.package_name(), cargo_like_package)
|
||||
|
||||
rustc_env = {
|
||||
"BAZEL_PACKAGE": native.package_name(),
|
||||
} | rustc_env
|
||||
@@ -127,7 +132,9 @@ def codex_rust_crate(
|
||||
crate = name,
|
||||
env = test_env,
|
||||
deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra,
|
||||
rustc_flags = rustc_flags_extra,
|
||||
# Keep `file!()` paths Cargo-like (`core/src/...`) instead of
|
||||
# Bazel package-prefixed (`codex-rs/core/src/...`) for snapshot parity.
|
||||
rustc_flags = rustc_flags_extra + [snapshot_path_remap],
|
||||
rustc_env = rustc_env,
|
||||
data = test_data_extra,
|
||||
tags = test_tags,
|
||||
@@ -174,11 +181,11 @@ def codex_rust_crate(
|
||||
compile_data = native.glob(["tests/**"], allow_empty = True) + integration_compile_data_extra,
|
||||
deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra,
|
||||
# Keep `file!()` paths Cargo-like (`core/tests/...`) instead of
|
||||
# Bazel workspace-prefixed (`codex-rs/core/tests/...`) for snapshot parity.
|
||||
rustc_flags = rustc_flags_extra + ["--remap-path-prefix=codex-rs="],
|
||||
# Bazel package-prefixed (`codex-rs/core/tests/...`) for snapshot parity.
|
||||
rustc_flags = rustc_flags_extra + [snapshot_path_remap],
|
||||
rustc_env = rustc_env,
|
||||
# Important: do not merge `test_env` here. Its unit-test-only
|
||||
# `INSTA_WORKSPACE_ROOT="."` can point integration tests at the
|
||||
# `INSTA_WORKSPACE_ROOT="codex-rs"` can point integration tests at the
|
||||
# runfiles cwd and cause false `.snap.new` churn on Linux.
|
||||
env = cargo_env,
|
||||
tags = test_tags,
|
||||
|
||||
Reference in New Issue
Block a user