Compare commits

...

5 Commits

Author SHA1 Message Date
Charles Cunningham
7464ec0906 core: stabilize fork snapshot across platforms
Co-authored-by: Codex <noreply@openai.com>
2026-03-22 11:46:27 -07:00
Charles Cunningham
bea065b40d core: snapshot full first forked request
Co-authored-by: Codex <noreply@openai.com>
2026-03-22 11:46:27 -07:00
Charles Cunningham
20bb569628 core: fix fork snapshot helper borrow
Co-authored-by: Codex <noreply@openai.com>
2026-03-22 11:46:27 -07:00
Charles Cunningham
c42549daab core: narrow fork snapshot fixture
Co-authored-by: Codex <noreply@openai.com>
2026-03-22 11:46:27 -07:00
Charles Cunningham
e3e3f29032 core: snapshot fork startup context injection
Co-authored-by: Codex <noreply@openai.com>
2026-03-22 11:46:27 -07:00
2 changed files with 157 additions and 0 deletions

View File

@@ -64,17 +64,31 @@ use codex_execpolicy::NetworkRuleProtocol;
use codex_execpolicy::Policy;
use codex_network_proxy::NetworkProxyConfig;
use codex_otel::TelemetryAuthMode;
use codex_protocol::config_types::CollaborationMode;
use codex_protocol::config_types::ModeKind;
use codex_protocol::config_types::Settings;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DeveloperInstructions;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ConversationAudioParams;
use codex_protocol::protocol::RealtimeAudioFrame;
use codex_protocol::protocol::Submission;
use codex_protocol::protocol::W3cTraceContext;
use core_test_support::context_snapshot;
use core_test_support::context_snapshot::ContextSnapshotOptions;
use core_test_support::context_snapshot::ContextSnapshotRenderMode;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::test_codex;
use core_test_support::tracing::install_test_tracing;
use core_test_support::wait_for_event;
use opentelemetry::trace::TraceContextExt;
use opentelemetry::trace::TraceId;
use std::path::Path;
@@ -1115,6 +1129,132 @@ async fn record_initial_history_reconstructs_forked_transcript() {
assert_eq!(expected, history.raw_items());
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn fork_startup_context_then_first_turn_diff_snapshot() -> anyhow::Result<()> {
let server = start_mock_server().await;
let _initial_request = mount_sse_once(
&server,
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
)
.await;
let first_forked_request = mount_sse_once(
&server,
sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]),
)
.await;
let mut builder = test_codex().with_config(|config| {
config.permissions.approval_policy =
codex_config::Constrained::allow_any(AskForApproval::OnRequest);
});
let initial = builder.build(&server).await?;
let rollout_path = initial
.session_configured
.rollout_path
.clone()
.expect("rollout path");
initial
.codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "fork seed".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let mut fork_config = initial.config.clone();
fork_config.permissions.approval_policy =
codex_config::Constrained::allow_any(AskForApproval::UnlessTrusted);
let forked = initial
.thread_manager
.fork_thread(usize::MAX, fork_config, rollout_path, false, None)
.await?;
let collaboration_mode = CollaborationMode {
mode: ModeKind::Plan,
settings: Settings {
model: forked.session_configured.model.clone(),
reasoning_effort: None,
developer_instructions: Some("Fork turn collaboration instructions.".to_string()),
},
};
forked
.thread
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: Some(AskForApproval::Never),
approvals_reviewer: None,
sandbox_policy: None,
windows_sandbox_level: None,
model: None,
effort: None,
summary: None,
service_tier: None,
collaboration_mode: Some(collaboration_mode),
personality: None,
})
.await?;
forked
.thread
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "after fork".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await?;
wait_for_event(&forked.thread, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let request = first_forked_request.single_request();
let snapshot = context_snapshot::format_labeled_requests_snapshot(
"First request after fork when fork startup changes approval policy and the first forked turn changes approval policy again and enters plan mode.",
&[("First Forked Turn Request", &request)],
&ContextSnapshotOptions::default()
.render_mode(ContextSnapshotRenderMode::FullText)
.strip_capability_instructions()
.strip_agents_md_user_context(),
);
let snapshot = snapshot
.lines()
.map(|line| {
let mut line = line.to_string();
for (tag, replacement) in [
("cwd", "<CWD>"),
("shell", "<SHELL>"),
("current_date", "<CURRENT_DATE>"),
("timezone", "<TIMEZONE>"),
] {
let open_tag = format!("<{tag}>");
let close_tag = format!("</{tag}>");
if let (Some(start), Some(end)) = (line.find(&open_tag), line.find(&close_tag)) {
let start = start + open_tag.len();
line = format!("{}{replacement}{}", &line[..start], &line[end..]);
}
}
line
})
.collect::<Vec<_>>()
.join("\n");
let mut settings = insta::Settings::clone_current();
settings.set_snapshot_path("snapshots");
settings.set_prepend_module_to_snapshot(false);
settings.bind(|| {
insta::assert_snapshot!(
"codex_core__codex_tests__fork_startup_context_then_first_turn_diff",
snapshot
);
});
Ok(())
}
#[tokio::test]
async fn record_initial_history_forked_hydrates_previous_turn_settings() {
let (session, turn_context) = make_session_and_context().await;

View File

@@ -0,0 +1,17 @@
---
source: core/src/codex_tests.rs
assertion_line: 1282
expression: snapshot
---
Scenario: First request after fork when fork startup changes approval policy and the first forked turn changes approval policy again and enters plan mode.
## First Forked Turn Request
00:message/developer:<permissions instructions>\nFilesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted.\n# Escalation Requests\n\nCommands are run outside the sandbox if they are approved by the user, or match an existing rule that allows it to run unrestricted. The command string is split into independent command segments at shell control operators, including but not limited to:\n\n- Pipes: |\n- Logical operators: &&, ||\n- Command separators: ;\n- Subshell boundaries: (...), $(...)\n\nEach resulting segment is evaluated independently for sandbox restrictions and approval requirements.\n\nExample:\n\ngit pull | tee output.txt\n\nThis is treated as two command segments:\n\n["git", "pull"]\n\n["tee", "output.txt"]\n\nCommands that use more advanced shell features like redirection (>, >>, <), substitutions ($(...), ...), environment variables (FOO=bar), or wildcard patterns (*, ?) will not be evaluated against rules, to limit the scope of what an approved rule allows.\n\n## How to request escalation\n\nIMPORTANT: To request approval to execute a command that will require escalated privileges:\n\n- Provide the `sandbox_permissions` parameter with the value `"require_escalated"`\n- Include a short question asking the user if they want to allow the action in `justification` parameter. e.g. "Do you want to download and install dependencies for this project?"\n- Optionally suggest a `prefix_rule` - this will be shown to the user with an option to persist the rule approval for future sessions.\n\nIf you run a command that is important to solving the user's query, but it fails because of sandboxing or with a likely sandbox-related network error (for example DNS/host resolution, registry/index access, or dependency download failure), rerun the command with "require_escalated". ALWAYS proceed to use the `justification` parameter - do not message the user before requesting approval for the command.\n\n## When to request escalation\n\nWhile commands are running inside the sandbox, here are some scenarios that will require escalation outside the sandbox:\n\n- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)\n- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.\n- If you run a command that is important to solving the user's query, but it fails because of sandboxing or with a likely sandbox-related network error (for example DNS/host resolution, registry/index access, or dependency download failure), rerun the command with `require_escalated`. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters. do not message the user before requesting approval for the command.\n- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.\n- Be judicious with escalating, but if completing the user's request requires it, you should do so - don't try and circumvent approvals by using other tools.\n\n## prefix_rule guidance\n\nWhen choosing a `prefix_rule`, request one that will allow you to fulfill similar requests from the user in the future without re-requesting escalation. It should be categorical and reasonably scoped to similar capabilities. You should rarely pass the entire command into `prefix_rule`.\n\n### Banned prefix_rules \nAvoid requesting overly broad prefixes that the user would be ill-advised to approve. For example, do not request ["python3"], ["python", "-"], or other similar prefixes that would allow arbitrary scripting.\nNEVER provide a prefix_rule argument for destructive commands like rm.\nNEVER provide a prefix_rule if your command uses a heredoc or herestring. \n\n### Examples\nGood examples of prefixes:\n- ["npm", "run", "dev"]\n- ["gh", "pr", "check"]\n- ["cargo", "test"]\n</permissions instructions>
01:message/user:<environment_context>\n <cwd><CWD></cwd>\n <shell><SHELL></shell>\n <current_date><CURRENT_DATE></current_date>\n <timezone><TIMEZONE></timezone>\n</environment_context>
02:message/user:fork seed
03:message/developer:<permissions instructions>\nFilesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted.\n Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `unless-trusted`: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.\n</permissions instructions>
04:message/user:<environment_context>\n <cwd><CWD></cwd>\n <shell><SHELL></shell>\n <current_date><CURRENT_DATE></current_date>\n <timezone><TIMEZONE></timezone>\n</environment_context>
05:message/developer[2]:
[01] <permissions instructions>\nFilesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted.\nApproval policy is currently never. Do not provide the `sandbox_permissions` for any reason, commands will be rejected.\n</permissions instructions>
[02] <collaboration_mode>Fork turn collaboration instructions.</collaboration_mode>
06:message/user:after fork