mirror of
https://github.com/openai/codex.git
synced 2026-04-30 11:21:34 +03:00
[hooks] add non-streaming (non-stdin style) shell-only PreToolUse support (#15211)
- add `PreToolUse` hook for bash-like tool execution only at first - block shell execution before dispatch with deny-only hook behavior - introduces common.rs matcher framework for matching when hooks are run example run: ``` › run three parallel echo commands, and the second one should echo "[block-pre-tool-use]" as a test • Running the three echo commands in parallel now and I’ll report the output directly. • Running PreToolUse hook: name for demo pre tool use hook • Running PreToolUse hook: name for demo pre tool use hook • Running PreToolUse hook: name for demo pre tool use hook PreToolUse hook (completed) warning: wizard-tower PreToolUse demo inspected Bash: echo "first parallel echo" PreToolUse hook (blocked) warning: wizard-tower PreToolUse demo blocked a Bash command on purpose. feedback: PreToolUse demo blocked the command. Remove [block-pre-tool-use] to continue. PreToolUse hook (completed) warning: wizard-tower PreToolUse demo inspected Bash: echo "third parallel echo" • Ran echo "first parallel echo" └ first parallel echo • Ran echo "third parallel echo" └ third parallel echo • Three little waves went out in parallel. 1. printed first parallel echo 2. was blocked before execution because it contained the exact test string [block-pre-tool-use] 3. printed third parallel echo There was also an unrelated macOS defaults warning around the successful commands, but the echoes themselves worked fine. If you want, I can rerun the second one with a slightly modified string so it passes cleanly. ```
This commit is contained in:
@@ -174,6 +174,69 @@ if payload.get("prompt") == {blocked_prompt_json}:
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_pre_tool_use_hook(
|
||||
home: &Path,
|
||||
matcher: Option<&str>,
|
||||
mode: &str,
|
||||
reason: &str,
|
||||
) -> Result<()> {
|
||||
let script_path = home.join("pre_tool_use_hook.py");
|
||||
let log_path = home.join("pre_tool_use_hook_log.jsonl");
|
||||
let mode_json = serde_json::to_string(mode).context("serialize pre tool use mode")?;
|
||||
let reason_json = serde_json::to_string(reason).context("serialize pre tool use reason")?;
|
||||
let script = format!(
|
||||
r#"import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
log_path = Path(r"{log_path}")
|
||||
mode = {mode_json}
|
||||
reason = {reason_json}
|
||||
|
||||
payload = json.load(sys.stdin)
|
||||
|
||||
with log_path.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps(payload) + "\n")
|
||||
|
||||
if mode == "json_deny":
|
||||
print(json.dumps({{
|
||||
"hookSpecificOutput": {{
|
||||
"hookEventName": "PreToolUse",
|
||||
"permissionDecision": "deny",
|
||||
"permissionDecisionReason": reason
|
||||
}}
|
||||
}}))
|
||||
elif mode == "exit_2":
|
||||
sys.stderr.write(reason + "\n")
|
||||
raise SystemExit(2)
|
||||
"#,
|
||||
log_path = log_path.display(),
|
||||
mode_json = mode_json,
|
||||
reason_json = reason_json,
|
||||
);
|
||||
|
||||
let mut group = serde_json::json!({
|
||||
"hooks": [{
|
||||
"type": "command",
|
||||
"command": format!("python3 {}", script_path.display()),
|
||||
"statusMessage": "running pre tool use hook",
|
||||
}]
|
||||
});
|
||||
if let Some(matcher) = matcher {
|
||||
group["matcher"] = Value::String(matcher.to_string());
|
||||
}
|
||||
|
||||
let hooks = serde_json::json!({
|
||||
"hooks": {
|
||||
"PreToolUse": [group]
|
||||
}
|
||||
});
|
||||
|
||||
fs::write(&script_path, script).context("write pre tool use hook script")?;
|
||||
fs::write(home.join("hooks.json"), hooks.to_string()).context("write hooks.json")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_session_start_hook_recording_transcript(home: &Path) -> Result<()> {
|
||||
let script_path = home.join("session_start_hook.py");
|
||||
let log_path = home.join("session_start_hook_log.jsonl");
|
||||
@@ -253,6 +316,15 @@ fn read_stop_hook_inputs(home: &Path) -> Result<Vec<serde_json::Value>> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn read_pre_tool_use_hook_inputs(home: &Path) -> Result<Vec<serde_json::Value>> {
|
||||
fs::read_to_string(home.join("pre_tool_use_hook_log.jsonl"))
|
||||
.context("read pre tool use hook log")?
|
||||
.lines()
|
||||
.filter(|line| !line.trim().is_empty())
|
||||
.map(|line| serde_json::from_str(line).context("parse pre tool use hook log line"))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn read_session_start_hook_inputs(home: &Path) -> Result<Vec<serde_json::Value>> {
|
||||
fs::read_to_string(home.join("session_start_hook_log.jsonl"))
|
||||
.context("read session start hook log")?
|
||||
@@ -849,3 +921,357 @@ async fn blocked_queued_prompt_does_not_strand_earlier_accepted_prompt() -> Resu
|
||||
server.shutdown().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn pre_tool_use_blocks_shell_command_before_execution() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let call_id = "pretooluse-shell-command";
|
||||
let marker = std::env::temp_dir().join("pretooluse-shell-command-marker");
|
||||
let command = format!("printf blocked > {}", marker.display());
|
||||
let args = serde_json::json!({ "command": command });
|
||||
let responses = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
core_test_support::responses::ev_function_call(
|
||||
call_id,
|
||||
"shell_command",
|
||||
&serde_json::to_string(&args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "hook blocked it"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex()
|
||||
.with_pre_build_hook(|home| {
|
||||
if let Err(error) =
|
||||
write_pre_tool_use_hook(home, Some("^Bash$"), "json_deny", "blocked by pre hook")
|
||||
{
|
||||
panic!("failed to write pre tool use hook test fixture: {error}");
|
||||
}
|
||||
})
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::CodexHooks)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
if marker.exists() {
|
||||
fs::remove_file(&marker).context("remove leftover pre tool use marker")?;
|
||||
}
|
||||
|
||||
test.submit_turn_with_policy(
|
||||
"run the blocked shell command",
|
||||
codex_protocol::protocol::SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 2);
|
||||
let output_item = requests[1].function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("shell command output string");
|
||||
assert!(
|
||||
output.contains("Bash command blocked by hook: blocked by pre hook"),
|
||||
"blocked tool output should surface the hook reason",
|
||||
);
|
||||
assert!(
|
||||
output.contains(&format!("Command: {command}")),
|
||||
"blocked tool output should surface the blocked command",
|
||||
);
|
||||
assert!(
|
||||
!marker.exists(),
|
||||
"blocked command should not create marker file"
|
||||
);
|
||||
|
||||
let hook_inputs = read_pre_tool_use_hook_inputs(test.codex_home_path())?;
|
||||
assert_eq!(hook_inputs.len(), 1);
|
||||
assert_eq!(hook_inputs[0]["hook_event_name"], "PreToolUse");
|
||||
assert_eq!(hook_inputs[0]["tool_name"], "Bash");
|
||||
assert_eq!(hook_inputs[0]["tool_use_id"], call_id);
|
||||
assert_eq!(hook_inputs[0]["tool_input"]["command"], command);
|
||||
let transcript_path = hook_inputs[0]["transcript_path"]
|
||||
.as_str()
|
||||
.expect("pre tool use hook transcript_path");
|
||||
assert!(
|
||||
!transcript_path.is_empty(),
|
||||
"pre tool use hook should receive a non-empty transcript_path",
|
||||
);
|
||||
assert!(
|
||||
Path::new(transcript_path).exists(),
|
||||
"pre tool use hook transcript_path should be materialized on disk",
|
||||
);
|
||||
assert!(
|
||||
hook_inputs[0]["turn_id"]
|
||||
.as_str()
|
||||
.is_some_and(|turn_id| !turn_id.is_empty())
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn pre_tool_use_blocks_local_shell_before_execution() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let call_id = "pretooluse-local-shell";
|
||||
let marker = std::env::temp_dir().join("pretooluse-local-shell-marker");
|
||||
let command = vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
format!("printf blocked > {}", marker.display()),
|
||||
];
|
||||
let responses = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
core_test_support::responses::ev_local_shell_call(
|
||||
call_id,
|
||||
"completed",
|
||||
command.iter().map(String::as_str).collect(),
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "local shell blocked"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex()
|
||||
.with_pre_build_hook(|home| {
|
||||
if let Err(error) =
|
||||
write_pre_tool_use_hook(home, Some("^Bash$"), "json_deny", "blocked local shell")
|
||||
{
|
||||
panic!("failed to write pre tool use hook test fixture: {error}");
|
||||
}
|
||||
})
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::CodexHooks)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
if marker.exists() {
|
||||
fs::remove_file(&marker).context("remove leftover local shell marker")?;
|
||||
}
|
||||
|
||||
test.submit_turn("run the blocked local shell command")
|
||||
.await?;
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 2);
|
||||
let output_item = requests[1].function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("local shell output string");
|
||||
assert!(
|
||||
output.contains("Bash command blocked by hook: blocked local shell"),
|
||||
"blocked local shell output should surface the hook reason",
|
||||
);
|
||||
assert!(
|
||||
output.contains(&format!(
|
||||
"Command: {}",
|
||||
codex_shell_command::parse_command::shlex_join(&command)
|
||||
)),
|
||||
"blocked local shell output should surface the blocked command",
|
||||
);
|
||||
assert!(
|
||||
!marker.exists(),
|
||||
"blocked local shell command should not execute"
|
||||
);
|
||||
|
||||
let hook_inputs = read_pre_tool_use_hook_inputs(test.codex_home_path())?;
|
||||
assert_eq!(hook_inputs.len(), 1);
|
||||
assert_eq!(
|
||||
hook_inputs[0]["tool_input"]["command"],
|
||||
codex_shell_command::parse_command::shlex_join(&command),
|
||||
);
|
||||
assert!(
|
||||
hook_inputs[0]["turn_id"]
|
||||
.as_str()
|
||||
.is_some_and(|turn_id| !turn_id.is_empty())
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn pre_tool_use_blocks_exec_command_before_execution() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let call_id = "pretooluse-exec-command";
|
||||
let marker = std::env::temp_dir().join("pretooluse-exec-command-marker");
|
||||
let command = format!("printf blocked > {}", marker.display());
|
||||
let args = serde_json::json!({ "cmd": command });
|
||||
let responses = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
core_test_support::responses::ev_function_call(
|
||||
call_id,
|
||||
"exec_command",
|
||||
&serde_json::to_string(&args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "exec command blocked"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex()
|
||||
.with_pre_build_hook(|home| {
|
||||
if let Err(error) =
|
||||
write_pre_tool_use_hook(home, Some("^Bash$"), "exit_2", "blocked exec command")
|
||||
{
|
||||
panic!("failed to write pre tool use hook test fixture: {error}");
|
||||
}
|
||||
})
|
||||
.with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config
|
||||
.features
|
||||
.enable(Feature::CodexHooks)
|
||||
.expect("test config should allow feature update");
|
||||
config
|
||||
.features
|
||||
.enable(Feature::UnifiedExec)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
if marker.exists() {
|
||||
fs::remove_file(&marker).context("remove leftover exec marker")?;
|
||||
}
|
||||
|
||||
test.submit_turn("run the blocked exec command").await?;
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 2);
|
||||
let output_item = requests[1].function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("exec command output string");
|
||||
assert!(
|
||||
output.contains("Bash command blocked by hook: blocked exec command"),
|
||||
"blocked exec command output should surface the hook reason",
|
||||
);
|
||||
assert!(
|
||||
output.contains(&format!("Command: {command}")),
|
||||
"blocked exec command output should surface the blocked command",
|
||||
);
|
||||
assert!(!marker.exists(), "blocked exec command should not execute");
|
||||
|
||||
let hook_inputs = read_pre_tool_use_hook_inputs(test.codex_home_path())?;
|
||||
assert_eq!(hook_inputs.len(), 1);
|
||||
assert_eq!(hook_inputs[0]["tool_use_id"], call_id);
|
||||
assert_eq!(hook_inputs[0]["tool_input"]["command"], command);
|
||||
assert!(
|
||||
hook_inputs[0]["turn_id"]
|
||||
.as_str()
|
||||
.is_some_and(|turn_id| !turn_id.is_empty())
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn pre_tool_use_does_not_fire_for_non_shell_tools() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let call_id = "pretooluse-update-plan";
|
||||
let args = serde_json::json!({
|
||||
"plan": [{
|
||||
"step": "watch the tide",
|
||||
"status": "pending",
|
||||
}]
|
||||
});
|
||||
let responses = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
core_test_support::responses::ev_function_call(
|
||||
call_id,
|
||||
"update_plan",
|
||||
&serde_json::to_string(&args)?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "plan updated"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex()
|
||||
.with_pre_build_hook(|home| {
|
||||
if let Err(error) = write_pre_tool_use_hook(home, None, "json_deny", "should not fire")
|
||||
{
|
||||
panic!("failed to write pre tool use hook test fixture: {error}");
|
||||
}
|
||||
})
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::CodexHooks)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
test.submit_turn("update the plan").await?;
|
||||
|
||||
let requests = responses.requests();
|
||||
assert_eq!(requests.len(), 2);
|
||||
let output_item = requests[1].function_call_output(call_id);
|
||||
let output = output_item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.expect("update plan output string");
|
||||
assert!(
|
||||
!output.contains("should not fire"),
|
||||
"non-shell tool output should not be blocked by PreToolUse",
|
||||
);
|
||||
|
||||
let hook_log_path = test.codex_home_path().join("pre_tool_use_hook_log.jsonl");
|
||||
assert!(
|
||||
!hook_log_path.exists(),
|
||||
"non-shell tools should not trigger pre tool use hooks",
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -973,6 +973,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_call() {
|
||||
.features
|
||||
.disable(Feature::GhostCommit)
|
||||
.expect("test config should allow feature update");
|
||||
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::Never);
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
@@ -989,7 +990,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_call() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TokenCount(_))).await;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
logs_assert(|lines: &[&str]| {
|
||||
let line = lines
|
||||
|
||||
Reference in New Issue
Block a user