Compare commits

...

2 Commits

Author SHA1 Message Date
Abhishek Bhardwaj
933de8595a Test passing 2025-11-02 01:09:06 -07:00
Abhishek Bhardwaj
4c08be1d7a feature: Add !cmd outputs to model conversation history
Feature: Persist both the LocalShellCall metadata and the FunctionCallOutput from !cmd executions so the model sees each command invocation and its stdout in the conversation history.

Flow: UserShellCommandTask builds a shell ToolCall, lets the runtime run it, then records a LocalShellCall followed by the resulting FunctionCallOutput via Session::record_conversation_items so both items land in the session history.

Abort/Ctrl-C: If the runtime returns "aborted" (the Ctrl-C path) or handle_tool_call errors, we mark the LocalShellCall as Incomplete and emit a FunctionCallOutput with success=false so downstream consumers treat it as a failed or interrupted command.],workdir:/Users/abhishek/code/codex}
2025-11-02 00:58:03 -07:00
2 changed files with 139 additions and 3 deletions

View File

@@ -2834,6 +2834,68 @@ mod tests {
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_shell_command_records_history_pair() {
use codex_protocol::models::ResponseItem;
use std::time::Duration;
let (sess, _tc, rx) = make_session_and_context_with_rx();
// Kick off a simple shell command via the helper.
let mut previous_context: Option<Arc<TurnContext>> = None;
crate::codex::handlers::run_user_shell_command(
&sess,
"sub-user-shell".to_string(),
"echo hello-codex".to_string(),
&mut previous_context,
)
.await;
// Wait until we observe ExecCommandEnd to ensure command finished and history appended.
let mut saw_exec_end = false;
for _ in 0..50 {
match tokio::time::timeout(Duration::from_millis(200), rx.recv()).await {
Ok(Ok(event)) => match event.msg {
EventMsg::ExecCommandEnd(_) => {
saw_exec_end = true;
break;
}
_ => {}
},
_ => {}
}
}
assert!(saw_exec_end, "did not observe ExecCommandEnd event");
// Inspect conversation history for the paired items (LocalShellCall -> FunctionCallOutput).
// Recording happens after ExecCommandEnd, so allow a brief window for the history write.
let mut found_pair = false;
let mut last_history_snapshot = Vec::new();
for _ in 0..50 {
let history = sess.clone_history().await.get_history();
last_history_snapshot = history.clone();
let mut seen = false;
for idx in (0..history.len()).rev() {
if let ResponseItem::LocalShellCall { call_id: Some(cid), .. } = &history[idx] {
if let Some(next) = history.get(idx + 1) {
if let ResponseItem::FunctionCallOutput { call_id, output } = next {
if call_id == cid && output.content.contains("hello-codex") {
seen = true;
break;
}
}
}
}
}
if seen {
found_pair = true;
break;
}
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
}
assert!(found_pair, "expected LocalShellCall followed by FunctionCallOutput with matching call_id and output containing 'hello-codex'\nFull history: {:?}", last_history_snapshot);
}
fn sample_rollout(
session: &Session,
turn_context: &TurnContext,

View File

@@ -1,6 +1,12 @@
use std::sync::Arc;
use async_trait::async_trait;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::LocalShellAction;
use codex_protocol::models::LocalShellExecAction;
use codex_protocol::models::LocalShellStatus;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::models::ShellToolCallParams;
use codex_protocol::user_input::UserInput;
use tokio::sync::Mutex;
@@ -79,13 +85,14 @@ impl SessionTask for UserShellCommandTask {
};
let params = ShellToolCallParams {
command: shell_invocation,
command: shell_invocation.clone(),
workdir: None,
timeout_ms: None,
with_escalated_permissions: None,
justification: None,
};
let params_timeout_ms = params.timeout_ms.clone();
let tool_call = ToolCall {
tool_name: USER_SHELL_TOOL_NAME.to_string(),
call_id: Uuid::new_v4().to_string(),
@@ -101,11 +108,78 @@ impl SessionTask for UserShellCommandTask {
Arc::clone(&tracker),
);
if let Err(err) = runtime
let call_id = tool_call.call_id.clone();
match runtime
.handle_tool_call(tool_call, cancellation_token)
.await
{
error!("user shell command failed: {err:?}");
Ok(resp) => {
let mut status = LocalShellStatus::Completed;
// Special-case 'aborted' commands to mark failure.
let mut output_item: ResponseItem = resp.clone().into();
if let ResponseInputItem::FunctionCallOutput { output, .. } = &resp {
if output.content == "aborted" {
status = LocalShellStatus::Incomplete;
output_item = ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
success: Some(false),
..Default::default()
},
};
}
}
let local_call = ResponseItem::LocalShellCall {
id: None,
call_id: Some(call_id.clone()),
status,
action: LocalShellAction::Exec(LocalShellExecAction {
command: shell_invocation.clone(),
timeout_ms: params_timeout_ms,
working_directory: Some(turn_context.cwd.to_string_lossy().into_owned()),
// The Responses API expects `env` to be an object with string keys/values.
// Sending `null` here yields an `invalid_type` error, so we send `{}`.
env: Some(std::collections::HashMap::new()),
user: None,
}),
};
session
.record_conversation_items(turn_context.as_ref(), &[local_call, output_item])
.await;
}
Err(err) => {
error!("user shell command failed: {err:?}");
let local_call = ResponseItem::LocalShellCall {
id: None,
call_id: Some(call_id.clone()),
status: LocalShellStatus::Incomplete,
action: LocalShellAction::Exec(LocalShellExecAction {
// Clone because the original invocation was moved into `params` above.
command: shell_invocation.clone(),
timeout_ms: params_timeout_ms,
working_directory: Some(turn_context.cwd.to_string_lossy().into_owned()),
// Match success path: ensure an empty object instead of `null`.
env: Some(std::collections::HashMap::new()),
user: None,
}),
};
let output_item = ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: err.to_string(),
success: Some(false),
..Default::default()
},
};
session
.record_conversation_items(turn_context.as_ref(), &[local_call, output_item])
.await;
}
}
None
}