flaky

shell
2026-03-25 01:26:34 +03:00 · 2025-11-20 13:13:16 -08:00 · 2025-11-19 16:44:45 -08:00
22 changed files with 244 additions and 47 deletions
--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -1173,6 +1173,8 @@ impl CodexMessageProcessor {
            with_escalated_permissions: None,
            justification: None,
            arg0: None,
+            max_output_tokens: None,
+            max_output_chars: None,
        };

        let effective_policy = params
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -3058,6 +3058,8 @@ mod tests {
            with_escalated_permissions: Some(true),
            justification: Some("test".to_string()),
            arg0: None,
+            max_output_tokens: None,
+            max_output_chars: None,
        };

        let params2 = ExecParams {
--- a/codex-rs/core/src/context_manager/history.rs
+++ b/codex-rs/core/src/context_manager/history.rs
@@ -5,6 +5,7 @@ use crate::truncate::truncate_function_output_items_with_policy;
 use crate::truncate::truncate_text;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
+use codex_protocol::models::ShellToolCallParams;
 use codex_protocol::protocol::TokenUsage;
 use codex_protocol::protocol::TokenUsageInfo;
 use codex_utils_tokenizer::Tokenizer;
@@ -135,6 +136,47 @@ impl ContextManager {
        normalize::remove_orphan_outputs(&mut self.items);
    }

+    fn get_shell_truncation_policy(&self, call_id: &str) -> Option<TruncationPolicy> {
+        let call = self.get_call_for_call_id(call_id)?;
+        match call {
+            ResponseItem::FunctionCall { arguments, .. } => {
+                let shell_tool_call_params =
+                    serde_json::from_str::<ShellToolCallParams>(&arguments).ok()?;
+                Self::create_truncation_policy(
+                    shell_tool_call_params.max_output_tokens,
+                    shell_tool_call_params.max_output_chars,
+                )
+            }
+            _ => None,
+        }
+    }
+
+    fn create_truncation_policy(
+        max_output_tokens: Option<usize>,
+        max_output_chars: Option<usize>,
+    ) -> Option<TruncationPolicy> {
+        if let Some(max_output_tokens) = max_output_tokens {
+            Some(TruncationPolicy::Tokens(max_output_tokens))
+        } else {
+            max_output_chars.map(TruncationPolicy::Bytes)
+        }
+    }
+
+    fn get_call_for_call_id(&self, call_id: &str) -> Option<ResponseItem> {
+        self.items.iter().find_map(|item| match item {
+            ResponseItem::FunctionCall {
+                call_id: existing, ..
+            } => {
+                if existing == call_id {
+                    Some(item.clone())
+                } else {
+                    None
+                }
+            }
+            _ => None,
+        })
+    }
+
    /// Returns a clone of the contents in the transcript.
    fn contents(&self) -> Vec<ResponseItem> {
        self.items.clone()
@@ -148,13 +190,12 @@ impl ContextManager {
        let policy_with_serialization_budget = policy.mul(1.2);
        match item {
            ResponseItem::FunctionCallOutput { call_id, output } => {
-                let truncated =
-                    truncate_text(output.content.as_str(), policy_with_serialization_budget);
+                let truncation_policy_override = self.get_shell_truncation_policy(call_id);
+                let truncation_policy =
+                    truncation_policy_override.unwrap_or(policy_with_serialization_budget);
+                let truncated = truncate_text(output.content.as_str(), truncation_policy);
                let truncated_items = output.content_items.as_ref().map(|items| {
-                    truncate_function_output_items_with_policy(
-                        items,
-                        policy_with_serialization_budget,
-                    )
+                    truncate_function_output_items_with_policy(items, truncation_policy)
                });
                ResponseItem::FunctionCallOutput {
                    call_id: call_id.clone(),
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -55,6 +55,8 @@ pub struct ExecParams {
    pub with_escalated_permissions: Option<bool>,
    pub justification: Option<String>,
    pub arg0: Option<String>,
+    pub max_output_tokens: Option<usize>,
+    pub max_output_chars: Option<usize>,
 }

 impl ExecParams {
@@ -100,6 +102,8 @@ pub async fn process_exec_tool_call(
        with_escalated_permissions,
        justification,
        arg0: _,
+        max_output_tokens,
+        max_output_chars,
    } = params;

    let (program, args) = command.split_first().ok_or_else(|| {
@@ -117,6 +121,8 @@ pub async fn process_exec_tool_call(
        timeout_ms,
        with_escalated_permissions,
        justification,
+        max_output_tokens,
+        max_output_chars,
    };

    let manager = SandboxManager::new();
@@ -148,6 +154,8 @@ pub(crate) async fn execute_exec_env(
        with_escalated_permissions,
        justification,
        arg0,
+        max_output_tokens,
+        max_output_chars,
    } = env;

    let params = ExecParams {
@@ -158,6 +166,8 @@ pub(crate) async fn execute_exec_env(
        with_escalated_permissions,
        justification,
        arg0,
+        max_output_tokens,
+        max_output_chars,
    };

    let start = Instant::now();
@@ -794,6 +804,8 @@ mod tests {
            with_escalated_permissions: None,
            justification: None,
            arg0: None,
+            max_output_tokens: None,
+            max_output_chars: None,
        };

        let output = exec(params, SandboxType::None, &SandboxPolicy::ReadOnly, None).await?;
--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -76,6 +76,7 @@ macro_rules! model_family {
    (
        $slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)?
    ) => {{
+        let truncation_policy = TruncationPolicy::Bytes(10_000);
        // defaults
        #[allow(unused_mut)]
        let mut mf = ModelFamily {
@@ -90,10 +91,10 @@ macro_rules! model_family {
            experimental_supported_tools: Vec::new(),
            effective_context_window_percent: 95,
            support_verbosity: false,
-            shell_type: ConfigShellToolType::Default,
+            shell_type: ConfigShellToolType::Default(truncation_policy),
            default_verbosity: None,
            default_reasoning_effort: None,
-            truncation_policy: TruncationPolicy::Bytes(10_000),
+            truncation_policy,
        };

        // apply overrides
@@ -138,6 +139,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
    } else if slug.starts_with("gpt-3.5") {
        model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
    } else if slug.starts_with("test-gpt-5") {
+        let truncation_policy = TruncationPolicy::Tokens(10_000);
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
@@ -150,13 +152,13 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
                "test_sync_tool".to_string(),
            ],
            supports_parallel_tool_calls: true,
-            shell_type: ConfigShellToolType::ShellCommand,
+            shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
            support_verbosity: true,
-            truncation_policy: TruncationPolicy::Tokens(10_000),
        )

    // Internal models.
    } else if slug.starts_with("codex-exp-") {
+        let truncation_policy = TruncationPolicy::Tokens(10_000);
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
@@ -168,41 +170,44 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
                "list_dir".to_string(),
                "read_file".to_string(),
            ],
-            shell_type: ConfigShellToolType::ShellCommand,
+            shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
            supports_parallel_tool_calls: true,
            support_verbosity: true,
-            truncation_policy: TruncationPolicy::Tokens(10_000),
+            truncation_policy: truncation_policy,
        )

    // Production models.
    } else if slug.starts_with("gpt-5.1-codex-max") {
+        let truncation_policy = TruncationPolicy::Tokens(10_000);
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
-            shell_type: ConfigShellToolType::ShellCommand,
+            shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
            supports_parallel_tool_calls: true,
            support_verbosity: false,
-            truncation_policy: TruncationPolicy::Tokens(10_000),
+            truncation_policy: truncation_policy,
        )
    } else if slug.starts_with("gpt-5-codex")
        || slug.starts_with("gpt-5.1-codex")
        || slug.starts_with("codex-")
    {
+        let truncation_policy = TruncationPolicy::Tokens(10_000);
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
-            shell_type: ConfigShellToolType::ShellCommand,
+            shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
            supports_parallel_tool_calls: true,
            support_verbosity: false,
-            truncation_policy: TruncationPolicy::Tokens(10_000),
+            truncation_policy: truncation_policy,
        )
    } else if slug.starts_with("gpt-5.1") {
+        let truncation_policy = TruncationPolicy::Tokens(10_000);
        model_family!(
            slug, "gpt-5.1",
            supports_reasoning_summaries: true,
@@ -212,7 +217,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
            base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
            default_reasoning_effort: Some(ReasoningEffort::Medium),
            truncation_policy: TruncationPolicy::Bytes(10_000),
-            shell_type: ConfigShellToolType::ShellCommand,
+            shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
            supports_parallel_tool_calls: true,
        )
    } else if slug.starts_with("gpt-5") {
@@ -220,7 +225,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
            slug, "gpt-5",
            supports_reasoning_summaries: true,
            needs_special_apply_patch_instructions: true,
-            shell_type: ConfigShellToolType::Default,
+            shell_type: ConfigShellToolType::Default(TruncationPolicy::Bytes(10_000)),
            support_verbosity: true,
            truncation_policy: TruncationPolicy::Bytes(10_000),
        )
@@ -230,6 +235,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
 }

 pub fn derive_default_model_family(model: &str) -> ModelFamily {
+    let truncation_policy = TruncationPolicy::Bytes(10_000);
    ModelFamily {
        slug: model.to_string(),
        family: model.to_string(),
@@ -242,9 +248,9 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
        experimental_supported_tools: Vec::new(),
        effective_context_window_percent: 95,
        support_verbosity: false,
-        shell_type: ConfigShellToolType::Default,
+        shell_type: ConfigShellToolType::Default(truncation_policy),
        default_verbosity: None,
        default_reasoning_effort: None,
-        truncation_policy: TruncationPolicy::Bytes(10_000),
+        truncation_policy,
    }
 }
--- a/codex-rs/core/src/sandboxing/mod.rs
+++ b/codex-rs/core/src/sandboxing/mod.rs
@@ -35,6 +35,8 @@ pub struct CommandSpec {
    pub timeout_ms: Option<u64>,
    pub with_escalated_permissions: Option<bool>,
    pub justification: Option<String>,
+    pub max_output_tokens: Option<usize>,
+    pub max_output_chars: Option<usize>,
 }

 #[derive(Clone, Debug)]
@@ -47,6 +49,8 @@ pub struct ExecEnv {
    pub with_escalated_permissions: Option<bool>,
    pub justification: Option<String>,
    pub arg0: Option<String>,
+    pub max_output_tokens: Option<usize>,
+    pub max_output_chars: Option<usize>,
 }

 pub enum SandboxPreference {
@@ -161,6 +165,8 @@ impl SandboxManager {
            with_escalated_permissions: spec.with_escalated_permissions,
            justification: spec.justification.clone(),
            arg0: arg0_override,
+            max_output_tokens: spec.max_output_tokens,
+            max_output_chars: spec.max_output_chars,
        })
    }

--- a/codex-rs/core/src/tasks/user_shell.rs
+++ b/codex-rs/core/src/tasks/user_shell.rs
@@ -98,6 +98,8 @@ impl SessionTask for UserShellCommandTask {
            with_escalated_permissions: None,
            justification: None,
            arg0: None,
+            max_output_tokens: None,
+            max_output_chars: None,
        };

        let stdout_stream = Some(StdoutStream {
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -15,6 +15,8 @@ use crate::protocol::PatchApplyEndEvent;
 use crate::protocol::TurnDiffEvent;
 use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::sandboxing::ToolError;
+use crate::truncate::TruncationPolicy;
+use crate::truncate::formatted_truncate_text;
 use codex_protocol::parse_command::ParsedCommand;
 use std::collections::HashMap;
 use std::path::Path;
@@ -29,6 +31,7 @@ pub(crate) struct ToolEventCtx<'a> {
    pub turn: &'a TurnContext,
    pub call_id: &'a str,
    pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
+    pub override_truncation_policy: Option<&'a TruncationPolicy>,
 }

 impl<'a> ToolEventCtx<'a> {
@@ -37,12 +40,14 @@ impl<'a> ToolEventCtx<'a> {
        turn: &'a TurnContext,
        call_id: &'a str,
        turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
+        override_truncation_policy: Option<&'a TruncationPolicy>,
    ) -> Self {
        Self {
            session,
            turn,
            call_id,
            turn_diff_tracker,
+            override_truncation_policy,
        }
    }
 }
@@ -245,13 +250,13 @@ impl ToolEmitter {
    fn format_exec_output_for_model(
        &self,
        output: &ExecToolCallOutput,
-        ctx: ToolEventCtx<'_>,
+        truncation_policy: &TruncationPolicy,
    ) -> String {
        match self {
            Self::Shell { freeform: true, .. } => {
-                super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
+                super::format_exec_output_for_model_freeform(output, *truncation_policy)
            }
-            _ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
+            _ => super::format_exec_output_for_model_structured(output, *truncation_policy),
        }
    }

@@ -260,9 +265,12 @@ impl ToolEmitter {
        ctx: ToolEventCtx<'_>,
        out: Result<ExecToolCallOutput, ToolError>,
    ) -> Result<String, FunctionCallError> {
+        let truncation_policy = ctx
+            .override_truncation_policy
+            .unwrap_or(&ctx.turn.truncation_policy);
        let (event, result) = match out {
            Ok(output) => {
-                let content = self.format_exec_output_for_model(&output, ctx);
+                let content = self.format_exec_output_for_model(&output, truncation_policy);
                let exit_code = output.exit_code;
                let event = ToolEventStage::Success(output);
                let result = if exit_code == 0 {
@@ -274,24 +282,26 @@ impl ToolEmitter {
            }
            Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
            | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
-                let response = self.format_exec_output_for_model(&output, ctx);
+                let response = self.format_exec_output_for_model(&output, truncation_policy);
                let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
                let result = Err(FunctionCallError::RespondToModel(response));
                (event, result)
            }
            Err(ToolError::Codex(err)) => {
-                let message = format!("execution error: {err:?}");
-                let event = ToolEventStage::Failure(ToolEventFailure::Message(message.clone()));
-                let result = Err(FunctionCallError::RespondToModel(message));
+                let formatted_error = formatted_truncate_text(&err.to_string(), *truncation_policy);
+                let message = format!("execution error: {formatted_error}");
+                let event = ToolEventStage::Failure(ToolEventFailure::Message(message));
+                let result = Err(FunctionCallError::RespondToModel(formatted_error));
                (event, result)
            }
            Err(ToolError::Rejected(msg)) => {
+                let formatted_msg = formatted_truncate_text(&msg, *truncation_policy);
                // Normalize common rejection messages for exec tools so tests and
                // users see a clear, consistent phrase.
-                let normalized = if msg == "rejected by user" {
+                let normalized = if formatted_msg == "rejected by user" {
                    "exec command rejected by user".to_string()
                } else {
-                    msg
+                    formatted_msg
                };
                let event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
                let result = Err(FunctionCallError::RespondToModel(normalized));
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -100,6 +100,7 @@ impl ToolHandler for ApplyPatchHandler {
                            turn.as_ref(),
                            &call_id,
                            Some(&tracker),
+                            None,
                        );
                        emitter.begin(event_ctx).await;

@@ -127,6 +128,7 @@ impl ToolHandler for ApplyPatchHandler {
                            turn.as_ref(),
                            &call_id,
                            Some(&tracker),
+                            None,
                        );
                        let content = emitter.finish(event_ctx, out).await?;
                        Ok(ToolOutput::Function {
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -25,6 +25,7 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
 use crate::tools::runtimes::shell::ShellRequest;
 use crate::tools::runtimes::shell::ShellRuntime;
 use crate::tools::sandboxing::ToolCtx;
+use crate::truncate::TruncationPolicy;

 pub struct ShellHandler;

@@ -40,6 +41,8 @@ impl ShellHandler {
            with_escalated_permissions: params.with_escalated_permissions,
            justification: params.justification,
            arg0: None,
+            max_output_tokens: params.max_output_tokens,
+            max_output_chars: params.max_output_chars,
        }
    }
 }
@@ -62,6 +65,8 @@ impl ShellCommandHandler {
            with_escalated_permissions: params.with_escalated_permissions,
            justification: params.justification,
            arg0: None,
+            max_output_tokens: params.max_output_tokens,
+            max_output_chars: params.max_output_chars,
        }
    }
 }
@@ -207,6 +212,9 @@ impl ShellHandler {
            )));
        }

+        let override_truncation_policy =
+            create_truncation_policy(exec_params.max_output_tokens, exec_params.max_output_chars);
+
        // Intercept apply_patch if present.
        match codex_apply_patch::maybe_parse_apply_patch_verified(
            &exec_params.command,
@@ -235,6 +243,7 @@ impl ShellHandler {
                            turn.as_ref(),
                            &call_id,
                            Some(&tracker),
+                            override_truncation_policy.as_ref(),
                        );
                        emitter.begin(event_ctx).await;

@@ -261,6 +270,7 @@ impl ShellHandler {
                            turn.as_ref(),
                            &call_id,
                            Some(&tracker),
+                            override_truncation_policy.as_ref(),
                        );
                        let content = emitter.finish(event_ctx, out).await?;
                        return Ok(ToolOutput::Function {
@@ -292,7 +302,13 @@ impl ShellHandler {
            source,
            freeform,
        );
-        let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
+        let event_ctx = ToolEventCtx::new(
+            session.as_ref(),
+            turn.as_ref(),
+            &call_id,
+            None,
+            override_truncation_policy.as_ref(),
+        );
        emitter.begin(event_ctx).await;

        let req = ShellRequest {
@@ -302,6 +318,8 @@ impl ShellHandler {
            env: exec_params.env.clone(),
            with_escalated_permissions: exec_params.with_escalated_permissions,
            justification: exec_params.justification.clone(),
+            max_output_tokens: exec_params.max_output_tokens,
+            max_output_chars: exec_params.max_output_chars,
        };
        let mut orchestrator = ToolOrchestrator::new();
        let mut runtime = ShellRuntime::new();
@@ -314,7 +332,13 @@ impl ShellHandler {
        let out = orchestrator
            .run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
            .await;
-        let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
+        let event_ctx = ToolEventCtx::new(
+            session.as_ref(),
+            turn.as_ref(),
+            &call_id,
+            None,
+            override_truncation_policy.as_ref(),
+        );
        let content = emitter.finish(event_ctx, out).await?;
        Ok(ToolOutput::Function {
            content,
@@ -324,6 +348,16 @@ impl ShellHandler {
    }
 }

+fn create_truncation_policy(
+    max_output_tokens: Option<usize>,
+    max_output_chars: Option<usize>,
+) -> Option<TruncationPolicy> {
+    if let Some(max_output_tokens) = max_output_tokens {
+        Some(TruncationPolicy::Tokens(max_output_tokens))
+    } else {
+        max_output_chars.map(TruncationPolicy::Bytes)
+    }
+}
 #[cfg(test)]
 mod tests {
    use std::path::PathBuf;
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -162,6 +162,7 @@ impl ToolHandler for UnifiedExecHandler {
                    context.turn.as_ref(),
                    &context.call_id,
                    None,
+                    None,
                );
                let emitter = ToolEmitter::unified_exec(
                    &command,
--- a/codex-rs/core/src/tools/router.rs
+++ b/codex-rs/core/src/tools/router.rs
@@ -116,6 +116,8 @@ impl ToolRouter {
                            timeout_ms: exec.timeout_ms,
                            with_escalated_permissions: None,
                            justification: None,
+                            max_output_tokens: None,
+                            max_output_chars: None,
                        };
                        Ok(Some(ToolCall {
                            tool_name: "local_shell".to_string(),
--- a/codex-rs/core/src/tools/runtimes/apply_patch.rs
+++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs
@@ -72,6 +72,8 @@ impl ApplyPatchRuntime {
            env: HashMap::new(),
            with_escalated_permissions: None,
            justification: None,
+            max_output_tokens: None,
+            max_output_chars: None,
        })
    }

--- a/codex-rs/core/src/tools/runtimes/mod.rs
+++ b/codex-rs/core/src/tools/runtimes/mod.rs
@@ -15,6 +15,7 @@ pub mod unified_exec;

 /// Shared helper to construct a CommandSpec from a tokenized command line.
 /// Validates that at least a program is present.
+#[allow(clippy::too_many_arguments)]
 pub(crate) fn build_command_spec(
    command: &[String],
    cwd: &Path,
@@ -22,6 +23,8 @@ pub(crate) fn build_command_spec(
    timeout_ms: Option<u64>,
    with_escalated_permissions: Option<bool>,
    justification: Option<String>,
+    max_output_tokens: Option<usize>,
+    max_output_chars: Option<usize>,
 ) -> Result<CommandSpec, ToolError> {
    let (program, args) = command
        .split_first()
@@ -34,5 +37,7 @@ pub(crate) fn build_command_spec(
        timeout_ms,
        with_escalated_permissions,
        justification,
+        max_output_tokens,
+        max_output_chars,
    })
 }
--- a/codex-rs/core/src/tools/runtimes/shell.rs
+++ b/codex-rs/core/src/tools/runtimes/shell.rs
@@ -33,6 +33,8 @@ pub struct ShellRequest {
    pub env: std::collections::HashMap<String, String>,
    pub with_escalated_permissions: Option<bool>,
    pub justification: Option<String>,
+    pub max_output_tokens: Option<usize>,
+    pub max_output_chars: Option<usize>,
 }

 impl ProvidesSandboxRetryData for ShellRequest {
@@ -147,6 +149,8 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
            req.timeout_ms,
            req.with_escalated_permissions,
            req.justification.clone(),
+            req.max_output_tokens,
+            req.max_output_chars,
        )?;
        let env = attempt
            .env_for(&spec)
--- a/codex-rs/core/src/tools/runtimes/unified_exec.rs
+++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs
@@ -36,6 +36,8 @@ pub struct UnifiedExecRequest {
    pub env: HashMap<String, String>,
    pub with_escalated_permissions: Option<bool>,
    pub justification: Option<String>,
+    pub max_output_tokens: Option<usize>,
+    pub max_output_chars: Option<usize>,
 }

 impl ProvidesSandboxRetryData for UnifiedExecRequest {
@@ -72,6 +74,8 @@ impl UnifiedExecRequest {
            env,
            with_escalated_permissions,
            justification,
+            max_output_tokens: None,
+            max_output_chars: None,
        }
    }
 }
@@ -162,6 +166,8 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
            None,
            req.with_escalated_permissions,
            req.justification.clone(),
+            req.max_output_tokens,
+            req.max_output_chars,
        )
        .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
        let exec_env = attempt
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -8,6 +8,7 @@ use crate::tools::handlers::apply_patch::ApplyPatchToolType;
 use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
 use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
 use crate::tools::registry::ToolRegistryBuilder;
+use crate::truncate::TruncationPolicy;
 use serde::Deserialize;
 use serde::Serialize;
 use serde_json::Value as JsonValue;
@@ -17,7 +18,7 @@ use std::collections::HashMap;

 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum ConfigShellToolType {
-    Default,
+    Default(TruncationPolicy),
    Local,
    UnifiedExec,
    /// Do not include a shell tool by default. Useful when using Codex
@@ -26,7 +27,7 @@ pub enum ConfigShellToolType {
    /// to customize agent behavior.
    Disabled,
    /// Takes a command as a single string to be run in the user's default shell.
-    ShellCommand,
+    ShellCommand(TruncationPolicy),
 }

 #[derive(Debug, Clone)]
@@ -58,7 +59,7 @@ impl ToolsConfig {
        } else if features.enabled(Feature::UnifiedExec) {
            ConfigShellToolType::UnifiedExec
        } else if features.enabled(Feature::ShellCommandTool) {
-            ConfigShellToolType::ShellCommand
+            ConfigShellToolType::ShellCommand(model_family.truncation_policy)
        } else {
            model_family.shell_type.clone()
        };
@@ -266,7 +267,7 @@ fn create_write_stdin_tool() -> ToolSpec {
    })
 }

-fn create_shell_tool() -> ToolSpec {
+fn create_shell_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
    let mut properties = BTreeMap::new();
    properties.insert(
        "command".to_string(),
@@ -300,6 +301,24 @@ fn create_shell_tool() -> ToolSpec {
            description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
        },
    );
+    match truncation_policy {
+        TruncationPolicy::Tokens(_) => {
+            properties.insert(
+                "max_output_tokens".to_string(),
+                JsonSchema::Number {
+                    description: Some("Maximum number of tokens to return from stdout/stderr. Excess tokens will be truncated".to_string()),
+                },
+            );
+        }
+        TruncationPolicy::Bytes(_) => {
+            properties.insert(
+                "max_output_chars".to_string(),
+                JsonSchema::Number {
+                    description: Some("Maximum number of characters to return from stdout/stderr. Excess characters will be truncated".to_string()),
+                },
+            );
+        }
+    }

    let description  = if cfg!(windows) {
        r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
@@ -330,7 +349,7 @@ Examples of valid command strings:
    })
 }

-fn create_shell_command_tool() -> ToolSpec {
+fn create_shell_command_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
    let mut properties = BTreeMap::new();
    properties.insert(
        "command".to_string(),
@@ -364,6 +383,30 @@ fn create_shell_command_tool() -> ToolSpec {
            description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
        },
    );
+    match truncation_policy {
+        TruncationPolicy::Tokens(_) => {
+            properties.insert(
+                "max_output_tokens".to_string(),
+                JsonSchema::Number {
+                    description: Some(
+                        "Maximum number of tokens to return. Excess output will be truncated."
+                            .to_string(),
+                    ),
+                },
+            );
+        }
+        TruncationPolicy::Bytes(_) => {
+            properties.insert(
+                "max_output_chars".to_string(),
+                JsonSchema::Number {
+                    description: Some(
+                        "Maximum number of tokens to return. Excess output will be truncated."
+                            .to_string(),
+                    ),
+                },
+            );
+        }
+    }

    let description = if cfg!(windows) {
        r#"Runs a Powershell command (Windows) and returns its output.
@@ -1001,8 +1044,8 @@ pub(crate) fn build_specs(
    let shell_command_handler = Arc::new(ShellCommandHandler);

    match &config.shell_type {
-        ConfigShellToolType::Default => {
-            builder.push_spec(create_shell_tool());
+        ConfigShellToolType::Default(truncation_policy) => {
+            builder.push_spec(create_shell_tool(*truncation_policy));
        }
        ConfigShellToolType::Local => {
            builder.push_spec(ToolSpec::LocalShell {});
@@ -1016,8 +1059,8 @@ pub(crate) fn build_specs(
        ConfigShellToolType::Disabled => {
            // Do nothing.
        }
-        ConfigShellToolType::ShellCommand => {
-            builder.push_spec(create_shell_command_tool());
+        ConfigShellToolType::ShellCommand(truncation_policy) => {
+            builder.push_spec(create_shell_command_tool(*truncation_policy));
        }
    }

@@ -1160,11 +1203,11 @@ mod tests {

    fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> {
        match config.shell_type {
-            ConfigShellToolType::Default => Some("shell"),
+            ConfigShellToolType::Default(_) => Some("shell"),
            ConfigShellToolType::Local => Some("local_shell"),
            ConfigShellToolType::UnifiedExec => None,
            ConfigShellToolType::Disabled => None,
-            ConfigShellToolType::ShellCommand => Some("shell_command"),
+            ConfigShellToolType::ShellCommand(_) => Some("shell_command"),
        }
    }

@@ -1926,7 +1969,7 @@ mod tests {

    #[test]
    fn test_shell_tool() {
-        let tool = super::create_shell_tool();
+        let tool = super::create_shell_tool(TruncationPolicy::Bytes(10_000));
        let ToolSpec::Function(ResponsesApiTool {
            description, name, ..
        }) = &tool
@@ -1956,7 +1999,7 @@ Examples of valid command strings:

    #[test]
    fn test_shell_command_tool() {
-        let tool = super::create_shell_command_tool();
+        let tool = super::create_shell_command_tool(TruncationPolicy::Tokens(10_000));
        let ToolSpec::Function(ResponsesApiTool {
            description, name, ..
        }) = &tool
--- a/codex-rs/core/src/unified_exec/session_manager.rs
+++ b/codex-rs/core/src/unified_exec/session_manager.rs
@@ -149,6 +149,7 @@ impl UnifiedExecSessionManager {
                turn_ref.as_ref(),
                request.call_id,
                None,
+                None,
            )
        };
        interaction_emitter
@@ -356,6 +357,7 @@ impl UnifiedExecSessionManager {
            entry.turn_ref.as_ref(),
            &entry.call_id,
            None,
+            None,
        );
        let emitter = ToolEmitter::unified_exec(
            &entry.command,
@@ -389,6 +391,7 @@ impl UnifiedExecSessionManager {
            context.turn.as_ref(),
            &context.call_id,
            None,
+            None,
        );
        let emitter =
            ToolEmitter::unified_exec(command, cwd, ExecCommandSource::UnifiedExecStartup, None);
--- a/codex-rs/core/tests/suite/exec.rs
+++ b/codex-rs/core/tests/suite/exec.rs
@@ -37,6 +37,8 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
        with_escalated_permissions: None,
        justification: None,
        arg0: None,
+        max_output_tokens: None,
+        max_output_chars: None,
    };

    let policy = SandboxPolicy::new_read_only_policy();
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -1530,8 +1530,8 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> {
    } = builder.build(&server).await?;

    let script = r#"python3 - <<'PY'
-for i in range(10000):
-    print("token token ")
+import sys
+sys.stdout.write("token token \n" * 500)
 PY
 "#;

--- a/codex-rs/exec-server/src/posix/escalate_server.rs
+++ b/codex-rs/exec-server/src/posix/escalate_server.rs
@@ -76,6 +76,8 @@ impl EscalateServer {
                with_escalated_permissions: None,
                justification: None,
                arg0: None,
+                max_output_tokens: None,
+                max_output_chars: None,
            },
            get_platform_sandbox().unwrap_or(SandboxType::None),
            // TODO: use the sandbox policy and cwd from the calling client
--- a/codex-rs/protocol/src/models.rs
+++ b/codex-rs/protocol/src/models.rs
@@ -322,6 +322,10 @@ pub struct ShellToolCallParams {
    pub with_escalated_permissions: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub justification: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_tokens: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_chars: Option<usize>,
 }

 /// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the
@@ -338,6 +342,10 @@ pub struct ShellCommandToolCallParams {
    pub with_escalated_permissions: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub justification: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_tokens: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_chars: Option<usize>,
 }

 /// Responses API compatible content items that can be returned by a tool call.
@@ -650,6 +658,8 @@ mod tests {
                timeout_ms: Some(1000),
                with_escalated_permissions: None,
                justification: None,
+                max_output_tokens: None,
+                max_output_chars: None,
            },
            params
        );
Author	SHA1	Message	Date
Ahmed Ibrahim	02ecb4d159	flaky	2025-11-20 13:13:16 -08:00
Ahmed Ibrahim	2e44082a30	shell	2025-11-19 16:44:45 -08:00