Add the utility to truncate by tokens (#6746)

- This PR is to make it on path for truncating by tokens. This path will be initially used by unified exec and context manager (responsible for MCP calls mainly). - We are exposing new config `calls_output_max_tokens` - Use `tokens` as the main budget unit but truncate based on the model family by Introducing `TruncationPolicy`. - Introduce `truncate_text` as a router for truncation based on the mode. In next PRs: - remove truncate_with_line_bytes_budget - Add the ability to the model to override the token budget.
2026-04-30 03:12:20 +03:00 · 2025-11-18 11:36:23 -08:00
parent b035c604b0
commit 3de8790714
21 changed files with 770 additions and 549 deletions
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@@ -27,7 +27,6 @@ use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use escargot::CargoBuild;
-use regex_lite::Regex;
 use serde_json::Value;
 use serde_json::json;
 use std::collections::HashMap;
@@ -48,7 +47,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
    let test = builder.build(&server).await?;

    // Construct a very long, non-existent path to force a RespondToModel error with a large message
-    let long_path = "a".repeat(20_000);
+    let long_path = "long path text should trigger truncation".repeat(8_000);
    let call_id = "grep-huge-error";
    let args = json!({
        "pattern": "alpha",
@@ -80,12 +79,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {

    tracing::debug!(output = %output, "truncated function error output");

-    // Expect plaintext with byte-truncation marker and no omitted-lines marker
+    // Expect plaintext with token-based truncation marker and no omitted-lines marker
    assert!(
        serde_json::from_str::<serde_json::Value>(&output).is_err(),
        "expected error output to be plain text",
    );
-    let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]\s*$"#;
+    assert!(
+        !output.contains("Total output lines:"),
+        "error output should not include line-based truncation header: {output}",
+    );
+    let truncated_pattern = r"(?s)^unable to access `.*tokens truncated.*$";
    assert_regex_match(truncated_pattern, &output);
    assert!(
        !output.contains("omitted"),
@@ -269,7 +272,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
    let tool_name = format!("mcp__{server_name}__echo");

    // Build a very large message to exceed 10KiB once serialized.
-    let large_msg = "long-message-with-newlines-".repeat(600);
+    let large_msg = "long-message-with-newlines-".repeat(6000);
    let args_json = serde_json::json!({ "message": large_msg });

    mount_sse_once(
@@ -334,22 +337,19 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
        .function_call_output_text(call_id)
        .context("function_call_output present for rmcp call")?;

-    // Expect plain text with byte-based truncation marker.
+    // Expect plain text with token-based truncation marker; the original JSON body
+    // is truncated in the middle of the echo string.
    assert!(
        serde_json::from_str::<Value>(&output).is_err(),
        "expected truncated MCP output to be plain text"
    );
    assert!(
-        output.starts_with("Total output lines: 1\n\n{"),
-        "expected total line header and JSON head, got: {output}"
+        !output.contains("Total output lines:"),
+        "MCP output should not include line-based truncation header: {output}"
    );

-    let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]")
-        .expect("compile regex");
-    assert!(
-        byte_marker.is_match(&output),
-        "expected byte truncation marker, got: {output}"
-    );
+    let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
+    assert_regex_match(truncated_pattern, &output);

    Ok(())
 }
@@ -453,3 +453,164 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {

    Ok(())
 }
+
+// Token-based policy should report token counts even when truncation is byte-estimated.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn token_policy_marker_reports_tokens() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5.1-codex".to_string(); // token policy
+        config.model_family =
+            find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
+        config.tool_output_token_limit = Some(50); // small budget to force truncation
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "shell-token-marker";
+    let args = json!({
+        "command": ["/bin/sh", "-c", "seq 1 150"],
+        "timeout_ms": 5_000,
+    });
+
+    mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let done_mock = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
+        .await?;
+
+    let output = done_mock
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("shell output present")?;
+
+    assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output);
+
+    Ok(())
+}
+
+// Byte-based policy should report bytes removed.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn byte_policy_marker_reports_bytes() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5.1".to_string(); // byte policy
+        config.model_family = find_family_for_model("gpt-5.1").expect("model family for gpt-5.1");
+        config.tool_output_token_limit = Some(50); // ~200 byte cap
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "shell-byte-marker";
+    let args = json!({
+        "command": ["/bin/sh", "-c", "seq 1 150"],
+        "timeout_ms": 5_000,
+    });
+
+    mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let done_mock = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
+        .await?;
+
+    let output = done_mock
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("shell output present")?;
+
+    assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output);
+
+    Ok(())
+}
+
+// Overriding config with a large token budget should avoid truncation.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn large_budget_avoids_truncation() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5.1-codex".to_string();
+        config.model_family =
+            find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
+        config.tool_output_token_limit = Some(50_000); // ample budget
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "shell-no-trunc";
+    let args = json!({
+        "command": ["/bin/sh", "-c", "seq 1 1000"],
+        "timeout_ms": 5_000,
+    });
+
+    mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let done_mock = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy(
+            "run big output without truncation",
+            SandboxPolicy::DangerFullAccess,
+        )
+        .await?;
+
+    let output = done_mock
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("shell output present")?;
+
+    assert!(
+        !output.contains("truncated"),
+        "output should remain untruncated with ample budget"
+    );
+
+    Ok(())
+}