Add the utility to truncate by tokens (#6746)

- This PR is to make it on path for truncating by tokens. This path will
be initially used by unified exec and context manager (responsible for
MCP calls mainly).
- We are exposing new config `calls_output_max_tokens`
- Use `tokens` as the main budget unit but truncate based on the model
family by Introducing `TruncationPolicy`.
- Introduce `truncate_text` as a router for truncation based on the
mode.

In next PRs:
- remove truncate_with_line_bytes_budget
- Add the ability to the model to override the token budget.
This commit is contained in:
Ahmed Ibrahim
2025-11-18 11:36:23 -08:00
committed by GitHub
parent b035c604b0
commit 3de8790714
21 changed files with 770 additions and 549 deletions

View File

@@ -27,7 +27,6 @@ use core_test_support::skip_if_no_network;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use escargot::CargoBuild;
use regex_lite::Regex;
use serde_json::Value;
use serde_json::json;
use std::collections::HashMap;
@@ -48,7 +47,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
let test = builder.build(&server).await?;
// Construct a very long, non-existent path to force a RespondToModel error with a large message
let long_path = "a".repeat(20_000);
let long_path = "long path text should trigger truncation".repeat(8_000);
let call_id = "grep-huge-error";
let args = json!({
"pattern": "alpha",
@@ -80,12 +79,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
tracing::debug!(output = %output, "truncated function error output");
// Expect plaintext with byte-truncation marker and no omitted-lines marker
// Expect plaintext with token-based truncation marker and no omitted-lines marker
assert!(
serde_json::from_str::<serde_json::Value>(&output).is_err(),
"expected error output to be plain text",
);
let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]\s*$"#;
assert!(
!output.contains("Total output lines:"),
"error output should not include line-based truncation header: {output}",
);
let truncated_pattern = r"(?s)^unable to access `.*tokens truncated.*$";
assert_regex_match(truncated_pattern, &output);
assert!(
!output.contains("omitted"),
@@ -269,7 +272,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
let tool_name = format!("mcp__{server_name}__echo");
// Build a very large message to exceed 10KiB once serialized.
let large_msg = "long-message-with-newlines-".repeat(600);
let large_msg = "long-message-with-newlines-".repeat(6000);
let args_json = serde_json::json!({ "message": large_msg });
mount_sse_once(
@@ -334,22 +337,19 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
.function_call_output_text(call_id)
.context("function_call_output present for rmcp call")?;
// Expect plain text with byte-based truncation marker.
// Expect plain text with token-based truncation marker; the original JSON body
// is truncated in the middle of the echo string.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated MCP output to be plain text"
);
assert!(
output.starts_with("Total output lines: 1\n\n{"),
"expected total line header and JSON head, got: {output}"
!output.contains("Total output lines:"),
"MCP output should not include line-based truncation header: {output}"
);
let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 11264 bytes \.\.\.\]")
.expect("compile regex");
assert!(
byte_marker.is_match(&output),
"expected byte truncation marker, got: {output}"
);
let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
assert_regex_match(truncated_pattern, &output);
Ok(())
}
@@ -453,3 +453,164 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
Ok(())
}
// Token-based policy should report token counts even when truncation is byte-estimated.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn token_policy_marker_reports_tokens() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string(); // token policy
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
config.tool_output_token_limit = Some(50); // small budget to force truncation
});
let fixture = builder.build(&server).await?;
let call_id = "shell-token-marker";
let args = json!({
"command": ["/bin/sh", "-c", "seq 1 150"],
"timeout_ms": 5_000,
});
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
)
.await;
let done_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
fixture
.submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
.await?;
let output = done_mock
.single_request()
.function_call_output_text(call_id)
.context("shell output present")?;
assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output);
Ok(())
}
// Byte-based policy should report bytes removed.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn byte_policy_marker_reports_bytes() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1".to_string(); // byte policy
config.model_family = find_family_for_model("gpt-5.1").expect("model family for gpt-5.1");
config.tool_output_token_limit = Some(50); // ~200 byte cap
});
let fixture = builder.build(&server).await?;
let call_id = "shell-byte-marker";
let args = json!({
"command": ["/bin/sh", "-c", "seq 1 150"],
"timeout_ms": 5_000,
});
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
)
.await;
let done_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
fixture
.submit_turn_with_policy("run the shell tool", SandboxPolicy::DangerFullAccess)
.await?;
let output = done_mock
.single_request()
.function_call_output_text(call_id)
.context("shell output present")?;
assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output);
Ok(())
}
// Overriding config with a large token budget should avoid truncation.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn large_budget_avoids_truncation() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("model family for gpt-5.1-codex");
config.tool_output_token_limit = Some(50_000); // ample budget
});
let fixture = builder.build(&server).await?;
let call_id = "shell-no-trunc";
let args = json!({
"command": ["/bin/sh", "-c", "seq 1 1000"],
"timeout_ms": 5_000,
});
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
)
.await;
let done_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
fixture
.submit_turn_with_policy(
"run big output without truncation",
SandboxPolicy::DangerFullAccess,
)
.await?;
let output = done_mock
.single_request()
.function_call_output_text(call_id)
.context("shell output present")?;
assert!(
!output.contains("truncated"),
"output should remain untruncated with ample budget"
);
Ok(())
}

View File

@@ -26,9 +26,11 @@ use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use core_test_support::wait_for_event_with_timeout;
use regex_lite::Regex;
use serde_json::Value;
use serde_json::json;
use tokio::time::Duration;
fn extract_output_text(item: &Value) -> Option<&str> {
item.get("output").and_then(|value| match value {
@@ -814,7 +816,7 @@ async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
let call_id = "uexec-metadata";
let args = serde_json::json!({
"cmd": "printf 'abcdefghijklmnopqrstuvwxyz'",
"cmd": "printf 'token one token two token three token four token five token six token seven'",
"yield_time_ms": 500,
"max_output_tokens": 6,
});
@@ -1295,7 +1297,7 @@ async fn unified_exec_streams_after_lagged_output() -> Result<()> {
import sys
import time
chunk = b'x' * (1 << 20)
chunk = b'long content here to trigger truncation' * (1 << 10)
for _ in range(4):
sys.stdout.buffer.write(chunk)
sys.stdout.flush()
@@ -1365,8 +1367,13 @@ PY
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
// This is a worst case scenario for the truncate logic.
wait_for_event_with_timeout(
&codex,
|event| matches!(event, EventMsg::TaskComplete(_)),
Duration::from_secs(10),
)
.await;
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
@@ -1523,14 +1530,15 @@ async fn unified_exec_formats_large_output_summary() -> Result<()> {
} = builder.build(&server).await?;
let script = r#"python3 - <<'PY'
for i in range(300):
print(f"line-{i}")
for i in range(10000):
print("token token ")
PY
"#;
let call_id = "uexec-large-output";
let args = serde_json::json!({
"cmd": script,
"max_output_tokens": 100,
"yield_time_ms": 500,
});
@@ -1577,15 +1585,14 @@ PY
let outputs = collect_tool_outputs(&bodies)?;
let large_output = outputs.get(call_id).expect("missing large output summary");
assert_regex_match(
concat!(
r"(?s)",
r"line-0.*?",
r"\[\.{3} omitted \d+ of \d+ lines \.{3}\].*?",
r"line-299",
),
&large_output.output,
);
let output_text = large_output.output.replace("\r\n", "\n");
let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}$"#;
assert_regex_match(truncated_pattern, &output_text);
let original_tokens = large_output
.original_token_count
.expect("missing original_token_count for large output summary");
assert!(original_tokens > 0);
Ok(())
}

View File

@@ -272,7 +272,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
});
let fixture = builder.build(&server).await?;