Add the utility to truncate by tokens (#6746)

- This PR is to make it on path for truncating by tokens. This path will
be initially used by unified exec and context manager (responsible for
MCP calls mainly).
- We are exposing new config `calls_output_max_tokens`
- Use `tokens` as the main budget unit but truncate based on the model
family by Introducing `TruncationPolicy`.
- Introduce `truncate_text` as a router for truncation based on the
mode.

In next PRs:
- remove truncate_with_line_bytes_budget
- Add the ability to the model to override the token budget.
This commit is contained in:
Ahmed Ibrahim
2025-11-18 11:36:23 -08:00
committed by GitHub
parent b035c604b0
commit 3de8790714
21 changed files with 770 additions and 549 deletions

View File

@@ -9,9 +9,7 @@ pub mod runtimes;
pub mod sandboxing;
pub mod spec;
use crate::context_manager::MODEL_FORMAT_MAX_BYTES;
use crate::context_manager::MODEL_FORMAT_MAX_LINES;
use crate::context_manager::format_output_for_model_body;
use crate::context_manager::truncate_with_line_bytes_budget;
use crate::exec::ExecToolCallOutput;
pub use router::ToolRouter;
use serde::Serialize;
@@ -22,6 +20,9 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
"[... telemetry preview truncated ...]";
// TODO(aibrahim): migrate shell tool to use truncate text and respect config value
const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;
/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
@@ -77,5 +78,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
};
// Truncate for model consumption before serialization.
format_output_for_model_body(&body, MODEL_FORMAT_MAX_BYTES, MODEL_FORMAT_MAX_LINES)
truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES)
}