Trim compaction input (#10374)

Two fixes:

1. Include trailing tool output in the total context size calculation.
Otherwise when checking whether compaction should run we ignore newly
added outputs.
2. Trim trailing tool output/tool calls until we can fit the request
into the model context size. Otherwise the compaction endpoint will fail
to compact. We only trim items that can be reproduced again by the model
(tool calls, tool call outputs).
This commit is contained in:
pakrym-oai
2026-02-02 19:03:11 -08:00
committed by GitHub
parent 7e07ec8f73
commit cbfd2a37cc
6 changed files with 354 additions and 40 deletions

View File

@@ -93,24 +93,7 @@ impl ContextManager {
let base_tokens = i64::try_from(approx_token_count(&base_instructions)).unwrap_or(i64::MAX);
let items_tokens = self.items.iter().fold(0i64, |acc, item| {
acc + match item {
ResponseItem::GhostSnapshot { .. } => 0,
ResponseItem::Reasoning {
encrypted_content: Some(content),
..
}
| ResponseItem::Compaction {
encrypted_content: content,
} => {
let reasoning_bytes = estimate_reasoning_length(content.len());
i64::try_from(approx_tokens_from_byte_count(reasoning_bytes))
.unwrap_or(i64::MAX)
}
item => {
let serialized = serde_json::to_string(item).unwrap_or_default();
i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX)
}
}
acc.saturating_add(estimate_item_token_count(item))
});
Some(base_tokens.saturating_add(items_tokens))
@@ -128,6 +111,15 @@ impl ContextManager {
}
}
pub(crate) fn remove_last_item(&mut self) -> bool {
if let Some(removed) = self.items.pop() {
normalize::remove_corresponding_for(&mut self.items, &removed);
true
} else {
false
}
}
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
self.items = items;
}
@@ -207,36 +199,42 @@ impl ContextManager {
);
}
fn get_non_last_reasoning_items_tokens(&self) -> usize {
// get reasoning items excluding all the ones after the last user message
fn get_non_last_reasoning_items_tokens(&self) -> i64 {
// Get reasoning items excluding all the ones after the last user message.
let Some(last_user_index) = self
.items
.iter()
.rposition(|item| matches!(item, ResponseItem::Message { role, .. } if role == "user"))
else {
return 0usize;
return 0;
};
let total_reasoning_bytes = self
.items
self.items
.iter()
.take(last_user_index)
.filter_map(|item| {
if let ResponseItem::Reasoning {
encrypted_content: Some(content),
..
} = item
{
Some(content.len())
} else {
None
}
.filter(|item| {
matches!(
item,
ResponseItem::Reasoning {
encrypted_content: Some(_),
..
}
)
})
.map(estimate_reasoning_length)
.fold(0usize, usize::saturating_add);
.fold(0i64, |acc, item| {
acc.saturating_add(estimate_item_token_count(item))
})
}
let token_estimate = approx_tokens_from_byte_count(total_reasoning_bytes);
token_estimate as usize
fn get_trailing_codex_generated_items_tokens(&self) -> i64 {
let mut total = 0i64;
for item in self.items.iter().rev() {
if !is_codex_generated_item(item) {
break;
}
total = total.saturating_add(estimate_item_token_count(item));
}
total
}
/// When true, the server already accounted for past reasoning tokens and
@@ -247,10 +245,13 @@ impl ContextManager {
.as_ref()
.map(|info| info.last_token_usage.total_tokens)
.unwrap_or(0);
let trailing_codex_generated_tokens = self.get_trailing_codex_generated_items_tokens();
if server_reasoning_included {
last_tokens
last_tokens.saturating_add(trailing_codex_generated_tokens)
} else {
last_tokens.saturating_add(self.get_non_last_reasoning_items_tokens() as i64)
last_tokens
.saturating_add(self.get_non_last_reasoning_items_tokens())
.saturating_add(trailing_codex_generated_tokens)
}
}
@@ -332,6 +333,33 @@ fn estimate_reasoning_length(encoded_len: usize) -> usize {
.saturating_sub(650)
}
fn estimate_item_token_count(item: &ResponseItem) -> i64 {
match item {
ResponseItem::GhostSnapshot { .. } => 0,
ResponseItem::Reasoning {
encrypted_content: Some(content),
..
}
| ResponseItem::Compaction {
encrypted_content: content,
} => {
let reasoning_bytes = estimate_reasoning_length(content.len());
i64::try_from(approx_tokens_from_byte_count(reasoning_bytes)).unwrap_or(i64::MAX)
}
item => {
let serialized = serde_json::to_string(item).unwrap_or_default();
i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX)
}
}
}
pub(crate) fn is_codex_generated_item(item: &ResponseItem) -> bool {
matches!(
item,
ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. }
) || matches!(item, ResponseItem::Message { role, .. } if role == "developer")
}
pub(crate) fn is_user_turn_boundary(item: &ResponseItem) -> bool {
let ResponseItem::Message { role, content, .. } = item else {
return false;

View File

@@ -60,6 +60,23 @@ fn user_input_text_msg(text: &str) -> ResponseItem {
}
}
fn function_call_output(call_id: &str, content: &str) -> ResponseItem {
ResponseItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload {
content: content.to_string(),
..Default::default()
},
}
}
fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem {
ResponseItem::CustomToolCallOutput {
call_id: call_id.to_string(),
output: output.to_string(),
}
}
fn reasoning_msg(text: &str) -> ResponseItem {
ResponseItem::Reasoning {
id: String::new(),
@@ -168,6 +185,63 @@ fn non_last_reasoning_tokens_ignore_entries_after_last_user() {
assert_eq!(history.get_non_last_reasoning_items_tokens(), 32);
}
#[test]
fn trailing_codex_generated_tokens_stop_at_first_non_generated_item() {
let earlier_output = function_call_output("call-earlier", "earlier output");
let trailing_function_output = function_call_output("call-tail-1", "tail function output");
let trailing_custom_output = custom_tool_call_output("call-tail-2", "tail custom output");
let history = create_history_with_items(vec![
earlier_output,
user_msg("boundary item"),
trailing_function_output.clone(),
trailing_custom_output.clone(),
]);
let expected_tokens = estimate_item_token_count(&trailing_function_output)
.saturating_add(estimate_item_token_count(&trailing_custom_output));
assert_eq!(
history.get_trailing_codex_generated_items_tokens(),
expected_tokens
);
}
#[test]
fn trailing_codex_generated_tokens_exclude_function_call_tail() {
let history = create_history_with_items(vec![ResponseItem::FunctionCall {
id: None,
name: "not-generated".to_string(),
arguments: "{}".to_string(),
call_id: "call-tail".to_string(),
}]);
assert_eq!(history.get_trailing_codex_generated_items_tokens(), 0);
}
#[test]
fn total_token_usage_includes_only_trailing_codex_generated_items() {
let non_trailing_output = function_call_output("call-before-message", "not trailing");
let trailing_assistant = assistant_msg("assistant boundary");
let trailing_output = custom_tool_call_output("tool-tail", "trailing output");
let mut history = create_history_with_items(vec![
non_trailing_output,
user_msg("boundary"),
trailing_assistant,
trailing_output.clone(),
]);
history.update_token_info(
&TokenUsage {
total_tokens: 100,
..Default::default()
},
None,
);
assert_eq!(
history.get_total_token_usage(true),
100 + estimate_item_token_count(&trailing_output)
);
}
#[test]
fn get_history_for_prompt_drops_ghost_commits() {
let items = vec![ResponseItem::GhostSnapshot {
@@ -222,6 +296,30 @@ fn remove_first_item_removes_matching_call_for_output() {
assert_eq!(h.raw_items(), vec![]);
}
#[test]
fn remove_last_item_removes_matching_call_for_output() {
let items = vec![
user_msg("before tool call"),
ResponseItem::FunctionCall {
id: None,
name: "do_it".to_string(),
arguments: "{}".to_string(),
call_id: "call-delete-last".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "call-delete-last".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
..Default::default()
},
},
];
let mut h = create_history_with_items(items);
assert!(h.remove_last_item());
assert_eq!(h.raw_items(), vec![user_msg("before tool call")]);
}
#[test]
fn replace_last_turn_images_replaces_tool_output_images() {
let items = vec![

View File

@@ -2,4 +2,5 @@ mod history;
mod normalize;
pub(crate) use history::ContextManager;
pub(crate) use history::is_codex_generated_item;
pub(crate) use history::is_user_turn_boundary;