mirror of
https://github.com/openai/codex.git
synced 2026-05-02 20:32:04 +03:00
Trim compaction input (#10374)
Two fixes: 1. Include trailing tool output in the total context size calculation. Otherwise when checking whether compaction should run we ignore newly added outputs. 2. Trim trailing tool output/tool calls until we can fit the request into the model context size. Otherwise the compaction endpoint will fail to compact. We only trim items that can be reproduced again by the model (tool calls, tool call outputs).
This commit is contained in:
@@ -93,24 +93,7 @@ impl ContextManager {
|
||||
let base_tokens = i64::try_from(approx_token_count(&base_instructions)).unwrap_or(i64::MAX);
|
||||
|
||||
let items_tokens = self.items.iter().fold(0i64, |acc, item| {
|
||||
acc + match item {
|
||||
ResponseItem::GhostSnapshot { .. } => 0,
|
||||
ResponseItem::Reasoning {
|
||||
encrypted_content: Some(content),
|
||||
..
|
||||
}
|
||||
| ResponseItem::Compaction {
|
||||
encrypted_content: content,
|
||||
} => {
|
||||
let reasoning_bytes = estimate_reasoning_length(content.len());
|
||||
i64::try_from(approx_tokens_from_byte_count(reasoning_bytes))
|
||||
.unwrap_or(i64::MAX)
|
||||
}
|
||||
item => {
|
||||
let serialized = serde_json::to_string(item).unwrap_or_default();
|
||||
i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX)
|
||||
}
|
||||
}
|
||||
acc.saturating_add(estimate_item_token_count(item))
|
||||
});
|
||||
|
||||
Some(base_tokens.saturating_add(items_tokens))
|
||||
@@ -128,6 +111,15 @@ impl ContextManager {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn remove_last_item(&mut self) -> bool {
|
||||
if let Some(removed) = self.items.pop() {
|
||||
normalize::remove_corresponding_for(&mut self.items, &removed);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
|
||||
self.items = items;
|
||||
}
|
||||
@@ -207,36 +199,42 @@ impl ContextManager {
|
||||
);
|
||||
}
|
||||
|
||||
fn get_non_last_reasoning_items_tokens(&self) -> usize {
|
||||
// get reasoning items excluding all the ones after the last user message
|
||||
fn get_non_last_reasoning_items_tokens(&self) -> i64 {
|
||||
// Get reasoning items excluding all the ones after the last user message.
|
||||
let Some(last_user_index) = self
|
||||
.items
|
||||
.iter()
|
||||
.rposition(|item| matches!(item, ResponseItem::Message { role, .. } if role == "user"))
|
||||
else {
|
||||
return 0usize;
|
||||
return 0;
|
||||
};
|
||||
|
||||
let total_reasoning_bytes = self
|
||||
.items
|
||||
self.items
|
||||
.iter()
|
||||
.take(last_user_index)
|
||||
.filter_map(|item| {
|
||||
if let ResponseItem::Reasoning {
|
||||
encrypted_content: Some(content),
|
||||
..
|
||||
} = item
|
||||
{
|
||||
Some(content.len())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.filter(|item| {
|
||||
matches!(
|
||||
item,
|
||||
ResponseItem::Reasoning {
|
||||
encrypted_content: Some(_),
|
||||
..
|
||||
}
|
||||
)
|
||||
})
|
||||
.map(estimate_reasoning_length)
|
||||
.fold(0usize, usize::saturating_add);
|
||||
.fold(0i64, |acc, item| {
|
||||
acc.saturating_add(estimate_item_token_count(item))
|
||||
})
|
||||
}
|
||||
|
||||
let token_estimate = approx_tokens_from_byte_count(total_reasoning_bytes);
|
||||
token_estimate as usize
|
||||
fn get_trailing_codex_generated_items_tokens(&self) -> i64 {
|
||||
let mut total = 0i64;
|
||||
for item in self.items.iter().rev() {
|
||||
if !is_codex_generated_item(item) {
|
||||
break;
|
||||
}
|
||||
total = total.saturating_add(estimate_item_token_count(item));
|
||||
}
|
||||
total
|
||||
}
|
||||
|
||||
/// When true, the server already accounted for past reasoning tokens and
|
||||
@@ -247,10 +245,13 @@ impl ContextManager {
|
||||
.as_ref()
|
||||
.map(|info| info.last_token_usage.total_tokens)
|
||||
.unwrap_or(0);
|
||||
let trailing_codex_generated_tokens = self.get_trailing_codex_generated_items_tokens();
|
||||
if server_reasoning_included {
|
||||
last_tokens
|
||||
last_tokens.saturating_add(trailing_codex_generated_tokens)
|
||||
} else {
|
||||
last_tokens.saturating_add(self.get_non_last_reasoning_items_tokens() as i64)
|
||||
last_tokens
|
||||
.saturating_add(self.get_non_last_reasoning_items_tokens())
|
||||
.saturating_add(trailing_codex_generated_tokens)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -332,6 +333,33 @@ fn estimate_reasoning_length(encoded_len: usize) -> usize {
|
||||
.saturating_sub(650)
|
||||
}
|
||||
|
||||
fn estimate_item_token_count(item: &ResponseItem) -> i64 {
|
||||
match item {
|
||||
ResponseItem::GhostSnapshot { .. } => 0,
|
||||
ResponseItem::Reasoning {
|
||||
encrypted_content: Some(content),
|
||||
..
|
||||
}
|
||||
| ResponseItem::Compaction {
|
||||
encrypted_content: content,
|
||||
} => {
|
||||
let reasoning_bytes = estimate_reasoning_length(content.len());
|
||||
i64::try_from(approx_tokens_from_byte_count(reasoning_bytes)).unwrap_or(i64::MAX)
|
||||
}
|
||||
item => {
|
||||
let serialized = serde_json::to_string(item).unwrap_or_default();
|
||||
i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_codex_generated_item(item: &ResponseItem) -> bool {
|
||||
matches!(
|
||||
item,
|
||||
ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. }
|
||||
) || matches!(item, ResponseItem::Message { role, .. } if role == "developer")
|
||||
}
|
||||
|
||||
pub(crate) fn is_user_turn_boundary(item: &ResponseItem) -> bool {
|
||||
let ResponseItem::Message { role, content, .. } = item else {
|
||||
return false;
|
||||
|
||||
@@ -60,6 +60,23 @@ fn user_input_text_msg(text: &str) -> ResponseItem {
|
||||
}
|
||||
}
|
||||
|
||||
fn function_call_output(call_id: &str, content: &str) -> ResponseItem {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: content.to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem {
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: output.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn reasoning_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Reasoning {
|
||||
id: String::new(),
|
||||
@@ -168,6 +185,63 @@ fn non_last_reasoning_tokens_ignore_entries_after_last_user() {
|
||||
assert_eq!(history.get_non_last_reasoning_items_tokens(), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_codex_generated_tokens_stop_at_first_non_generated_item() {
|
||||
let earlier_output = function_call_output("call-earlier", "earlier output");
|
||||
let trailing_function_output = function_call_output("call-tail-1", "tail function output");
|
||||
let trailing_custom_output = custom_tool_call_output("call-tail-2", "tail custom output");
|
||||
let history = create_history_with_items(vec![
|
||||
earlier_output,
|
||||
user_msg("boundary item"),
|
||||
trailing_function_output.clone(),
|
||||
trailing_custom_output.clone(),
|
||||
]);
|
||||
let expected_tokens = estimate_item_token_count(&trailing_function_output)
|
||||
.saturating_add(estimate_item_token_count(&trailing_custom_output));
|
||||
|
||||
assert_eq!(
|
||||
history.get_trailing_codex_generated_items_tokens(),
|
||||
expected_tokens
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_codex_generated_tokens_exclude_function_call_tail() {
|
||||
let history = create_history_with_items(vec![ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "not-generated".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-tail".to_string(),
|
||||
}]);
|
||||
|
||||
assert_eq!(history.get_trailing_codex_generated_items_tokens(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn total_token_usage_includes_only_trailing_codex_generated_items() {
|
||||
let non_trailing_output = function_call_output("call-before-message", "not trailing");
|
||||
let trailing_assistant = assistant_msg("assistant boundary");
|
||||
let trailing_output = custom_tool_call_output("tool-tail", "trailing output");
|
||||
let mut history = create_history_with_items(vec![
|
||||
non_trailing_output,
|
||||
user_msg("boundary"),
|
||||
trailing_assistant,
|
||||
trailing_output.clone(),
|
||||
]);
|
||||
history.update_token_info(
|
||||
&TokenUsage {
|
||||
total_tokens: 100,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
history.get_total_token_usage(true),
|
||||
100 + estimate_item_token_count(&trailing_output)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_history_for_prompt_drops_ghost_commits() {
|
||||
let items = vec![ResponseItem::GhostSnapshot {
|
||||
@@ -222,6 +296,30 @@ fn remove_first_item_removes_matching_call_for_output() {
|
||||
assert_eq!(h.raw_items(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_last_item_removes_matching_call_for_output() {
|
||||
let items = vec![
|
||||
user_msg("before tool call"),
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "do_it".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-delete-last".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-delete-last".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
},
|
||||
];
|
||||
let mut h = create_history_with_items(items);
|
||||
|
||||
assert!(h.remove_last_item());
|
||||
assert_eq!(h.raw_items(), vec![user_msg("before tool call")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_last_turn_images_replaces_tool_output_images() {
|
||||
let items = vec![
|
||||
|
||||
@@ -2,4 +2,5 @@ mod history;
|
||||
mod normalize;
|
||||
|
||||
pub(crate) use history::ContextManager;
|
||||
pub(crate) use history::is_codex_generated_item;
|
||||
pub(crate) use history::is_user_turn_boundary;
|
||||
|
||||
Reference in New Issue
Block a user