mirror of
https://github.com/openai/codex.git
synced 2026-05-03 04:42:20 +03:00
core: add focused diagnostics for remote compaction overflows (#11133)
## Summary - add targeted remote-compaction failure diagnostics in compact_remote logging - log the specific values needed to explain overflow timing: - last_api_response_total_tokens - estimated_tokens_of_items_added_since_last_successful_api_response - estimated_bytes_of_items_added_since_last_successful_api_response - failing_compaction_request_body_bytes - simplify breakdown naming and remove last_api_response_total_bytes_estimate (it was an approximation and not useful for debugging) ## Why When compaction fails with context_length_exceeded, we need concrete, low-ambiguity numbers that map directly to: 1) what the API most recently reported, and 2) what local history added since then. This keeps the failure logs actionable without adding broad, noisy metrics. ## Testing - just fmt - cargo test -p codex-core
This commit is contained in:
committed by
GitHub
parent
f88667042e
commit
9450cd9ce5
@@ -5,7 +5,7 @@ use crate::instructions::UserInstructions;
|
||||
use crate::session_prefix::is_session_prefix;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::approx_token_count;
|
||||
use crate::truncate::approx_tokens_from_byte_count;
|
||||
use crate::truncate::approx_tokens_from_byte_count_i64;
|
||||
use crate::truncate::truncate_function_output_items_with_policy;
|
||||
use crate::truncate::truncate_text;
|
||||
use crate::user_shell_command::is_user_shell_command_text;
|
||||
@@ -27,6 +27,14 @@ pub(crate) struct ContextManager {
|
||||
token_info: Option<TokenUsageInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct TotalTokenUsageBreakdown {
|
||||
pub last_api_response_total_tokens: i64,
|
||||
pub all_history_items_model_visible_bytes: i64,
|
||||
pub estimated_tokens_of_items_added_since_last_successful_api_response: i64,
|
||||
pub estimated_bytes_of_items_added_since_last_successful_api_response: i64,
|
||||
}
|
||||
|
||||
impl ContextManager {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
@@ -102,9 +110,11 @@ impl ContextManager {
|
||||
let base_tokens =
|
||||
i64::try_from(approx_token_count(&base_instructions.text)).unwrap_or(i64::MAX);
|
||||
|
||||
let items_tokens = self.items.iter().fold(0i64, |acc, item| {
|
||||
acc.saturating_add(estimate_item_token_count(item))
|
||||
});
|
||||
let items_tokens = self
|
||||
.items
|
||||
.iter()
|
||||
.map(estimate_item_token_count)
|
||||
.fold(0i64, i64::saturating_add);
|
||||
|
||||
Some(base_tokens.saturating_add(items_tokens))
|
||||
}
|
||||
@@ -231,9 +241,8 @@ impl ContextManager {
|
||||
}
|
||||
)
|
||||
})
|
||||
.fold(0i64, |acc, item| {
|
||||
acc.saturating_add(estimate_item_token_count(item))
|
||||
})
|
||||
.map(estimate_item_token_count)
|
||||
.fold(0i64, i64::saturating_add)
|
||||
}
|
||||
|
||||
// These are local items added after the most recent model-emitted item.
|
||||
@@ -247,14 +256,6 @@ impl ContextManager {
|
||||
&self.items[start..]
|
||||
}
|
||||
|
||||
fn get_items_after_last_model_generated_tokens(&self) -> i64 {
|
||||
self.items_after_last_model_generated_item()
|
||||
.iter()
|
||||
.fold(0i64, |acc, item| {
|
||||
acc.saturating_add(estimate_item_token_count(item))
|
||||
})
|
||||
}
|
||||
|
||||
/// When true, the server already accounted for past reasoning tokens and
|
||||
/// the client should not re-estimate them.
|
||||
pub(crate) fn get_total_token_usage(&self, server_reasoning_included: bool) -> i64 {
|
||||
@@ -263,8 +264,11 @@ impl ContextManager {
|
||||
.as_ref()
|
||||
.map(|info| info.last_token_usage.total_tokens)
|
||||
.unwrap_or(0);
|
||||
let items_after_last_model_generated_tokens =
|
||||
self.get_items_after_last_model_generated_tokens();
|
||||
let items_after_last_model_generated_tokens = self
|
||||
.items_after_last_model_generated_item()
|
||||
.iter()
|
||||
.map(estimate_item_token_count)
|
||||
.fold(0i64, i64::saturating_add);
|
||||
if server_reasoning_included {
|
||||
last_tokens.saturating_add(items_after_last_model_generated_tokens)
|
||||
} else {
|
||||
@@ -274,6 +278,34 @@ impl ContextManager {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
|
||||
let last_usage = self
|
||||
.token_info
|
||||
.as_ref()
|
||||
.map(|info| info.last_token_usage.clone())
|
||||
.unwrap_or_default();
|
||||
let items_after_last_model_generated = self.items_after_last_model_generated_item();
|
||||
|
||||
TotalTokenUsageBreakdown {
|
||||
last_api_response_total_tokens: last_usage.total_tokens,
|
||||
all_history_items_model_visible_bytes: self
|
||||
.items
|
||||
.iter()
|
||||
.map(estimate_response_item_model_visible_bytes)
|
||||
.fold(0i64, i64::saturating_add),
|
||||
estimated_tokens_of_items_added_since_last_successful_api_response:
|
||||
items_after_last_model_generated
|
||||
.iter()
|
||||
.map(estimate_item_token_count)
|
||||
.fold(0i64, i64::saturating_add),
|
||||
estimated_bytes_of_items_added_since_last_successful_api_response:
|
||||
items_after_last_model_generated
|
||||
.iter()
|
||||
.map(estimate_response_item_model_visible_bytes)
|
||||
.fold(0i64, i64::saturating_add),
|
||||
}
|
||||
}
|
||||
|
||||
/// This function enforces a couple of invariants on the in-memory history:
|
||||
/// 1. every call (function/custom) has a corresponding output entry
|
||||
/// 2. every output has a corresponding call entry
|
||||
@@ -357,6 +389,11 @@ fn estimate_reasoning_length(encoded_len: usize) -> usize {
|
||||
}
|
||||
|
||||
fn estimate_item_token_count(item: &ResponseItem) -> i64 {
|
||||
let model_visible_bytes = estimate_response_item_model_visible_bytes(item);
|
||||
approx_tokens_from_byte_count_i64(model_visible_bytes)
|
||||
}
|
||||
|
||||
pub(crate) fn estimate_response_item_model_visible_bytes(item: &ResponseItem) -> i64 {
|
||||
match item {
|
||||
ResponseItem::GhostSnapshot { .. } => 0,
|
||||
ResponseItem::Reasoning {
|
||||
@@ -365,14 +402,10 @@ fn estimate_item_token_count(item: &ResponseItem) -> i64 {
|
||||
}
|
||||
| ResponseItem::Compaction {
|
||||
encrypted_content: content,
|
||||
} => {
|
||||
let reasoning_bytes = estimate_reasoning_length(content.len());
|
||||
i64::try_from(approx_tokens_from_byte_count(reasoning_bytes)).unwrap_or(i64::MAX)
|
||||
}
|
||||
item => {
|
||||
let serialized = serde_json::to_string(item).unwrap_or_default();
|
||||
i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX)
|
||||
}
|
||||
} => i64::try_from(estimate_reasoning_length(content.len())).unwrap_or(i64::MAX),
|
||||
item => serde_json::to_string(item)
|
||||
.map(|serialized| i64::try_from(serialized.len()).unwrap_or(i64::MAX))
|
||||
.unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -193,7 +193,11 @@ fn items_after_last_model_generated_tokens_include_user_and_tool_output() {
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
history.get_items_after_last_model_generated_tokens(),
|
||||
history
|
||||
.items_after_last_model_generated_item()
|
||||
.iter()
|
||||
.map(estimate_item_token_count)
|
||||
.fold(0i64, i64::saturating_add),
|
||||
expected_tokens
|
||||
);
|
||||
}
|
||||
@@ -202,7 +206,14 @@ fn items_after_last_model_generated_tokens_include_user_and_tool_output() {
|
||||
fn items_after_last_model_generated_tokens_are_zero_without_model_generated_items() {
|
||||
let history = create_history_with_items(vec![user_msg("no model output yet")]);
|
||||
|
||||
assert_eq!(history.get_items_after_last_model_generated_tokens(), 0);
|
||||
assert_eq!(
|
||||
history
|
||||
.items_after_last_model_generated_item()
|
||||
.iter()
|
||||
.map(estimate_item_token_count)
|
||||
.fold(0i64, i64::saturating_add),
|
||||
0
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -2,5 +2,7 @@ mod history;
|
||||
mod normalize;
|
||||
|
||||
pub(crate) use history::ContextManager;
|
||||
pub(crate) use history::TotalTokenUsageBreakdown;
|
||||
pub(crate) use history::estimate_response_item_model_visible_bytes;
|
||||
pub(crate) use history::is_codex_generated_item;
|
||||
pub(crate) use history::is_user_turn_boundary;
|
||||
|
||||
Reference in New Issue
Block a user