mirror of
https://github.com/openai/codex.git
synced 2026-05-04 13:21:54 +03:00
Fix remote compaction estimator/payload instruction small mismatch (#10692)
## Summary This PR fixes a deterministic mismatch in remote compaction where pre-trim estimation and the `/v1/responses/compact` payload could use different base instructions. Before this change: - pre-trim estimation used model-derived instructions (`model_info.get_model_instructions(...)`) - compact payload used session base instructions (`sess.get_base_instructions()`) After this change: - remote pre-trim estimation and compact payload both use the same `BaseInstructions` instance from session state. ## Changes - Added a shared estimator entry point in `ContextManager`: - `estimate_token_count_with_base_instructions(&self, base_instructions: &BaseInstructions) -> Option<i64>` - Kept `estimate_token_count(&TurnContext)` as a thin wrapper that resolves model/personality instructions and delegates to the new helper. - Updated remote compaction flow to fetch base instructions once and reuse it for both: - trim preflight estimation - compact request payload construction - Added regression coverage for parity and behavior: - unit test verifying explicit-base estimator behavior - integration test proving remote compaction uses session override instructions and trims accordingly ## Why this matters This removes a deterministic divergence source where pre-trim could think the request fits while the actual compact request exceeded context because its instructions were longer/different. ## Scope In scope: - estimator/payload base-instructions parity in remote compaction Out of scope: - retry-on-`context_length_exceeded` - compaction threshold/headroom policy changes - broader trimming policy changes ## Codex author: `codex fork 019c2b24-c2df-7b31-a482-fb8cf7a28559`
This commit is contained in:
committed by
GitHub
parent
cd5f49a619
commit
dc7007beaa
@@ -9,6 +9,7 @@ use crate::truncate::approx_tokens_from_byte_count;
|
||||
use crate::truncate::truncate_function_output_items_with_policy;
|
||||
use crate::truncate::truncate_text;
|
||||
use crate::user_shell_command::is_user_shell_command_text;
|
||||
use codex_protocol::models::BaseInstructions;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
@@ -88,8 +89,18 @@ impl ContextManager {
|
||||
pub(crate) fn estimate_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
|
||||
let model_info = &turn_context.model_info;
|
||||
let personality = turn_context.personality.or(turn_context.config.personality);
|
||||
let base_instructions = model_info.get_model_instructions(personality);
|
||||
let base_tokens = i64::try_from(approx_token_count(&base_instructions)).unwrap_or(i64::MAX);
|
||||
let base_instructions = BaseInstructions {
|
||||
text: model_info.get_model_instructions(personality),
|
||||
};
|
||||
self.estimate_token_count_with_base_instructions(&base_instructions)
|
||||
}
|
||||
|
||||
pub(crate) fn estimate_token_count_with_base_instructions(
|
||||
&self,
|
||||
base_instructions: &BaseInstructions,
|
||||
) -> Option<i64> {
|
||||
let base_tokens =
|
||||
i64::try_from(approx_token_count(&base_instructions.text)).unwrap_or(i64::MAX);
|
||||
|
||||
let items_tokens = self.items.iter().fold(0i64, |acc, item| {
|
||||
acc.saturating_add(estimate_item_token_count(item))
|
||||
|
||||
@@ -2,6 +2,7 @@ use super::*;
|
||||
use crate::truncate;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use codex_git::GhostCommit;
|
||||
use codex_protocol::models::BaseInstructions;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
@@ -103,6 +104,10 @@ fn truncate_exec_output(content: &str) -> String {
|
||||
truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS))
|
||||
}
|
||||
|
||||
fn approx_token_count_for_text(text: &str) -> i64 {
|
||||
i64::try_from(text.len().saturating_add(3) / 4).unwrap_or(i64::MAX)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ContextManager::default();
|
||||
@@ -250,6 +255,28 @@ fn get_history_for_prompt_drops_ghost_commits() {
|
||||
assert_eq!(filtered, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn estimate_token_count_with_base_instructions_uses_provided_text() {
|
||||
let history = create_history_with_items(vec![assistant_msg("hello from history")]);
|
||||
let short_base = BaseInstructions {
|
||||
text: "short".to_string(),
|
||||
};
|
||||
let long_base = BaseInstructions {
|
||||
text: "x".repeat(1_000),
|
||||
};
|
||||
|
||||
let short_estimate = history
|
||||
.estimate_token_count_with_base_instructions(&short_base)
|
||||
.expect("token estimate");
|
||||
let long_estimate = history
|
||||
.estimate_token_count_with_base_instructions(&long_base)
|
||||
.expect("token estimate");
|
||||
|
||||
let expected_delta = approx_token_count_for_text(&long_base.text)
|
||||
- approx_token_count_for_text(&short_base.text);
|
||||
assert_eq!(long_estimate - short_estimate, expected_delta);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_removes_matching_output_for_function_call() {
|
||||
let items = vec![
|
||||
|
||||
Reference in New Issue
Block a user