Fix remote compaction estimator/payload instruction small mismatch (#10692)

## Summary
This PR fixes a deterministic mismatch in remote compaction where
pre-trim estimation and the `/v1/responses/compact` payload could use
different base instructions.

Before this change:
- pre-trim estimation used model-derived instructions
(`model_info.get_model_instructions(...)`)
- compact payload used session base instructions
(`sess.get_base_instructions()`)

After this change:
- remote pre-trim estimation and compact payload both use the same
`BaseInstructions` instance from session state.

## Changes
- Added a shared estimator entry point in `ContextManager`:
- `estimate_token_count_with_base_instructions(&self, base_instructions:
&BaseInstructions) -> Option<i64>`
- Kept `estimate_token_count(&TurnContext)` as a thin wrapper that
resolves model/personality instructions and delegates to the new helper.
- Updated remote compaction flow to fetch base instructions once and
reuse it for both:
  - trim preflight estimation
  - compact request payload construction
- Added regression coverage for parity and behavior:
  - unit test verifying explicit-base estimator behavior
- integration test proving remote compaction uses session override
instructions and trims accordingly

## Why this matters
This removes a deterministic divergence source where pre-trim could
think the request fits while the actual compact request exceeded context
because its instructions were longer/different.

## Scope
In scope:
- estimator/payload base-instructions parity in remote compaction

Out of scope:
- retry-on-`context_length_exceeded`
- compaction threshold/headroom policy changes
- broader trimming policy changes

## Codex author:
`codex fork 019c2b24-c2df-7b31-a482-fb8cf7a28559`
This commit is contained in:
Charley Cunningham
2026-02-04 23:24:06 -08:00
committed by GitHub
parent cd5f49a619
commit dc7007beaa
5 changed files with 314 additions and 10 deletions

View File

@@ -9,6 +9,7 @@ use crate::truncate::approx_tokens_from_byte_count;
use crate::truncate::truncate_function_output_items_with_policy;
use crate::truncate::truncate_text;
use crate::user_shell_command::is_user_shell_command_text;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
@@ -88,8 +89,18 @@ impl ContextManager {
pub(crate) fn estimate_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
let model_info = &turn_context.model_info;
let personality = turn_context.personality.or(turn_context.config.personality);
let base_instructions = model_info.get_model_instructions(personality);
let base_tokens = i64::try_from(approx_token_count(&base_instructions)).unwrap_or(i64::MAX);
let base_instructions = BaseInstructions {
text: model_info.get_model_instructions(personality),
};
self.estimate_token_count_with_base_instructions(&base_instructions)
}
pub(crate) fn estimate_token_count_with_base_instructions(
&self,
base_instructions: &BaseInstructions,
) -> Option<i64> {
let base_tokens =
i64::try_from(approx_token_count(&base_instructions.text)).unwrap_or(i64::MAX);
let items_tokens = self.items.iter().fold(0i64, |acc, item| {
acc.saturating_add(estimate_item_token_count(item))

View File

@@ -2,6 +2,7 @@ use super::*;
use crate::truncate;
use crate::truncate::TruncationPolicy;
use codex_git::GhostCommit;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
@@ -103,6 +104,10 @@ fn truncate_exec_output(content: &str) -> String {
truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS))
}
fn approx_token_count_for_text(text: &str) -> i64 {
i64::try_from(text.len().saturating_add(3) / 4).unwrap_or(i64::MAX)
}
#[test]
fn filters_non_api_messages() {
let mut h = ContextManager::default();
@@ -250,6 +255,28 @@ fn get_history_for_prompt_drops_ghost_commits() {
assert_eq!(filtered, vec![]);
}
#[test]
fn estimate_token_count_with_base_instructions_uses_provided_text() {
let history = create_history_with_items(vec![assistant_msg("hello from history")]);
let short_base = BaseInstructions {
text: "short".to_string(),
};
let long_base = BaseInstructions {
text: "x".repeat(1_000),
};
let short_estimate = history
.estimate_token_count_with_base_instructions(&short_base)
.expect("token estimate");
let long_estimate = history
.estimate_token_count_with_base_instructions(&long_base)
.expect("token estimate");
let expected_delta = approx_token_count_for_text(&long_base.text)
- approx_token_count_for_text(&short_base.text);
assert_eq!(long_estimate - short_estimate, expected_delta);
}
#[test]
fn remove_first_item_removes_matching_output_for_function_call() {
let items = vec![