Fix remote compaction estimator/payload instruction small mismatch (#10692)

## Summary This PR fixes a deterministic mismatch in remote compaction where pre-trim estimation and the `/v1/responses/compact` payload could use different base instructions. Before this change: - pre-trim estimation used model-derived instructions (`model_info.get_model_instructions(...)`) - compact payload used session base instructions (`sess.get_base_instructions()`) After this change: - remote pre-trim estimation and compact payload both use the same `BaseInstructions` instance from session state. ## Changes - Added a shared estimator entry point in `ContextManager`: - `estimate_token_count_with_base_instructions(&self, base_instructions: &BaseInstructions) -> Option<i64>` - Kept `estimate_token_count(&TurnContext)` as a thin wrapper that resolves model/personality instructions and delegates to the new helper. - Updated remote compaction flow to fetch base instructions once and reuse it for both: - trim preflight estimation - compact request payload construction - Added regression coverage for parity and behavior: - unit test verifying explicit-base estimator behavior - integration test proving remote compaction uses session override instructions and trims accordingly ## Why this matters This removes a deterministic divergence source where pre-trim could think the request fits while the actual compact request exceeded context because its instructions were longer/different. ## Scope In scope: - estimator/payload base-instructions parity in remote compaction Out of scope: - retry-on-`context_length_exceeded` - compaction threshold/headroom policy changes - broader trimming policy changes ## Codex author: `codex fork 019c2b24-c2df-7b31-a482-fb8cf7a28559`
2026-05-04 13:21:54 +03:00 · 2026-02-04 23:24:06 -08:00
parent cd5f49a619
commit dc7007beaa
5 changed files with 314 additions and 10 deletions
--- a/codex-rs/core/src/context_manager/history.rs
+++ b/codex-rs/core/src/context_manager/history.rs
@@ -9,6 +9,7 @@ use crate::truncate::approx_tokens_from_byte_count;
 use crate::truncate::truncate_function_output_items_with_policy;
 use crate::truncate::truncate_text;
 use crate::user_shell_command::is_user_shell_command_text;
+use codex_protocol::models::BaseInstructions;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::FunctionCallOutputContentItem;
@@ -88,8 +89,18 @@ impl ContextManager {
    pub(crate) fn estimate_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
        let model_info = &turn_context.model_info;
        let personality = turn_context.personality.or(turn_context.config.personality);
-        let base_instructions = model_info.get_model_instructions(personality);
-        let base_tokens = i64::try_from(approx_token_count(&base_instructions)).unwrap_or(i64::MAX);
+        let base_instructions = BaseInstructions {
+            text: model_info.get_model_instructions(personality),
+        };
+        self.estimate_token_count_with_base_instructions(&base_instructions)
+    }
+
+    pub(crate) fn estimate_token_count_with_base_instructions(
+        &self,
+        base_instructions: &BaseInstructions,
+    ) -> Option<i64> {
+        let base_tokens =
+            i64::try_from(approx_token_count(&base_instructions.text)).unwrap_or(i64::MAX);

        let items_tokens = self.items.iter().fold(0i64, |acc, item| {
            acc.saturating_add(estimate_item_token_count(item))
--- a/codex-rs/core/src/context_manager/history_tests.rs
+++ b/codex-rs/core/src/context_manager/history_tests.rs
@@ -2,6 +2,7 @@ use super::*;
 use crate::truncate;
 use crate::truncate::TruncationPolicy;
 use codex_git::GhostCommit;
+use codex_protocol::models::BaseInstructions;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::FunctionCallOutputContentItem;
@@ -103,6 +104,10 @@ fn truncate_exec_output(content: &str) -> String {
    truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS))
 }

+fn approx_token_count_for_text(text: &str) -> i64 {
+    i64::try_from(text.len().saturating_add(3) / 4).unwrap_or(i64::MAX)
+}
+
 #[test]
 fn filters_non_api_messages() {
    let mut h = ContextManager::default();
@@ -250,6 +255,28 @@ fn get_history_for_prompt_drops_ghost_commits() {
    assert_eq!(filtered, vec![]);
 }

+#[test]
+fn estimate_token_count_with_base_instructions_uses_provided_text() {
+    let history = create_history_with_items(vec![assistant_msg("hello from history")]);
+    let short_base = BaseInstructions {
+        text: "short".to_string(),
+    };
+    let long_base = BaseInstructions {
+        text: "x".repeat(1_000),
+    };
+
+    let short_estimate = history
+        .estimate_token_count_with_base_instructions(&short_base)
+        .expect("token estimate");
+    let long_estimate = history
+        .estimate_token_count_with_base_instructions(&long_base)
+        .expect("token estimate");
+
+    let expected_delta = approx_token_count_for_text(&long_base.text)
+        - approx_token_count_for_text(&short_base.text);
+    assert_eq!(long_estimate - short_estimate, expected_delta);
+}
+
 #[test]
 fn remove_first_item_removes_matching_output_for_function_call() {
    let items = vec![