Update compact.rs

fix tests
Add a test
2026-04-22 07:21:46 +03:00 · 2025-12-19 17:28:54 +01:00 · 2025-12-19 17:03:54 +01:00 · 2025-12-19 13:01:36 +01:00 · 2025-12-19 12:50:30 +01:00
2 changed files with 137 additions and 17 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -764,6 +764,16 @@ impl Session {
        state.get_total_token_usage()
    }

+    async fn get_total_token_usage_with_estimate(&self, turn_context: &TurnContext) -> i64 {
+        let reported = self.get_total_token_usage().await;
+        let estimated = self
+            .clone_history()
+            .await
+            .estimate_token_count(turn_context)
+            .unwrap_or(0);
+        reported.max(estimated)
+    }
+
    async fn record_initial_history(&self, conversation_history: InitialHistory) {
        let turn_context = self.new_default_turn().await;
        match conversation_history {
@@ -2233,7 +2243,9 @@ pub(crate) async fn run_task(
        .get_model_family()
        .auto_compact_token_limit()
        .unwrap_or(i64::MAX);
-    let total_usage_tokens = sess.get_total_token_usage().await;
+    let total_usage_tokens = sess
+        .get_total_token_usage_with_estimate(&turn_context)
+        .await;
    if total_usage_tokens >= auto_compact_limit {
        run_auto_compact(&sess, &turn_context).await;
    }
@@ -2315,7 +2327,9 @@ pub(crate) async fn run_task(
                    needs_follow_up,
                    last_agent_message: turn_last_agent_message,
                } = turn_output;
-                let total_usage_tokens = sess.get_total_token_usage().await;
+                let total_usage_tokens = sess
+                    .get_total_token_usage_with_estimate(&turn_context)
+                    .await;
                let token_limit_reached = total_usage_tokens >= auto_compact_limit;

                // as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -1228,6 +1228,106 @@ async fn auto_compact_runs_after_token_limit_hit() {
    );
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_uses_estimate_when_usage_is_zero() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+    let large_user_msg = "x".repeat(40_000);
+
+    let sse1 = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed("r1"),
+    ]);
+    let sse2 = sse(vec![
+        ev_assistant_message("m2", AUTO_SUMMARY_TEXT),
+        ev_completed("r2"),
+    ]);
+    let sse3 = sse(vec![
+        ev_assistant_message("m3", FINAL_REPLY),
+        ev_completed("r3"),
+    ]);
+
+    let follow_up_msg = POST_AUTO_USER_MSG;
+    let large_user_msg_fragment = large_user_msg.clone();
+    let first_matcher = move |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains(&large_user_msg_fragment)
+            && !body_contains_text(body, follow_up_msg)
+            && !body_contains_text(body, SUMMARIZATION_PROMPT)
+    };
+    mount_sse_once_match(&server, first_matcher, sse1).await;
+
+    let auto_compact_matcher = |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body_contains_text(body, SUMMARIZATION_PROMPT)
+    };
+    mount_sse_once_match(&server, auto_compact_matcher, sse2).await;
+
+    let follow_up_matcher = |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body_contains_text(body, follow_up_msg) && !body_contains_text(body, SUMMARIZATION_PROMPT)
+    };
+    mount_sse_once_match(&server, follow_up_matcher, sse3).await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home).await;
+    config.model_provider = model_provider;
+    set_test_compact_prompt(&mut config);
+    config.model_auto_compact_token_limit = Some(15_000);
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("dummy"),
+        config.model_provider.clone(),
+    );
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .unwrap()
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: large_user_msg.clone(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![UserInput::Text {
+                text: follow_up_msg.to_string(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = get_responses_requests(&server).await;
+    assert_eq!(
+        requests.len(),
+        3,
+        "expected user turn, auto compact, and follow-up request"
+    );
+    let auto_compact_index = requests
+        .iter()
+        .enumerate()
+        .find_map(|(idx, req)| {
+            let body = std::str::from_utf8(&req.body).unwrap_or("");
+            body_contains_text(body, SUMMARIZATION_PROMPT).then_some(idx)
+        })
+        .expect("auto compact request missing");
+    assert_eq!(
+        auto_compact_index, 1,
+        "auto compact should run before the follow-up turn when usage is zero",
+    );
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_persists_rollout_entries() {
    skip_if_no_network!();
@@ -1701,29 +1801,29 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_

    let sse1 = sse(vec![
        ev_assistant_message("m1", FIRST_REPLY),
-        ev_completed_with_tokens("r1", 500),
+        ev_completed_with_tokens("r1", 5000),
    ]);
    let first_summary_payload = auto_summary(FIRST_AUTO_SUMMARY);
    let sse2 = sse(vec![
        ev_assistant_message("m2", &first_summary_payload),
-        ev_completed_with_tokens("r2", 50),
+        ev_completed_with_tokens("r2", 500),
    ]);
    let sse3 = sse(vec![
        ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
-        ev_completed_with_tokens("r3", 150),
+        ev_completed_with_tokens("r3", 1500),
    ]);
    let sse4 = sse(vec![
        ev_assistant_message("m4", SECOND_LARGE_REPLY),
-        ev_completed_with_tokens("r4", 450),
+        ev_completed_with_tokens("r4", 4500),
    ]);
    let second_summary_payload = auto_summary(SECOND_AUTO_SUMMARY);
    let sse5 = sse(vec![
        ev_assistant_message("m5", &second_summary_payload),
-        ev_completed_with_tokens("r5", 60),
+        ev_completed_with_tokens("r5", 600),
    ]);
    let sse6 = sse(vec![
        ev_assistant_message("m6", FINAL_REPLY),
-        ev_completed_with_tokens("r6", 120),
+        ev_completed_with_tokens("r6", 1200),
    ]);
    let follow_up_user = "FOLLOW_UP_AUTO_COMPACT";
    let final_user = "FINAL_AUTO_COMPACT";
@@ -1736,7 +1836,9 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
    let mut config = load_default_config_for_test(&home).await;
    config.model_provider = model_provider;
    set_test_compact_prompt(&mut config);
-    config.model_auto_compact_token_limit = Some(200);
+    // Keep base instructions empty so token estimates align with mocked usage.
+    config.base_instructions = Some(String::new());
+    config.model_auto_compact_token_limit = Some(2000);
    let conversation_manager = ConversationManager::with_models_provider(
        CodexAuth::from_api_key("dummy"),
        config.model_provider.clone(),
@@ -1814,14 +1916,14 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {

    let server = start_mock_server().await;

-    let context_window = 100;
+    let context_window = 1000;
    let limit = context_window * 90 / 100;
    let over_limit_tokens = context_window * 95 / 100 + 1;
    let follow_up_user = "FOLLOW_UP_AFTER_LIMIT";

    let first_turn = sse(vec![
        ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
-        ev_completed_with_tokens("r1", 50),
+        ev_completed_with_tokens("r1", 500),
    ]);
    let function_call_follow_up = sse(vec![
        ev_assistant_message("m2", FINAL_REPLY),
@@ -1830,9 +1932,9 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
    let auto_summary_payload = auto_summary(AUTO_SUMMARY_TEXT);
    let auto_compact_turn = sse(vec![
        ev_assistant_message("m3", &auto_summary_payload),
-        ev_completed_with_tokens("r3", 10),
+        ev_completed_with_tokens("r3", 100),
    ]);
-    let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);
+    let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 100)]);

    // Mount responses in order and keep mocks only for the ones we assert on.
    let first_turn_mock = mount_sse_once(&server, first_turn).await;
@@ -1847,6 +1949,8 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
    let mut config = load_default_config_for_test(&home).await;
    config.model_provider = model_provider;
    set_test_compact_prompt(&mut config);
+    // Keep base instructions empty so token estimates align with mocked usage.
+    config.base_instructions = Some(String::new());
    config.model_context_window = Some(context_window);
    config.model_auto_compact_token_limit = Some(limit);

@@ -1926,16 +2030,16 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
    let second_user = "TRIGGER_COMPACT_AT_LIMIT";
    let third_user = "AFTER_REMOTE_COMPACT";

-    let pre_last_reasoning_content = "a".repeat(2_400);
-    let post_last_reasoning_content = "b".repeat(4_000);
+    let pre_last_reasoning_content = "a".repeat(2_4000);
+    let post_last_reasoning_content = "b".repeat(4_0000);

    let first_turn = sse(vec![
        ev_reasoning_item("pre-reasoning", &["pre"], &[&pre_last_reasoning_content]),
-        ev_completed_with_tokens("r1", 10),
+        ev_completed_with_tokens("r1", 100),
    ]);
    let second_turn = sse(vec![
        ev_reasoning_item("post-reasoning", &["post"], &[&post_last_reasoning_content]),
-        ev_completed_with_tokens("r2", 80),
+        ev_completed_with_tokens("r2", 800),
    ]);
    let third_turn = sse(vec![
        ev_assistant_message("m4", FINAL_REPLY),
@@ -1974,6 +2078,8 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
        .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
        .with_config(|config| {
            set_test_compact_prompt(config);
+            // Keep base instructions empty so token estimates align with mocked usage.
+            config.base_instructions = Some(String::new());
            config.model_auto_compact_token_limit = Some(300);
            config.features.enable(Feature::RemoteCompaction);
        })
Author	SHA1	Message	Date
jif-oai	72ea30d900	Update compact.rs	2025-12-19 17:28:54 +01:00
jif-oai	574cf183c7	fix tests	2025-12-19 17:03:54 +01:00
jif-oai	d51284d37a	Add a test	2025-12-19 13:01:36 +01:00
jif-oai	90a9597afa	feat: max token usage with estimate for compaction	2025-12-19 12:50:30 +01:00