Compare commits

...

4 Commits

Author SHA1 Message Date
jif-oai
72ea30d900 Update compact.rs 2025-12-19 17:28:54 +01:00
jif-oai
574cf183c7 fix tests 2025-12-19 17:03:54 +01:00
jif-oai
d51284d37a Add a test 2025-12-19 13:01:36 +01:00
jif-oai
90a9597afa feat: max token usage with estimate for compaction 2025-12-19 12:50:30 +01:00
2 changed files with 137 additions and 17 deletions

View File

@@ -764,6 +764,16 @@ impl Session {
state.get_total_token_usage()
}
async fn get_total_token_usage_with_estimate(&self, turn_context: &TurnContext) -> i64 {
let reported = self.get_total_token_usage().await;
let estimated = self
.clone_history()
.await
.estimate_token_count(turn_context)
.unwrap_or(0);
reported.max(estimated)
}
async fn record_initial_history(&self, conversation_history: InitialHistory) {
let turn_context = self.new_default_turn().await;
match conversation_history {
@@ -2233,7 +2243,9 @@ pub(crate) async fn run_task(
.get_model_family()
.auto_compact_token_limit()
.unwrap_or(i64::MAX);
let total_usage_tokens = sess.get_total_token_usage().await;
let total_usage_tokens = sess
.get_total_token_usage_with_estimate(&turn_context)
.await;
if total_usage_tokens >= auto_compact_limit {
run_auto_compact(&sess, &turn_context).await;
}
@@ -2315,7 +2327,9 @@ pub(crate) async fn run_task(
needs_follow_up,
last_agent_message: turn_last_agent_message,
} = turn_output;
let total_usage_tokens = sess.get_total_token_usage().await;
let total_usage_tokens = sess
.get_total_token_usage_with_estimate(&turn_context)
.await;
let token_limit_reached = total_usage_tokens >= auto_compact_limit;
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.

View File

@@ -1228,6 +1228,106 @@ async fn auto_compact_runs_after_token_limit_hit() {
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_uses_estimate_when_usage_is_zero() {
skip_if_no_network!();
let server = start_mock_server().await;
let large_user_msg = "x".repeat(40_000);
let sse1 = sse(vec![
ev_assistant_message("m1", FIRST_REPLY),
ev_completed("r1"),
]);
let sse2 = sse(vec![
ev_assistant_message("m2", AUTO_SUMMARY_TEXT),
ev_completed("r2"),
]);
let sse3 = sse(vec![
ev_assistant_message("m3", FINAL_REPLY),
ev_completed("r3"),
]);
let follow_up_msg = POST_AUTO_USER_MSG;
let large_user_msg_fragment = large_user_msg.clone();
let first_matcher = move |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body.contains(&large_user_msg_fragment)
&& !body_contains_text(body, follow_up_msg)
&& !body_contains_text(body, SUMMARIZATION_PROMPT)
};
mount_sse_once_match(&server, first_matcher, sse1).await;
let auto_compact_matcher = |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body_contains_text(body, SUMMARIZATION_PROMPT)
};
mount_sse_once_match(&server, auto_compact_matcher, sse2).await;
let follow_up_matcher = |req: &wiremock::Request| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body_contains_text(body, follow_up_msg) && !body_contains_text(body, SUMMARIZATION_PROMPT)
};
mount_sse_once_match(&server, follow_up_matcher, sse3).await;
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home).await;
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
config.model_auto_compact_token_limit = Some(15_000);
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
);
let codex = conversation_manager
.new_conversation(config)
.await
.unwrap()
.conversation;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: large_user_msg.clone(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: follow_up_msg.to_string(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = get_responses_requests(&server).await;
assert_eq!(
requests.len(),
3,
"expected user turn, auto compact, and follow-up request"
);
let auto_compact_index = requests
.iter()
.enumerate()
.find_map(|(idx, req)| {
let body = std::str::from_utf8(&req.body).unwrap_or("");
body_contains_text(body, SUMMARIZATION_PROMPT).then_some(idx)
})
.expect("auto compact request missing");
assert_eq!(
auto_compact_index, 1,
"auto compact should run before the follow-up turn when usage is zero",
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_persists_rollout_entries() {
skip_if_no_network!();
@@ -1701,29 +1801,29 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
let sse1 = sse(vec![
ev_assistant_message("m1", FIRST_REPLY),
ev_completed_with_tokens("r1", 500),
ev_completed_with_tokens("r1", 5000),
]);
let first_summary_payload = auto_summary(FIRST_AUTO_SUMMARY);
let sse2 = sse(vec![
ev_assistant_message("m2", &first_summary_payload),
ev_completed_with_tokens("r2", 50),
ev_completed_with_tokens("r2", 500),
]);
let sse3 = sse(vec![
ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
ev_completed_with_tokens("r3", 150),
ev_completed_with_tokens("r3", 1500),
]);
let sse4 = sse(vec![
ev_assistant_message("m4", SECOND_LARGE_REPLY),
ev_completed_with_tokens("r4", 450),
ev_completed_with_tokens("r4", 4500),
]);
let second_summary_payload = auto_summary(SECOND_AUTO_SUMMARY);
let sse5 = sse(vec![
ev_assistant_message("m5", &second_summary_payload),
ev_completed_with_tokens("r5", 60),
ev_completed_with_tokens("r5", 600),
]);
let sse6 = sse(vec![
ev_assistant_message("m6", FINAL_REPLY),
ev_completed_with_tokens("r6", 120),
ev_completed_with_tokens("r6", 1200),
]);
let follow_up_user = "FOLLOW_UP_AUTO_COMPACT";
let final_user = "FINAL_AUTO_COMPACT";
@@ -1736,7 +1836,9 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
let mut config = load_default_config_for_test(&home).await;
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
config.model_auto_compact_token_limit = Some(200);
// Keep base instructions empty so token estimates align with mocked usage.
config.base_instructions = Some(String::new());
config.model_auto_compact_token_limit = Some(2000);
let conversation_manager = ConversationManager::with_models_provider(
CodexAuth::from_api_key("dummy"),
config.model_provider.clone(),
@@ -1814,14 +1916,14 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
let server = start_mock_server().await;
let context_window = 100;
let context_window = 1000;
let limit = context_window * 90 / 100;
let over_limit_tokens = context_window * 95 / 100 + 1;
let follow_up_user = "FOLLOW_UP_AFTER_LIMIT";
let first_turn = sse(vec![
ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
ev_completed_with_tokens("r1", 50),
ev_completed_with_tokens("r1", 500),
]);
let function_call_follow_up = sse(vec![
ev_assistant_message("m2", FINAL_REPLY),
@@ -1830,9 +1932,9 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
let auto_summary_payload = auto_summary(AUTO_SUMMARY_TEXT);
let auto_compact_turn = sse(vec![
ev_assistant_message("m3", &auto_summary_payload),
ev_completed_with_tokens("r3", 10),
ev_completed_with_tokens("r3", 100),
]);
let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);
let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 100)]);
// Mount responses in order and keep mocks only for the ones we assert on.
let first_turn_mock = mount_sse_once(&server, first_turn).await;
@@ -1847,6 +1949,8 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
let mut config = load_default_config_for_test(&home).await;
config.model_provider = model_provider;
set_test_compact_prompt(&mut config);
// Keep base instructions empty so token estimates align with mocked usage.
config.base_instructions = Some(String::new());
config.model_context_window = Some(context_window);
config.model_auto_compact_token_limit = Some(limit);
@@ -1926,16 +2030,16 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
let second_user = "TRIGGER_COMPACT_AT_LIMIT";
let third_user = "AFTER_REMOTE_COMPACT";
let pre_last_reasoning_content = "a".repeat(2_400);
let post_last_reasoning_content = "b".repeat(4_000);
let pre_last_reasoning_content = "a".repeat(2_4000);
let post_last_reasoning_content = "b".repeat(4_0000);
let first_turn = sse(vec![
ev_reasoning_item("pre-reasoning", &["pre"], &[&pre_last_reasoning_content]),
ev_completed_with_tokens("r1", 10),
ev_completed_with_tokens("r1", 100),
]);
let second_turn = sse(vec![
ev_reasoning_item("post-reasoning", &["post"], &[&post_last_reasoning_content]),
ev_completed_with_tokens("r2", 80),
ev_completed_with_tokens("r2", 800),
]);
let third_turn = sse(vec![
ev_assistant_message("m4", FINAL_REPLY),
@@ -1974,6 +2078,8 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
set_test_compact_prompt(config);
// Keep base instructions empty so token estimates align with mocked usage.
config.base_instructions = Some(String::new());
config.model_auto_compact_token_limit = Some(300);
config.features.enable(Feature::RemoteCompaction);
})