This commit is contained in:
Ahmed Ibrahim
2025-10-16 17:05:34 -07:00
parent d4e59dedd8
commit f2444893ca
2 changed files with 358 additions and 322 deletions

View File

@@ -77,8 +77,13 @@ async fn run_compact_task_inner(
let mut truncated_count = 0usize;
let mut trimmed_tails: Vec<Vec<ResponseItem>> = Vec::new();
let max_retries = turn_context.client.get_provider().stream_max_retries();
let mut retries = 0;
let max_retries_u64 = turn_context
.client
.get_provider()
.stream_max_retries();
let max_retries = max_retries_u64 as usize;
let mut retries: u64 = 0;
let mut context_retries = 0usize;
let rollout_item = RolloutItem::TurnContext(TurnContextItem {
cwd: turn_context.cwd.clone(),
@@ -124,6 +129,19 @@ async fn run_compact_task_inner(
if !trimmed.is_empty() {
truncated_count += trimmed.len();
trimmed_tails.push(trimmed);
if context_retries >= max_retries {
sess.set_total_tokens_full(&sub_id, turn_context.as_ref())
.await;
let event = Event {
id: sub_id.clone(),
msg: EventMsg::Error(ErrorEvent {
message: e.to_string(),
}),
};
sess.send_event(event).await;
return;
}
context_retries += 1;
retries = 0;
continue;
}
@@ -140,12 +158,12 @@ async fn run_compact_task_inner(
return;
}
Err(e) => {
if retries < max_retries {
if retries < max_retries_u64 {
retries += 1;
let delay = backoff(retries);
sess.notify_stream_error(
&sub_id,
format!("Re-connecting... {retries}/{max_retries}"),
format!("Re-connecting... {retries}/{max_retries_u64}"),
)
.await;
tokio::time::sleep(delay).await;

View File

@@ -698,6 +698,16 @@ async fn manual_compact_retries_after_context_window_error() {
);
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: THIRD_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = request_log.requests();
assert_eq!(
requests.len(),
@@ -771,6 +781,7 @@ async fn manual_compact_retries_after_context_window_error() {
async fn manual_compact_trims_last_user_turn_with_function_calls_on_context_error() {
skip_if_no_network!();
// Scenario 1: ensure the retry trims the most recent turn when function calls are involved.
const FIRST_USER_MSG: &str = "first user turn";
const SECOND_USER_MSG: &str = "second user turn";
const FIRST_CALL_A: &str = "call-first-a";
@@ -778,344 +789,351 @@ async fn manual_compact_trims_last_user_turn_with_function_calls_on_context_erro
const SECOND_CALL_A: &str = "call-second-a";
const SECOND_CALL_B: &str = "call-second-b";
let server = start_mock_server().await;
{
let server = start_mock_server().await;
let first_turn_initial = sse(vec![ev_function_call(FIRST_CALL_A, "tool.first.a", "{}")]);
let first_turn_second_call = sse(vec![
ev_function_call_output(FIRST_CALL_A, "first-call-a output"),
ev_function_call(FIRST_CALL_B, "tool.first.b", "{}"),
]);
let first_turn_complete = sse(vec![
ev_function_call_output(FIRST_CALL_B, "first-call-b output"),
ev_assistant_message("assistant-first", "first turn complete"),
ev_completed("resp-first"),
]);
let second_turn_initial = sse(vec![ev_function_call(SECOND_CALL_A, "tool.second.a", "{}")]);
let second_turn_second_call = sse(vec![
ev_function_call_output(SECOND_CALL_A, "second-call-a output"),
ev_function_call(SECOND_CALL_B, "tool.second.b", "{}"),
]);
let second_turn_complete = sse(vec![
ev_function_call_output(SECOND_CALL_B, "second-call-b output"),
ev_assistant_message("assistant-second", "second turn complete"),
ev_completed("resp-second"),
]);
let compact_failed = sse_failed(
"resp-fail",
"context_length_exceeded",
CONTEXT_LIMIT_MESSAGE,
);
let compact_retry = sse(vec![
ev_assistant_message("assistant-summary", SUMMARY_TEXT),
ev_completed("resp-summary"),
]);
let first_turn_initial = sse(vec![ev_function_call(FIRST_CALL_A, "tool.first.a", "{}")]);
let first_turn_second_call = sse(vec![
ev_function_call_output(FIRST_CALL_A, "first-call-a output"),
ev_function_call(FIRST_CALL_B, "tool.first.b", "{}"),
]);
let first_turn_complete = sse(vec![
ev_function_call_output(FIRST_CALL_B, "first-call-b output"),
ev_assistant_message("assistant-first", "first turn complete"),
ev_completed("resp-first"),
]);
let second_turn_initial = sse(vec![ev_function_call(SECOND_CALL_A, "tool.second.a", "{}")]);
let second_turn_second_call = sse(vec![
ev_function_call_output(SECOND_CALL_A, "second-call-a output"),
ev_function_call(SECOND_CALL_B, "tool.second.b", "{}"),
]);
let second_turn_complete = sse(vec![
ev_function_call_output(SECOND_CALL_B, "second-call-b output"),
ev_assistant_message("assistant-second", "second turn complete"),
ev_completed("resp-second"),
]);
let compact_failed = sse_failed(
"resp-fail",
"context_length_exceeded",
CONTEXT_LIMIT_MESSAGE,
);
let compact_retry = sse(vec![
ev_assistant_message("assistant-summary", SUMMARY_TEXT),
ev_completed("resp-summary"),
]);
let request_log = mount_sse_sequence(
&server,
vec![
first_turn_initial,
first_turn_second_call,
first_turn_complete,
second_turn_initial,
second_turn_second_call,
second_turn_complete,
compact_failed,
compact_retry,
],
)
.await;
let request_log = mount_sse_sequence(
&server,
vec![
first_turn_initial,
first_turn_second_call,
first_turn_complete,
second_turn_initial,
second_turn_second_call,
second_turn_complete,
compact_failed,
compact_retry,
],
)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
config.model_auto_compact_token_limit = Some(200_000);
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
.new_conversation(config)
.await
.unwrap()
.conversation;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
config.model_auto_compact_token_limit = Some(200_000);
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
.new_conversation(config)
.await
.unwrap()
.conversation;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: FIRST_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: FIRST_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: SECOND_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: SECOND_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex.submit(Op::Compact).await.unwrap();
let EventMsg::BackgroundEvent(event) =
wait_for_event(&codex, |ev| matches!(ev, EventMsg::BackgroundEvent(_))).await
else {
panic!("expected background event after compact retry");
};
assert!(
event
.message
.contains("Trimmed 2 older conversation item(s)"),
"background event should report trimming chunked user turn: {}",
event.message
);
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex.submit(Op::Compact).await.unwrap();
let EventMsg::BackgroundEvent(event) =
wait_for_event(&codex, |ev| matches!(ev, EventMsg::BackgroundEvent(_))).await
else {
panic!("expected background event after compact retry");
};
assert!(
event
.message
.contains("Trimmed 2 older conversation item(s)"),
"background event should report trimming chunked user turn: {}",
event.message
);
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = request_log.requests();
assert_eq!(
requests.len(),
8,
"expected two user turns (with tool call round-trips) followed by compact attempt + retry"
);
let requests = request_log.requests();
assert_eq!(
requests.len(),
8,
"expected two user turns (with tool call round-trips) followed by compact attempt + retry"
);
let compact_attempt = requests[6].body_json();
let retry_attempt = requests[7].body_json();
let compact_attempt = requests[6].body_json();
let retry_attempt = requests[7].body_json();
let compact_input = compact_attempt["input"]
.as_array()
.expect("compact attempt input array");
let retry_input = retry_attempt["input"]
.as_array()
.expect("retry attempt input array");
fn extract_text(item: &Value) -> Option<String> {
item.get("content")
.and_then(Value::as_array)
.and_then(|items| items.first())
.and_then(|entry| entry.get("text"))
.and_then(Value::as_str)
.map(str::to_string)
}
let contains_text = |items: &[Value], needle: &str| {
items
.iter()
.any(|item| extract_text(item).is_some_and(|text| text == needle))
};
assert!(
contains_text(compact_input, SECOND_USER_MSG),
"initial compact attempt should include most recent user message",
);
assert!(
!contains_text(retry_input, SECOND_USER_MSG),
"retry should drop the most recent user message",
);
assert!(
contains_text(compact_input, "second turn complete"),
"initial compact attempt should include assistant reply for most recent turn",
);
assert!(
!contains_text(retry_input, "second turn complete"),
"retry should drop assistant reply for most recent turn",
);
assert_eq!(
compact_input.len().saturating_sub(retry_input.len()),
2,
"retry should drop the most recent user turn from the prompt",
);
let retry_call_ids: std::collections::HashSet<_> = retry_input
.iter()
.filter_map(|item| item.get("call_id").and_then(|v| v.as_str()))
.collect();
assert!(
!retry_call_ids.contains(SECOND_CALL_A),
"retry should remove function call {SECOND_CALL_A}"
);
assert!(
!retry_call_ids.contains(SECOND_CALL_B),
"retry should remove function call {SECOND_CALL_B}"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn manual_compact_restores_trimmed_tail_after_retry() {
skip_if_no_network!();
const FIRST_USER_MSG: &str = "first user turn";
const FIRST_ASSISTANT_MSG: &str = "first assistant reply";
const SECOND_USER_MSG: &str = "second user turn";
const SECOND_ASSISTANT_MSG: &str = "second assistant reply";
const THIRD_USER_MSG: &str = "post compact user";
const THIRD_ASSISTANT_MSG: &str = "post compact assistant";
let server = start_mock_server().await;
let first_turn = sse(vec![
ev_assistant_message("assistant-first", FIRST_ASSISTANT_MSG),
ev_completed("resp-first"),
]);
let second_turn = sse(vec![
ev_assistant_message("assistant-second", SECOND_ASSISTANT_MSG),
ev_completed("resp-second"),
]);
let compact_failed = sse_failed(
"resp-fail",
"context_length_exceeded",
CONTEXT_LIMIT_MESSAGE,
);
let compact_retry = sse(vec![
ev_assistant_message("assistant-summary", SUMMARY_TEXT),
ev_completed("resp-summary"),
]);
let third_turn = sse(vec![
ev_assistant_message("assistant-third", THIRD_ASSISTANT_MSG),
ev_completed("resp-third"),
]);
let request_log = mount_sse_sequence(
&server,
vec![
first_turn,
second_turn,
compact_failed,
compact_retry,
third_turn,
],
)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
config.model_auto_compact_token_limit = Some(200_000);
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
.new_conversation(config)
.await
.unwrap()
.conversation;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: FIRST_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: SECOND_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex.submit(Op::Compact).await.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: THIRD_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = request_log.requests();
assert_eq!(
requests.len(),
5,
"expected two user turns, two compact attempts, and a post-compact turn",
);
let retry_request = &requests[3];
let retry_body = retry_request.body_json();
let retry_input = retry_body
.get("input")
.and_then(Value::as_array)
.unwrap_or_else(|| panic!("retry request missing input array: {retry_body}"));
assert!(
retry_input.iter().all(|item| {
fn extract_text(item: &Value) -> Option<String> {
item.get("content")
.and_then(Value::as_array)
.and_then(|entries| entries.first())
.and_then(|items| items.first())
.and_then(|entry| entry.get("text"))
.and_then(Value::as_str)
.map(|text| text != SECOND_USER_MSG && text != SECOND_ASSISTANT_MSG)
.unwrap_or(true)
}),
"retry compact input should omit trimmed second turn",
);
.map(str::to_string)
}
let final_request = &requests[4];
let body = final_request.body_json();
let input_items = body
.get("input")
.and_then(Value::as_array)
.unwrap_or_else(|| panic!("final request missing input array: {body}"));
let contains_text = |items: &[Value], needle: &str| {
items
.iter()
.any(|item| extract_text(item).is_some_and(|text| text == needle))
};
fn message_index(items: &[Value], needle: &str) -> Option<usize> {
items.iter().position(|item| {
item.get("type").and_then(Value::as_str) == Some("message")
&& item
.get("content")
assert!(
contains_text(
compact_attempt["input"].as_array().unwrap(),
SECOND_USER_MSG
),
"initial compact attempt should include most recent user message",
);
assert!(
!contains_text(retry_attempt["input"].as_array().unwrap(), SECOND_USER_MSG),
"retry should drop the most recent user message",
);
assert!(
contains_text(
compact_attempt["input"].as_array().unwrap(),
"second turn complete"
),
"initial compact attempt should include assistant reply for most recent turn",
);
assert!(
!contains_text(
retry_attempt["input"].as_array().unwrap(),
"second turn complete"
),
"retry should drop assistant reply for most recent turn",
);
assert_eq!(
compact_attempt["input"]
.as_array()
.unwrap()
.len()
.saturating_sub(retry_attempt["input"].as_array().unwrap().len()),
2,
"retry should drop the most recent user turn from the prompt",
);
let retry_call_ids: std::collections::HashSet<_> = retry_attempt["input"]
.as_array()
.unwrap()
.iter()
.filter_map(|item| item.get("call_id").and_then(|v| v.as_str()))
.collect();
assert!(
!retry_call_ids.contains(SECOND_CALL_A),
"retry should remove function call {SECOND_CALL_A}"
);
assert!(
!retry_call_ids.contains(SECOND_CALL_B),
"retry should remove function call {SECOND_CALL_B}"
);
}
// Scenario 2: after a retry succeeds, the trimmed turn is restored to history for the next user input.
{
const SIMPLE_FIRST_USER_MSG: &str = "first user turn";
const SIMPLE_FIRST_ASSISTANT_MSG: &str = "first assistant reply";
const SIMPLE_SECOND_USER_MSG: &str = "second user turn";
const SIMPLE_SECOND_ASSISTANT_MSG: &str = "second assistant reply";
const SIMPLE_THIRD_USER_MSG: &str = "post compact user";
const SIMPLE_THIRD_ASSISTANT_MSG: &str = "post compact assistant";
let server = start_mock_server().await;
let first_turn = sse(vec![
ev_assistant_message("assistant-first", SIMPLE_FIRST_ASSISTANT_MSG),
ev_completed("resp-first"),
]);
let second_turn = sse(vec![
ev_assistant_message("assistant-second", SIMPLE_SECOND_ASSISTANT_MSG),
ev_completed("resp-second"),
]);
let compact_failed = sse_failed(
"resp-fail",
"context_length_exceeded",
CONTEXT_LIMIT_MESSAGE,
);
let compact_retry = sse(vec![
ev_assistant_message("assistant-summary", SUMMARY_TEXT),
ev_completed("resp-summary"),
]);
let third_turn = sse(vec![
ev_assistant_message("assistant-third", SIMPLE_THIRD_ASSISTANT_MSG),
ev_completed("resp-third"),
]);
let request_log = mount_sse_sequence(
&server,
vec![
first_turn,
second_turn,
compact_failed,
compact_retry,
third_turn,
],
)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
config.model_auto_compact_token_limit = Some(200_000);
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
.new_conversation(config)
.await
.unwrap()
.conversation;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: SIMPLE_FIRST_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: SIMPLE_SECOND_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex.submit(Op::Compact).await.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: SIMPLE_THIRD_USER_MSG.into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let requests = request_log.requests();
assert_eq!(
requests.len(),
5,
"expected two user turns, two compact attempts, and a post-compact turn",
);
let retry_request = &requests[3];
let retry_body = retry_request.body_json();
let retry_input = retry_body
.get("input")
.and_then(Value::as_array)
.expect("retry request missing input array");
assert!(
retry_input.iter().all(|item| {
item.get("content")
.and_then(Value::as_array)
.and_then(|entries| entries.first())
.and_then(|entry| entry.get("text"))
.and_then(Value::as_str)
.is_some_and(|text| text == needle)
})
.map(|text| {
text != SIMPLE_SECOND_USER_MSG && text != SIMPLE_SECOND_ASSISTANT_MSG
})
.unwrap_or(true)
}),
"retry compact input should omit trimmed second turn",
);
let final_request = &requests[4];
let body = final_request.body_json();
let input_items = body
.get("input")
.and_then(Value::as_array)
.expect("final request missing input array");
fn message_index(items: &[Value], needle: &str) -> Option<usize> {
items.iter().position(|item| {
item.get("type").and_then(Value::as_str) == Some("message")
&& item
.get("content")
.and_then(Value::as_array)
.and_then(|entries| entries.first())
.and_then(|entry| entry.get("text"))
.and_then(Value::as_str)
.is_some_and(|text| text == needle)
})
}
let summary_index = input_items
.iter()
.position(|item| {
item.get("content")
.and_then(Value::as_array)
.and_then(|entries| entries.first())
.and_then(|entry| entry.get("text"))
.and_then(Value::as_str)
.map(|text| text.contains(SUMMARY_TEXT))
.unwrap_or(false)
})
.expect("final request should include summary bridge");
let second_user_index = message_index(input_items, SIMPLE_SECOND_USER_MSG)
.expect("trimmed second user message should remain in history");
let second_assistant_index = message_index(input_items, SIMPLE_SECOND_ASSISTANT_MSG)
.expect("trimmed assistant reply should remain in history");
let third_user_index = message_index(input_items, SIMPLE_THIRD_USER_MSG)
.expect("post-compact user turn should be present");
assert!(
summary_index < second_user_index,
"summary bridge should precede restored user message"
);
assert!(
second_user_index < second_assistant_index,
"restored user message should precede assistant reply"
);
assert!(
second_assistant_index < third_user_index,
"restored assistant reply should precede new user turn"
);
}
let summary_index = input_items
.iter()
.position(|item| {
item.get("content")
.and_then(Value::as_array)
.and_then(|entries| entries.first())
.and_then(|entry| entry.get("text"))
.and_then(Value::as_str)
.map(|text| text.contains(SUMMARY_TEXT))
.unwrap_or(false)
})
.expect("final request should include summary bridge");
let second_user_index = message_index(input_items, SECOND_USER_MSG)
.expect("trimmed second user message should remain in history");
let second_assistant_index = message_index(input_items, SECOND_ASSISTANT_MSG)
.expect("trimmed assistant reply should remain in history");
let third_user_index = message_index(input_items, THIRD_USER_MSG)
.expect("post-compact user turn should be present");
assert!(
summary_index < second_user_index,
"summary bridge should precede restored user message"
);
assert!(
second_user_index < second_assistant_index,
"restored user message should precede assistant reply"
);
assert!(
second_assistant_index < third_user_index,
"restored assistant reply should precede new user turn"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]