diff --git a/codex-rs/core/src/realtime_context.rs b/codex-rs/core/src/realtime_context.rs index 1b845db1e9..fbe48a3310 100644 --- a/codex-rs/core/src/realtime_context.rs +++ b/codex-rs/core/src/realtime_context.rs @@ -15,6 +15,7 @@ use std::collections::HashSet; use std::ffi::OsStr; use std::fs::DirEntry; use std::io; +use std::mem::take; use std::path::Path; use std::path::PathBuf; use tracing::debug; @@ -26,7 +27,7 @@ const CURRENT_THREAD_SECTION_TOKEN_BUDGET: usize = 1_200; const RECENT_WORK_SECTION_TOKEN_BUDGET: usize = 2_200; const WORKSPACE_SECTION_TOKEN_BUDGET: usize = 1_600; const NOTES_SECTION_TOKEN_BUDGET: usize = 300; -const MAX_CURRENT_THREAD_TURNS: usize = 2; +const CURRENT_THREAD_TURN_TOKEN_BUDGET: usize = 300; const MAX_RECENT_THREADS: usize = 40; const MAX_RECENT_WORK_GROUPS: usize = 8; const MAX_CURRENT_CWD_ASKS: usize = 8; @@ -204,10 +205,7 @@ fn build_current_thread_section(items: &[ResponseItem]) -> Option { continue; }; if !current_user.is_empty() || !current_assistant.is_empty() { - turns.push(( - std::mem::take(&mut current_user), - std::mem::take(&mut current_assistant), - )); + turns.push((take(&mut current_user), take(&mut current_assistant))); } current_user.push(text); } @@ -231,43 +229,75 @@ fn build_current_thread_section(items: &[ResponseItem]) -> Option { turns.push((current_user, current_assistant)); } - let retained_turns = turns - .into_iter() - .rev() - .take(MAX_CURRENT_THREAD_TURNS) - .collect::>() - .into_iter() - .rev() - .collect::>(); - if retained_turns.is_empty() { + if turns.is_empty() { return None; } let mut lines = vec![ "Most recent user/assistant turns from this exact thread. Use them for continuity when responding.".to_string(), ]; + let mut remaining_budget = + CURRENT_THREAD_SECTION_TOKEN_BUDGET.saturating_sub(approx_token_count(&lines.join("\n"))); + let mut retained_turn_count = 0; - let retained_turn_count = retained_turns.len(); - for (index, (user_messages, assistant_messages)) in retained_turns.into_iter().enumerate() { - lines.push(String::new()); - if retained_turn_count == 1 || index + 1 == retained_turn_count { - lines.push("### Latest turn".to_string()); + for (index, (user_messages, assistant_messages)) in turns.into_iter().rev().enumerate() { + if remaining_budget == 0 { + break; + } + + let mut turn_lines = Vec::new(); + if index == 0 { + turn_lines.push("### Latest turn".to_string()); } else { - lines.push(format!("### Prior turn {}", index + 1)); + turn_lines.push(format!("### Previous turn {index}")); } if !user_messages.is_empty() { - lines.push("User:".to_string()); - lines.push(user_messages.join("\n\n")); + turn_lines.push("User:".to_string()); + turn_lines.push(user_messages.join("\n\n")); } if !assistant_messages.is_empty() { - lines.push(String::new()); - lines.push("Assistant:".to_string()); - lines.push(assistant_messages.join("\n\n")); + turn_lines.push(String::new()); + turn_lines.push("Assistant:".to_string()); + turn_lines.push(assistant_messages.join("\n\n")); } + + let turn_budget = CURRENT_THREAD_TURN_TOKEN_BUDGET.min(remaining_budget); + let turn_text = turn_lines.join("\n"); + let mut truncation_budget = turn_budget; + let turn_text = loop { + let candidate = truncate_text(&turn_text, TruncationPolicy::Tokens(truncation_budget)); + let candidate_tokens = approx_token_count(&candidate); + if candidate_tokens <= turn_budget { + break candidate; + } + + // The shared truncator adds its marker after choosing preserved + // content, so tighten the content budget until the rendered turn + // itself fits the per-turn cap. + let excess_tokens = candidate_tokens.saturating_sub(turn_budget); + let next_budget = truncation_budget.saturating_sub(excess_tokens.max(1)); + if next_budget == 0 { + let candidate = truncate_text(&turn_text, TruncationPolicy::Tokens(0)); + if approx_token_count(&candidate) <= turn_budget { + break candidate; + } + break String::new(); + } + truncation_budget = next_budget; + }; + let turn_tokens = approx_token_count(&turn_text); + if turn_tokens == 0 { + continue; + } + + lines.push(String::new()); + lines.push(turn_text); + remaining_budget = remaining_budget.saturating_sub(turn_tokens); + retained_turn_count += 1; } - Some(lines.join("\n")) + (retained_turn_count > 0).then(|| lines.join("\n")) } fn build_workspace_section_with_user_root( diff --git a/codex-rs/core/src/realtime_context_tests.rs b/codex-rs/core/src/realtime_context_tests.rs index 709bf31765..7495161d46 100644 --- a/codex-rs/core/src/realtime_context_tests.rs +++ b/codex-rs/core/src/realtime_context_tests.rs @@ -1,8 +1,11 @@ +use super::build_current_thread_section; use super::build_recent_work_section; use super::build_workspace_section_with_user_root; use chrono::TimeZone; use chrono::Utc; use codex_protocol::ThreadId; +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; use codex_state::ThreadMetadata; use pretty_assertions::assert_eq; use std::fs; @@ -43,6 +46,120 @@ fn thread_metadata(cwd: &str, title: &str, first_user_message: &str) -> ThreadMe } } +fn message(role: &str, content: ContentItem) -> ResponseItem { + ResponseItem::Message { + id: None, + role: role.to_string(), + content: vec![content], + end_turn: None, + phase: None, + } +} + +fn user_message(text: impl Into) -> ResponseItem { + message("user", ContentItem::InputText { text: text.into() }) +} + +fn assistant_message(text: impl Into) -> ResponseItem { + message("assistant", ContentItem::OutputText { text: text.into() }) +} + +fn long_turn_text(index: usize) -> String { + format!( + "turn-{index}-start {} turn-{index}-middle {} turn-{index}-end", + "head filler ".repeat(160), + "tail filler ".repeat(240), + ) +} + +#[test] +fn current_thread_section_includes_short_turns_newest_first_until_budget() { + let items = vec![ + user_message("user turn 1"), + assistant_message("assistant turn 1"), + user_message("user turn 2"), + assistant_message("assistant turn 2"), + user_message("user turn 3"), + assistant_message("assistant turn 3"), + user_message("user turn 4"), + assistant_message("assistant turn 4"), + ]; + + assert_eq!( + build_current_thread_section(&items), + Some( + r#"Most recent user/assistant turns from this exact thread. Use them for continuity when responding. + +### Latest turn +User: +user turn 4 + +Assistant: +assistant turn 4 + +### Previous turn 1 +User: +user turn 3 + +Assistant: +assistant turn 3 + +### Previous turn 2 +User: +user turn 2 + +Assistant: +assistant turn 2 + +### Previous turn 3 +User: +user turn 1 + +Assistant: +assistant turn 1"# + .to_string() + ) + ); +} + +#[test] +fn current_thread_turn_truncation_preserves_start_and_end() { + let items = vec![user_message(long_turn_text(/*index*/ 0))]; + let section = build_current_thread_section(&items).expect("current thread section"); + + assert_eq!( + ( + section.contains("turn-0-start"), + section.contains("turn-0-middle"), + section.contains("turn-0-end"), + section.contains("tokens truncated"), + ), + (true, false, true, true), + ); +} + +#[test] +fn current_thread_section_keeps_latest_turns_when_history_exceeds_budget() { + let mut items = Vec::new(); + for index in 1..=8 { + items.push(user_message(long_turn_text(index))); + items.push(assistant_message(format!("assistant turn {index}"))); + } + + let section = build_current_thread_section(&items).expect("current thread section"); + + assert_eq!( + ( + section.contains("turn-8-start"), + section.contains("turn-8-end"), + section.contains("### Previous turn 2"), + section.contains("turn-1-start"), + section.contains("turn-1-end"), + ), + (true, true, true, false, false), + ); +} + #[test] fn workspace_section_requires_meaningful_structure() { let cwd = TempDir::new().expect("tempdir"); diff --git a/codex-rs/core/tests/suite/realtime_conversation.rs b/codex-rs/core/tests/suite/realtime_conversation.rs index a8f7f10fe6..4a7230867e 100644 --- a/codex-rs/core/tests/suite/realtime_conversation.rs +++ b/codex-rs/core/tests/suite/realtime_conversation.rs @@ -2,9 +2,12 @@ use anyhow::Context; use anyhow::Result; use chrono::Utc; use codex_config::config_toml::RealtimeWsVersion; +use codex_core::test_support::auth_manager_from_auth; use codex_login::CodexAuth; use codex_login::OPENAI_API_KEY_ENV_VAR; use codex_protocol::ThreadId; +use codex_protocol::models::ContentItem; +use codex_protocol::models::ResponseItem; use codex_protocol::protocol::CodexErrorInfo; use codex_protocol::protocol::ConversationAudioParams; use codex_protocol::protocol::ConversationStartParams; @@ -12,12 +15,14 @@ use codex_protocol::protocol::ConversationStartTransport; use codex_protocol::protocol::ConversationTextParams; use codex_protocol::protocol::ErrorEvent; use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::InitialHistory; use codex_protocol::protocol::Op; use codex_protocol::protocol::RealtimeAudioFrame; use codex_protocol::protocol::RealtimeConversationRealtimeEvent; use codex_protocol::protocol::RealtimeConversationVersion; use codex_protocol::protocol::RealtimeEvent; use codex_protocol::protocol::RealtimeVoice; +use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::SessionSource; use codex_protocol::user_input::UserInput; use core_test_support::responses; @@ -1508,6 +1513,165 @@ async fn conversation_start_injects_startup_context_from_thread_history() -> Res Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn conversation_startup_context_current_thread_selects_many_turns_by_budget() -> Result<()> { + skip_if_no_network!(Ok(())); + + let api_server = start_mock_server().await; + let realtime_server = start_websocket_server(vec![vec![vec![json!({ + "type": "session.updated", + "session": { "id": "sess_current_thread_budget", "instructions": "backend prompt" } + })]]]) + .await; + + let latest_long_user_turn = format!( + "latest-long-start {} latest-long-middle {} latest-long-end", + "head detail ".repeat(120), + "tail detail ".repeat(170), + ); + let mut user_turns = (1..=7) + .map(|index| { + format!( + "short-turn-{index}-start {} short-turn-{index}-end", + "detail ".repeat(86) + ) + }) + .collect::>(); + user_turns.push(latest_long_user_turn.clone()); + + let mut builder = test_codex().with_config({ + let realtime_base_url = realtime_server.uri().to_string(); + move |config| { + config.experimental_realtime_ws_base_url = Some(realtime_base_url); + config.realtime.version = RealtimeWsVersion::V1; + } + }); + let test = builder.build(&api_server).await?; + + // Seed completed turns through a resumed thread so this remains an + // end-to-end startup-context test without paying for a model turn per + // fixture entry in platform CI. + let history = user_turns + .into_iter() + .enumerate() + .flat_map(|(index, user_turn)| { + let turn_number = index + 1; + let assistant_turn = format!("assistant turn {turn_number}"); + [ + RolloutItem::ResponseItem(ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputText { text: user_turn }], + end_turn: None, + phase: None, + }), + RolloutItem::ResponseItem(ResponseItem::Message { + id: None, + role: "assistant".to_string(), + content: vec![ContentItem::OutputText { + text: assistant_turn, + }], + end_turn: None, + phase: None, + }), + ] + }) + .collect::>(); + test.codex.shutdown_and_wait().await?; + let resumed_thread = test + .thread_manager + .resume_thread_with_history( + test.config.clone(), + InitialHistory::Forked(history), + auth_manager_from_auth(CodexAuth::from_api_key("dummy")), + /*persist_extended_history*/ false, + /*parent_trace*/ None, + ) + .await?; + let codex = resumed_thread.thread; + + codex + .submit(Op::RealtimeConversationStart(ConversationStartParams { + prompt: Some(Some("backend prompt".to_string())), + session_id: None, + transport: None, + voice: None, + })) + .await?; + + let startup_context_request = wait_for_matching_websocket_request( + &realtime_server, + "current thread budget startup context request with instructions", + |request| websocket_request_instructions(request).is_some(), + ) + .await; + let startup_context = websocket_request_instructions(&startup_context_request) + .expect("startup context request should contain instructions"); + + // Isolate only the Current Thread section; the startup prompt may also include + // workspace and notes sections after it. + let current_thread_start = startup_context + .find("## Current Thread") + .expect("startup context should include current thread section"); + let current_thread_and_rest = &startup_context[current_thread_start..]; + let current_thread_end = [ + "\n## Recent Work", + "\n## Machine / Workspace Map", + "\n## Notes", + ] + .iter() + .filter_map(|marker| current_thread_and_rest.find(marker)) + .min() + .unwrap_or(current_thread_and_rest.len()); + let current_thread = ¤t_thread_and_rest[..current_thread_end]; + + let rendered_turns = current_thread + .split("\n### ") + .skip(1) + .map(|turn| format!("### {turn}")) + .collect::>(); + let over_budget_turns = rendered_turns + .iter() + .filter_map(|turn| { + let token_count = turn.len().div_ceil(4); + (token_count > 300).then(|| { + ( + turn.lines().next().unwrap_or_default().to_string(), + token_count, + ) + }) + }) + .collect::>(); + let latest_rendered_source = + format!("### Latest turn\nUser:\n{latest_long_user_turn}\n\nAssistant:\nassistant turn 8"); + + // Snapshot the actual section so turn order, oldest-first omission, and + // start/end truncation behavior are reviewed together. + let snapshot = format!( + "latest_source_tokens: {}\nrendered_turn_count: {}\nover_budget_turns: {over_budget_turns:?}\n\n{current_thread}", + latest_rendered_source.len().div_ceil(4), + rendered_turns.len(), + ); + insta::assert_snapshot!( + "conversation_startup_context_current_thread_selects_many_turns_by_budget", + snapshot + ); + + // The input includes a turn over 300 approximate tokens, and every rendered + // turn still fits the per-turn cap after labels and truncation markers. + assert_eq!( + ( + latest_rendered_source.len().div_ceil(4) > 300, + over_budget_turns, + ), + (true, Vec::<(String, usize)>::new()), + ); + + codex.shutdown_and_wait().await?; + realtime_server.shutdown().await; + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn conversation_startup_context_falls_back_to_workspace_map() -> Result<()> { skip_if_no_network!(Ok(())); diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__realtime_conversation__conversation_startup_context_current_thread_selects_many_turns_by_budget.snap b/codex-rs/core/tests/suite/snapshots/all__suite__realtime_conversation__conversation_startup_context_current_thread_selects_many_turns_by_budget.snap new file mode 100644 index 0000000000..12ae5b9aca --- /dev/null +++ b/codex-rs/core/tests/suite/snapshots/all__suite__realtime_conversation__conversation_startup_context_current_thread_selects_many_turns_by_budget.snap @@ -0,0 +1,52 @@ +--- +source: core/tests/suite/realtime_conversation.rs +expression: snapshot +--- +latest_source_tokens: 897 +rendered_turn_count: 6 +over_budget_turns: [] + +## Current Thread +Most recent user/assistant turns from this exact thread. Use them for continuity when responding. + +### Latest turn +User: +latest-long-start head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head detail head d…604 tokens truncated… tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail tail detail latest-long-end + +Assistant: +assistant turn 8 + +### Previous turn 1 +User: +short-turn-7-start detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail short-turn-7-end + +Assistant: +assistant turn 7 + +### Previous turn 2 +User: +short-turn-6-start detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail short-turn-6-end + +Assistant: +assistant turn 6 + +### Previous turn 3 +User: +short-turn-5-start detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail short-turn-5-end + +Assistant: +assistant turn 5 + +### Previous turn 4 +User: +short-turn-4-start detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail short-turn-4-end + +Assistant: +assistant turn 4 + +### Previous turn 5 +User: +short-turn-3-start detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail detail short-turn-3-end + +Assistant: +assistant turn 3