mirror of
https://github.com/openai/codex.git
synced 2026-04-14 11:31:42 +03:00
Compare commits
8 Commits
dev/shaqay
...
jif/change
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3b65c12031 | ||
|
|
e39e0c4332 | ||
|
|
cc71c6f9de | ||
|
|
1259a810b4 | ||
|
|
99b78c9bb2 | ||
|
|
08b2afe7a1 | ||
|
|
253b7b8639 | ||
|
|
edd46c6347 |
7
codex-rs/Cargo.lock
generated
7
codex-rs/Cargo.lock
generated
@@ -5664,6 +5664,12 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1_smol"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d"
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.9"
|
||||
@@ -6851,6 +6857,7 @@ dependencies = [
|
||||
"getrandom 0.3.3",
|
||||
"js-sys",
|
||||
"serde",
|
||||
"sha1_smol",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ toml_edit = { workspace = true }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
tree-sitter = { workspace = true }
|
||||
tree-sitter-bash = { workspace = true }
|
||||
uuid = { workspace = true, features = ["serde", "v4"] }
|
||||
uuid = { workspace = true, features = ["serde", "v4", "v5"] }
|
||||
which = { workspace = true }
|
||||
wildmatch = { workspace = true }
|
||||
|
||||
|
||||
@@ -212,6 +212,7 @@ impl ModelClient {
|
||||
};
|
||||
|
||||
let input_with_instructions = prompt.get_formatted_input();
|
||||
tracing::warn!("Inputs: {input_with_instructions:#?}");
|
||||
|
||||
let verbosity = if self.config.model_family.support_verbosity {
|
||||
self.config.model_verbosity
|
||||
|
||||
@@ -15,7 +15,6 @@ use crate::protocol::TaskStartedEvent;
|
||||
use crate::protocol::TurnContextItem;
|
||||
use crate::truncate::truncate_middle;
|
||||
use crate::util::backoff;
|
||||
use askama::Template;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
@@ -23,18 +22,16 @@ use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use futures::prelude::*;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
use serde_json::json;
|
||||
use tracing::error;
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
|
||||
#[derive(Template)]
|
||||
#[template(path = "compact/history_bridge.md", escape = "none")]
|
||||
struct HistoryBridgeTemplate<'a> {
|
||||
user_messages_text: &'a str,
|
||||
summary_text: &'a str,
|
||||
}
|
||||
|
||||
pub(crate) async fn run_inline_auto_compact_task(
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
@@ -63,6 +60,7 @@ async fn run_compact_task_inner(
|
||||
input: Vec<UserInput>,
|
||||
) {
|
||||
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
|
||||
let output_schema = structured_summary::schema();
|
||||
|
||||
let mut history = sess.clone_history().await;
|
||||
history.record_items(&[initial_input_for_turn.into()]);
|
||||
@@ -86,6 +84,7 @@ async fn run_compact_task_inner(
|
||||
let turn_input = history.get_history_for_prompt();
|
||||
let prompt = Prompt {
|
||||
input: turn_input.clone(),
|
||||
output_schema: Some(output_schema.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let attempt_result = drain_to_completed(&sess, turn_context.as_ref(), &prompt).await;
|
||||
@@ -147,8 +146,22 @@ async fn run_compact_task_inner(
|
||||
}
|
||||
|
||||
let history_snapshot = sess.clone_history().await.get_history();
|
||||
let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let user_messages = collect_user_messages(&history_snapshot);
|
||||
let structured_summary = structured_summary::parse(&history_snapshot);
|
||||
let (summary_text, user_messages) = match structured_summary {
|
||||
Some(structured) => {
|
||||
let structured_summary::Summary {
|
||||
intent_user_message,
|
||||
summary,
|
||||
} = structured;
|
||||
(summary, vec![intent_user_message])
|
||||
}
|
||||
None => {
|
||||
let summary =
|
||||
get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let users = collect_user_messages(&history_snapshot);
|
||||
(summary, users)
|
||||
}
|
||||
};
|
||||
let initial_context = sess.build_initial_context(turn_context.as_ref());
|
||||
let mut new_history = build_compacted_history(initial_context, &user_messages, &summary_text);
|
||||
let ghost_snapshots: Vec<ResponseItem> = history_snapshot
|
||||
@@ -218,36 +231,70 @@ fn build_compacted_history_with_limit(
|
||||
summary_text: &str,
|
||||
max_bytes: usize,
|
||||
) -> Vec<ResponseItem> {
|
||||
let mut user_messages_text = if user_messages.is_empty() {
|
||||
"(none)".to_string()
|
||||
} else {
|
||||
user_messages.join("\n\n")
|
||||
};
|
||||
// Truncate the concatenated prior user messages so the bridge message
|
||||
// stays well under the context window (approx. 4 bytes/token).
|
||||
if user_messages_text.len() > max_bytes {
|
||||
user_messages_text = truncate_middle(&user_messages_text, max_bytes).0;
|
||||
let mut selected_messages: Vec<String> = Vec::new();
|
||||
if max_bytes > 0 {
|
||||
let mut remaining = max_bytes;
|
||||
for message in user_messages.iter().rev() {
|
||||
if remaining == 0 {
|
||||
break;
|
||||
}
|
||||
if message.len() <= remaining {
|
||||
selected_messages.push(message.clone());
|
||||
remaining = remaining.saturating_sub(message.len());
|
||||
} else {
|
||||
let (truncated, _) = truncate_middle(message, remaining);
|
||||
selected_messages.push(truncated);
|
||||
break;
|
||||
}
|
||||
}
|
||||
selected_messages.reverse();
|
||||
}
|
||||
|
||||
for message in &selected_messages {
|
||||
history.push(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: message.clone(),
|
||||
}],
|
||||
});
|
||||
}
|
||||
|
||||
let summary_text = if summary_text.is_empty() {
|
||||
"(no summary available)".to_string()
|
||||
} else {
|
||||
summary_text.to_string()
|
||||
};
|
||||
let Ok(bridge) = HistoryBridgeTemplate {
|
||||
user_messages_text: &user_messages_text,
|
||||
summary_text: &summary_text,
|
||||
}
|
||||
.render() else {
|
||||
return vec![];
|
||||
};
|
||||
history.push(ResponseItem::Message {
|
||||
|
||||
let call_id = deterministic_compact_call_id(&selected_messages, &summary_text);
|
||||
history.push(ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: bridge }],
|
||||
status: Some("completed".to_string()),
|
||||
call_id: call_id.clone(),
|
||||
name: "compactor".to_string(),
|
||||
input: String::new(),
|
||||
});
|
||||
history.push(ResponseItem::CustomToolCallOutput {
|
||||
call_id,
|
||||
output: summary_text,
|
||||
});
|
||||
|
||||
history
|
||||
}
|
||||
|
||||
fn deterministic_compact_call_id(user_messages: &[String], summary_text: &str) -> String {
|
||||
// Required for testing purpose.
|
||||
let payload = serde_json::json!({
|
||||
"summary": summary_text,
|
||||
"messages": user_messages,
|
||||
});
|
||||
if let Ok(bytes) = serde_json::to_vec(&payload) {
|
||||
Uuid::new_v5(&Uuid::NAMESPACE_OID, &bytes).to_string()
|
||||
} else {
|
||||
Uuid::new_v4().to_string()
|
||||
}
|
||||
}
|
||||
|
||||
async fn drain_to_completed(
|
||||
sess: &Session,
|
||||
turn_context: &TurnContext,
|
||||
@@ -280,6 +327,56 @@ async fn drain_to_completed(
|
||||
}
|
||||
}
|
||||
|
||||
mod structured_summary {
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct Summary {
|
||||
pub(crate) intent_user_message: String,
|
||||
pub(crate) summary: String,
|
||||
}
|
||||
|
||||
pub fn schema() -> JsonValue {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"intent_user_message": {
|
||||
"type": "string",
|
||||
"description": "One consolidated user message capturing the user's current goal or request."
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": "A concise status summary describing progress and next steps."
|
||||
}
|
||||
},
|
||||
"required": ["intent_user_message", "summary"],
|
||||
"additionalProperties": false
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse(responses: &[ResponseItem]) -> Option<Summary> {
|
||||
let text = get_last_assistant_message_from_turn(responses)?;
|
||||
let parsed: Summary = match serde_json::from_str(text.trim()) {
|
||||
Ok(parsed) => parsed,
|
||||
Err(err) => {
|
||||
warn!(?err, "Failed to parse structured compact summary");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let intent = parsed.intent_user_message.trim();
|
||||
if intent.is_empty() {
|
||||
warn!("Structured compact summary missing intent_user_message");
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Summary {
|
||||
intent_user_message: intent.to_string(),
|
||||
summary: parsed.summary.trim().to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -371,6 +468,41 @@ mod tests {
|
||||
assert_eq!(vec!["real user message".to_string()], collected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_auto_compact_summary_extracts_trimmed_fields() {
|
||||
let payload = r#"
|
||||
{
|
||||
"intent_user_message": " intent summary ",
|
||||
"summary": " status note "
|
||||
}
|
||||
"#;
|
||||
let responses = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: payload.to_string(),
|
||||
}],
|
||||
}];
|
||||
|
||||
let parsed = structured_summary::parse(&responses).expect("structured summary expected");
|
||||
assert_eq!(parsed.intent_user_message, "intent summary");
|
||||
assert_eq!(parsed.summary, "status note");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_auto_compact_summary_requires_intent() {
|
||||
let payload = r#"{"intent_user_message":" ","summary":"just text"}"#;
|
||||
let responses = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentItem::OutputText {
|
||||
text: payload.to_string(),
|
||||
}],
|
||||
}];
|
||||
|
||||
assert!(structured_summary::parse(&responses).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_compacted_history_truncates_overlong_user_messages() {
|
||||
// Use a small truncation limit so the test remains fast while still validating
|
||||
@@ -383,30 +515,79 @@ mod tests {
|
||||
"SUMMARY",
|
||||
max_bytes,
|
||||
);
|
||||
assert_eq!(history.len(), 3);
|
||||
|
||||
// Expect exactly one bridge message added to history (plus any initial context we provided, which is none).
|
||||
assert_eq!(history.len(), 1);
|
||||
let truncated_message = &history[0];
|
||||
let tool_call = &history[1];
|
||||
let tool_output = &history[2];
|
||||
|
||||
// Extract the text content of the bridge message.
|
||||
let bridge_text = match &history[0] {
|
||||
let truncated_text = match truncated_message {
|
||||
ResponseItem::Message { role, content, .. } if role == "user" => {
|
||||
content_items_to_text(content).unwrap_or_default()
|
||||
}
|
||||
other => panic!("unexpected item in history: {other:?}"),
|
||||
};
|
||||
|
||||
// The bridge should contain the truncation marker and not the full original payload.
|
||||
assert!(
|
||||
bridge_text.contains("tokens truncated"),
|
||||
"expected truncation marker in bridge message"
|
||||
truncated_text.contains("tokens truncated"),
|
||||
"expected truncation marker in truncated user message"
|
||||
);
|
||||
assert!(
|
||||
!bridge_text.contains(&big),
|
||||
"bridge should not include the full oversized user text"
|
||||
);
|
||||
assert!(
|
||||
bridge_text.contains("SUMMARY"),
|
||||
"bridge should include the provided summary text"
|
||||
!truncated_text.contains(&big),
|
||||
"truncated user message should not include the full oversized user text"
|
||||
);
|
||||
|
||||
match tool_call {
|
||||
ResponseItem::CustomToolCall { name, .. } => {
|
||||
assert_eq!(name, "compactor");
|
||||
}
|
||||
other => panic!("expected CustomToolCall, got {other:?}"),
|
||||
}
|
||||
|
||||
match tool_output {
|
||||
ResponseItem::CustomToolCallOutput { output, .. } => {
|
||||
assert_eq!(output, "SUMMARY");
|
||||
}
|
||||
other => panic!("expected CustomToolCallOutput, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_compacted_history_produces_stable_call_id() {
|
||||
let initial_context: Vec<ResponseItem> = Vec::new();
|
||||
let user_messages = vec!["first user message".to_string()];
|
||||
let summary_text = "summary text";
|
||||
|
||||
let history1 =
|
||||
build_compacted_history(initial_context.clone(), &user_messages, summary_text);
|
||||
let history2 = build_compacted_history(initial_context, &user_messages, summary_text);
|
||||
|
||||
let call_id1 = extract_compactor_call_id(&history1);
|
||||
let call_id2 = extract_compactor_call_id(&history2);
|
||||
|
||||
assert_eq!(call_id1, call_id2);
|
||||
}
|
||||
|
||||
fn extract_compactor_call_id(history: &[ResponseItem]) -> String {
|
||||
let call_id = history
|
||||
.iter()
|
||||
.find_map(|item| match item {
|
||||
ResponseItem::CustomToolCall { name, call_id, .. } if name == "compactor" => {
|
||||
Some(call_id.clone())
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.expect("compactor call id missing");
|
||||
|
||||
let output_call_id = history
|
||||
.iter()
|
||||
.find_map(|item| match item {
|
||||
ResponseItem::CustomToolCallOutput { call_id, .. } => Some(call_id.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.expect("compactor call output missing");
|
||||
|
||||
assert_eq!(call_id, output_call_id);
|
||||
call_id
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
You were originally given instructions from a user over one or more turns. Here were the user messages:
|
||||
|
||||
{{ user_messages_text }}
|
||||
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
|
||||
{{ summary_text }}
|
||||
@@ -1,5 +1,72 @@
|
||||
You have exceeded the maximum number of tokens, please stop coding and instead write a short memento message for the next agent. Your note should:
|
||||
- Summarize what you finished and what still needs work. If there was a recent update_plan call, repeat its steps verbatim.
|
||||
- List outstanding TODOs with file paths / line numbers so they're easy to find.
|
||||
- Flag code that needs more tests (edge cases, performance, integration, etc.).
|
||||
- Record any open bugs, quirks, or setup steps that will make it easier for the next agent to pick up where you left off.
|
||||
You are performing a CONTEXT CHECKPOINT COMPACTION for a tool. You must output a single valid JSON object with the following schema:
|
||||
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"intent_user_message": { "type": "string" },
|
||||
"summary": { "type": "string" }
|
||||
},
|
||||
"required": ["intent_user_message", "summary"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
|
||||
STRICT OUTPUT RULES
|
||||
- Output a single valid JSON object only (no Markdown, no fences, no commentary).
|
||||
- Must parse with serde_json::from_str.
|
||||
- UTF-8 only.
|
||||
- Up to ~4000 tokens total.
|
||||
|
||||
GOAL
|
||||
Reconstruct the SINGLE ACTIVE TASK THREAD and all critical short-term context so the conversation can continue seamlessly after reset.
|
||||
|
||||
ACTIVE TASK SELECTION
|
||||
- Identify the task thread with in-progress work (code edits, plans, tests, IDs, env state).
|
||||
- Do not switch tasks due to incidental questions.
|
||||
- If uncertainty exists, include the candidate context and mark unknowns.
|
||||
|
||||
OUTPUT FIELDS
|
||||
|
||||
"intent_user_message"
|
||||
A consolidated user-side directive for re-injection after reset, containing:
|
||||
|
||||
1) Minimal machine-only context necessary to interpret the task
|
||||
(paths, APIs, configs, constraints, design notes, etc.)
|
||||
|
||||
2) The ORIGINAL user request that defined the active task, verbatim:
|
||||
<VERBATIM_REQUEST_START>
|
||||
...original user text...
|
||||
<VERBATIM_REQUEST_END>
|
||||
|
||||
3) The most recent user messages relevant to continuing execution,
|
||||
**including clarifications, corrections, follow-ups, parameter overrides, and sub-tasks**.
|
||||
|
||||
Use this delimiter block for that (raw text, no quotes or escapes):
|
||||
<RECENT_USER_CONTEXT_START>
|
||||
...verbatim recent user messages (~2–6 turns or whatever is needed)...
|
||||
<RECENT_USER_CONTEXT_END>
|
||||
|
||||
Rules for both blocks:
|
||||
- EXACT text.
|
||||
- Put the start/end tags on their own lines without indentation.
|
||||
|
||||
"summary"
|
||||
A machine continuation context containing:
|
||||
|
||||
- current status/phase
|
||||
- authoritative excerpts (files, functions, diffs, snippets)
|
||||
- internal state and intermediate values
|
||||
- pending steps and design notes
|
||||
- environment context (paths, branches, configs, run IDs)
|
||||
- test state or execution buffer if relevant
|
||||
- explicit unknowns as "UNKNOWN"
|
||||
- brief note on non-active threads only if helpful
|
||||
- final cursor line:
|
||||
RESUME_AT: <next precise action>
|
||||
|
||||
SELF-CHECK BEFORE EMITTING (do not output)
|
||||
- Valid JSON object only?
|
||||
- intent_user_message includes both blocks:
|
||||
<VERBATIM_REQUEST_START> … <VERBATIM_REQUEST_END>
|
||||
<RECENT_USER_CONTEXT_START> … <RECENT_USER_CONTEXT_END>
|
||||
- Enough granular state to continue without prior messages?
|
||||
- summary ends with a concrete RESUME_AT?
|
||||
@@ -12,6 +12,8 @@ use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::wait_for_event;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::VecDeque;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use codex_core::codex::compact::SUMMARIZATION_PROMPT;
|
||||
@@ -26,12 +28,14 @@ use core_test_support::responses::sse;
|
||||
use core_test_support::responses::sse_failed;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
// --- Test helpers -----------------------------------------------------------
|
||||
|
||||
pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
|
||||
pub(super) const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
|
||||
const THIRD_USER_MSG: &str = "next turn";
|
||||
const AUTO_SUMMARY_TEXT: &str = "AUTO_SUMMARY";
|
||||
const AUTO_INTENT_TEXT: &str = "AUTO_INTENT";
|
||||
const FIRST_AUTO_MSG: &str = "token limit start";
|
||||
const SECOND_AUTO_MSG: &str = "token limit push";
|
||||
const STILL_TOO_BIG_REPLY: &str = "STILL_TOO_BIG";
|
||||
@@ -39,12 +43,43 @@ const MULTI_AUTO_MSG: &str = "multi auto";
|
||||
const SECOND_LARGE_REPLY: &str = "SECOND_LARGE_REPLY";
|
||||
const FIRST_AUTO_SUMMARY: &str = "FIRST_AUTO_SUMMARY";
|
||||
const SECOND_AUTO_SUMMARY: &str = "SECOND_AUTO_SUMMARY";
|
||||
const FIRST_AUTO_INTENT: &str = "FIRST_AUTO_INTENT";
|
||||
const SECOND_AUTO_INTENT: &str = "SECOND_AUTO_INTENT";
|
||||
const SUMMARY_INTENT: &str = "SUMMARY_INTENT";
|
||||
const FINAL_REPLY: &str = "FINAL_REPLY";
|
||||
const CONTEXT_LIMIT_MESSAGE: &str =
|
||||
"Your input exceeds the context window of this model. Please adjust your input and try again.";
|
||||
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
||||
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
||||
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
|
||||
const POST_AUTO_USER_MSG: &str = "post auto follow-up";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
|
||||
fn structured_auto_summary(intent: &str, summary: &str) -> String {
|
||||
json!({
|
||||
"intent_user_message": intent,
|
||||
"summary": summary,
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn drop_call_id(value: &mut serde_json::Value) {
|
||||
match value {
|
||||
serde_json::Value::Object(obj) => {
|
||||
obj.retain(|k, _| k != "call_id");
|
||||
for v in obj.values_mut() {
|
||||
drop_call_id(v);
|
||||
}
|
||||
}
|
||||
serde_json::Value::Array(arr) => {
|
||||
for v in arr {
|
||||
drop_call_id(v);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn summarize_context_three_requests_and_instructions() {
|
||||
@@ -71,14 +106,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
// Mount three expectations, one per request, matched by body content.
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains("\"text\":\"hello world\"") && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
@@ -163,7 +197,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
"expected summarize trigger, got `{text2}`"
|
||||
);
|
||||
|
||||
// Third request must contain the refreshed instructions, bridge summary message and new user msg.
|
||||
// Third request must contain the refreshed instructions, compacted user history, and new user message.
|
||||
let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
|
||||
assert!(
|
||||
@@ -171,16 +205,58 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
"expected refreshed context and new user message in third request"
|
||||
);
|
||||
|
||||
// Collect all (role, text) message tuples.
|
||||
let mut messages: Vec<(String, String)> = Vec::new();
|
||||
let mut compactor_call_ids: HashSet<String> = HashSet::new();
|
||||
let mut compactor_output: Option<String> = None;
|
||||
|
||||
for item in input3 {
|
||||
if item["type"].as_str() == Some("message") {
|
||||
let role = item["role"].as_str().unwrap_or_default().to_string();
|
||||
let text = item["content"][0]["text"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
messages.push((role, text));
|
||||
match item.get("type").and_then(|v| v.as_str()) {
|
||||
Some("message") => {
|
||||
let role = item
|
||||
.get("role")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let text = item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
messages.push((role, text));
|
||||
}
|
||||
Some("custom_tool_call") => {
|
||||
let name = item
|
||||
.get("name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
if name == "compactor" {
|
||||
let call_id = item
|
||||
.get("call_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
compactor_call_ids.insert(call_id);
|
||||
}
|
||||
}
|
||||
Some("custom_tool_call_output") => {
|
||||
let output = item
|
||||
.get("output")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let call_id = item
|
||||
.get("call_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if compactor_call_ids.contains(&call_id) && compactor_output.is_none() {
|
||||
compactor_output = Some(output);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,22 +269,18 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
.any(|(r, t)| r == "user" && t == THIRD_USER_MSG),
|
||||
"third request should include the new user message"
|
||||
);
|
||||
let Some((_, bridge_text)) = messages.iter().find(|(role, text)| {
|
||||
role == "user"
|
||||
&& (text.contains("Here were the user messages")
|
||||
|| text.contains("Here are all the user messages"))
|
||||
&& text.contains(SUMMARY_TEXT)
|
||||
}) else {
|
||||
panic!("expected a bridge message containing the summary");
|
||||
};
|
||||
assert!(
|
||||
bridge_text.contains("hello world"),
|
||||
"bridge should capture earlier user messages"
|
||||
messages
|
||||
.iter()
|
||||
.any(|(r, t)| r == "user" && t == "hello world"),
|
||||
"third request should include the original user message"
|
||||
);
|
||||
assert!(
|
||||
!bridge_text.contains(SUMMARIZATION_PROMPT),
|
||||
"bridge text should not echo the summarize trigger"
|
||||
!compactor_call_ids.is_empty(),
|
||||
"expected a compactor tool call"
|
||||
);
|
||||
let compactor_output = compactor_output.expect("expected a compactor tool call output");
|
||||
assert_eq!(compactor_output, SUMMARY_TEXT);
|
||||
assert!(
|
||||
!messages
|
||||
.iter()
|
||||
@@ -338,16 +410,22 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
ev_completed_with_tokens("r2", 330_000),
|
||||
]);
|
||||
|
||||
let auto_summary_payload = structured_auto_summary(AUTO_INTENT_TEXT, AUTO_SUMMARY_TEXT);
|
||||
let sse3 = sse(vec![
|
||||
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||
ev_assistant_message("m3", &auto_summary_payload),
|
||||
ev_completed_with_tokens("r3", 200),
|
||||
]);
|
||||
let sse_resume = sse(vec![ev_completed("r3-resume")]);
|
||||
let sse4 = sse(vec![
|
||||
ev_assistant_message("m4", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r4", 120),
|
||||
]);
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
@@ -355,16 +433,30 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(SECOND_AUTO_MSG)
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
let resume_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(AUTO_INTENT_TEXT)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body.contains(POST_AUTO_USER_MSG)
|
||||
};
|
||||
mount_sse_once_match(&server, resume_matcher, sse_resume).await;
|
||||
|
||||
let fourth_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(POST_AUTO_USER_MSG) && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, fourth_matcher, sse4).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
@@ -402,18 +494,29 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
// wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: POST_AUTO_USER_MSG.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert!(
|
||||
requests.len() >= 3,
|
||||
"auto compact should add at least a third request, got {}",
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
5,
|
||||
"expected user turns, a compaction request, a resumed turn, and the follow-up turn; got {}",
|
||||
requests.len()
|
||||
);
|
||||
let is_auto_compact = |req: &wiremock::Request| {
|
||||
std::str::from_utf8(&req.body)
|
||||
.unwrap_or("")
|
||||
.contains("You have exceeded the maximum number of tokens")
|
||||
.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
let auto_compact_count = requests.iter().filter(|req| is_auto_compact(req)).count();
|
||||
assert_eq!(
|
||||
@@ -430,11 +533,41 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact should add a third request"
|
||||
);
|
||||
|
||||
let resume_index = requests
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(idx, req)| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
(body.contains(AUTO_INTENT_TEXT)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body.contains(POST_AUTO_USER_MSG))
|
||||
.then_some(idx)
|
||||
})
|
||||
.expect("resume request missing after compaction");
|
||||
|
||||
let follow_up_index = requests
|
||||
.iter()
|
||||
.enumerate()
|
||||
.rev()
|
||||
.find_map(|(idx, req)| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
(body.contains(POST_AUTO_USER_MSG) && !body.contains(COMPACT_PROMPT_MARKER))
|
||||
.then_some(idx)
|
||||
})
|
||||
.expect("follow-up request missing");
|
||||
assert_eq!(follow_up_index, 4, "follow-up request should be last");
|
||||
|
||||
let body_first = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body3 = requests[auto_compact_index]
|
||||
let body_auto = requests[auto_compact_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let instructions = body3
|
||||
let body_resume = requests[resume_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let body_follow_up = requests[follow_up_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let instructions = body_auto
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
@@ -448,13 +581,16 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact should keep the standard developer instructions",
|
||||
);
|
||||
|
||||
let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let last3 = input3
|
||||
let input_auto = body_auto.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let last_auto = input_auto
|
||||
.last()
|
||||
.expect("auto compact request should append a user message");
|
||||
assert_eq!(last3.get("type").and_then(|v| v.as_str()), Some("message"));
|
||||
assert_eq!(last3.get("role").and_then(|v| v.as_str()), Some("user"));
|
||||
let last_text = last3
|
||||
assert_eq!(
|
||||
last_auto.get("type").and_then(|v| v.as_str()),
|
||||
Some("message")
|
||||
);
|
||||
assert_eq!(last_auto.get("role").and_then(|v| v.as_str()), Some("user"));
|
||||
let last_text = last_auto
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|items| items.first())
|
||||
@@ -465,6 +601,57 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
last_text, SUMMARIZATION_PROMPT,
|
||||
"auto compact should send the summarization prompt as a user message",
|
||||
);
|
||||
|
||||
let input_resume = body_resume.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
assert!(
|
||||
input_resume.iter().any(
|
||||
|item| item.get("type").and_then(|v| v.as_str()) == Some("custom_tool_call_output")
|
||||
),
|
||||
"resume request should include compacted history"
|
||||
);
|
||||
|
||||
let input_follow_up = body_follow_up
|
||||
.get("input")
|
||||
.and_then(|v| v.as_array())
|
||||
.unwrap();
|
||||
let user_texts: Vec<String> = input_follow_up
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(|v| v.as_str()) == Some("message"))
|
||||
.filter(|item| item.get("role").and_then(|v| v.as_str()) == Some("user"))
|
||||
.filter_map(|item| {
|
||||
item.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(std::string::ToString::to_string)
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
user_texts.iter().any(|text| text == AUTO_INTENT_TEXT),
|
||||
"auto compact follow-up request should include the intent user message"
|
||||
);
|
||||
assert!(
|
||||
user_texts.iter().any(|text| text == POST_AUTO_USER_MSG),
|
||||
"auto compact follow-up request should include the new user message"
|
||||
);
|
||||
assert!(
|
||||
!user_texts
|
||||
.iter()
|
||||
.any(|text| text == FIRST_AUTO_MSG || text == SECOND_AUTO_MSG),
|
||||
"original user messages should be replaced by the intent message after compaction"
|
||||
);
|
||||
let compactor_output = input_follow_up
|
||||
.iter()
|
||||
.find(|item| item.get("type").and_then(|v| v.as_str()) == Some("custom_tool_call_output"))
|
||||
.and_then(|item| item.get("output"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert_eq!(
|
||||
compactor_output, AUTO_SUMMARY_TEXT,
|
||||
"compactor tool output should contain the structured summary field"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
@@ -483,8 +670,9 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
ev_completed_with_tokens("r2", 330_000),
|
||||
]);
|
||||
|
||||
let auto_summary_payload = structured_auto_summary(AUTO_INTENT_TEXT, AUTO_SUMMARY_TEXT);
|
||||
let sse3 = sse(vec![
|
||||
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||
ev_assistant_message("m3", &auto_summary_payload),
|
||||
ev_completed_with_tokens("r3", 200),
|
||||
]);
|
||||
|
||||
@@ -492,7 +680,7 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
@@ -500,13 +688,13 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(SECOND_AUTO_MSG)
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
@@ -591,8 +779,9 @@ async fn auto_compact_stops_after_failed_attempt() {
|
||||
ev_completed_with_tokens("r1", 500),
|
||||
]);
|
||||
|
||||
let summary_payload = structured_auto_summary(SUMMARY_INTENT, SUMMARY_TEXT);
|
||||
let sse2 = sse(vec![
|
||||
ev_assistant_message("m2", SUMMARY_TEXT),
|
||||
ev_assistant_message("m2", &summary_payload),
|
||||
ev_completed_with_tokens("r2", 50),
|
||||
]);
|
||||
|
||||
@@ -603,21 +792,19 @@ async fn auto_compact_stops_after_failed_attempt() {
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(FIRST_AUTO_MSG) && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1.clone()).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2.clone()).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
!body.contains("You have exceeded the maximum number of tokens")
|
||||
&& body.contains(SUMMARY_TEXT)
|
||||
!body.contains(COMPACT_PROMPT_MARKER) && body.contains(SUMMARY_TEXT)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3.clone()).await;
|
||||
|
||||
@@ -810,6 +997,220 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn manual_compact_twice_replaces_history_with_latest_intent() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let first_user_message = "first manual turn";
|
||||
let second_user_message = "second manual turn";
|
||||
let final_user_message = "post compact follow-up";
|
||||
let first_intent = "FIRST_MANUAL_INTENT";
|
||||
let second_intent = "SECOND_MANUAL_INTENT";
|
||||
let first_summary = "FIRST_MANUAL_SUMMARY";
|
||||
let second_summary = "SECOND_MANUAL_SUMMARY";
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let first_turn = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed("r1"),
|
||||
]);
|
||||
let first_compact = sse(vec![
|
||||
ev_assistant_message("m2", &structured_auto_summary(first_intent, first_summary)),
|
||||
ev_completed("r2"),
|
||||
]);
|
||||
let second_turn = sse(vec![
|
||||
ev_assistant_message("m3", SECOND_LARGE_REPLY),
|
||||
ev_completed("r3"),
|
||||
]);
|
||||
let second_compact = sse(vec![
|
||||
ev_assistant_message(
|
||||
"m4",
|
||||
&structured_auto_summary(second_intent, second_summary),
|
||||
),
|
||||
ev_completed("r4"),
|
||||
]);
|
||||
let final_turn = sse(vec![
|
||||
ev_assistant_message("m5", FINAL_REPLY),
|
||||
ev_completed("r5"),
|
||||
]);
|
||||
|
||||
let responses_mock = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
first_turn,
|
||||
first_compact,
|
||||
second_turn,
|
||||
second_compact,
|
||||
final_turn,
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.unwrap()
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: first_user_message.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: second_user_message.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: final_user_message.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
5,
|
||||
"expected exactly 5 requests (user turn, compact, user turn, compact, final turn)"
|
||||
);
|
||||
let contains_user_text = |input: &[serde_json::Value], expected: &str| -> bool {
|
||||
input.iter().any(|item| {
|
||||
item.get("type").and_then(|v| v.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|v| v.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter().any(|entry| {
|
||||
entry.get("text").and_then(|v| v.as_str()) == Some(expected)
|
||||
})
|
||||
})
|
||||
.unwrap_or(false)
|
||||
})
|
||||
};
|
||||
|
||||
let first_turn_input = requests[0].input();
|
||||
assert!(
|
||||
contains_user_text(&first_turn_input, first_user_message),
|
||||
"first turn request missing first user message"
|
||||
);
|
||||
assert!(
|
||||
!contains_user_text(&first_turn_input, SUMMARIZATION_PROMPT),
|
||||
"first turn request should not include summarization prompt"
|
||||
);
|
||||
|
||||
let first_compact_input = requests[1].input();
|
||||
assert!(
|
||||
contains_user_text(&first_compact_input, SUMMARIZATION_PROMPT),
|
||||
"first compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
contains_user_text(&first_compact_input, first_user_message),
|
||||
"first compact request should include history before compaction"
|
||||
);
|
||||
|
||||
let second_turn_input = requests[2].input();
|
||||
assert!(
|
||||
contains_user_text(&second_turn_input, second_user_message),
|
||||
"second turn request missing second user message"
|
||||
);
|
||||
assert!(
|
||||
contains_user_text(&second_turn_input, first_intent),
|
||||
"second turn request should include the compacted intent"
|
||||
);
|
||||
|
||||
let second_compact_input = requests[3].input();
|
||||
assert!(
|
||||
contains_user_text(&second_compact_input, SUMMARIZATION_PROMPT),
|
||||
"second compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
contains_user_text(&second_compact_input, second_user_message),
|
||||
"second compact request should include latest history"
|
||||
);
|
||||
|
||||
let mut final_output = requests
|
||||
.last()
|
||||
.unwrap_or_else(|| panic!("final turn request missing for {final_user_message}"))
|
||||
.input()
|
||||
.into_iter()
|
||||
.collect::<VecDeque<_>>();
|
||||
|
||||
// System prompt
|
||||
final_output.pop_front();
|
||||
// Developer instructions
|
||||
final_output.pop_front();
|
||||
|
||||
let _ = final_output
|
||||
.iter_mut()
|
||||
.map(drop_call_id)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expected = vec![
|
||||
json!({
|
||||
"content": vec![
|
||||
json!({
|
||||
"text": "SECOND_MANUAL_INTENT",
|
||||
"type": "input_text",
|
||||
}),
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"input": "",
|
||||
"name": "compactor",
|
||||
"status": "completed",
|
||||
"type": "custom_tool_call",
|
||||
}),
|
||||
json!({
|
||||
"output": "SECOND_MANUAL_SUMMARY",
|
||||
"type": "custom_tool_call_output",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![
|
||||
json!({
|
||||
"text": "post compact follow-up",
|
||||
"type": "input_text",
|
||||
}),
|
||||
],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
];
|
||||
assert_eq!(final_output, expected);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
|
||||
skip_if_no_network!();
|
||||
@@ -820,8 +1221,9 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r1", 500),
|
||||
]);
|
||||
let first_summary_payload = structured_auto_summary(FIRST_AUTO_INTENT, FIRST_AUTO_SUMMARY);
|
||||
let sse2 = sse(vec![
|
||||
ev_assistant_message("m2", FIRST_AUTO_SUMMARY),
|
||||
ev_assistant_message("m2", &first_summary_payload),
|
||||
ev_completed_with_tokens("r2", 50),
|
||||
]);
|
||||
let sse3 = sse(vec![
|
||||
@@ -832,8 +1234,9 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
ev_assistant_message("m4", SECOND_LARGE_REPLY),
|
||||
ev_completed_with_tokens("r4", 450),
|
||||
]);
|
||||
let second_summary_payload = structured_auto_summary(SECOND_AUTO_INTENT, SECOND_AUTO_SUMMARY);
|
||||
let sse5 = sse(vec![
|
||||
ev_assistant_message("m5", SECOND_AUTO_SUMMARY),
|
||||
ev_assistant_message("m5", &second_summary_payload),
|
||||
ev_completed_with_tokens("r5", 60),
|
||||
]);
|
||||
let sse6 = sse(vec![
|
||||
@@ -909,7 +1312,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"first request should contain the user input"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[1].contains("You have exceeded the maximum number of tokens"),
|
||||
request_bodies[1].contains(COMPACT_PROMPT_MARKER),
|
||||
"first auto compact request should include the summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
@@ -917,7 +1320,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"function call output should be sent before the second auto compact"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[4].contains("You have exceeded the maximum number of tokens"),
|
||||
request_bodies[4].contains(COMPACT_PROMPT_MARKER),
|
||||
"second auto compact request should include the summarization prompt"
|
||||
);
|
||||
}
|
||||
@@ -940,8 +1343,9 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
ev_assistant_message("m2", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r2", over_limit_tokens),
|
||||
]);
|
||||
let auto_summary_payload = structured_auto_summary(AUTO_INTENT_TEXT, AUTO_SUMMARY_TEXT);
|
||||
let auto_compact_turn = sse(vec![
|
||||
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||
ev_assistant_message("m3", &auto_summary_payload),
|
||||
ev_completed_with_tokens("r3", 10),
|
||||
]);
|
||||
let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);
|
||||
@@ -1011,7 +1415,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
|
||||
let auto_compact_body = auto_compact_mock.single_request().body_json().to_string();
|
||||
assert!(
|
||||
auto_compact_body.contains("You have exceeded the maximum number of tokens"),
|
||||
auto_compact_body.contains(COMPACT_PROMPT_MARKER),
|
||||
"auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -36,6 +36,8 @@ use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
const AFTER_SECOND_RESUME: &str = "AFTER_SECOND_RESUME";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
|
||||
fn network_disabled() -> bool {
|
||||
std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
|
||||
@@ -155,6 +157,34 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let expected_model = OPENAI_DEFAULT_MODEL;
|
||||
let extract_compactor_items = |request: &Value| -> (Value, Value) {
|
||||
let mut call = None;
|
||||
let mut output = None;
|
||||
if let Some(input) = request.get("input").and_then(|v| v.as_array()) {
|
||||
for item in input {
|
||||
match item.get("type").and_then(|v| v.as_str()) {
|
||||
Some("custom_tool_call")
|
||||
if item.get("name").and_then(|v| v.as_str()) == Some("compactor") =>
|
||||
{
|
||||
call = Some(item.clone());
|
||||
}
|
||||
Some("custom_tool_call_output") => {
|
||||
output = Some(item.clone());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
let call = call.expect("expected compactor call");
|
||||
let output = output.expect("expected compactor output");
|
||||
(call, output)
|
||||
};
|
||||
let (compactor_call_after_compact, compactor_output_after_compact) =
|
||||
extract_compactor_items(&requests[2]);
|
||||
let (compactor_call_after_resume, compactor_output_after_resume) =
|
||||
extract_compactor_items(&requests[3]);
|
||||
let (compactor_call_after_fork, compactor_output_after_fork) =
|
||||
extract_compactor_items(&requests[4]);
|
||||
let user_turn_1 = json!(
|
||||
{
|
||||
"model": expected_model,
|
||||
@@ -271,7 +301,29 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
"include": [
|
||||
"reasoning.encrypted_content"
|
||||
],
|
||||
"prompt_cache_key": prompt_cache_key
|
||||
"prompt_cache_key": prompt_cache_key,
|
||||
"text": {
|
||||
"format": {
|
||||
"type": "json_schema",
|
||||
"strict": true,
|
||||
"name": "codex_output_schema",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"intent_user_message": {
|
||||
"type": "string",
|
||||
"description": "One consolidated user message capturing the user's current goal or request."
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": "A concise status summary describing progress and next steps."
|
||||
}
|
||||
},
|
||||
"required": ["intent_user_message", "summary"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
let user_turn_2_after_compact = json!(
|
||||
{
|
||||
@@ -304,16 +356,12 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "You were originally given instructions from a user over one or more turns. Here were the user messages:
|
||||
|
||||
hello world
|
||||
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
|
||||
SUMMARY_ONLY_CONTEXT"
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
compactor_call_after_compact,
|
||||
compactor_output_after_compact,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -369,16 +417,12 @@ SUMMARY_ONLY_CONTEXT"
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "You were originally given instructions from a user over one or more turns. Here were the user messages:
|
||||
|
||||
hello world
|
||||
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
|
||||
SUMMARY_ONLY_CONTEXT"
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
compactor_call_after_resume,
|
||||
compactor_output_after_resume,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -454,16 +498,12 @@ SUMMARY_ONLY_CONTEXT"
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "You were originally given instructions from a user over one or more turns. Here were the user messages:
|
||||
|
||||
hello world
|
||||
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
|
||||
SUMMARY_ONLY_CONTEXT"
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
compactor_call_after_fork,
|
||||
compactor_output_after_fork,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -603,6 +643,34 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
|
||||
// Build expected final request input: initial context + forked user message +
|
||||
// compacted call/output + post-compact user message + resumed user message.
|
||||
let extract_compactor_items = |request: &Value| -> (Value, Value) {
|
||||
let mut call = None;
|
||||
let mut output = None;
|
||||
if let Some(input) = request.get("input").and_then(|v| v.as_array()) {
|
||||
for item in input {
|
||||
match item.get("type").and_then(|v| v.as_str()) {
|
||||
Some("custom_tool_call")
|
||||
if item.get("name").and_then(|v| v.as_str()) == Some("compactor") =>
|
||||
{
|
||||
call = Some(item.clone());
|
||||
}
|
||||
Some("custom_tool_call_output") => {
|
||||
output = Some(item.clone());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
(
|
||||
call.expect("compactor call"),
|
||||
output.expect("compactor output"),
|
||||
)
|
||||
};
|
||||
let (call_after_second_compact, output_after_second_compact) =
|
||||
extract_compactor_items(&requests[requests.len() - 3]);
|
||||
|
||||
let mut expected = json!([
|
||||
{
|
||||
"instructions": prompt,
|
||||
@@ -633,10 +701,12 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "You were originally given instructions from a user over one or more turns. Here were the user messages:\n\nAFTER_FORK\n\nAnother language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\n\nSUMMARY_ONLY_CONTEXT"
|
||||
"text": "AFTER_FORK"
|
||||
}
|
||||
]
|
||||
},
|
||||
call_after_second_compact,
|
||||
output_after_second_compact,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -722,7 +792,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
let match_first = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
|
||||
&& !body.contains("\"text\":\"AFTER_COMPACT\"")
|
||||
&& !body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
@@ -732,7 +802,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
|
||||
let match_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(server, match_compact, sse2).await;
|
||||
|
||||
@@ -766,8 +836,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
let match_second_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
&& body.contains("AFTER_FORK")
|
||||
body.contains(COMPACT_PROMPT_MARKER) && body.contains("AFTER_FORK")
|
||||
};
|
||||
mount_sse_once_match(server, match_second_compact, sse6).await;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user