Align compaction tests with always-skip post-layout

This commit is contained in:
Charles Cunningham
2026-02-18 00:08:39 -08:00
parent d33a1bedd4
commit 714e0b78ba
5 changed files with 49 additions and 391 deletions

View File

@@ -45,7 +45,6 @@ use core_test_support::responses::sse_failed;
use core_test_support::responses::sse_response;
use core_test_support::responses::start_mock_server;
use pretty_assertions::assert_eq;
use serde_json::json;
use wiremock::MockServer;
// --- Test helpers -----------------------------------------------------------
@@ -143,8 +142,8 @@ fn assert_pre_sampling_switch_compaction_requests(
"follow-up request after successful model-switch compaction should include model-switch update item"
);
assert!(
body_contains_text(&follow_up_body, "<environment_context>"),
"follow-up request should preserve canonical environment context after pre-sampling compaction"
!body_contains_text(&follow_up_body, "<environment_context>"),
"follow-up request should not reinsert canonical environment context after pre-sampling compaction"
);
}
@@ -680,10 +679,6 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
// mock responses from the model
let reasoning_response_1 = ev_reasoning_item("m1", &["I will create a react app"], &[]);
let encrypted_content_1 = reasoning_response_1["item"]["encrypted_content"]
.as_str()
.unwrap();
// first chunk of work
let model_reasoning_response_1_sse = sse(vec![
reasoning_response_1.clone(),
@@ -698,10 +693,6 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
]);
let reasoning_response_2 = ev_reasoning_item("m3", &["I will create a node app"], &[]);
let encrypted_content_2 = reasoning_response_2["item"]["encrypted_content"]
.as_str()
.unwrap();
// second chunk of work
let model_reasoning_response_2_sse = sse(vec![
reasoning_response_2.clone(),
@@ -716,13 +707,9 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
]);
let reasoning_response_3 = ev_reasoning_item("m6", &["I will create a python app"], &[]);
let encrypted_content_3 = reasoning_response_3["item"]["encrypted_content"]
.as_str()
.unwrap();
// third chunk of work
let model_reasoning_response_3_sse = sse(vec![
ev_reasoning_item("m6", &["I will create a python app"], &[]),
reasoning_response_3.clone(),
ev_local_shell_call("r6-shell", "completed", vec!["echo", "make-python"]),
ev_completed_with_tokens("r6", token_count_used),
]);
@@ -806,9 +793,21 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
}
let initial_input = normalize_inputs(input);
let environment_message = initial_input[0]["content"][0]["text"].as_str().unwrap();
assert!(
initial_input.iter().any(|value| {
value
.get("content")
.and_then(|content| content.as_array())
.and_then(|content| content.first())
.and_then(|item| item.get("text"))
.and_then(|text| text.as_str())
.is_some_and(|text| text.contains("<environment_context>"))
}),
"first request should include canonical environment context"
);
// test 1: after compaction, we should have one environment message, one user message, and one user message with summary prefix
// test 1: after each compaction, the next model request should include
// only the latest user message and the newest summary.
let compaction_indices = [2, 4, 6];
let expected_summaries = [
prefixed_first_summary.as_str(),
@@ -819,11 +818,9 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
let body = requests_payloads.clone()[i].body_json();
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
let input = normalize_inputs(input);
assert_eq!(input.len(), 3);
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
let user_message_received = input[1]["content"][0]["text"].as_str().unwrap();
let summary_message = input[2]["content"][0]["text"].as_str().unwrap();
assert_eq!(environment_message, environment_message);
assert_eq!(input.len(), 2);
let user_message_received = input[0]["content"][0]["text"].as_str().unwrap();
let summary_message = input[1]["content"][0]["text"].as_str().unwrap();
assert_eq!(user_message_received, user_message);
assert_eq!(
summary_message, expected_summary,
@@ -831,358 +828,22 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
);
}
// test 2: the expected requests inputs should be as follows:
let expected_requests_inputs = json!([
[
// 0: first request of the user message.
{
"content": [
{
"text": environment_message,
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": "create an app",
"type": "input_text"
}
],
"role": "user",
"type": "message"
}
]
,
[
// 1: first automatic compaction request.
{
"content": [
{
"text": environment_message,
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": "create an app",
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": null,
"encrypted_content": encrypted_content_1,
"summary": [
{
"text": "I will create a react app",
"type": "summary_text"
}
],
"type": "reasoning"
},
{
"action": {
"command": [
"echo",
"make-react"
],
"env": null,
"timeout_ms": null,
"type": "exec",
"user": null,
"working_directory": null
},
"call_id": "r1-shell",
"status": "completed",
"type": "local_shell_call"
},
{
"call_id": "r1-shell",
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
"type": "function_call_output"
},
{
"content": [
{
"text": SUMMARIZATION_PROMPT,
"type": "input_text"
}
],
"role": "user",
"type": "message"
}
]
,
[
// 2: request after first automatic compaction.
{
"content": [
{
"text": environment_message,
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": "create an app",
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": prefixed_first_summary.clone(),
"type": "input_text"
}
],
"role": "user",
"type": "message"
}
]
,
[
// 3: request for second automatic compaction.
{
"content": [
{
"text": environment_message,
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": "create an app",
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": prefixed_first_summary.clone(),
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": null,
"encrypted_content": encrypted_content_2,
"summary": [
{
"text": "I will create a node app",
"type": "summary_text"
}
],
"type": "reasoning"
},
{
"action": {
"command": [
"echo",
"make-node"
],
"env": null,
"timeout_ms": null,
"type": "exec",
"user": null,
"working_directory": null
},
"call_id": "r3-shell",
"status": "completed",
"type": "local_shell_call"
},
{
"call_id": "r3-shell",
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
"type": "function_call_output"
},
{
"content": [
{
"text": SUMMARIZATION_PROMPT,
"type": "input_text"
}
],
"role": "user",
"type": "message"
}
]
,
// 4: request after second automatic compaction.
[
{
"content": [
{
"text": environment_message,
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": "create an app",
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": prefixed_second_summary.clone(),
"type": "input_text"
}
],
"role": "user",
"type": "message"
}
]
,
[
// 5: request for third automatic compaction.
{
"content": [
{
"text": environment_message,
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": "create an app",
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": prefixed_second_summary.clone(),
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": null,
"encrypted_content": encrypted_content_3,
"summary": [
{
"text": "I will create a python app",
"type": "summary_text"
}
],
"type": "reasoning"
},
{
"action": {
"command": [
"echo",
"make-python"
],
"env": null,
"timeout_ms": null,
"type": "exec",
"user": null,
"working_directory": null
},
"call_id": "r6-shell",
"status": "completed",
"type": "local_shell_call"
},
{
"call_id": "r6-shell",
"output": "execution error: Io(Os { code: 2, kind: NotFound, message: \"No such file or directory\" })",
"type": "function_call_output"
},
{
"content": [
{
"text": SUMMARIZATION_PROMPT,
"type": "input_text"
}
],
"role": "user",
"type": "message"
}
]
,
[
{
// 6: request after third automatic compaction.
"content": [
{
"text": environment_message,
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": "create an app",
"type": "input_text"
}
],
"role": "user",
"type": "message"
},
{
"content": [
{
"text": prefixed_third_summary.clone(),
"type": "input_text"
}
],
"role": "user",
"type": "message"
}
]
]);
for (i, request) in requests_payloads.iter().enumerate() {
let body = request.body_json();
// test 2: each auto-compaction request should include the summarization prompt.
for i in [1, 3, 5] {
let body = requests_payloads[i].body_json();
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
let expected_input = expected_requests_inputs[i].as_array().unwrap();
assert_eq!(normalize_inputs(input), normalize_inputs(expected_input));
assert!(
input.iter().any(|value| {
value
.get("content")
.and_then(|content| content.as_array())
.and_then(|content| content.first())
.and_then(|item| item.get("text"))
.and_then(|text| text.as_str())
.is_some_and(|text| text == SUMMARIZATION_PROMPT)
}),
"compaction request {i} should include summarization prompt"
);
}
// test 3: the number of requests should be 7

View File

@@ -1723,8 +1723,7 @@ async fn snapshot_request_shape_remote_mid_turn_continuation_compaction() -> Res
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn snapshot_request_shape_remote_mid_turn_compaction_summary_only_reinjects_context()
-> Result<()> {
async fn snapshot_request_shape_remote_mid_turn_compaction_summary_only_layout() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = TestCodexHarness::with_builder(
@@ -1789,9 +1788,9 @@ async fn snapshot_request_shape_remote_mid_turn_compaction_summary_only_reinject
let compact_request = compact_mock.single_request();
let post_compact_turn_request = post_compact_turn_request_mock.single_request();
insta::assert_snapshot!(
"remote_mid_turn_compaction_summary_only_reinjects_context_shapes",
"remote_mid_turn_compaction_summary_only_shapes",
format_labeled_requests_snapshot(
"Remote mid-turn compaction where compact output has only summary user content: continuation layout reinjects canonical context before that summary.",
"Remote mid-turn compaction where compact output has only summary user content: continuation layout keeps the summary-only compact output without inserting extra context items.",
&[
("Remote Compaction Request", &compact_request),
(
@@ -1806,8 +1805,7 @@ async fn snapshot_request_shape_remote_mid_turn_compaction_summary_only_reinject
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn snapshot_request_shape_remote_mid_turn_compaction_multi_summary_reinjects_above_last_summary()
-> Result<()> {
async fn snapshot_request_shape_remote_mid_turn_compaction_multi_summary_layout() -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = TestCodexHarness::with_builder(
@@ -1896,7 +1894,7 @@ async fn snapshot_request_shape_remote_mid_turn_compaction_multi_summary_reinjec
"older summary should round-trip from conversation history into the next compact request"
);
insta::assert_snapshot!(
"remote_mid_turn_compaction_multi_summary_reinjects_above_last_summary_shapes",
"remote_mid_turn_compaction_multi_summary_shapes",
format_labeled_requests_snapshot(
"Remote mid-turn compaction after an earlier summary compaction: the older summary remains in model-visible history and round-trips into the next compact request.",
&[

View File

@@ -1,6 +1,6 @@
---
source: core/tests/suite/compact.rs
assertion_line: 1773
assertion_line: 1460
expression: "format_labeled_requests_snapshot(\"Pre-sampling compaction on model switch to a smaller context window: current behavior compacts using prior-turn history only (incoming user message excluded), and the follow-up request carries compacted history plus the new user message.\",\n&[(\"Initial Request (Previous Model)\", &requests[0]),\n(\"Pre-sampling Compaction Request\", &requests[1]),\n(\"Post-Compaction Follow-up Request (Next Model)\", &requests[2]),])"
---
Scenario: Pre-sampling compaction on model switch to a smaller context window: current behavior compacts using prior-turn history only (incoming user message excluded), and the follow-up request carries compacted history plus the new user message.
@@ -22,10 +22,7 @@ Scenario: Pre-sampling compaction on model switch to a smaller context window: c
06:message/user:<SUMMARIZATION_PROMPT>
## Post-Compaction Follow-up Request (Next Model)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user:<AGENTS_MD>
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:before switch
04:message/user:<COMPACTION_SUMMARY>\nPRE_SAMPLING_SUMMARY
05:message/developer:<model_switch>\nThe user was previously using a different model....
06:message/user:after switch
00:message/user:before switch
01:message/user:<COMPACTION_SUMMARY>\nPRE_SAMPLING_SUMMARY
02:message/developer:<model_switch>\nThe user was previously using a different model....
03:message/user:after switch

View File

@@ -1,5 +1,6 @@
---
source: core/tests/suite/compact_remote.rs
assertion_line: 1896
expression: "format_labeled_requests_snapshot(\"Remote mid-turn compaction after an earlier summary compaction: the older summary remains in model-visible history and round-trips into the next compact request.\",\n&[(\"Second Turn Request (Before Mid-Turn Compaction)\", &second_turn_request),\n(\"Remote Compaction Request\", &compact_request),])"
---
Scenario: Remote mid-turn compaction after an earlier summary compaction: the older summary remains in model-visible history and round-trips into the next compact request.

View File

@@ -1,8 +1,9 @@
---
source: core/tests/suite/compact_remote.rs
expression: "format_labeled_requests_snapshot(\"Remote mid-turn compaction where compact output has only summary user content: continuation layout reinjects canonical context before that summary.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", &post_compact_turn_request),])"
assertion_line: 1790
expression: "format_labeled_requests_snapshot(\"Remote mid-turn compaction where compact output has only summary user content: continuation layout keeps the summary-only compact output without inserting extra context items.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", &post_compact_turn_request),])"
---
Scenario: Remote mid-turn compaction where compact output has only summary user content: continuation layout reinjects canonical context before that summary.
Scenario: Remote mid-turn compaction where compact output has only summary user content: continuation layout keeps the summary-only compact output without inserting extra context items.
## Remote Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>