feat: detached review (#7292)

This commit is contained in:
jif-oai
2025-11-28 11:34:57 +00:00
committed by GitHub
parent cbd7d0d543
commit aaec8abf58
15 changed files with 529 additions and 228 deletions

View File

@@ -70,7 +70,6 @@ async fn codex_delegate_forwards_exec_approval_and_proceeds_on_approval() {
review_request: ReviewRequest {
prompt: "Please review".to_string(),
user_facing_hint: "review".to_string(),
append_to_original_thread: true,
},
})
.await
@@ -146,7 +145,6 @@ async fn codex_delegate_forwards_patch_approval_and_proceeds_on_decision() {
review_request: ReviewRequest {
prompt: "Please review".to_string(),
user_facing_hint: "review".to_string(),
append_to_original_thread: true,
},
})
.await
@@ -201,7 +199,6 @@ async fn codex_delegate_ignores_legacy_deltas() {
review_request: ReviewRequest {
prompt: "Please review".to_string(),
user_facing_hint: "review".to_string(),
append_to_original_thread: true,
},
})
.await

View File

@@ -18,6 +18,7 @@ use codex_core::protocol::ReviewOutputEvent;
use codex_core::protocol::ReviewRequest;
use codex_core::protocol::RolloutItem;
use codex_core::protocol::RolloutLine;
use codex_core::review_format::render_review_output_text;
use codex_protocol::user_input::UserInput;
use core_test_support::load_default_config_for_test;
use core_test_support::load_sse_fixture_with_id_from_str;
@@ -82,7 +83,6 @@ async fn review_op_emits_lifecycle_and_review_output() {
review_request: ReviewRequest {
prompt: "Please review my changes".to_string(),
user_facing_hint: "my changes".to_string(),
append_to_original_thread: true,
},
})
.await
@@ -124,22 +124,36 @@ async fn review_op_emits_lifecycle_and_review_output() {
let mut saw_header = false;
let mut saw_finding_line = false;
let expected_assistant_text = render_review_output_text(&expected);
let mut saw_assistant_plain = false;
let mut saw_assistant_xml = false;
for line in text.lines() {
if line.trim().is_empty() {
continue;
}
let v: serde_json::Value = serde_json::from_str(line).expect("jsonl line");
let rl: RolloutLine = serde_json::from_value(v).expect("rollout line");
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = rl.item
&& role == "user"
{
for c in content {
if let ContentItem::InputText { text } = c {
if text.contains("full review output from reviewer model") {
saw_header = true;
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = rl.item {
if role == "user" {
for c in content {
if let ContentItem::InputText { text } = c {
if text.contains("full review output from reviewer model") {
saw_header = true;
}
if text.contains("- Prefer Stylize helpers — /tmp/file.rs:10-20") {
saw_finding_line = true;
}
}
if text.contains("- Prefer Stylize helpers — /tmp/file.rs:10-20") {
saw_finding_line = true;
}
} else if role == "assistant" {
for c in content {
if let ContentItem::OutputText { text } = c {
if text.contains("<user_action>") {
saw_assistant_xml = true;
}
if text == expected_assistant_text {
saw_assistant_plain = true;
}
}
}
}
@@ -150,6 +164,14 @@ async fn review_op_emits_lifecycle_and_review_output() {
saw_finding_line,
"formatted finding line missing from rollout"
);
assert!(
saw_assistant_plain,
"assistant review output missing from rollout"
);
assert!(
!saw_assistant_xml,
"assistant review output contains user_action markup"
);
server.verify().await;
}
@@ -179,7 +201,6 @@ async fn review_op_with_plain_text_emits_review_fallback() {
review_request: ReviewRequest {
prompt: "Plain text review".to_string(),
user_facing_hint: "plain text review".to_string(),
append_to_original_thread: true,
},
})
.await
@@ -238,7 +259,6 @@ async fn review_filters_agent_message_related_events() {
review_request: ReviewRequest {
prompt: "Filter streaming events".to_string(),
user_facing_hint: "Filter streaming events".to_string(),
append_to_original_thread: true,
},
})
.await
@@ -247,7 +267,7 @@ async fn review_filters_agent_message_related_events() {
let mut saw_entered = false;
let mut saw_exited = false;
// Drain until TaskComplete; assert filtered events never surface.
// Drain until TaskComplete; assert streaming-related events never surface.
wait_for_event(&codex, |event| match event {
EventMsg::TaskComplete(_) => true,
EventMsg::EnteredReviewMode(_) => {
@@ -265,12 +285,6 @@ async fn review_filters_agent_message_related_events() {
EventMsg::AgentMessageDelta(_) => {
panic!("unexpected AgentMessageDelta surfaced during review")
}
EventMsg::ItemCompleted(ev) => match &ev.item {
codex_protocol::items::TurnItem::AgentMessage(_) => {
panic!("unexpected ItemCompleted for TurnItem::AgentMessage surfaced during review")
}
_ => false,
},
_ => false,
})
.await;
@@ -279,8 +293,9 @@ async fn review_filters_agent_message_related_events() {
server.verify().await;
}
/// When the model returns structured JSON in a review, ensure no AgentMessage
/// is emitted; the UI consumes the structured result via ExitedReviewMode.
/// When the model returns structured JSON in a review, ensure only a single
/// non-streaming AgentMessage is emitted; the UI consumes the structured
/// result via ExitedReviewMode plus a final assistant message.
// Windows CI only: bump to 4 workers to prevent SSE/event starvation and test timeouts.
#[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
#[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
@@ -323,19 +338,21 @@ async fn review_does_not_emit_agent_message_on_structured_output() {
review_request: ReviewRequest {
prompt: "check structured".to_string(),
user_facing_hint: "check structured".to_string(),
append_to_original_thread: true,
},
})
.await
.unwrap();
// Drain events until TaskComplete; ensure none are AgentMessage.
// Drain events until TaskComplete; ensure we only see a final
// AgentMessage (no streaming assistant messages).
let mut saw_entered = false;
let mut saw_exited = false;
let mut agent_messages = 0;
wait_for_event(&codex, |event| match event {
EventMsg::TaskComplete(_) => true,
EventMsg::AgentMessage(_) => {
panic!("unexpected AgentMessage during review with structured output")
agent_messages += 1;
false
}
EventMsg::EnteredReviewMode(_) => {
saw_entered = true;
@@ -348,6 +365,7 @@ async fn review_does_not_emit_agent_message_on_structured_output() {
_ => false,
})
.await;
assert_eq!(1, agent_messages, "expected exactly one AgentMessage event");
assert!(saw_entered && saw_exited, "missing review lifecycle events");
server.verify().await;
@@ -377,7 +395,6 @@ async fn review_uses_custom_review_model_from_config() {
review_request: ReviewRequest {
prompt: "use custom model".to_string(),
user_facing_hint: "use custom model".to_string(),
append_to_original_thread: true,
},
})
.await
@@ -495,7 +512,6 @@ async fn review_input_isolated_from_parent_history() {
review_request: ReviewRequest {
prompt: review_prompt.clone(),
user_facing_hint: review_prompt.clone(),
append_to_original_thread: true,
},
})
.await
@@ -608,7 +624,6 @@ async fn review_history_does_not_leak_into_parent_session() {
review_request: ReviewRequest {
prompt: "Start a review".to_string(),
user_facing_hint: "Start a review".to_string(),
append_to_original_thread: true,
},
})
.await