Compare full request for websockets incrementality (#11343)

Tools can dynamically change mid-turn now. We need to be more thorough
about reusing incremental connections.
This commit is contained in:
pakrym-oai
2026-02-10 11:14:36 -08:00
committed by GitHub
parent 548afa5749
commit 0639c33892
3 changed files with 124 additions and 36 deletions

View File

@@ -80,7 +80,7 @@ pub enum ResponseEvent {
ModelsEtag(String),
}
#[derive(Debug, Serialize, Clone)]
#[derive(Debug, Serialize, Clone, PartialEq)]
pub struct Reasoning {
#[serde(skip_serializing_if = "Option::is_none")]
pub effort: Option<ReasoningEffortConfig>,
@@ -88,14 +88,14 @@ pub struct Reasoning {
pub summary: Option<ReasoningSummaryConfig>,
}
#[derive(Debug, Serialize, Default, Clone)]
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum TextFormatType {
#[default]
JsonSchema,
}
#[derive(Debug, Serialize, Default, Clone)]
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
pub struct TextFormat {
/// Format type used by the OpenAI text controls.
pub r#type: TextFormatType,
@@ -109,7 +109,7 @@ pub struct TextFormat {
/// Controls the `text` field for the Responses API, combining verbosity and
/// optional JSON schema output formatting.
#[derive(Debug, Serialize, Default, Clone)]
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
pub struct TextControls {
#[serde(skip_serializing_if = "Option::is_none")]
pub verbosity: Option<OpenAiVerbosity>,
@@ -117,7 +117,7 @@ pub struct TextControls {
pub format: Option<TextFormat>,
}
#[derive(Debug, Serialize, Default, Clone)]
#[derive(Debug, Serialize, Default, Clone, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum OpenAiVerbosity {
Low,
@@ -136,7 +136,7 @@ impl From<VerbosityConfig> for OpenAiVerbosity {
}
}
#[derive(Debug, Serialize, Clone)]
#[derive(Debug, Serialize, Clone, PartialEq)]
pub struct ResponsesApiRequest {
pub model: String,
pub instructions: String,

View File

@@ -155,8 +155,8 @@ pub struct ModelClient {
/// The session establishes a Responses WebSocket connection lazily and reuses it across multiple
/// requests within the turn. It also caches per-turn state:
///
/// - The last request's input items, so subsequent calls can use `response.append` when the input
/// is an incremental extension of the previous request.
/// - The last full request, so subsequent calls can use `response.append` only when the current
/// request is an incremental extension of the previous one.
/// - The `x-codex-turn-state` sticky-routing token, which must be replayed for all requests within
/// the same turn.
///
@@ -166,7 +166,7 @@ pub struct ModelClient {
pub struct ModelClientSession {
client: ModelClient,
connection: Option<ApiWebSocketConnection>,
websocket_last_items: Vec<ResponseItem>,
websocket_last_request: Option<ResponsesApiRequest>,
websocket_last_response_id: Option<String>,
websocket_last_response_id_rx: Option<oneshot::Receiver<String>>,
/// Turn state for sticky routing.
@@ -230,7 +230,7 @@ impl ModelClient {
ModelClientSession {
client: self.clone(),
connection: None,
websocket_last_items: Vec::new(),
websocket_last_request: None,
websocket_last_response_id: None,
websocket_last_response_id_rx: None,
turn_state: Arc::new(OnceLock::new()),
@@ -530,16 +530,25 @@ impl ModelClientSession {
}
}
fn get_incremental_items(&self, input_items: &[ResponseItem]) -> Option<Vec<ResponseItem>> {
// Checks whether the current request input is an incremental append to the previous request.
// If items in the new request contain all the items from the previous request we build
// a response.append request otherwise we start with a fresh response.create request.
let previous_len = self.websocket_last_items.len();
let can_append = previous_len > 0
&& input_items.starts_with(&self.websocket_last_items)
&& previous_len < input_items.len();
if can_append {
Some(input_items[previous_len..].to_vec())
fn get_incremental_items(&self, request: &ResponsesApiRequest) -> Option<Vec<ResponseItem>> {
// Checks whether the current request is an incremental append to the previous request.
// We only append when non-input request fields are unchanged and `input` is a strict
// extension of the previous input.
let previous_request = self.websocket_last_request.as_ref()?;
let mut previous_without_input = previous_request.clone();
previous_without_input.input.clear();
let mut request_without_input = request.clone();
request_without_input.input.clear();
if previous_without_input != request_without_input {
return None;
}
let previous_len = previous_request.input.len();
if previous_len > 0
&& request.input.starts_with(&previous_request.input)
&& previous_len < request.input.len()
{
Some(request.input[previous_len..].to_vec())
} else {
None
}
@@ -571,10 +580,10 @@ impl ModelClientSession {
fn prepare_websocket_request(
&mut self,
payload: ResponseCreateWsRequest,
) -> (ResponsesWsRequest, Vec<ResponseItem>) {
let full_input = payload.input.clone();
request: &ResponsesApiRequest,
) -> ResponsesWsRequest {
let responses_websockets_v2_enabled = self.client.responses_websockets_v2_enabled();
let incremental_items = self.get_incremental_items(&full_input);
let incremental_items = self.get_incremental_items(request);
if let Some(append_items) = incremental_items {
if responses_websockets_v2_enabled
&& let Some(previous_response_id) = self.websocket_previous_response_id()
@@ -584,20 +593,17 @@ impl ModelClientSession {
input: append_items,
..payload
};
return (ResponsesWsRequest::ResponseCreate(payload), full_input);
return ResponsesWsRequest::ResponseCreate(payload);
}
if !responses_websockets_v2_enabled {
return (
ResponsesWsRequest::ResponseAppend(ResponseAppendWsRequest {
input: append_items,
}),
full_input,
);
return ResponsesWsRequest::ResponseAppend(ResponseAppendWsRequest {
input: append_items,
});
}
}
(ResponsesWsRequest::ResponseCreate(payload), full_input)
ResponsesWsRequest::ResponseCreate(payload)
}
/// Opportunistically warms a websocket for this turn-scoped client session.
@@ -650,7 +656,7 @@ impl ModelClientSession {
};
if needs_new {
self.websocket_last_items.clear();
self.websocket_last_request = None;
self.websocket_last_response_id = None;
self.websocket_last_response_id_rx = None;
let turn_state = options
@@ -806,7 +812,7 @@ impl ModelClientSession {
Err(err) => return Err(map_api_error(err)),
}
let (request, request_input) = self.prepare_websocket_request(ws_payload);
let ws_request = self.prepare_websocket_request(ws_payload, &request);
let stream_result = self
.connection
@@ -816,10 +822,10 @@ impl ModelClientSession {
"websocket connection is unavailable".to_string(),
))
})?
.stream_request(request)
.stream_request(ws_request)
.await
.map_err(map_api_error)?;
self.websocket_last_items = request_input;
self.websocket_last_request = Some(request);
let (last_response_id_sender, last_response_id_receiver) = oneshot::channel();
self.websocket_last_response_id_rx = Some(last_response_id_receiver);
let mut last_response_id_sender = Some(last_response_id_sender);
@@ -928,7 +934,7 @@ impl ModelClientSession {
);
self.connection = None;
self.websocket_last_items.clear();
self.websocket_last_request = None;
}
activated
}

View File

@@ -22,6 +22,7 @@ use codex_otel::metrics::MetricsConfig;
use codex_protocol::ThreadId;
use codex_protocol::account::PlanType;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::models::BaseInstructions;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::user_input::UserInput;
@@ -603,6 +604,42 @@ async fn responses_websocket_creates_on_non_prefix() {
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_creates_when_non_input_request_fields_change() {
skip_if_no_network!();
let server = start_websocket_server(vec![vec![
vec![ev_response_created("resp-1"), ev_completed("resp-1")],
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
]])
.await;
let harness = websocket_harness(&server).await;
let mut client_session = harness.client.new_session();
let prompt_one =
prompt_with_input_and_instructions(vec![message_item("hello")], "base instructions one");
let prompt_two = prompt_with_input_and_instructions(
vec![message_item("hello"), message_item("second")],
"base instructions two",
);
stream_until_complete(&mut client_session, &harness, &prompt_one).await;
stream_until_complete(&mut client_session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
let second = connection.get(1).expect("missing request").body_json();
assert_eq!(second["type"].as_str(), Some("response.create"));
assert_eq!(second.get("previous_response_id"), None);
assert_eq!(
second["input"],
serde_json::to_value(&prompt_two.input).expect("serialize full input")
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_creates_with_previous_response_id_on_prefix() {
skip_if_no_network!();
@@ -637,6 +674,43 @@ async fn responses_websocket_v2_creates_with_previous_response_id_on_prefix() {
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_creates_without_previous_response_id_when_non_input_fields_change()
{
skip_if_no_network!();
let server = start_websocket_server(vec![vec![
vec![ev_response_created("resp-1"), ev_completed("resp-1")],
vec![ev_response_created("resp-2"), ev_completed("resp-2")],
]])
.await;
let harness = websocket_harness_with_v2(&server, true).await;
let mut session = harness.client.new_session();
let prompt_one =
prompt_with_input_and_instructions(vec![message_item("hello")], "base instructions one");
let prompt_two = prompt_with_input_and_instructions(
vec![message_item("hello"), message_item("second")],
"base instructions two",
);
stream_until_complete(&mut session, &harness, &prompt_one).await;
stream_until_complete(&mut session, &harness, &prompt_two).await;
let connection = server.single_connection();
assert_eq!(connection.len(), 2);
let second = connection.get(1).expect("missing request").body_json();
assert_eq!(second["type"].as_str(), Some("response.create"));
assert_eq!(second.get("previous_response_id"), None);
assert_eq!(
second["input"],
serde_json::to_value(&prompt_two.input).expect("serialize full input")
);
server.shutdown().await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn responses_websocket_v2_after_error_uses_full_create_without_previous_response_id() {
skip_if_no_network!();
@@ -778,6 +852,14 @@ fn prompt_with_input(input: Vec<ResponseItem>) -> Prompt {
prompt
}
fn prompt_with_input_and_instructions(input: Vec<ResponseItem>, instructions: &str) -> Prompt {
let mut prompt = prompt_with_input(input);
prompt.base_instructions = BaseInstructions {
text: instructions.to_string(),
};
prompt
}
fn websocket_provider(server: &WebSocketTestServer) -> ModelProviderInfo {
ModelProviderInfo {
name: "mock-ws".into(),