Compare full request for websockets incrementality (#11343)

Tools can dynamically change mid-turn now. We need to be more thorough about reusing incremental connections.
2026-03-05 21:45:28 +03:00 · 2026-02-10 11:14:36 -08:00
parent 548afa5749
commit 0639c33892
3 changed files with 124 additions and 36 deletions
--- a/codex-rs/codex-api/src/common.rs
+++ b/codex-rs/codex-api/src/common.rs
@@ -80,7 +80,7 @@ pub enum ResponseEvent {
    ModelsEtag(String),
 }

-#[derive(Debug, Serialize, Clone)]
+#[derive(Debug, Serialize, Clone, PartialEq)]
 pub struct Reasoning {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub effort: Option<ReasoningEffortConfig>,
@@ -88,14 +88,14 @@ pub struct Reasoning {
    pub summary: Option<ReasoningSummaryConfig>,
 }

-#[derive(Debug, Serialize, Default, Clone)]
+#[derive(Debug, Serialize, Default, Clone, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum TextFormatType {
    #[default]
    JsonSchema,
 }

-#[derive(Debug, Serialize, Default, Clone)]
+#[derive(Debug, Serialize, Default, Clone, PartialEq)]
 pub struct TextFormat {
    /// Format type used by the OpenAI text controls.
    pub r#type: TextFormatType,
@@ -109,7 +109,7 @@ pub struct TextFormat {

 /// Controls the `text` field for the Responses API, combining verbosity and
 /// optional JSON schema output formatting.
-#[derive(Debug, Serialize, Default, Clone)]
+#[derive(Debug, Serialize, Default, Clone, PartialEq)]
 pub struct TextControls {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub verbosity: Option<OpenAiVerbosity>,
@@ -117,7 +117,7 @@ pub struct TextControls {
    pub format: Option<TextFormat>,
 }

-#[derive(Debug, Serialize, Default, Clone)]
+#[derive(Debug, Serialize, Default, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum OpenAiVerbosity {
    Low,
@@ -136,7 +136,7 @@ impl From<VerbosityConfig> for OpenAiVerbosity {
    }
 }

-#[derive(Debug, Serialize, Clone)]
+#[derive(Debug, Serialize, Clone, PartialEq)]
 pub struct ResponsesApiRequest {
    pub model: String,
    pub instructions: String,
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -155,8 +155,8 @@ pub struct ModelClient {
 /// The session establishes a Responses WebSocket connection lazily and reuses it across multiple
 /// requests within the turn. It also caches per-turn state:
 ///
-/// - The last request's input items, so subsequent calls can use `response.append` when the input
-///   is an incremental extension of the previous request.
+/// - The last full request, so subsequent calls can use `response.append` only when the current
+///   request is an incremental extension of the previous one.
 /// - The `x-codex-turn-state` sticky-routing token, which must be replayed for all requests within
 ///   the same turn.
 ///
@@ -166,7 +166,7 @@ pub struct ModelClient {
 pub struct ModelClientSession {
    client: ModelClient,
    connection: Option<ApiWebSocketConnection>,
-    websocket_last_items: Vec<ResponseItem>,
+    websocket_last_request: Option<ResponsesApiRequest>,
    websocket_last_response_id: Option<String>,
    websocket_last_response_id_rx: Option<oneshot::Receiver<String>>,
    /// Turn state for sticky routing.
@@ -230,7 +230,7 @@ impl ModelClient {
        ModelClientSession {
            client: self.clone(),
            connection: None,
-            websocket_last_items: Vec::new(),
+            websocket_last_request: None,
            websocket_last_response_id: None,
            websocket_last_response_id_rx: None,
            turn_state: Arc::new(OnceLock::new()),
@@ -530,16 +530,25 @@ impl ModelClientSession {
        }
    }

-    fn get_incremental_items(&self, input_items: &[ResponseItem]) -> Option<Vec<ResponseItem>> {
-        // Checks whether the current request input is an incremental append to the previous request.
-        // If items in the new request contain all the items from the previous request we build
-        // a response.append request otherwise we start with a fresh response.create request.
-        let previous_len = self.websocket_last_items.len();
-        let can_append = previous_len > 0
-            && input_items.starts_with(&self.websocket_last_items)
-            && previous_len < input_items.len();
-        if can_append {
-            Some(input_items[previous_len..].to_vec())
+    fn get_incremental_items(&self, request: &ResponsesApiRequest) -> Option<Vec<ResponseItem>> {
+        // Checks whether the current request is an incremental append to the previous request.
+        // We only append when non-input request fields are unchanged and `input` is a strict
+        // extension of the previous input.
+        let previous_request = self.websocket_last_request.as_ref()?;
+        let mut previous_without_input = previous_request.clone();
+        previous_without_input.input.clear();
+        let mut request_without_input = request.clone();
+        request_without_input.input.clear();
+        if previous_without_input != request_without_input {
+            return None;
+        }
+
+        let previous_len = previous_request.input.len();
+        if previous_len > 0
+            && request.input.starts_with(&previous_request.input)
+            && previous_len < request.input.len()
+        {
+            Some(request.input[previous_len..].to_vec())
        } else {
            None
        }
@@ -571,10 +580,10 @@ impl ModelClientSession {
    fn prepare_websocket_request(
        &mut self,
        payload: ResponseCreateWsRequest,
-    ) -> (ResponsesWsRequest, Vec<ResponseItem>) {
-        let full_input = payload.input.clone();
+        request: &ResponsesApiRequest,
+    ) -> ResponsesWsRequest {
        let responses_websockets_v2_enabled = self.client.responses_websockets_v2_enabled();
-        let incremental_items = self.get_incremental_items(&full_input);
+        let incremental_items = self.get_incremental_items(request);
        if let Some(append_items) = incremental_items {
            if responses_websockets_v2_enabled
                && let Some(previous_response_id) = self.websocket_previous_response_id()
@@ -584,20 +593,17 @@ impl ModelClientSession {
                    input: append_items,
                    ..payload
                };
-                return (ResponsesWsRequest::ResponseCreate(payload), full_input);
+                return ResponsesWsRequest::ResponseCreate(payload);
            }

            if !responses_websockets_v2_enabled {
-                return (
-                    ResponsesWsRequest::ResponseAppend(ResponseAppendWsRequest {
-                        input: append_items,
-                    }),
-                    full_input,
-                );
+                return ResponsesWsRequest::ResponseAppend(ResponseAppendWsRequest {
+                    input: append_items,
+                });
            }
        }

-        (ResponsesWsRequest::ResponseCreate(payload), full_input)
+        ResponsesWsRequest::ResponseCreate(payload)
    }

    /// Opportunistically warms a websocket for this turn-scoped client session.
@@ -650,7 +656,7 @@ impl ModelClientSession {
        };

        if needs_new {
-            self.websocket_last_items.clear();
+            self.websocket_last_request = None;
            self.websocket_last_response_id = None;
            self.websocket_last_response_id_rx = None;
            let turn_state = options
@@ -806,7 +812,7 @@ impl ModelClientSession {
                Err(err) => return Err(map_api_error(err)),
            }

-            let (request, request_input) = self.prepare_websocket_request(ws_payload);
+            let ws_request = self.prepare_websocket_request(ws_payload, &request);

            let stream_result = self
                .connection
@@ -816,10 +822,10 @@ impl ModelClientSession {
                        "websocket connection is unavailable".to_string(),
                    ))
                })?
-                .stream_request(request)
+                .stream_request(ws_request)
                .await
                .map_err(map_api_error)?;
-            self.websocket_last_items = request_input;
+            self.websocket_last_request = Some(request);
            let (last_response_id_sender, last_response_id_receiver) = oneshot::channel();
            self.websocket_last_response_id_rx = Some(last_response_id_receiver);
            let mut last_response_id_sender = Some(last_response_id_sender);
@@ -928,7 +934,7 @@ impl ModelClientSession {
            );

            self.connection = None;
-            self.websocket_last_items.clear();
+            self.websocket_last_request = None;
        }
        activated
    }
--- a/codex-rs/core/tests/suite/client_websockets.rs
+++ b/codex-rs/core/tests/suite/client_websockets.rs
@@ -22,6 +22,7 @@ use codex_otel::metrics::MetricsConfig;
 use codex_protocol::ThreadId;
 use codex_protocol::account::PlanType;
 use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::models::BaseInstructions;
 use codex_protocol::openai_models::ModelInfo;
 use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
 use codex_protocol::user_input::UserInput;
@@ -603,6 +604,42 @@ async fn responses_websocket_creates_on_non_prefix() {
    server.shutdown().await;
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn responses_websocket_creates_when_non_input_request_fields_change() {
+    skip_if_no_network!();
+
+    let server = start_websocket_server(vec![vec![
+        vec![ev_response_created("resp-1"), ev_completed("resp-1")],
+        vec![ev_response_created("resp-2"), ev_completed("resp-2")],
+    ]])
+    .await;
+
+    let harness = websocket_harness(&server).await;
+    let mut client_session = harness.client.new_session();
+    let prompt_one =
+        prompt_with_input_and_instructions(vec![message_item("hello")], "base instructions one");
+    let prompt_two = prompt_with_input_and_instructions(
+        vec![message_item("hello"), message_item("second")],
+        "base instructions two",
+    );
+
+    stream_until_complete(&mut client_session, &harness, &prompt_one).await;
+    stream_until_complete(&mut client_session, &harness, &prompt_two).await;
+
+    let connection = server.single_connection();
+    assert_eq!(connection.len(), 2);
+    let second = connection.get(1).expect("missing request").body_json();
+
+    assert_eq!(second["type"].as_str(), Some("response.create"));
+    assert_eq!(second.get("previous_response_id"), None);
+    assert_eq!(
+        second["input"],
+        serde_json::to_value(&prompt_two.input).expect("serialize full input")
+    );
+
+    server.shutdown().await;
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn responses_websocket_v2_creates_with_previous_response_id_on_prefix() {
    skip_if_no_network!();
@@ -637,6 +674,43 @@ async fn responses_websocket_v2_creates_with_previous_response_id_on_prefix() {
    server.shutdown().await;
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn responses_websocket_v2_creates_without_previous_response_id_when_non_input_fields_change()
+{
+    skip_if_no_network!();
+
+    let server = start_websocket_server(vec![vec![
+        vec![ev_response_created("resp-1"), ev_completed("resp-1")],
+        vec![ev_response_created("resp-2"), ev_completed("resp-2")],
+    ]])
+    .await;
+
+    let harness = websocket_harness_with_v2(&server, true).await;
+    let mut session = harness.client.new_session();
+    let prompt_one =
+        prompt_with_input_and_instructions(vec![message_item("hello")], "base instructions one");
+    let prompt_two = prompt_with_input_and_instructions(
+        vec![message_item("hello"), message_item("second")],
+        "base instructions two",
+    );
+
+    stream_until_complete(&mut session, &harness, &prompt_one).await;
+    stream_until_complete(&mut session, &harness, &prompt_two).await;
+
+    let connection = server.single_connection();
+    assert_eq!(connection.len(), 2);
+    let second = connection.get(1).expect("missing request").body_json();
+
+    assert_eq!(second["type"].as_str(), Some("response.create"));
+    assert_eq!(second.get("previous_response_id"), None);
+    assert_eq!(
+        second["input"],
+        serde_json::to_value(&prompt_two.input).expect("serialize full input")
+    );
+
+    server.shutdown().await;
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn responses_websocket_v2_after_error_uses_full_create_without_previous_response_id() {
    skip_if_no_network!();
@@ -778,6 +852,14 @@ fn prompt_with_input(input: Vec<ResponseItem>) -> Prompt {
    prompt
 }

+fn prompt_with_input_and_instructions(input: Vec<ResponseItem>, instructions: &str) -> Prompt {
+    let mut prompt = prompt_with_input(input);
+    prompt.base_instructions = BaseInstructions {
+        text: instructions.to_string(),
+    };
+    prompt
+}
+
 fn websocket_provider(server: &WebSocketTestServer) -> ModelProviderInfo {
    ModelProviderInfo {
        name: "mock-ws".into(),