app-server: Add back pressure and batching to command/exec (#15547)

* Add `OutgoingMessageSender::send_server_notification_to_connection_and_wait` which returns only once message is written to websocket (or failed to do so) * Use this mechanism to apply back pressure to stdout/stderr streams of processes spawned by `command/exec`, to limit them to at most one message in-memory at a time * Use back pressure signal to also batch smaller chunks into ≈64KiB ones This should make commands execution more robust over high-latency/low-throughput networks
2026-04-30 03:12:20 +03:00 · 2026-03-24 11:35:51 -07:00
parent daf5e584c2
commit d61c03ca08
7 changed files with 273 additions and 51 deletions
--- a/codex-rs/app-server/src/outgoing_message.rs
+++ b/codex-rs/app-server/src/outgoing_message.rs
@@ -81,17 +81,33 @@ impl RequestContext {
    }
 }

-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub(crate) enum OutgoingEnvelope {
    ToConnection {
        connection_id: ConnectionId,
        message: OutgoingMessage,
+        write_complete_tx: Option<oneshot::Sender<()>>,
    },
    Broadcast {
        message: OutgoingMessage,
    },
 }

+#[derive(Debug)]
+pub(crate) struct QueuedOutgoingMessage {
+    pub(crate) message: OutgoingMessage,
+    pub(crate) write_complete_tx: Option<oneshot::Sender<()>>,
+}
+
+impl QueuedOutgoingMessage {
+    pub(crate) fn new(message: OutgoingMessage) -> Self {
+        Self {
+            message,
+            write_complete_tx: None,
+        }
+    }
+}
+
 /// Sends messages to the client and manages request callbacks.
 pub(crate) struct OutgoingMessageSender {
    next_server_request_id: AtomicI64,
@@ -299,6 +315,7 @@ impl OutgoingMessageSender {
                        .send(OutgoingEnvelope::ToConnection {
                            connection_id: *connection_id,
                            message: outgoing_message.clone(),
+                            write_complete_tx: None,
                        })
                        .await
                    {
@@ -333,6 +350,7 @@ impl OutgoingMessageSender {
                .send(OutgoingEnvelope::ToConnection {
                    connection_id,
                    message: OutgoingMessage::Request(request),
+                    write_complete_tx: None,
                })
                .await
            {
@@ -519,6 +537,7 @@ impl OutgoingMessageSender {
                .send(OutgoingEnvelope::ToConnection {
                    connection_id: *connection_id,
                    message: outgoing_message.clone(),
+                    write_complete_tx: None,
                })
                .await
            {
@@ -527,6 +546,28 @@ impl OutgoingMessageSender {
        }
    }

+    pub(crate) async fn send_server_notification_to_connection_and_wait(
+        &self,
+        connection_id: ConnectionId,
+        notification: ServerNotification,
+    ) {
+        tracing::trace!("app-server event: {notification}");
+        let outgoing_message = OutgoingMessage::AppServerNotification(notification);
+        let (write_complete_tx, write_complete_rx) = oneshot::channel();
+        if let Err(err) = self
+            .sender
+            .send(OutgoingEnvelope::ToConnection {
+                connection_id,
+                message: outgoing_message,
+                write_complete_tx: Some(write_complete_tx),
+            })
+            .await
+        {
+            warn!("failed to send server notification to client: {err:?}");
+        }
+        let _ = write_complete_rx.await;
+    }
+
    pub(crate) async fn send_error(
        &self,
        request_id: ConnectionRequestId,
@@ -566,6 +607,7 @@ impl OutgoingMessageSender {
        let send_fut = self.sender.send(OutgoingEnvelope::ToConnection {
            connection_id,
            message,
+            write_complete_tx: None,
        });
        let send_result = if let Some(request_context) = request_context {
            send_fut.instrument(request_context.span()).await
@@ -818,6 +860,7 @@ mod tests {
            OutgoingEnvelope::ToConnection {
                connection_id,
                message,
+                ..
            } => {
                assert_eq!(connection_id, ConnectionId(42));
                let OutgoingMessage::Response(response) = message else {
@@ -880,6 +923,7 @@ mod tests {
            OutgoingEnvelope::ToConnection {
                connection_id,
                message,
+                ..
            } => {
                assert_eq!(connection_id, ConnectionId(9));
                let OutgoingMessage::Error(outgoing_error) = message else {
@@ -892,6 +936,50 @@ mod tests {
        }
    }

+    #[tokio::test]
+    async fn send_server_notification_to_connection_and_wait_tracks_write_completion() {
+        let (tx, mut rx) = mpsc::channel::<OutgoingEnvelope>(4);
+        let outgoing = OutgoingMessageSender::new(tx);
+        let send_task = tokio::spawn(async move {
+            outgoing
+                .send_server_notification_to_connection_and_wait(
+                    ConnectionId(42),
+                    ServerNotification::ModelRerouted(ModelReroutedNotification {
+                        thread_id: "thread-1".to_string(),
+                        turn_id: "turn-1".to_string(),
+                        from_model: "gpt-5.3-codex".to_string(),
+                        to_model: "gpt-5.2".to_string(),
+                        reason: ModelRerouteReason::HighRiskCyberActivity,
+                    }),
+                )
+                .await
+        });
+
+        let envelope = timeout(Duration::from_secs(1), rx.recv())
+            .await
+            .expect("should receive envelope before timeout")
+            .expect("channel should contain one message");
+        let OutgoingEnvelope::ToConnection {
+            connection_id,
+            message,
+            write_complete_tx,
+        } = envelope
+        else {
+            panic!("expected targeted server notification envelope");
+        };
+        assert_eq!(connection_id, ConnectionId(42));
+        assert!(matches!(message, OutgoingMessage::AppServerNotification(_)));
+        write_complete_tx
+            .expect("write completion sender should be attached")
+            .send(())
+            .expect("receiver should still be waiting");
+
+        timeout(Duration::from_secs(1), send_task)
+            .await
+            .expect("send task should finish after write completion is signaled")
+            .expect("send task should not panic");
+    }
+
    #[tokio::test]
    async fn connection_closed_clears_registered_request_contexts() {
        let (tx, _rx) = mpsc::channel::<OutgoingEnvelope>(4);