feat: move exec-server ownership (#16344)

This introduces session-scoped ownership for exec-server so ws
disconnects no longer immediately kill running remote exec processes,
and it prepares the protocol for reconnect-based resume.
- add session_id / resume_session_id to the exec-server initialize
handshake
  - move process ownership under a shared session registry
- detach sessions on websocket disconnect and expire them after a TTL
instead of killing processes immediately (we will resume based on this)
- allow a new connection to resume an existing session and take over
notifications/ownership
- I use UUID to make them not predictable as we don't have auth for now
- make detached-session expiry authoritative at resume time so teardown
wins at the TTL boundary
- reject long-poll process/read calls that get resumed out from under an
older attachment

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
jif-oai
2026-04-10 14:11:47 +01:00
committed by GitHub
parent 7bbe3b6011
commit 085ffb4456
21 changed files with 1203 additions and 172 deletions

View File

@@ -14,14 +14,33 @@ use crate::rpc::invalid_request;
use crate::rpc::method_not_found;
use crate::server::ExecServerHandler;
use crate::server::registry::build_router;
use crate::server::session_registry::SessionRegistry;
pub(crate) async fn run_connection(connection: JsonRpcConnection) {
#[derive(Clone)]
pub(crate) struct ConnectionProcessor {
session_registry: Arc<SessionRegistry>,
}
impl ConnectionProcessor {
pub(crate) fn new() -> Self {
Self {
session_registry: SessionRegistry::new(),
}
}
pub(crate) async fn run_connection(&self, connection: JsonRpcConnection) {
run_connection(connection, Arc::clone(&self.session_registry)).await;
}
}
async fn run_connection(connection: JsonRpcConnection, session_registry: Arc<SessionRegistry>) {
let router = Arc::new(build_router());
let (json_outgoing_tx, mut incoming_rx, connection_tasks) = connection.into_parts();
let (json_outgoing_tx, mut incoming_rx, mut disconnected_rx, connection_tasks) =
connection.into_parts();
let (outgoing_tx, mut outgoing_rx) =
mpsc::channel::<RpcServerOutboundMessage>(CHANNEL_CAPACITY);
let notifications = RpcNotificationSender::new(outgoing_tx.clone());
let handler = Arc::new(ExecServerHandler::new(notifications));
let handler = Arc::new(ExecServerHandler::new(session_registry, notifications));
let outbound_task = tokio::spawn(async move {
while let Some(message) = outgoing_rx.recv().await {
@@ -40,6 +59,10 @@ pub(crate) async fn run_connection(connection: JsonRpcConnection) {
// Process inbound events sequentially to preserve initialize/initialized ordering.
while let Some(event) = incoming_rx.recv().await {
if !handler.is_session_attached() {
debug!("exec-server connection evicted after session resume");
break;
}
match event {
JsonRpcConnectionEvent::MalformedMessage { reason } => {
warn!("ignoring malformed exec-server message: {reason}");
@@ -57,7 +80,13 @@ pub(crate) async fn run_connection(connection: JsonRpcConnection) {
JsonRpcConnectionEvent::Message(message) => match message {
codex_app_server_protocol::JSONRPCMessage::Request(request) => {
if let Some(route) = router.request_route(request.method.as_str()) {
let message = route(handler.clone(), request).await;
let message = tokio::select! {
message = route(Arc::clone(&handler), request) => message,
_ = disconnected_rx.changed() => {
debug!("exec-server transport disconnected while handling request");
break;
}
};
if outgoing_tx.send(message).await.is_err() {
break;
}
@@ -84,7 +113,16 @@ pub(crate) async fn run_connection(connection: JsonRpcConnection) {
);
break;
};
if let Err(err) = route(handler.clone(), notification).await {
let result = tokio::select! {
result = route(Arc::clone(&handler), notification) => result,
_ = disconnected_rx.changed() => {
debug!(
"exec-server transport disconnected while handling notification"
);
break;
}
};
if let Err(err) = result {
warn!("closing exec-server connection after protocol error: {err}");
break;
}
@@ -114,6 +152,7 @@ pub(crate) async fn run_connection(connection: JsonRpcConnection) {
}
handler.shutdown().await;
drop(handler);
drop(outgoing_tx);
for task in connection_tasks {
task.abort();
@@ -121,3 +160,230 @@ pub(crate) async fn run_connection(connection: JsonRpcConnection) {
}
let _ = outbound_task.await;
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use codex_app_server_protocol::JSONRPCMessage;
use codex_app_server_protocol::JSONRPCNotification;
use codex_app_server_protocol::JSONRPCRequest;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
use serde::Serialize;
use serde::de::DeserializeOwned;
use tokio::io::AsyncBufReadExt;
use tokio::io::AsyncWriteExt;
use tokio::io::BufReader;
use tokio::io::DuplexStream;
use tokio::io::Lines;
use tokio::io::duplex;
use tokio::task::JoinHandle;
use tokio::time::timeout;
use super::run_connection;
use crate::ProcessId;
use crate::connection::JsonRpcConnection;
use crate::protocol::EXEC_METHOD;
use crate::protocol::EXEC_READ_METHOD;
use crate::protocol::EXEC_TERMINATE_METHOD;
use crate::protocol::ExecParams;
use crate::protocol::ExecResponse;
use crate::protocol::INITIALIZE_METHOD;
use crate::protocol::INITIALIZED_METHOD;
use crate::protocol::InitializeParams;
use crate::protocol::InitializeResponse;
use crate::protocol::ReadParams;
use crate::protocol::TerminateParams;
use crate::protocol::TerminateResponse;
use crate::server::session_registry::SessionRegistry;
#[tokio::test]
async fn transport_disconnect_detaches_session_during_in_flight_read() {
let registry = SessionRegistry::new();
let (mut first_writer, mut first_lines, first_task) =
spawn_test_connection(Arc::clone(&registry), "first");
send_request(
&mut first_writer,
/*id*/ 1,
INITIALIZE_METHOD,
&InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: None,
},
)
.await;
let initialize_response: InitializeResponse =
read_response(&mut first_lines, /*expected_id*/ 1).await;
send_notification(&mut first_writer, INITIALIZED_METHOD, &()).await;
let process_id = ProcessId::from("proc-long-poll");
send_request(
&mut first_writer,
/*id*/ 2,
EXEC_METHOD,
&exec_params(process_id.clone()),
)
.await;
let _: ExecResponse = read_response(&mut first_lines, /*expected_id*/ 2).await;
send_request(
&mut first_writer,
/*id*/ 3,
EXEC_READ_METHOD,
&ReadParams {
process_id: process_id.clone(),
after_seq: None,
max_bytes: None,
wait_ms: Some(5_000),
},
)
.await;
drop(first_writer);
tokio::time::sleep(Duration::from_millis(25)).await;
let (mut second_writer, mut second_lines, second_task) =
spawn_test_connection(Arc::clone(&registry), "second");
send_request(
&mut second_writer,
/*id*/ 1,
INITIALIZE_METHOD,
&InitializeParams {
client_name: "exec-server-test".to_string(),
resume_session_id: Some(initialize_response.session_id.clone()),
},
)
.await;
let second_initialize_response = timeout(
Duration::from_secs(1),
read_response::<InitializeResponse>(&mut second_lines, /*expected_id*/ 1),
)
.await
.expect("resume initialize should not wait for the old read to finish");
assert_eq!(
second_initialize_response.session_id,
initialize_response.session_id
);
timeout(Duration::from_secs(1), first_task)
.await
.expect("first processor should exit")
.expect("first processor should join");
send_notification(&mut second_writer, INITIALIZED_METHOD, &()).await;
send_request(
&mut second_writer,
/*id*/ 2,
EXEC_TERMINATE_METHOD,
&TerminateParams { process_id },
)
.await;
let _: TerminateResponse = read_response(&mut second_lines, /*expected_id*/ 2).await;
drop(second_writer);
drop(second_lines);
timeout(Duration::from_secs(1), second_task)
.await
.expect("second processor should exit")
.expect("second processor should join");
}
fn spawn_test_connection(
registry: Arc<SessionRegistry>,
label: &str,
) -> (DuplexStream, Lines<BufReader<DuplexStream>>, JoinHandle<()>) {
let (client_writer, server_reader) = duplex(1 << 20);
let (server_writer, client_reader) = duplex(1 << 20);
let connection =
JsonRpcConnection::from_stdio(server_reader, server_writer, label.to_string());
let task = tokio::spawn(run_connection(connection, registry));
(client_writer, BufReader::new(client_reader).lines(), task)
}
async fn send_request<P: Serialize>(
writer: &mut DuplexStream,
id: i64,
method: &str,
params: &P,
) {
write_message(
writer,
&JSONRPCMessage::Request(JSONRPCRequest {
id: RequestId::Integer(id),
method: method.to_string(),
params: Some(serde_json::to_value(params).expect("serialize params")),
trace: None,
}),
)
.await;
}
async fn send_notification<P: Serialize>(writer: &mut DuplexStream, method: &str, params: &P) {
write_message(
writer,
&JSONRPCMessage::Notification(JSONRPCNotification {
method: method.to_string(),
params: Some(serde_json::to_value(params).expect("serialize params")),
}),
)
.await;
}
async fn write_message(writer: &mut DuplexStream, message: &JSONRPCMessage) {
let encoded = serde_json::to_vec(message).expect("serialize JSON-RPC message");
writer.write_all(&encoded).await.expect("write request");
writer.write_all(b"\n").await.expect("write newline");
}
async fn read_response<T: DeserializeOwned>(
lines: &mut Lines<BufReader<DuplexStream>>,
expected_id: i64,
) -> T {
let line = lines
.next_line()
.await
.expect("read response")
.expect("response line");
match serde_json::from_str::<JSONRPCMessage>(&line).expect("decode JSON-RPC response") {
JSONRPCMessage::Response(JSONRPCResponse { id, result }) => {
assert_eq!(id, RequestId::Integer(expected_id));
serde_json::from_value(result).expect("decode response result")
}
JSONRPCMessage::Error(error) => panic!("unexpected JSON-RPC error: {error:?}"),
other => panic!("expected JSON-RPC response, got {other:?}"),
}
}
fn exec_params(process_id: ProcessId) -> ExecParams {
let mut env = HashMap::new();
if let Some(path) = std::env::var_os("PATH") {
env.insert("PATH".to_string(), path.to_string_lossy().into_owned());
}
ExecParams {
process_id,
argv: sleep_then_print_argv(),
cwd: std::env::current_dir().expect("cwd"),
env,
tty: false,
arg0: None,
}
}
fn sleep_then_print_argv() -> Vec<String> {
if cfg!(windows) {
vec![
std::env::var("COMSPEC").unwrap_or_else(|_| "cmd.exe".to_string()),
"/C".to_string(),
"ping -n 3 127.0.0.1 >NUL && echo late".to_string(),
]
} else {
vec![
"/bin/sh".to_string(),
"-c".to_string(),
"sleep 1; printf late".to_string(),
]
}
}
}