mirror of
https://github.com/openai/codex.git
synced 2026-05-04 05:11:37 +03:00
feat: exec-server prep for unified exec (#15691)
This PR partially rebase `unified_exec` on the `exec-server` and adapt the `exec-server` accordingly. ## What changed in `exec-server` 1. Replaced the old "broadcast-driven; process-global" event model with process-scoped session events. The goal is to be able to have dedicated handler for each process. 2. Add to protocol contract to support explicit lifecycle status and stream ordering: - `WriteResponse` now returns `WriteStatus` (Accepted, UnknownProcess, StdinClosed, Starting) instead of a bool. - Added seq fields to output/exited notifications. - Added terminal process/closed notification. 3. Demultiplexed remote notifications into per-process channels. Same as for the event sys 4. Local and remote backends now both implement ExecBackend. 5. Local backend wraps internal process ID/operations into per-process ExecProcess objects. 6. Remote backend registers a session channel before launch and unregisters on failed launch. ## What changed in `unified_exec` 1. Added unified process-state model and backend-neutral process wrapper. This will probably disappear in the future, but it makes it easier to keep the work flowing on both side. - `UnifiedExecProcess` now handles both local PTY sessions and remote exec-server processes through a shared `ProcessHandle`. - Added `ProcessState` to track has_exited, exit_code, and terminal failure message consistently across backends. 2. Routed write and lifecycle handling through process-level methods. ## Some rationals 1. The change centralizes execution transport in exec-server while preserving policy and orchestration ownership in core, avoiding duplicated launch approval logic. This comes from internal discussion. 2. Session-scoped events remove coupling/cross-talk between processes and make stream ordering and terminal state explicit (seq, closed, failed). 3. The failure-path surfacing (remote launch failures, write failures, transport disconnects) makes command tool output and cleanup behavior deterministic ## Follow-ups: * Unify the concept of thread ID behind an obfuscated struct * FD handling * Full zsh-fork compatibility * Full network sandboxing compatibility * Handle ws disconnection
This commit is contained in:
@@ -11,13 +11,16 @@ use codex_utils_pty::ExecCommandSession;
|
||||
use codex_utils_pty::TerminalSize;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::Notify;
|
||||
use tokio::sync::broadcast;
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::warn;
|
||||
use tokio::sync::watch;
|
||||
|
||||
use crate::ExecBackend;
|
||||
use crate::ExecProcess;
|
||||
use crate::ExecServerError;
|
||||
use crate::ExecServerEvent;
|
||||
use crate::ProcessId;
|
||||
use crate::StartedExecProcess;
|
||||
use crate::protocol::EXEC_CLOSED_METHOD;
|
||||
use crate::protocol::ExecClosedNotification;
|
||||
use crate::protocol::ExecExitedNotification;
|
||||
use crate::protocol::ExecOutputDeltaNotification;
|
||||
use crate::protocol::ExecOutputStream;
|
||||
@@ -31,6 +34,7 @@ use crate::protocol::TerminateParams;
|
||||
use crate::protocol::TerminateResponse;
|
||||
use crate::protocol::WriteParams;
|
||||
use crate::protocol::WriteResponse;
|
||||
use crate::protocol::WriteStatus;
|
||||
use crate::rpc::RpcNotificationSender;
|
||||
use crate::rpc::RpcServerOutboundMessage;
|
||||
use crate::rpc::internal_error;
|
||||
@@ -38,7 +42,6 @@ use crate::rpc::invalid_params;
|
||||
use crate::rpc::invalid_request;
|
||||
|
||||
const RETAINED_OUTPUT_BYTES_PER_PROCESS: usize = 1024 * 1024;
|
||||
const EVENT_CHANNEL_CAPACITY: usize = 256;
|
||||
const NOTIFICATION_CHANNEL_CAPACITY: usize = 256;
|
||||
#[cfg(test)]
|
||||
const EXITED_PROCESS_RETENTION: Duration = Duration::from_millis(25);
|
||||
@@ -59,7 +62,10 @@ struct RunningProcess {
|
||||
retained_bytes: usize,
|
||||
next_seq: u64,
|
||||
exit_code: Option<i32>,
|
||||
wake_tx: watch::Sender<u64>,
|
||||
output_notify: Arc<Notify>,
|
||||
open_streams: usize,
|
||||
closed: bool,
|
||||
}
|
||||
|
||||
enum ProcessEntry {
|
||||
@@ -69,7 +75,6 @@ enum ProcessEntry {
|
||||
|
||||
struct Inner {
|
||||
notifications: RpcNotificationSender,
|
||||
events_tx: broadcast::Sender<ExecServerEvent>,
|
||||
processes: Mutex<HashMap<String, ProcessEntry>>,
|
||||
initialize_requested: AtomicBool,
|
||||
initialized: AtomicBool,
|
||||
@@ -80,6 +85,12 @@ pub(crate) struct LocalProcess {
|
||||
inner: Arc<Inner>,
|
||||
}
|
||||
|
||||
struct LocalExecProcess {
|
||||
process_id: ProcessId,
|
||||
backend: LocalProcess,
|
||||
wake_tx: watch::Sender<u64>,
|
||||
}
|
||||
|
||||
impl Default for LocalProcess {
|
||||
fn default() -> Self {
|
||||
let (outgoing_tx, mut outgoing_rx) =
|
||||
@@ -94,7 +105,6 @@ impl LocalProcess {
|
||||
Self {
|
||||
inner: Arc::new(Inner {
|
||||
notifications,
|
||||
events_tx: broadcast::channel(EVENT_CHANNEL_CAPACITY).0,
|
||||
processes: Mutex::new(HashMap::new()),
|
||||
initialize_requested: AtomicBool::new(false),
|
||||
initialized: AtomicBool::new(false),
|
||||
@@ -152,10 +162,12 @@ impl LocalProcess {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn exec(&self, params: ExecParams) -> Result<ExecResponse, JSONRPCErrorError> {
|
||||
async fn start_process(
|
||||
&self,
|
||||
params: ExecParams,
|
||||
) -> Result<(ExecResponse, watch::Sender<u64>), JSONRPCErrorError> {
|
||||
self.require_initialized_for("exec")?;
|
||||
let process_id = params.process_id.clone();
|
||||
|
||||
let (program, args) = params
|
||||
.argv
|
||||
.split_first()
|
||||
@@ -203,6 +215,7 @@ impl LocalProcess {
|
||||
};
|
||||
|
||||
let output_notify = Arc::new(Notify::new());
|
||||
let (wake_tx, _wake_rx) = watch::channel(0);
|
||||
{
|
||||
let mut process_map = self.inner.processes.lock().await;
|
||||
process_map.insert(
|
||||
@@ -214,7 +227,10 @@ impl LocalProcess {
|
||||
retained_bytes: 0,
|
||||
next_seq: 1,
|
||||
exit_code: None,
|
||||
wake_tx: wake_tx.clone(),
|
||||
output_notify: Arc::clone(&output_notify),
|
||||
open_streams: 2,
|
||||
closed: false,
|
||||
})),
|
||||
);
|
||||
}
|
||||
@@ -248,7 +264,13 @@ impl LocalProcess {
|
||||
output_notify,
|
||||
));
|
||||
|
||||
Ok(ExecResponse { process_id })
|
||||
Ok((ExecResponse { process_id }, wake_tx))
|
||||
}
|
||||
|
||||
pub(crate) async fn exec(&self, params: ExecParams) -> Result<ExecResponse, JSONRPCErrorError> {
|
||||
self.start_process(params)
|
||||
.await
|
||||
.map(|(response, _)| response)
|
||||
}
|
||||
|
||||
pub(crate) async fn exec_read(
|
||||
@@ -256,6 +278,7 @@ impl LocalProcess {
|
||||
params: ReadParams,
|
||||
) -> Result<ReadResponse, JSONRPCErrorError> {
|
||||
self.require_initialized_for("exec")?;
|
||||
let _process_id = params.process_id.clone();
|
||||
let after_seq = params.after_seq.unwrap_or(0);
|
||||
let max_bytes = params.max_bytes.unwrap_or(usize::MAX);
|
||||
let wait = Duration::from_millis(params.wait_ms.unwrap_or(0));
|
||||
@@ -300,6 +323,8 @@ impl LocalProcess {
|
||||
next_seq,
|
||||
exited: process.exit_code.is_some(),
|
||||
exit_code: process.exit_code,
|
||||
closed: process.closed,
|
||||
failure: None,
|
||||
},
|
||||
Arc::clone(&process.output_notify),
|
||||
)
|
||||
@@ -309,6 +334,11 @@ impl LocalProcess {
|
||||
|| response.exited
|
||||
|| tokio::time::Instant::now() >= deadline
|
||||
{
|
||||
let _total_bytes: usize = response
|
||||
.chunks
|
||||
.iter()
|
||||
.map(|chunk| chunk.chunk.0.len())
|
||||
.sum();
|
||||
return Ok(response);
|
||||
}
|
||||
|
||||
@@ -325,22 +355,24 @@ impl LocalProcess {
|
||||
params: WriteParams,
|
||||
) -> Result<WriteResponse, JSONRPCErrorError> {
|
||||
self.require_initialized_for("exec")?;
|
||||
let _process_id = params.process_id.clone();
|
||||
let _input_bytes = params.chunk.0.len();
|
||||
let writer_tx = {
|
||||
let process_map = self.inner.processes.lock().await;
|
||||
let process = process_map.get(¶ms.process_id).ok_or_else(|| {
|
||||
invalid_request(format!("unknown process id {}", params.process_id))
|
||||
})?;
|
||||
let Some(process) = process_map.get(¶ms.process_id) else {
|
||||
return Ok(WriteResponse {
|
||||
status: WriteStatus::UnknownProcess,
|
||||
});
|
||||
};
|
||||
let ProcessEntry::Running(process) = process else {
|
||||
return Err(invalid_request(format!(
|
||||
"process id {} is starting",
|
||||
params.process_id
|
||||
)));
|
||||
return Ok(WriteResponse {
|
||||
status: WriteStatus::Starting,
|
||||
});
|
||||
};
|
||||
if !process.tty {
|
||||
return Err(invalid_request(format!(
|
||||
"stdin is closed for process {}",
|
||||
params.process_id
|
||||
)));
|
||||
return Ok(WriteResponse {
|
||||
status: WriteStatus::StdinClosed,
|
||||
});
|
||||
}
|
||||
process.session.writer_sender()
|
||||
};
|
||||
@@ -350,7 +382,9 @@ impl LocalProcess {
|
||||
.await
|
||||
.map_err(|_| internal_error("failed to write to process stdin".to_string()))?;
|
||||
|
||||
Ok(WriteResponse { accepted: true })
|
||||
Ok(WriteResponse {
|
||||
status: WriteStatus::Accepted,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn terminate_process(
|
||||
@@ -358,6 +392,7 @@ impl LocalProcess {
|
||||
params: TerminateParams,
|
||||
) -> Result<TerminateResponse, JSONRPCErrorError> {
|
||||
self.require_initialized_for("exec")?;
|
||||
let _process_id = params.process_id.clone();
|
||||
let running = {
|
||||
let process_map = self.inner.processes.lock().await;
|
||||
match process_map.get(¶ms.process_id) {
|
||||
@@ -377,13 +412,68 @@ impl LocalProcess {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecProcess for LocalProcess {
|
||||
async fn start(&self, params: ExecParams) -> Result<ExecResponse, ExecServerError> {
|
||||
self.exec(params).await.map_err(map_handler_error)
|
||||
impl ExecBackend for LocalProcess {
|
||||
async fn start(&self, params: ExecParams) -> Result<StartedExecProcess, ExecServerError> {
|
||||
let (response, wake_tx) = self
|
||||
.start_process(params)
|
||||
.await
|
||||
.map_err(map_handler_error)?;
|
||||
Ok(StartedExecProcess {
|
||||
process: Arc::new(LocalExecProcess {
|
||||
process_id: response.process_id.into(),
|
||||
backend: self.clone(),
|
||||
wake_tx,
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecProcess for LocalExecProcess {
|
||||
fn process_id(&self) -> &ProcessId {
|
||||
&self.process_id
|
||||
}
|
||||
|
||||
async fn read(&self, params: ReadParams) -> Result<ReadResponse, ExecServerError> {
|
||||
self.exec_read(params).await.map_err(map_handler_error)
|
||||
fn subscribe_wake(&self) -> watch::Receiver<u64> {
|
||||
self.wake_tx.subscribe()
|
||||
}
|
||||
|
||||
async fn read(
|
||||
&self,
|
||||
after_seq: Option<u64>,
|
||||
max_bytes: Option<usize>,
|
||||
wait_ms: Option<u64>,
|
||||
) -> Result<ReadResponse, ExecServerError> {
|
||||
self.backend
|
||||
.read(&self.process_id, after_seq, max_bytes, wait_ms)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn write(&self, chunk: Vec<u8>) -> Result<WriteResponse, ExecServerError> {
|
||||
self.backend.write(&self.process_id, chunk).await
|
||||
}
|
||||
|
||||
async fn terminate(&self) -> Result<(), ExecServerError> {
|
||||
self.backend.terminate(&self.process_id).await
|
||||
}
|
||||
}
|
||||
|
||||
impl LocalProcess {
|
||||
async fn read(
|
||||
&self,
|
||||
process_id: &str,
|
||||
after_seq: Option<u64>,
|
||||
max_bytes: Option<usize>,
|
||||
wait_ms: Option<u64>,
|
||||
) -> Result<ReadResponse, ExecServerError> {
|
||||
self.exec_read(ReadParams {
|
||||
process_id: process_id.to_string(),
|
||||
after_seq,
|
||||
max_bytes,
|
||||
wait_ms,
|
||||
})
|
||||
.await
|
||||
.map_err(map_handler_error)
|
||||
}
|
||||
|
||||
async fn write(
|
||||
@@ -399,16 +489,13 @@ impl ExecProcess for LocalProcess {
|
||||
.map_err(map_handler_error)
|
||||
}
|
||||
|
||||
async fn terminate(&self, process_id: &str) -> Result<TerminateResponse, ExecServerError> {
|
||||
async fn terminate(&self, process_id: &str) -> Result<(), ExecServerError> {
|
||||
self.terminate_process(TerminateParams {
|
||||
process_id: process_id.to_string(),
|
||||
})
|
||||
.await
|
||||
.map_err(map_handler_error)
|
||||
}
|
||||
|
||||
fn subscribe_events(&self) -> broadcast::Receiver<ExecServerEvent> {
|
||||
self.inner.events_tx.subscribe()
|
||||
.map_err(map_handler_error)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -427,6 +514,7 @@ async fn stream_output(
|
||||
output_notify: Arc<Notify>,
|
||||
) {
|
||||
while let Some(chunk) = receiver.recv().await {
|
||||
let _chunk_len = chunk.len();
|
||||
let notification = {
|
||||
let mut processes = inner.processes.lock().await;
|
||||
let Some(entry) = processes.get_mut(&process_id) else {
|
||||
@@ -448,21 +536,16 @@ async fn stream_output(
|
||||
break;
|
||||
};
|
||||
process.retained_bytes = process.retained_bytes.saturating_sub(evicted.chunk.len());
|
||||
warn!(
|
||||
"retained output cap exceeded for process {process_id}; dropping oldest output"
|
||||
);
|
||||
}
|
||||
let _ = process.wake_tx.send(seq);
|
||||
ExecOutputDeltaNotification {
|
||||
process_id: process_id.clone(),
|
||||
seq,
|
||||
stream,
|
||||
chunk: chunk.into(),
|
||||
}
|
||||
};
|
||||
output_notify.notify_waiters();
|
||||
let _ = inner
|
||||
.events_tx
|
||||
.send(ExecServerEvent::OutputDelta(notification.clone()));
|
||||
|
||||
if inner
|
||||
.notifications
|
||||
.notify(crate::protocol::EXEC_OUTPUT_DELTA_METHOD, ¬ification)
|
||||
@@ -472,6 +555,8 @@ async fn stream_output(
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
finish_output_stream(process_id, inner).await;
|
||||
}
|
||||
|
||||
async fn watch_exit(
|
||||
@@ -481,29 +566,35 @@ async fn watch_exit(
|
||||
output_notify: Arc<Notify>,
|
||||
) {
|
||||
let exit_code = exit_rx.await.unwrap_or(-1);
|
||||
{
|
||||
let notification = {
|
||||
let mut processes = inner.processes.lock().await;
|
||||
if let Some(ProcessEntry::Running(process)) = processes.get_mut(&process_id) {
|
||||
let seq = process.next_seq;
|
||||
process.next_seq += 1;
|
||||
process.exit_code = Some(exit_code);
|
||||
let _ = process.wake_tx.send(seq);
|
||||
Some(ExecExitedNotification {
|
||||
process_id: process_id.clone(),
|
||||
seq,
|
||||
exit_code,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
output_notify.notify_waiters();
|
||||
let notification = ExecExitedNotification {
|
||||
process_id: process_id.clone(),
|
||||
exit_code,
|
||||
};
|
||||
let _ = inner
|
||||
.events_tx
|
||||
.send(ExecServerEvent::Exited(notification.clone()));
|
||||
if inner
|
||||
.notifications
|
||||
.notify(crate::protocol::EXEC_EXITED_METHOD, ¬ification)
|
||||
.await
|
||||
.is_err()
|
||||
output_notify.notify_waiters();
|
||||
if let Some(notification) = notification
|
||||
&& inner
|
||||
.notifications
|
||||
.notify(crate::protocol::EXEC_EXITED_METHOD, ¬ification)
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
maybe_emit_closed(process_id.clone(), Arc::clone(&inner)).await;
|
||||
|
||||
tokio::time::sleep(EXITED_PROCESS_RETENTION).await;
|
||||
let mut processes = inner.processes.lock().await;
|
||||
if matches!(
|
||||
@@ -513,3 +604,51 @@ async fn watch_exit(
|
||||
processes.remove(&process_id);
|
||||
}
|
||||
}
|
||||
|
||||
async fn finish_output_stream(process_id: String, inner: Arc<Inner>) {
|
||||
{
|
||||
let mut processes = inner.processes.lock().await;
|
||||
let Some(ProcessEntry::Running(process)) = processes.get_mut(&process_id) else {
|
||||
return;
|
||||
};
|
||||
|
||||
if process.open_streams > 0 {
|
||||
process.open_streams -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
maybe_emit_closed(process_id, inner).await;
|
||||
}
|
||||
|
||||
async fn maybe_emit_closed(process_id: String, inner: Arc<Inner>) {
|
||||
let notification = {
|
||||
let mut processes = inner.processes.lock().await;
|
||||
let Some(ProcessEntry::Running(process)) = processes.get_mut(&process_id) else {
|
||||
return;
|
||||
};
|
||||
|
||||
if process.closed || process.open_streams != 0 || process.exit_code.is_none() {
|
||||
return;
|
||||
}
|
||||
|
||||
process.closed = true;
|
||||
let seq = process.next_seq;
|
||||
process.next_seq += 1;
|
||||
let _ = process.wake_tx.send(seq);
|
||||
Some(ExecClosedNotification {
|
||||
process_id: process_id.clone(),
|
||||
seq,
|
||||
})
|
||||
};
|
||||
|
||||
let Some(notification) = notification else {
|
||||
return;
|
||||
};
|
||||
|
||||
if inner
|
||||
.notifications
|
||||
.notify(EXEC_CLOSED_METHOD, ¬ification)
|
||||
.await
|
||||
.is_err()
|
||||
{}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user