mirror of
https://github.com/openai/codex.git
synced 2026-04-30 03:12:20 +03:00
Act on reasoning-included per turn (#9402)
- Reset reasoning-included flag each turn and update compaction test
This commit is contained in:
@@ -42,6 +42,10 @@ pub enum ResponseEvent {
|
||||
Created,
|
||||
OutputItemDone(ResponseItem),
|
||||
OutputItemAdded(ResponseItem),
|
||||
/// Emitted when `X-Reasoning-Included: true` is present on the response,
|
||||
/// meaning the server already accounted for past reasoning tokens and the
|
||||
/// client should not re-estimate them.
|
||||
ServerReasoningIncluded(bool),
|
||||
Completed {
|
||||
response_id: String,
|
||||
token_usage: Option<TokenUsage>,
|
||||
|
||||
@@ -157,6 +157,9 @@ impl Stream for AggregatedStream {
|
||||
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::ServerReasoningIncluded(included)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::ServerReasoningIncluded(included))));
|
||||
}
|
||||
Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
|
||||
return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
|
||||
}
|
||||
|
||||
@@ -29,18 +29,21 @@ use url::Url;
|
||||
|
||||
type WsStream = WebSocketStream<MaybeTlsStream<TcpStream>>;
|
||||
const X_CODEX_TURN_STATE_HEADER: &str = "x-codex-turn-state";
|
||||
const X_REASONING_INCLUDED_HEADER: &str = "x-reasoning-included";
|
||||
|
||||
pub struct ResponsesWebsocketConnection {
|
||||
stream: Arc<Mutex<Option<WsStream>>>,
|
||||
// TODO (pakrym): is this the right place for timeout?
|
||||
idle_timeout: Duration,
|
||||
server_reasoning_included: bool,
|
||||
}
|
||||
|
||||
impl ResponsesWebsocketConnection {
|
||||
fn new(stream: WsStream, idle_timeout: Duration) -> Self {
|
||||
fn new(stream: WsStream, idle_timeout: Duration, server_reasoning_included: bool) -> Self {
|
||||
Self {
|
||||
stream: Arc::new(Mutex::new(Some(stream))),
|
||||
idle_timeout,
|
||||
server_reasoning_included,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,11 +59,17 @@ impl ResponsesWebsocketConnection {
|
||||
mpsc::channel::<std::result::Result<ResponseEvent, ApiError>>(1600);
|
||||
let stream = Arc::clone(&self.stream);
|
||||
let idle_timeout = self.idle_timeout;
|
||||
let server_reasoning_included = self.server_reasoning_included;
|
||||
let request_body = serde_json::to_value(&request).map_err(|err| {
|
||||
ApiError::Stream(format!("failed to encode websocket request: {err}"))
|
||||
})?;
|
||||
|
||||
tokio::spawn(async move {
|
||||
if server_reasoning_included {
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::ServerReasoningIncluded(true)))
|
||||
.await;
|
||||
}
|
||||
let mut guard = stream.lock().await;
|
||||
let Some(ws_stream) = guard.as_mut() else {
|
||||
let _ = tx_event
|
||||
@@ -111,10 +120,12 @@ impl<A: AuthProvider> ResponsesWebsocketClient<A> {
|
||||
headers.extend(extra_headers);
|
||||
apply_auth_headers(&mut headers, &self.auth);
|
||||
|
||||
let stream = connect_websocket(ws_url, headers, turn_state).await?;
|
||||
let (stream, server_reasoning_included) =
|
||||
connect_websocket(ws_url, headers, turn_state).await?;
|
||||
Ok(ResponsesWebsocketConnection::new(
|
||||
stream,
|
||||
self.provider.stream_idle_timeout,
|
||||
server_reasoning_included,
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -137,7 +148,7 @@ async fn connect_websocket(
|
||||
url: Url,
|
||||
headers: HeaderMap,
|
||||
turn_state: Option<Arc<OnceLock<String>>>,
|
||||
) -> Result<WsStream, ApiError> {
|
||||
) -> Result<(WsStream, bool), ApiError> {
|
||||
let mut request = url
|
||||
.clone()
|
||||
.into_client_request()
|
||||
@@ -147,6 +158,7 @@ async fn connect_websocket(
|
||||
let (stream, response) = tokio_tungstenite::connect_async(request)
|
||||
.await
|
||||
.map_err(|err| map_ws_error(err, &url))?;
|
||||
let reasoning_included = response.headers().contains_key(X_REASONING_INCLUDED_HEADER);
|
||||
if let Some(turn_state) = turn_state
|
||||
&& let Some(header_value) = response
|
||||
.headers()
|
||||
@@ -155,7 +167,7 @@ async fn connect_websocket(
|
||||
{
|
||||
let _ = turn_state.set(header_value.to_string());
|
||||
}
|
||||
Ok(stream)
|
||||
Ok((stream, reasoning_included))
|
||||
}
|
||||
|
||||
fn map_ws_error(err: WsError, url: &Url) -> ApiError {
|
||||
|
||||
@@ -25,6 +25,8 @@ use tokio_util::io::ReaderStream;
|
||||
use tracing::debug;
|
||||
use tracing::trace;
|
||||
|
||||
const X_REASONING_INCLUDED_HEADER: &str = "x-reasoning-included";
|
||||
|
||||
/// Streams SSE events from an on-disk fixture for tests.
|
||||
pub fn stream_from_fixture(
|
||||
path: impl AsRef<Path>,
|
||||
@@ -58,6 +60,10 @@ pub fn spawn_response_stream(
|
||||
.get("X-Models-Etag")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(ToString::to_string);
|
||||
let reasoning_included = stream_response
|
||||
.headers
|
||||
.get(X_REASONING_INCLUDED_HEADER)
|
||||
.is_some();
|
||||
if let Some(turn_state) = turn_state.as_ref()
|
||||
&& let Some(header_value) = stream_response
|
||||
.headers
|
||||
@@ -74,6 +80,11 @@ pub fn spawn_response_stream(
|
||||
if let Some(etag) = models_etag {
|
||||
let _ = tx_event.send(Ok(ResponseEvent::ModelsEtag(etag))).await;
|
||||
}
|
||||
if reasoning_included {
|
||||
let _ = tx_event
|
||||
.send(Ok(ResponseEvent::ServerReasoningIncluded(true)))
|
||||
.await;
|
||||
}
|
||||
process_sse(stream_response.bytes, tx_event, idle_timeout, telemetry).await;
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user