Act on reasoning-included per turn (#9402)

- Reset reasoning-included flag each turn and update compaction test
This commit is contained in:
Ahmed Ibrahim
2026-01-19 11:23:25 -08:00
committed by GitHub
parent 57ec3a8277
commit b11e96fb04
12 changed files with 192 additions and 11 deletions

View File

@@ -42,6 +42,10 @@ pub enum ResponseEvent {
Created,
OutputItemDone(ResponseItem),
OutputItemAdded(ResponseItem),
/// Emitted when `X-Reasoning-Included: true` is present on the response,
/// meaning the server already accounted for past reasoning tokens and the
/// client should not re-estimate them.
ServerReasoningIncluded(bool),
Completed {
response_id: String,
token_usage: Option<TokenUsage>,

View File

@@ -157,6 +157,9 @@ impl Stream for AggregatedStream {
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
}
Poll::Ready(Some(Ok(ResponseEvent::ServerReasoningIncluded(included)))) => {
return Poll::Ready(Some(Ok(ResponseEvent::ServerReasoningIncluded(included))));
}
Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
}

View File

@@ -29,18 +29,21 @@ use url::Url;
type WsStream = WebSocketStream<MaybeTlsStream<TcpStream>>;
const X_CODEX_TURN_STATE_HEADER: &str = "x-codex-turn-state";
const X_REASONING_INCLUDED_HEADER: &str = "x-reasoning-included";
pub struct ResponsesWebsocketConnection {
stream: Arc<Mutex<Option<WsStream>>>,
// TODO (pakrym): is this the right place for timeout?
idle_timeout: Duration,
server_reasoning_included: bool,
}
impl ResponsesWebsocketConnection {
fn new(stream: WsStream, idle_timeout: Duration) -> Self {
fn new(stream: WsStream, idle_timeout: Duration, server_reasoning_included: bool) -> Self {
Self {
stream: Arc::new(Mutex::new(Some(stream))),
idle_timeout,
server_reasoning_included,
}
}
@@ -56,11 +59,17 @@ impl ResponsesWebsocketConnection {
mpsc::channel::<std::result::Result<ResponseEvent, ApiError>>(1600);
let stream = Arc::clone(&self.stream);
let idle_timeout = self.idle_timeout;
let server_reasoning_included = self.server_reasoning_included;
let request_body = serde_json::to_value(&request).map_err(|err| {
ApiError::Stream(format!("failed to encode websocket request: {err}"))
})?;
tokio::spawn(async move {
if server_reasoning_included {
let _ = tx_event
.send(Ok(ResponseEvent::ServerReasoningIncluded(true)))
.await;
}
let mut guard = stream.lock().await;
let Some(ws_stream) = guard.as_mut() else {
let _ = tx_event
@@ -111,10 +120,12 @@ impl<A: AuthProvider> ResponsesWebsocketClient<A> {
headers.extend(extra_headers);
apply_auth_headers(&mut headers, &self.auth);
let stream = connect_websocket(ws_url, headers, turn_state).await?;
let (stream, server_reasoning_included) =
connect_websocket(ws_url, headers, turn_state).await?;
Ok(ResponsesWebsocketConnection::new(
stream,
self.provider.stream_idle_timeout,
server_reasoning_included,
))
}
}
@@ -137,7 +148,7 @@ async fn connect_websocket(
url: Url,
headers: HeaderMap,
turn_state: Option<Arc<OnceLock<String>>>,
) -> Result<WsStream, ApiError> {
) -> Result<(WsStream, bool), ApiError> {
let mut request = url
.clone()
.into_client_request()
@@ -147,6 +158,7 @@ async fn connect_websocket(
let (stream, response) = tokio_tungstenite::connect_async(request)
.await
.map_err(|err| map_ws_error(err, &url))?;
let reasoning_included = response.headers().contains_key(X_REASONING_INCLUDED_HEADER);
if let Some(turn_state) = turn_state
&& let Some(header_value) = response
.headers()
@@ -155,7 +167,7 @@ async fn connect_websocket(
{
let _ = turn_state.set(header_value.to_string());
}
Ok(stream)
Ok((stream, reasoning_included))
}
fn map_ws_error(err: WsError, url: &Url) -> ApiError {

View File

@@ -25,6 +25,8 @@ use tokio_util::io::ReaderStream;
use tracing::debug;
use tracing::trace;
const X_REASONING_INCLUDED_HEADER: &str = "x-reasoning-included";
/// Streams SSE events from an on-disk fixture for tests.
pub fn stream_from_fixture(
path: impl AsRef<Path>,
@@ -58,6 +60,10 @@ pub fn spawn_response_stream(
.get("X-Models-Etag")
.and_then(|v| v.to_str().ok())
.map(ToString::to_string);
let reasoning_included = stream_response
.headers
.get(X_REASONING_INCLUDED_HEADER)
.is_some();
if let Some(turn_state) = turn_state.as_ref()
&& let Some(header_value) = stream_response
.headers
@@ -74,6 +80,11 @@ pub fn spawn_response_stream(
if let Some(etag) = models_etag {
let _ = tx_event.send(Ok(ResponseEvent::ModelsEtag(etag))).await;
}
if reasoning_included {
let _ = tx_event
.send(Ok(ResponseEvent::ServerReasoningIncluded(true)))
.await;
}
process_sse(stream_response.bytes, tx_event, idle_timeout, telemetry).await;
});