Act on reasoning-included per turn (#9402)

- Reset reasoning-included flag each turn and update compaction test
This commit is contained in:
Ahmed Ibrahim
2026-01-19 11:23:25 -08:00
committed by GitHub
parent 57ec3a8277
commit b11e96fb04
12 changed files with 192 additions and 11 deletions

View File

@@ -809,7 +809,7 @@ impl Session {
async fn get_total_token_usage(&self) -> i64 {
let state = self.state.lock().await;
state.get_total_token_usage()
state.get_total_token_usage(state.server_reasoning_included())
}
async fn record_initial_history(&self, conversation_history: InitialHistory) {
@@ -1618,6 +1618,11 @@ impl Session {
self.send_token_count_event(turn_context).await;
}
pub(crate) async fn set_server_reasoning_included(&self, included: bool) {
let mut state = self.state.lock().await;
state.set_server_reasoning_included(included);
}
async fn send_token_count_event(&self, turn_context: &TurnContext) {
let (info, rate_limits) = {
let state = self.state.lock().await;
@@ -3149,6 +3154,9 @@ async fn try_run_sampling_request(
active_item = Some(tracked_item);
}
}
ResponseEvent::ServerReasoningIncluded(included) => {
sess.set_server_reasoning_included(included).await;
}
ResponseEvent::RateLimits(snapshot) => {
// Update internal state with latest rate limits, but defer sending until
// token usage is available to avoid duplicate TokenCount events.