Fix compaction context reinjection and model baselines (#12252)

## Summary
- move regular-turn context diff/full-context persistence into
`run_turn` so pre-turn compaction runs before incoming context updates
are recorded
- after successful pre-turn compaction, rely on a cleared
`reference_context_item` to trigger full context reinjection on the
follow-up regular turn (manual `/compact` keeps replacement history
summary-only and also clears the baseline)
- preserve `<model_switch>` when full context is reinjected, and inject
it *before* the rest of the full-context items
- scope `reference_context_item` and `previous_model` to regular user
turns only so standalone tasks (`/compact`, shell, review, undo) cannot
suppress future reinjection or `<model_switch>` behavior
- make context-diff persistence + `reference_context_item` updates
explicit in the regular-turn path, with clearer docs/comments around the
invariant
- stop persisting local `/compact` `RolloutItem::TurnContext` snapshots
(only regular turns persist `TurnContextItem` now)
- simplify resume/fork previous-model/reference-baseline hydration by
looking up the last surviving turn context from rollout lifecycle
events, including rollback and compaction-crossing handling
- remove the legacy fallback that guessed from bare `TurnContext`
rollouts without lifecycle events
- update compaction/remote-compaction/model-visible snapshots and
compact test assertions (including remote compaction mock response
shape)

## Why
We were persisting incoming context items before spawning the regular
turn task, which let pre-turn compaction requests accidentally include
incoming context diffs without the new user message. Fixing that exposed
follow-on baseline issues around `/compact`, resume/fork, and standalone
tasks that could cause duplicate context injection or suppress
`<model_switch>` instructions.

This PR re-centers the invariants around regular turns:
- regular turns persist model-visible context diffs/full reinjection and
update the `reference_context_item`
- standalone tasks do not advance those regular-turn baselines
- compaction clears the baseline when replacement history may have
stripped the referenced context diffs

## Follow-ups (TODOs left in code)
- `TODO(ccunningham)`: fix rollback/backtracking baseline handling more
comprehensively
- `TODO(ccunningham)`: include pending incoming context items in
pre-turn compaction threshold estimation
- `TODO(ccunningham)`: inject updated personality spec alongside
`<model_switch>` so some model-switch paths can avoid forced full
reinjection
- `TODO(ccunningham)`: review task turn lifecycle
(`TurnStarted`/`TurnComplete`) behavior and emit task-start context
diffs for task types that should have them (excluding `/compact`)

## Validation
- `just fmt`
- CI should cover the updated compaction/resume/model-visible snapshot
expectations and rollout-hydration behavior
- I did **not** rerun the full local test suite after the latest
resume-lookup / rollout-persistence simplifications
This commit is contained in:
Charley Cunningham
2026-02-20 23:13:08 -08:00
committed by GitHub
parent 264fc444b6
commit bb0ac5be70
31 changed files with 1289 additions and 1206 deletions

View File

@@ -25,22 +25,21 @@ impl SessionTask for CompactTask {
_cancellation_token: CancellationToken,
) -> Option<String> {
let session = session.clone_session();
if crate::compact::should_use_remote_compact_task(&ctx.provider) {
let _ = if crate::compact::should_use_remote_compact_task(&ctx.provider) {
let _ = session.services.otel_manager.counter(
"codex.task.compact",
1,
&[("type", "remote")],
);
let _ = crate::compact_remote::run_remote_compact_task(session, ctx).await;
crate::compact_remote::run_remote_compact_task(session.clone(), ctx).await
} else {
let _ = session.services.otel_manager.counter(
"codex.task.compact",
1,
&[("type", "local")],
);
let _ = crate::compact::run_compact_task(session, ctx, input).await;
}
crate::compact::run_compact_task(session.clone(), ctx, input).await
};
None
}
}

View File

@@ -121,8 +121,6 @@ impl Session {
) {
self.abort_all_tasks(TurnAbortReason::Replaced).await;
self.clear_connector_selection().await;
self.seed_initial_context_if_needed(turn_context.as_ref())
.await;
let task: Arc<dyn SessionTask> = Arc::new(task);
let task_kind = task.kind();
@@ -140,7 +138,6 @@ impl Session {
tokio::spawn(
async move {
let ctx_for_finish = Arc::clone(&ctx);
let model_slug = ctx_for_finish.model_info.slug.clone();
let last_agent_message = task_for_run
.run(
Arc::clone(&session_ctx),
@@ -151,9 +148,6 @@ impl Session {
.await;
let sess = session_ctx.clone_session();
sess.flush_rollout().await;
// Update previous model before TurnComplete is emitted so
// immediately following turns observe the correct switch state.
sess.set_previous_model(Some(model_slug)).await;
if !task_cancellation_token.is_cancelled() {
// Emit completion uniformly from spawn site so all tasks share the same lifecycle.
sess.on_task_finished(Arc::clone(&ctx_for_finish), last_agent_message)
@@ -278,10 +272,6 @@ impl Session {
task.handle.abort();
// Set previous model even when interrupted so model-switch handling stays correct.
self.set_previous_model(Some(task.turn_context.model_info.slug.clone()))
.await;
let session_ctx = Arc::new(SessionTaskContext::new(Arc::clone(self)));
session_task
.abort(session_ctx, Arc::clone(&task.turn_context))

View File

@@ -101,7 +101,8 @@ impl SessionTask for UndoTask {
match restore_result {
Ok(Ok(())) => {
items.remove(idx);
sess.replace_history(items).await;
let reference_context_item = sess.reference_context_item().await;
sess.replace_history(items, reference_context_item).await;
let short_id: String = commit_id.chars().take(7).collect();
info!(commit_id = commit_id, "Undo restored ghost snapshot");
completed.success = true;

View File

@@ -101,6 +101,10 @@ pub(crate) async fn execute_user_shell_command(
// Auxiliary mode runs within an existing active turn. That turn already
// emitted TurnStarted, so emitting another TurnStarted here would create
// duplicate turn lifecycle events and confuse clients.
// TODO(ccunningham): After TurnStarted, emit model-visible turn context diffs for
// standalone lifecycle tasks (for example /shell, and review once it emits TurnStarted).
// `/compact` is an intentional exception because compaction requests should not include
// freshly reinjected context before the summary/replacement history is applied.
let event = EventMsg::TurnStarted(TurnStartedEvent {
turn_id: turn_context.sub_id.clone(),
model_context_window: turn_context.model_context_window(),