Fix compaction context reinjection and model baselines (#12252)

## Summary - move regular-turn context diff/full-context persistence into `run_turn` so pre-turn compaction runs before incoming context updates are recorded - after successful pre-turn compaction, rely on a cleared `reference_context_item` to trigger full context reinjection on the follow-up regular turn (manual `/compact` keeps replacement history summary-only and also clears the baseline) - preserve `<model_switch>` when full context is reinjected, and inject it *before* the rest of the full-context items - scope `reference_context_item` and `previous_model` to regular user turns only so standalone tasks (`/compact`, shell, review, undo) cannot suppress future reinjection or `<model_switch>` behavior - make context-diff persistence + `reference_context_item` updates explicit in the regular-turn path, with clearer docs/comments around the invariant - stop persisting local `/compact` `RolloutItem::TurnContext` snapshots (only regular turns persist `TurnContextItem` now) - simplify resume/fork previous-model/reference-baseline hydration by looking up the last surviving turn context from rollout lifecycle events, including rollback and compaction-crossing handling - remove the legacy fallback that guessed from bare `TurnContext` rollouts without lifecycle events - update compaction/remote-compaction/model-visible snapshots and compact test assertions (including remote compaction mock response shape) ## Why We were persisting incoming context items before spawning the regular turn task, which let pre-turn compaction requests accidentally include incoming context diffs without the new user message. Fixing that exposed follow-on baseline issues around `/compact`, resume/fork, and standalone tasks that could cause duplicate context injection or suppress `<model_switch>` instructions. This PR re-centers the invariants around regular turns: - regular turns persist model-visible context diffs/full reinjection and update the `reference_context_item` - standalone tasks do not advance those regular-turn baselines - compaction clears the baseline when replacement history may have stripped the referenced context diffs ## Follow-ups (TODOs left in code) - `TODO(ccunningham)`: fix rollback/backtracking baseline handling more comprehensively - `TODO(ccunningham)`: include pending incoming context items in pre-turn compaction threshold estimation - `TODO(ccunningham)`: inject updated personality spec alongside `<model_switch>` so some model-switch paths can avoid forced full reinjection - `TODO(ccunningham)`: review task turn lifecycle (`TurnStarted`/`TurnComplete`) behavior and emit task-start context diffs for task types that should have them (excluding `/compact`) ## Validation - `just fmt` - CI should cover the updated compaction/resume/model-visible snapshot expectations and rollout-hydration behavior - I did **not** rerun the full local test suite after the latest resume-lookup / rollout-persistence simplifications
2026-05-05 22:01:37 +03:00 · 2026-02-20 23:13:08 -08:00
parent 264fc444b6
commit bb0ac5be70
31 changed files with 1289 additions and 1206 deletions
--- a/codex-rs/core/src/tasks/compact.rs
+++ b/codex-rs/core/src/tasks/compact.rs
@@ -25,22 +25,21 @@ impl SessionTask for CompactTask {
        _cancellation_token: CancellationToken,
    ) -> Option<String> {
        let session = session.clone_session();
-        if crate::compact::should_use_remote_compact_task(&ctx.provider) {
+        let _ = if crate::compact::should_use_remote_compact_task(&ctx.provider) {
            let _ = session.services.otel_manager.counter(
                "codex.task.compact",
                1,
                &[("type", "remote")],
            );
-            let _ = crate::compact_remote::run_remote_compact_task(session, ctx).await;
+            crate::compact_remote::run_remote_compact_task(session.clone(), ctx).await
        } else {
            let _ = session.services.otel_manager.counter(
                "codex.task.compact",
                1,
                &[("type", "local")],
            );
-            let _ = crate::compact::run_compact_task(session, ctx, input).await;
-        }
-
+            crate::compact::run_compact_task(session.clone(), ctx, input).await
+        };
        None
    }
 }
--- a/codex-rs/core/src/tasks/mod.rs
+++ b/codex-rs/core/src/tasks/mod.rs
@@ -121,8 +121,6 @@ impl Session {
    ) {
        self.abort_all_tasks(TurnAbortReason::Replaced).await;
        self.clear_connector_selection().await;
-        self.seed_initial_context_if_needed(turn_context.as_ref())
-            .await;

        let task: Arc<dyn SessionTask> = Arc::new(task);
        let task_kind = task.kind();
@@ -140,7 +138,6 @@ impl Session {
            tokio::spawn(
                async move {
                    let ctx_for_finish = Arc::clone(&ctx);
-                    let model_slug = ctx_for_finish.model_info.slug.clone();
                    let last_agent_message = task_for_run
                        .run(
                            Arc::clone(&session_ctx),
@@ -151,9 +148,6 @@ impl Session {
                        .await;
                    let sess = session_ctx.clone_session();
                    sess.flush_rollout().await;
-                    // Update previous model before TurnComplete is emitted so
-                    // immediately following turns observe the correct switch state.
-                    sess.set_previous_model(Some(model_slug)).await;
                    if !task_cancellation_token.is_cancelled() {
                        // Emit completion uniformly from spawn site so all tasks share the same lifecycle.
                        sess.on_task_finished(Arc::clone(&ctx_for_finish), last_agent_message)
@@ -278,10 +272,6 @@ impl Session {

        task.handle.abort();

-        // Set previous model even when interrupted so model-switch handling stays correct.
-        self.set_previous_model(Some(task.turn_context.model_info.slug.clone()))
-            .await;
-
        let session_ctx = Arc::new(SessionTaskContext::new(Arc::clone(self)));
        session_task
            .abort(session_ctx, Arc::clone(&task.turn_context))
--- a/codex-rs/core/src/tasks/undo.rs
+++ b/codex-rs/core/src/tasks/undo.rs
@@ -101,7 +101,8 @@ impl SessionTask for UndoTask {
        match restore_result {
            Ok(Ok(())) => {
                items.remove(idx);
-                sess.replace_history(items).await;
+                let reference_context_item = sess.reference_context_item().await;
+                sess.replace_history(items, reference_context_item).await;
                let short_id: String = commit_id.chars().take(7).collect();
                info!(commit_id = commit_id, "Undo restored ghost snapshot");
                completed.success = true;
--- a/codex-rs/core/src/tasks/user_shell.rs
+++ b/codex-rs/core/src/tasks/user_shell.rs
@@ -101,6 +101,10 @@ pub(crate) async fn execute_user_shell_command(
        // Auxiliary mode runs within an existing active turn. That turn already
        // emitted TurnStarted, so emitting another TurnStarted here would create
        // duplicate turn lifecycle events and confuse clients.
+        // TODO(ccunningham): After TurnStarted, emit model-visible turn context diffs for
+        // standalone lifecycle tasks (for example /shell, and review once it emits TurnStarted).
+        // `/compact` is an intentional exception because compaction requests should not include
+        // freshly reinjected context before the summary/replacement history is applied.
        let event = EventMsg::TurnStarted(TurnStartedEvent {
            turn_id: turn_context.sub_id.clone(),
            model_context_window: turn_context.model_context_window(),