feat: use memory usage for selection (#12909)

This commit is contained in:
jif-oai
2026-02-26 16:44:02 +00:00
committed by GitHub
parent 1503a8dad7
commit c528f32acb
8 changed files with 329 additions and 16 deletions

View File

@@ -59,7 +59,14 @@ Phase 2 consolidates the latest stage-1 outputs into the filesystem memory artif
What it does:
- claims a single global phase-2 job (so only one consolidation runs at a time)
- loads a bounded set of the most recent stage-1 outputs from the state DB (the per-rollout memories produced by Phase 1, used as the consolidation input set)
- loads a bounded set of stage-1 outputs from the state DB using phase-2
selection rules:
- ignores memories whose `last_usage` falls outside the configured
`max_unused_days` window
- for memories with no `last_usage`, falls back to `generated_at` so fresh
never-used memories can still be selected
- ranks eligible memories by `usage_count` first, then by the most recent
`last_usage` / `generated_at`
- computes a completion watermark from the claimed watermark + newest input timestamps
- syncs local memory artifacts under the memories root:
- `raw_memories.md` (merged raw memories, latest first)

View File

@@ -53,6 +53,7 @@ pub(super) async fn run(session: &Arc<Session>, config: Arc<Config>) {
};
let root = memory_root(&config.codex_home);
let max_raw_memories = config.memories.max_raw_memories_for_global;
let max_unused_days = config.memories.max_unused_days;
// 1. Claim the job.
let claim = match job::claim(session, db).await {
@@ -76,7 +77,10 @@ pub(super) async fn run(session: &Arc<Session>, config: Arc<Config>) {
};
// 3. Query the memories
let selection = match db.get_phase2_input_selection(max_raw_memories).await {
let selection = match db
.get_phase2_input_selection(max_raw_memories, max_unused_days)
.await
{
Ok(selection) => selection,
Err(err) => {
tracing::error!("failed to list stage1 outputs from global: {}", err);

View File

@@ -559,7 +559,7 @@ mod phase2 {
#[tokio::test]
async fn dispatch_reclaims_stale_global_lock_and_starts_consolidation() {
let harness = DispatchHarness::new().await;
harness.seed_stage1_output(100).await;
harness.seed_stage1_output(Utc::now().timestamp()).await;
let stale_claim = harness
.state_db
@@ -573,12 +573,18 @@ mod phase2 {
phase2::run(&harness.session, Arc::clone(&harness.config)).await;
let running_claim = harness
let post_dispatch_claim = harness
.state_db
.try_claim_global_phase2_job(ThreadId::new(), 3_600)
.await
.expect("claim while running");
pretty_assertions::assert_eq!(running_claim, Phase2JobClaimOutcome::SkippedRunning);
.expect("claim after stale lock dispatch");
assert!(
matches!(
post_dispatch_claim,
Phase2JobClaimOutcome::SkippedRunning | Phase2JobClaimOutcome::SkippedNotDirty
),
"stale-lock dispatch should either keep the reclaimed job running or finish it before re-claim"
);
let user_input_ops = harness.user_input_ops_count();
pretty_assertions::assert_eq!(user_input_ops, 1);