mirror of
https://github.com/openai/codex.git
synced 2026-05-04 13:21:54 +03:00
Prefer state DB thread listings before filesystem (#10544)
Summary - add Cursor/ThreadsPage conversions so state DB listings can be mapped back into the rollout list model - make recorder list helpers query the state DB first (archived flag included) and only fall back to file traversal if needed, along with populating head bytes lazily - add extensive tests to ensure the DB path is honored for active and archived threads and that the fallback works Testing - Not run (not requested) <img width="1196" height="693" alt="Screenshot 2026-02-03 at 20 42 33" src="https://github.com/user-attachments/assets/826b3c7a-ef11-4b27-802a-3c343695794a" />
This commit is contained in:
@@ -261,6 +261,14 @@ impl<'de> serde::Deserialize<'de> for Cursor {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<codex_state::Anchor> for Cursor {
|
||||
fn from(anchor: codex_state::Anchor) -> Self {
|
||||
let ts = OffsetDateTime::from_unix_timestamp(anchor.ts.timestamp())
|
||||
.unwrap_or(OffsetDateTime::UNIX_EPOCH);
|
||||
Self::new(ts, anchor.id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve recorded thread file paths with token pagination. The returned `next_cursor`
|
||||
/// can be supplied on the next call to resume after the last returned item, resilient to
|
||||
/// concurrent new sessions being appended. Ordering is stable by the requested sort key
|
||||
@@ -989,7 +997,6 @@ async fn read_head_summary(path: &Path, head_limit: usize) -> io::Result<HeadTai
|
||||
&& !UserInstructions::is_user_instructions(content.as_slice())
|
||||
&& !is_session_prefix_content(content.as_slice())
|
||||
{
|
||||
tracing::warn!("Item: {item:#?}");
|
||||
summary.saw_user_event = true;
|
||||
}
|
||||
if summary.head.len() < head_limit
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::io::Error as IoError;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use chrono::SecondsFormat;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_protocol::dynamic_tools::DynamicToolSpec;
|
||||
use codex_protocol::models::BaseInstructions;
|
||||
@@ -23,12 +24,14 @@ use tracing::warn;
|
||||
use super::ARCHIVED_SESSIONS_SUBDIR;
|
||||
use super::SESSIONS_SUBDIR;
|
||||
use super::list::Cursor;
|
||||
use super::list::ThreadItem;
|
||||
use super::list::ThreadListConfig;
|
||||
use super::list::ThreadListLayout;
|
||||
use super::list::ThreadSortKey;
|
||||
use super::list::ThreadsPage;
|
||||
use super::list::get_threads;
|
||||
use super::list::get_threads_in_root;
|
||||
use super::list::read_head_for_summary;
|
||||
use super::metadata;
|
||||
use super::policy::is_persisted_response_item;
|
||||
use crate::config::Config;
|
||||
@@ -120,8 +123,7 @@ impl RolloutRecorder {
|
||||
model_providers: Option<&[String]>,
|
||||
default_provider: &str,
|
||||
) -> std::io::Result<ThreadsPage> {
|
||||
let stage = "list_threads";
|
||||
let page = get_threads(
|
||||
Self::list_threads_with_db_fallback(
|
||||
codex_home,
|
||||
page_size,
|
||||
cursor,
|
||||
@@ -129,35 +131,9 @@ impl RolloutRecorder {
|
||||
allowed_sources,
|
||||
model_providers,
|
||||
default_provider,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// TODO(jif): drop after sqlite migration phase 1
|
||||
let state_db_ctx = state_db::open_if_present(codex_home, default_provider).await;
|
||||
if let Some(db_ids) = state_db::list_thread_ids_db(
|
||||
state_db_ctx.as_deref(),
|
||||
codex_home,
|
||||
page_size,
|
||||
cursor,
|
||||
sort_key,
|
||||
allowed_sources,
|
||||
model_providers,
|
||||
false,
|
||||
stage,
|
||||
)
|
||||
.await
|
||||
{
|
||||
if page.items.len() != db_ids.len() {
|
||||
state_db::record_discrepancy(stage, "bad_len");
|
||||
return Ok(page);
|
||||
}
|
||||
for (id, item) in db_ids.iter().zip(page.items.iter()) {
|
||||
if !item.path.display().to_string().contains(&id.to_string()) {
|
||||
state_db::record_discrepancy(stage, "bad_id");
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(page)
|
||||
}
|
||||
|
||||
/// List archived threads (rollout files) under the archived sessions directory.
|
||||
@@ -170,25 +146,32 @@ impl RolloutRecorder {
|
||||
model_providers: Option<&[String]>,
|
||||
default_provider: &str,
|
||||
) -> std::io::Result<ThreadsPage> {
|
||||
let stage = "list_archived_threads";
|
||||
let root = codex_home.join(ARCHIVED_SESSIONS_SUBDIR);
|
||||
let page = get_threads_in_root(
|
||||
root,
|
||||
Self::list_threads_with_db_fallback(
|
||||
codex_home,
|
||||
page_size,
|
||||
cursor,
|
||||
sort_key,
|
||||
ThreadListConfig {
|
||||
allowed_sources,
|
||||
model_providers,
|
||||
default_provider,
|
||||
layout: ThreadListLayout::Flat,
|
||||
},
|
||||
allowed_sources,
|
||||
model_providers,
|
||||
default_provider,
|
||||
true,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
}
|
||||
|
||||
// TODO(jif): drop after sqlite migration phase 1
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn list_threads_with_db_fallback(
|
||||
codex_home: &Path,
|
||||
page_size: usize,
|
||||
cursor: Option<&Cursor>,
|
||||
sort_key: ThreadSortKey,
|
||||
allowed_sources: &[SessionSource],
|
||||
model_providers: Option<&[String]>,
|
||||
default_provider: &str,
|
||||
archived: bool,
|
||||
) -> std::io::Result<ThreadsPage> {
|
||||
let state_db_ctx = state_db::open_if_present(codex_home, default_provider).await;
|
||||
if let Some(db_ids) = state_db::list_thread_ids_db(
|
||||
if let Some(db_page) = state_db::list_threads_db(
|
||||
state_db_ctx.as_deref(),
|
||||
codex_home,
|
||||
page_size,
|
||||
@@ -196,22 +179,42 @@ impl RolloutRecorder {
|
||||
sort_key,
|
||||
allowed_sources,
|
||||
model_providers,
|
||||
true,
|
||||
stage,
|
||||
archived,
|
||||
)
|
||||
.await
|
||||
{
|
||||
if page.items.len() != db_ids.len() {
|
||||
state_db::record_discrepancy(stage, "bad_len");
|
||||
return Ok(page);
|
||||
}
|
||||
for (id, item) in db_ids.iter().zip(page.items.iter()) {
|
||||
if !item.path.display().to_string().contains(&id.to_string()) {
|
||||
state_db::record_discrepancy(stage, "bad_id");
|
||||
}
|
||||
}
|
||||
let mut page: ThreadsPage = db_page.into();
|
||||
populate_thread_heads(page.items.as_mut_slice()).await;
|
||||
return Ok(page);
|
||||
}
|
||||
Ok(page)
|
||||
|
||||
if archived {
|
||||
let root = codex_home.join(ARCHIVED_SESSIONS_SUBDIR);
|
||||
return get_threads_in_root(
|
||||
root,
|
||||
page_size,
|
||||
cursor,
|
||||
sort_key,
|
||||
ThreadListConfig {
|
||||
allowed_sources,
|
||||
model_providers,
|
||||
default_provider,
|
||||
layout: ThreadListLayout::Flat,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
get_threads(
|
||||
codex_home,
|
||||
page_size,
|
||||
cursor,
|
||||
sort_key,
|
||||
allowed_sources,
|
||||
model_providers,
|
||||
default_provider,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Find the newest recorded thread path, optionally filtering to a matching cwd.
|
||||
@@ -645,6 +648,41 @@ impl JsonlWriter {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<codex_state::ThreadsPage> for ThreadsPage {
|
||||
fn from(db_page: codex_state::ThreadsPage) -> Self {
|
||||
let items = db_page
|
||||
.items
|
||||
.into_iter()
|
||||
.map(|item| ThreadItem {
|
||||
path: item.rollout_path,
|
||||
head: Vec::new(),
|
||||
created_at: Some(item.created_at.to_rfc3339_opts(SecondsFormat::Secs, true)),
|
||||
updated_at: Some(item.updated_at.to_rfc3339_opts(SecondsFormat::Secs, true)),
|
||||
})
|
||||
.collect();
|
||||
Self {
|
||||
items,
|
||||
next_cursor: db_page.next_anchor.map(Into::into),
|
||||
num_scanned_files: db_page.num_scanned_rows,
|
||||
reached_scan_cap: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn populate_thread_heads(items: &mut [ThreadItem]) {
|
||||
for item in items {
|
||||
item.head = read_head_for_summary(item.path.as_path())
|
||||
.await
|
||||
.unwrap_or_else(|err| {
|
||||
warn!(
|
||||
"failed to read rollout head from state db path: {} ({err})",
|
||||
item.path.display()
|
||||
);
|
||||
Vec::new()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn select_resume_path(page: &ThreadsPage, filter_cwd: Option<&Path>) -> Option<PathBuf> {
|
||||
match filter_cwd {
|
||||
Some(cwd) => page.items.iter().find_map(|item| {
|
||||
|
||||
@@ -7,6 +7,7 @@ use std::fs::{self};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use chrono::TimeZone;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
use time::Duration;
|
||||
@@ -22,6 +23,7 @@ use crate::rollout::list::ThreadItem;
|
||||
use crate::rollout::list::ThreadSortKey;
|
||||
use crate::rollout::list::ThreadsPage;
|
||||
use crate::rollout::list::get_threads;
|
||||
use crate::rollout::recorder::RolloutRecorder;
|
||||
use crate::rollout::rollout_date_parts;
|
||||
use anyhow::Result;
|
||||
use codex_protocol::ThreadId;
|
||||
@@ -45,6 +47,191 @@ fn provider_vec(providers: &[&str]) -> Vec<String> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn insert_state_db_thread(
|
||||
home: &Path,
|
||||
thread_id: ThreadId,
|
||||
rollout_path: &Path,
|
||||
archived: bool,
|
||||
) {
|
||||
let runtime =
|
||||
codex_state::StateRuntime::init(home.to_path_buf(), TEST_PROVIDER.to_string(), None)
|
||||
.await
|
||||
.expect("state db should initialize");
|
||||
let created_at = chrono::Utc
|
||||
.with_ymd_and_hms(2025, 1, 3, 12, 0, 0)
|
||||
.single()
|
||||
.expect("valid datetime");
|
||||
let mut builder = codex_state::ThreadMetadataBuilder::new(
|
||||
thread_id,
|
||||
rollout_path.to_path_buf(),
|
||||
created_at,
|
||||
SessionSource::Cli,
|
||||
);
|
||||
builder.model_provider = Some(TEST_PROVIDER.to_string());
|
||||
builder.cwd = home.to_path_buf();
|
||||
if archived {
|
||||
builder.archived_at = Some(created_at);
|
||||
}
|
||||
let mut metadata = builder.build(TEST_PROVIDER);
|
||||
metadata.has_user_event = true;
|
||||
runtime
|
||||
.upsert_thread(&metadata)
|
||||
.await
|
||||
.expect("state db upsert should succeed");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_threads_prefers_state_db_when_available() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let home = temp.path();
|
||||
let fs_uuid = Uuid::from_u128(101);
|
||||
write_session_file(
|
||||
home,
|
||||
"2025-01-03T13-00-00",
|
||||
fs_uuid,
|
||||
1,
|
||||
Some(SessionSource::Cli),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let db_uuid = Uuid::from_u128(102);
|
||||
let db_thread_id = ThreadId::from_string(&db_uuid.to_string()).expect("valid thread id");
|
||||
let db_rollout_path = home.join(format!(
|
||||
"sessions/2025/01/03/rollout-2025-01-03T12-00-00-{db_uuid}.jsonl"
|
||||
));
|
||||
insert_state_db_thread(home, db_thread_id, db_rollout_path.as_path(), false).await;
|
||||
|
||||
let page = RolloutRecorder::list_threads(
|
||||
home,
|
||||
10,
|
||||
None,
|
||||
ThreadSortKey::CreatedAt,
|
||||
NO_SOURCE_FILTER,
|
||||
None,
|
||||
TEST_PROVIDER,
|
||||
)
|
||||
.await
|
||||
.expect("thread listing should succeed");
|
||||
|
||||
assert_eq!(page.items.len(), 1);
|
||||
assert_eq!(page.items[0].path, db_rollout_path);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_archived_threads_prefers_state_db_when_available() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let home = temp.path();
|
||||
let archived_root = home.join("archived_sessions");
|
||||
fs::create_dir_all(&archived_root).unwrap();
|
||||
let fs_uuid = Uuid::from_u128(201);
|
||||
let fs_path = archived_root.join(format!("rollout-2025-01-03T13-00-00-{fs_uuid}.jsonl"));
|
||||
fs::write(&fs_path, "{\"type\":\"session_meta\",\"payload\":{}}\n").unwrap();
|
||||
|
||||
let db_uuid = Uuid::from_u128(202);
|
||||
let db_thread_id = ThreadId::from_string(&db_uuid.to_string()).expect("valid thread id");
|
||||
let db_rollout_path =
|
||||
archived_root.join(format!("rollout-2025-01-03T12-00-00-{db_uuid}.jsonl"));
|
||||
insert_state_db_thread(home, db_thread_id, db_rollout_path.as_path(), true).await;
|
||||
|
||||
let page = RolloutRecorder::list_archived_threads(
|
||||
home,
|
||||
10,
|
||||
None,
|
||||
ThreadSortKey::CreatedAt,
|
||||
NO_SOURCE_FILTER,
|
||||
None,
|
||||
TEST_PROVIDER,
|
||||
)
|
||||
.await
|
||||
.expect("archived thread listing should succeed");
|
||||
|
||||
assert_eq!(page.items.len(), 1);
|
||||
assert_eq!(page.items[0].path, db_rollout_path);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_threads_db_excludes_archived_entries() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let home = temp.path();
|
||||
let sessions_root = home.join("sessions/2025/01/03");
|
||||
let archived_root = home.join("archived_sessions");
|
||||
fs::create_dir_all(&sessions_root).unwrap();
|
||||
fs::create_dir_all(&archived_root).unwrap();
|
||||
|
||||
let active_uuid = Uuid::from_u128(211);
|
||||
let active_thread_id =
|
||||
ThreadId::from_string(&active_uuid.to_string()).expect("valid active thread id");
|
||||
let active_rollout_path =
|
||||
sessions_root.join(format!("rollout-2025-01-03T12-00-00-{active_uuid}.jsonl"));
|
||||
insert_state_db_thread(home, active_thread_id, active_rollout_path.as_path(), false).await;
|
||||
|
||||
let archived_uuid = Uuid::from_u128(212);
|
||||
let archived_thread_id =
|
||||
ThreadId::from_string(&archived_uuid.to_string()).expect("valid archived thread id");
|
||||
let archived_rollout_path =
|
||||
archived_root.join(format!("rollout-2025-01-03T11-00-00-{archived_uuid}.jsonl"));
|
||||
insert_state_db_thread(
|
||||
home,
|
||||
archived_thread_id,
|
||||
archived_rollout_path.as_path(),
|
||||
true,
|
||||
)
|
||||
.await;
|
||||
|
||||
let page = RolloutRecorder::list_threads(
|
||||
home,
|
||||
10,
|
||||
None,
|
||||
ThreadSortKey::CreatedAt,
|
||||
NO_SOURCE_FILTER,
|
||||
None,
|
||||
TEST_PROVIDER,
|
||||
)
|
||||
.await
|
||||
.expect("thread listing should succeed");
|
||||
|
||||
assert_eq!(page.items.len(), 1);
|
||||
assert_eq!(page.items[0].path, active_rollout_path);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_threads_falls_back_to_files_when_state_db_is_unavailable() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let home = temp.path();
|
||||
let fs_uuid = Uuid::from_u128(301);
|
||||
write_session_file(
|
||||
home,
|
||||
"2025-01-03T13-00-00",
|
||||
fs_uuid,
|
||||
1,
|
||||
Some(SessionSource::Cli),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let page = RolloutRecorder::list_threads(
|
||||
home,
|
||||
10,
|
||||
None,
|
||||
ThreadSortKey::CreatedAt,
|
||||
NO_SOURCE_FILTER,
|
||||
None,
|
||||
TEST_PROVIDER,
|
||||
)
|
||||
.await
|
||||
.expect("thread listing should succeed");
|
||||
|
||||
assert_eq!(page.items.len(), 1);
|
||||
let file_name = page.items[0]
|
||||
.path
|
||||
.file_name()
|
||||
.and_then(|value| value.to_str())
|
||||
.expect("rollout file name should be utf8");
|
||||
assert!(
|
||||
file_name.contains(&fs_uuid.to_string()),
|
||||
"expected file path from filesystem listing, got: {file_name}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollout_date_parts_extracts_directory_components() {
|
||||
let file_name = OsStr::new("rollout-2025-03-01T09-00-00-123.jsonl");
|
||||
|
||||
Reference in New Issue
Block a user