Move sqlite logs to a dedicated database (#13772)

## Summary
- move sqlite log reads and writes onto a dedicated `logs_1.sqlite`
database to reduce lock contention with the main state DB
- add a dedicated logs migrator and route `codex-state-logs` to the new
database path
- leave the old `logs` table in the existing state DB untouched for now

## Testing
- just fmt
- cargo test -p codex-state

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Charley Cunningham
2026-03-06 10:54:20 -08:00
committed by GitHub
parent 51fcdc760d
commit 4e6c6193a1
7 changed files with 165 additions and 31 deletions

View File

@@ -6,6 +6,8 @@ use crate::AgentJobItemStatus;
use crate::AgentJobProgress;
use crate::AgentJobStatus;
use crate::DB_ERROR_METRIC;
use crate::LOGS_DB_FILENAME;
use crate::LOGS_DB_VERSION;
use crate::LogEntry;
use crate::LogQuery;
use crate::LogRow;
@@ -17,7 +19,8 @@ use crate::ThreadMetadata;
use crate::ThreadMetadataBuilder;
use crate::ThreadsPage;
use crate::apply_rollout_item;
use crate::migrations::MIGRATOR;
use crate::migrations::LOGS_MIGRATOR;
use crate::migrations::STATE_MIGRATOR;
use crate::model::AgentJobRow;
use crate::model::ThreadRow;
use crate::model::anchor_from_item;
@@ -37,6 +40,7 @@ use sqlx::Row;
use sqlx::Sqlite;
use sqlx::SqliteConnection;
use sqlx::SqlitePool;
use sqlx::migrate::Migrator;
use sqlx::sqlite::SqliteConnectOptions;
use sqlx::sqlite::SqliteJournalMode;
use sqlx::sqlite::SqlitePoolOptions;
@@ -68,22 +72,41 @@ pub struct StateRuntime {
codex_home: PathBuf,
default_provider: String,
pool: Arc<sqlx::SqlitePool>,
logs_pool: Arc<sqlx::SqlitePool>,
}
impl StateRuntime {
/// Initialize the state runtime using the provided Codex home and default provider.
///
/// This opens (and migrates) the SQLite database at `codex_home/state.sqlite`.
/// This opens (and migrates) the SQLite databases under `codex_home`,
/// keeping logs in a dedicated file to reduce lock contention with the
/// rest of the state store.
pub async fn init(
codex_home: PathBuf,
default_provider: String,
otel: Option<OtelManager>,
) -> anyhow::Result<Arc<Self>> {
tokio::fs::create_dir_all(&codex_home).await?;
remove_legacy_state_files(&codex_home).await;
let current_state_name = state_db_filename();
let current_logs_name = logs_db_filename();
remove_legacy_db_files(
&codex_home,
current_state_name.as_str(),
STATE_DB_FILENAME,
"state",
)
.await;
remove_legacy_db_files(
&codex_home,
current_logs_name.as_str(),
LOGS_DB_FILENAME,
"logs",
)
.await;
let state_path = state_db_path(codex_home.as_path());
let logs_path = logs_db_path(codex_home.as_path());
let existed = tokio::fs::try_exists(&state_path).await.unwrap_or(false);
let pool = match open_sqlite(&state_path).await {
let pool = match open_sqlite(&state_path, &STATE_MIGRATOR).await {
Ok(db) => Arc::new(db),
Err(err) => {
warn!("failed to open state db at {}: {err}", state_path.display());
@@ -93,11 +116,22 @@ impl StateRuntime {
return Err(err);
}
};
let logs_pool = match open_sqlite(&logs_path, &LOGS_MIGRATOR).await {
Ok(db) => Arc::new(db),
Err(err) => {
warn!("failed to open logs db at {}: {err}", logs_path.display());
if let Some(otel) = otel.as_ref() {
otel.counter(METRIC_DB_INIT, 1, &[("status", "open_error")]);
}
return Err(err);
}
};
if let Some(otel) = otel.as_ref() {
otel.counter(METRIC_DB_INIT, 1, &[("status", "opened")]);
}
let runtime = Arc::new(Self {
pool,
logs_pool,
codex_home,
default_provider,
});
@@ -113,7 +147,7 @@ impl StateRuntime {
}
}
async fn open_sqlite(path: &Path) -> anyhow::Result<SqlitePool> {
async fn open_sqlite(path: &Path, migrator: &'static Migrator) -> anyhow::Result<SqlitePool> {
let options = SqliteConnectOptions::new()
.filename(path)
.create_if_missing(true)
@@ -125,26 +159,42 @@ async fn open_sqlite(path: &Path) -> anyhow::Result<SqlitePool> {
.max_connections(5)
.connect_with(options)
.await?;
MIGRATOR.run(&pool).await?;
migrator.run(&pool).await?;
Ok(pool)
}
fn db_filename(base_name: &str, version: u32) -> String {
format!("{base_name}_{version}.sqlite")
}
pub fn state_db_filename() -> String {
format!("{STATE_DB_FILENAME}_{STATE_DB_VERSION}.sqlite")
db_filename(STATE_DB_FILENAME, STATE_DB_VERSION)
}
pub fn state_db_path(codex_home: &Path) -> PathBuf {
codex_home.join(state_db_filename())
}
async fn remove_legacy_state_files(codex_home: &Path) {
let current_name = state_db_filename();
pub fn logs_db_filename() -> String {
db_filename(LOGS_DB_FILENAME, LOGS_DB_VERSION)
}
pub fn logs_db_path(codex_home: &Path) -> PathBuf {
codex_home.join(logs_db_filename())
}
async fn remove_legacy_db_files(
codex_home: &Path,
current_name: &str,
base_name: &str,
db_label: &str,
) {
let mut entries = match tokio::fs::read_dir(codex_home).await {
Ok(entries) => entries,
Err(err) => {
warn!(
"failed to read codex_home for state db cleanup {}: {err}",
codex_home.display()
"failed to read codex_home for {db_label} db cleanup {}: {err}",
codex_home.display(),
);
return;
}
@@ -160,37 +210,37 @@ async fn remove_legacy_state_files(codex_home: &Path) {
}
let file_name = entry.file_name();
let file_name = file_name.to_string_lossy();
if !should_remove_state_file(file_name.as_ref(), current_name.as_str()) {
if !should_remove_db_file(file_name.as_ref(), current_name, base_name) {
continue;
}
let legacy_path = entry.path();
if let Err(err) = tokio::fs::remove_file(&legacy_path).await {
warn!(
"failed to remove legacy state db file {}: {err}",
legacy_path.display()
"failed to remove legacy {db_label} db file {}: {err}",
legacy_path.display(),
);
}
}
}
fn should_remove_state_file(file_name: &str, current_name: &str) -> bool {
let mut base_name = file_name;
fn should_remove_db_file(file_name: &str, current_name: &str, base_name: &str) -> bool {
let mut normalized_name = file_name;
for suffix in ["-wal", "-shm", "-journal"] {
if let Some(stripped) = file_name.strip_suffix(suffix) {
base_name = stripped;
normalized_name = stripped;
break;
}
}
if base_name == current_name {
if normalized_name == current_name {
return false;
}
let unversioned_name = format!("{STATE_DB_FILENAME}.sqlite");
if base_name == unversioned_name {
let unversioned_name = format!("{base_name}.sqlite");
if normalized_name == unversioned_name {
return true;
}
let Some(version_with_extension) = base_name.strip_prefix(&format!("{STATE_DB_FILENAME}_"))
let Some(version_with_extension) = normalized_name.strip_prefix(&format!("{base_name}_"))
else {
return false;
};