mirror of
https://github.com/openai/codex.git
synced 2026-05-04 05:11:37 +03:00
Move sqlite logs to a dedicated database (#13772)
## Summary - move sqlite log reads and writes onto a dedicated `logs_1.sqlite` database to reduce lock contention with the main state DB - add a dedicated logs migrator and route `codex-state-logs` to the new database path - leave the old `logs` table in the existing state DB untouched for now ## Testing - just fmt - cargo test -p codex-state --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
committed by
GitHub
parent
51fcdc760d
commit
4e6c6193a1
@@ -6,6 +6,8 @@ use crate::AgentJobItemStatus;
|
||||
use crate::AgentJobProgress;
|
||||
use crate::AgentJobStatus;
|
||||
use crate::DB_ERROR_METRIC;
|
||||
use crate::LOGS_DB_FILENAME;
|
||||
use crate::LOGS_DB_VERSION;
|
||||
use crate::LogEntry;
|
||||
use crate::LogQuery;
|
||||
use crate::LogRow;
|
||||
@@ -17,7 +19,8 @@ use crate::ThreadMetadata;
|
||||
use crate::ThreadMetadataBuilder;
|
||||
use crate::ThreadsPage;
|
||||
use crate::apply_rollout_item;
|
||||
use crate::migrations::MIGRATOR;
|
||||
use crate::migrations::LOGS_MIGRATOR;
|
||||
use crate::migrations::STATE_MIGRATOR;
|
||||
use crate::model::AgentJobRow;
|
||||
use crate::model::ThreadRow;
|
||||
use crate::model::anchor_from_item;
|
||||
@@ -37,6 +40,7 @@ use sqlx::Row;
|
||||
use sqlx::Sqlite;
|
||||
use sqlx::SqliteConnection;
|
||||
use sqlx::SqlitePool;
|
||||
use sqlx::migrate::Migrator;
|
||||
use sqlx::sqlite::SqliteConnectOptions;
|
||||
use sqlx::sqlite::SqliteJournalMode;
|
||||
use sqlx::sqlite::SqlitePoolOptions;
|
||||
@@ -68,22 +72,41 @@ pub struct StateRuntime {
|
||||
codex_home: PathBuf,
|
||||
default_provider: String,
|
||||
pool: Arc<sqlx::SqlitePool>,
|
||||
logs_pool: Arc<sqlx::SqlitePool>,
|
||||
}
|
||||
|
||||
impl StateRuntime {
|
||||
/// Initialize the state runtime using the provided Codex home and default provider.
|
||||
///
|
||||
/// This opens (and migrates) the SQLite database at `codex_home/state.sqlite`.
|
||||
/// This opens (and migrates) the SQLite databases under `codex_home`,
|
||||
/// keeping logs in a dedicated file to reduce lock contention with the
|
||||
/// rest of the state store.
|
||||
pub async fn init(
|
||||
codex_home: PathBuf,
|
||||
default_provider: String,
|
||||
otel: Option<OtelManager>,
|
||||
) -> anyhow::Result<Arc<Self>> {
|
||||
tokio::fs::create_dir_all(&codex_home).await?;
|
||||
remove_legacy_state_files(&codex_home).await;
|
||||
let current_state_name = state_db_filename();
|
||||
let current_logs_name = logs_db_filename();
|
||||
remove_legacy_db_files(
|
||||
&codex_home,
|
||||
current_state_name.as_str(),
|
||||
STATE_DB_FILENAME,
|
||||
"state",
|
||||
)
|
||||
.await;
|
||||
remove_legacy_db_files(
|
||||
&codex_home,
|
||||
current_logs_name.as_str(),
|
||||
LOGS_DB_FILENAME,
|
||||
"logs",
|
||||
)
|
||||
.await;
|
||||
let state_path = state_db_path(codex_home.as_path());
|
||||
let logs_path = logs_db_path(codex_home.as_path());
|
||||
let existed = tokio::fs::try_exists(&state_path).await.unwrap_or(false);
|
||||
let pool = match open_sqlite(&state_path).await {
|
||||
let pool = match open_sqlite(&state_path, &STATE_MIGRATOR).await {
|
||||
Ok(db) => Arc::new(db),
|
||||
Err(err) => {
|
||||
warn!("failed to open state db at {}: {err}", state_path.display());
|
||||
@@ -93,11 +116,22 @@ impl StateRuntime {
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
let logs_pool = match open_sqlite(&logs_path, &LOGS_MIGRATOR).await {
|
||||
Ok(db) => Arc::new(db),
|
||||
Err(err) => {
|
||||
warn!("failed to open logs db at {}: {err}", logs_path.display());
|
||||
if let Some(otel) = otel.as_ref() {
|
||||
otel.counter(METRIC_DB_INIT, 1, &[("status", "open_error")]);
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
if let Some(otel) = otel.as_ref() {
|
||||
otel.counter(METRIC_DB_INIT, 1, &[("status", "opened")]);
|
||||
}
|
||||
let runtime = Arc::new(Self {
|
||||
pool,
|
||||
logs_pool,
|
||||
codex_home,
|
||||
default_provider,
|
||||
});
|
||||
@@ -113,7 +147,7 @@ impl StateRuntime {
|
||||
}
|
||||
}
|
||||
|
||||
async fn open_sqlite(path: &Path) -> anyhow::Result<SqlitePool> {
|
||||
async fn open_sqlite(path: &Path, migrator: &'static Migrator) -> anyhow::Result<SqlitePool> {
|
||||
let options = SqliteConnectOptions::new()
|
||||
.filename(path)
|
||||
.create_if_missing(true)
|
||||
@@ -125,26 +159,42 @@ async fn open_sqlite(path: &Path) -> anyhow::Result<SqlitePool> {
|
||||
.max_connections(5)
|
||||
.connect_with(options)
|
||||
.await?;
|
||||
MIGRATOR.run(&pool).await?;
|
||||
migrator.run(&pool).await?;
|
||||
Ok(pool)
|
||||
}
|
||||
|
||||
fn db_filename(base_name: &str, version: u32) -> String {
|
||||
format!("{base_name}_{version}.sqlite")
|
||||
}
|
||||
|
||||
pub fn state_db_filename() -> String {
|
||||
format!("{STATE_DB_FILENAME}_{STATE_DB_VERSION}.sqlite")
|
||||
db_filename(STATE_DB_FILENAME, STATE_DB_VERSION)
|
||||
}
|
||||
|
||||
pub fn state_db_path(codex_home: &Path) -> PathBuf {
|
||||
codex_home.join(state_db_filename())
|
||||
}
|
||||
|
||||
async fn remove_legacy_state_files(codex_home: &Path) {
|
||||
let current_name = state_db_filename();
|
||||
pub fn logs_db_filename() -> String {
|
||||
db_filename(LOGS_DB_FILENAME, LOGS_DB_VERSION)
|
||||
}
|
||||
|
||||
pub fn logs_db_path(codex_home: &Path) -> PathBuf {
|
||||
codex_home.join(logs_db_filename())
|
||||
}
|
||||
|
||||
async fn remove_legacy_db_files(
|
||||
codex_home: &Path,
|
||||
current_name: &str,
|
||||
base_name: &str,
|
||||
db_label: &str,
|
||||
) {
|
||||
let mut entries = match tokio::fs::read_dir(codex_home).await {
|
||||
Ok(entries) => entries,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"failed to read codex_home for state db cleanup {}: {err}",
|
||||
codex_home.display()
|
||||
"failed to read codex_home for {db_label} db cleanup {}: {err}",
|
||||
codex_home.display(),
|
||||
);
|
||||
return;
|
||||
}
|
||||
@@ -160,37 +210,37 @@ async fn remove_legacy_state_files(codex_home: &Path) {
|
||||
}
|
||||
let file_name = entry.file_name();
|
||||
let file_name = file_name.to_string_lossy();
|
||||
if !should_remove_state_file(file_name.as_ref(), current_name.as_str()) {
|
||||
if !should_remove_db_file(file_name.as_ref(), current_name, base_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let legacy_path = entry.path();
|
||||
if let Err(err) = tokio::fs::remove_file(&legacy_path).await {
|
||||
warn!(
|
||||
"failed to remove legacy state db file {}: {err}",
|
||||
legacy_path.display()
|
||||
"failed to remove legacy {db_label} db file {}: {err}",
|
||||
legacy_path.display(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn should_remove_state_file(file_name: &str, current_name: &str) -> bool {
|
||||
let mut base_name = file_name;
|
||||
fn should_remove_db_file(file_name: &str, current_name: &str, base_name: &str) -> bool {
|
||||
let mut normalized_name = file_name;
|
||||
for suffix in ["-wal", "-shm", "-journal"] {
|
||||
if let Some(stripped) = file_name.strip_suffix(suffix) {
|
||||
base_name = stripped;
|
||||
normalized_name = stripped;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if base_name == current_name {
|
||||
if normalized_name == current_name {
|
||||
return false;
|
||||
}
|
||||
let unversioned_name = format!("{STATE_DB_FILENAME}.sqlite");
|
||||
if base_name == unversioned_name {
|
||||
let unversioned_name = format!("{base_name}.sqlite");
|
||||
if normalized_name == unversioned_name {
|
||||
return true;
|
||||
}
|
||||
|
||||
let Some(version_with_extension) = base_name.strip_prefix(&format!("{STATE_DB_FILENAME}_"))
|
||||
let Some(version_with_extension) = normalized_name.strip_prefix(&format!("{base_name}_"))
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user