Files
codex/codex-rs/state/src/log_db.rs
Charley Cunningham fce4ad9cf4 Add process_uuid to sqlite logs (#11534)
## Summary
This PR is the first slice of the per-session `/feedback` logging work:
it adds a process-unique identifier to SQLite log rows.

It does **not** change `/feedback` sourcing behavior yet.

## Changes
- Add migration `0009_logs_process_id.sql` to extend `logs` with:
  - `process_uuid TEXT`
  - `idx_logs_process_uuid` index
- Extend state log models:
  - `LogEntry.process_uuid: Option<String>`
  - `LogRow.process_uuid: Option<String>`
- Stamp each log row with a stable per-process UUID in the sqlite log
layer:
  - generated once per process as `pid:<pid>:<uuid>`
- Update sqlite log insert/query paths to persist and read
`process_uuid`:
  - `INSERT INTO logs (..., process_uuid, ...)`
  - `SELECT ..., process_uuid, ... FROM logs`

## Why
App-server runs many sessions in one process. This change provides a
process-scoping primitive we need for follow-up `/feedback` work, so
threadless/process-level logs can be associated with the emitting
process without mixing across processes.

## Non-goals in this PR
- No `/feedback` transport/source changes
- No attachment size changes
- No sqlite retention/trim policy changes

## Testing
- `just fmt`
- CI will run the full checks
2026-02-14 17:27:22 -08:00

307 lines
8.8 KiB
Rust

//! Tracing log export into the state SQLite database.
//!
//! This module provides a `tracing_subscriber::Layer` that captures events and
//! inserts them into the `logs` table in `state.sqlite`. The writer runs in a
//! background task and batches inserts to keep logging overhead low.
//!
//! ## Usage
//!
//! ```no_run
//! use codex_state::log_db;
//! use tracing_subscriber::prelude::*;
//!
//! # async fn example(state_db: std::sync::Arc<codex_state::StateRuntime>) {
//! let layer = log_db::start(state_db);
//! let _ = tracing_subscriber::registry()
//! .with(layer)
//! .try_init();
//! # }
//! ```
use chrono::Duration as ChronoDuration;
use chrono::Utc;
use std::sync::OnceLock;
use std::time::Duration;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
use tokio::sync::mpsc;
use tracing::Event;
use tracing::field::Field;
use tracing::field::Visit;
use tracing::span::Attributes;
use tracing::span::Id;
use tracing::span::Record;
use tracing_subscriber::Layer;
use tracing_subscriber::registry::LookupSpan;
use uuid::Uuid;
use crate::LogEntry;
use crate::StateRuntime;
const LOG_QUEUE_CAPACITY: usize = 512;
const LOG_BATCH_SIZE: usize = 64;
const LOG_FLUSH_INTERVAL: Duration = Duration::from_millis(250);
const LOG_RETENTION_DAYS: i64 = 90;
pub struct LogDbLayer {
sender: mpsc::Sender<LogEntry>,
process_uuid: String,
}
pub fn start(state_db: std::sync::Arc<StateRuntime>) -> LogDbLayer {
let process_uuid = current_process_log_uuid().to_string();
let (sender, receiver) = mpsc::channel(LOG_QUEUE_CAPACITY);
tokio::spawn(run_inserter(std::sync::Arc::clone(&state_db), receiver));
tokio::spawn(run_retention_cleanup(state_db));
LogDbLayer {
sender,
process_uuid,
}
}
impl<S> Layer<S> for LogDbLayer
where
S: tracing::Subscriber + for<'a> LookupSpan<'a>,
{
fn on_new_span(
&self,
attrs: &Attributes<'_>,
id: &Id,
ctx: tracing_subscriber::layer::Context<'_, S>,
) {
let mut visitor = SpanFieldVisitor::default();
attrs.record(&mut visitor);
if let Some(span) = ctx.span(id) {
span.extensions_mut().insert(SpanLogContext {
thread_id: visitor.thread_id,
});
}
}
fn on_record(
&self,
id: &Id,
values: &Record<'_>,
ctx: tracing_subscriber::layer::Context<'_, S>,
) {
let mut visitor = SpanFieldVisitor::default();
values.record(&mut visitor);
if visitor.thread_id.is_none() {
return;
}
if let Some(span) = ctx.span(id) {
let mut extensions = span.extensions_mut();
if let Some(log_context) = extensions.get_mut::<SpanLogContext>() {
log_context.thread_id = visitor.thread_id;
} else {
extensions.insert(SpanLogContext {
thread_id: visitor.thread_id,
});
}
}
}
fn on_event(&self, event: &Event<'_>, ctx: tracing_subscriber::layer::Context<'_, S>) {
let metadata = event.metadata();
let mut visitor = MessageVisitor::default();
event.record(&mut visitor);
let thread_id = visitor
.thread_id
.clone()
.or_else(|| event_thread_id(event, &ctx));
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_else(|_| Duration::from_secs(0));
let entry = LogEntry {
ts: now.as_secs() as i64,
ts_nanos: now.subsec_nanos() as i64,
level: metadata.level().as_str().to_string(),
target: metadata.target().to_string(),
message: visitor.message,
thread_id,
process_uuid: Some(self.process_uuid.clone()),
module_path: metadata.module_path().map(ToString::to_string),
file: metadata.file().map(ToString::to_string),
line: metadata.line().map(|line| line as i64),
};
let _ = self.sender.try_send(entry);
}
}
#[derive(Clone, Debug, Default)]
struct SpanLogContext {
thread_id: Option<String>,
}
#[derive(Default)]
struct SpanFieldVisitor {
thread_id: Option<String>,
}
impl SpanFieldVisitor {
fn record_field(&mut self, field: &Field, value: String) {
if field.name() == "thread_id" && self.thread_id.is_none() {
self.thread_id = Some(value);
}
}
}
impl Visit for SpanFieldVisitor {
fn record_i64(&mut self, field: &Field, value: i64) {
self.record_field(field, value.to_string());
}
fn record_u64(&mut self, field: &Field, value: u64) {
self.record_field(field, value.to_string());
}
fn record_bool(&mut self, field: &Field, value: bool) {
self.record_field(field, value.to_string());
}
fn record_f64(&mut self, field: &Field, value: f64) {
self.record_field(field, value.to_string());
}
fn record_str(&mut self, field: &Field, value: &str) {
self.record_field(field, value.to_string());
}
fn record_error(&mut self, field: &Field, value: &(dyn std::error::Error + 'static)) {
self.record_field(field, value.to_string());
}
fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) {
self.record_field(field, format!("{value:?}"));
}
}
fn event_thread_id<S>(
event: &Event<'_>,
ctx: &tracing_subscriber::layer::Context<'_, S>,
) -> Option<String>
where
S: tracing::Subscriber + for<'a> LookupSpan<'a>,
{
let mut thread_id = None;
if let Some(scope) = ctx.event_scope(event) {
for span in scope.from_root() {
let extensions = span.extensions();
if let Some(log_context) = extensions.get::<SpanLogContext>()
&& log_context.thread_id.is_some()
{
thread_id = log_context.thread_id.clone();
}
}
}
thread_id
}
fn current_process_log_uuid() -> &'static str {
static PROCESS_LOG_UUID: OnceLock<String> = OnceLock::new();
PROCESS_LOG_UUID.get_or_init(|| {
let pid = std::process::id();
let process_uuid = Uuid::new_v4();
format!("pid:{pid}:{process_uuid}")
})
}
async fn run_inserter(
state_db: std::sync::Arc<StateRuntime>,
mut receiver: mpsc::Receiver<LogEntry>,
) {
let mut buffer = Vec::with_capacity(LOG_BATCH_SIZE);
let mut ticker = tokio::time::interval(LOG_FLUSH_INTERVAL);
loop {
tokio::select! {
maybe_entry = receiver.recv() => {
match maybe_entry {
Some(entry) => {
buffer.push(entry);
if buffer.len() >= LOG_BATCH_SIZE {
flush(&state_db, &mut buffer).await;
}
}
None => {
flush(&state_db, &mut buffer).await;
break;
}
}
}
_ = ticker.tick() => {
flush(&state_db, &mut buffer).await;
}
}
}
}
async fn flush(state_db: &std::sync::Arc<StateRuntime>, buffer: &mut Vec<LogEntry>) {
if buffer.is_empty() {
return;
}
let entries = buffer.split_off(0);
let _ = state_db.insert_logs(entries.as_slice()).await;
}
async fn run_retention_cleanup(state_db: std::sync::Arc<StateRuntime>) {
let Some(cutoff) = Utc::now().checked_sub_signed(ChronoDuration::days(LOG_RETENTION_DAYS))
else {
return;
};
let _ = state_db.delete_logs_before(cutoff.timestamp()).await;
}
#[derive(Default)]
struct MessageVisitor {
message: Option<String>,
thread_id: Option<String>,
}
impl MessageVisitor {
fn record_field(&mut self, field: &Field, value: String) {
if field.name() == "message" && self.message.is_none() {
self.message = Some(value.clone());
}
if field.name() == "thread_id" && self.thread_id.is_none() {
self.thread_id = Some(value);
}
}
}
impl Visit for MessageVisitor {
fn record_i64(&mut self, field: &Field, value: i64) {
self.record_field(field, value.to_string());
}
fn record_u64(&mut self, field: &Field, value: u64) {
self.record_field(field, value.to_string());
}
fn record_bool(&mut self, field: &Field, value: bool) {
self.record_field(field, value.to_string());
}
fn record_f64(&mut self, field: &Field, value: f64) {
self.record_field(field, value.to_string());
}
fn record_str(&mut self, field: &Field, value: &str) {
self.record_field(field, value.to_string());
}
fn record_error(&mut self, field: &Field, value: &(dyn std::error::Error + 'static)) {
self.record_field(field, value.to_string());
}
fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) {
self.record_field(field, format!("{value:?}"));
}
}