Leverage state DB metadata for thread summaries (#10621)

Summary:
- read conversation summaries and cwd info from the state DB when
possible so we no longer rely on rollout files for metadata and avoid
extra I/O
- persist CLI version in thread metadata, surface it through summary
builders, and add the necessary DB migration hooks
- simplify thread listing by using enriched state DB data directly
rather than reading rollout heads

Testing:
- Not run (not requested)
This commit is contained in:
jif-oai
2026-02-05 16:39:11 +00:00
committed by GitHub
parent 68e82e5dc9
commit 9ee746afd6
14 changed files with 748 additions and 408 deletions

View File

@@ -5,9 +5,12 @@ use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SessionMetaLine;
use codex_protocol::protocol::TurnContextItem;
use codex_protocol::protocol::USER_MESSAGE_BEGIN;
use codex_protocol::protocol::UserMessageEvent;
use serde::Serialize;
use serde_json::Value;
const IMAGE_ONLY_USER_MESSAGE_PLACEHOLDER: &str = "[Image]";
/// Apply a rollout item to the metadata structure.
pub fn apply_rollout_item(
metadata: &mut ThreadMetadata,
@@ -37,6 +40,9 @@ fn apply_session_meta_from_item(metadata: &mut ThreadMetadata, meta_line: &Sessi
if let Some(provider) = meta_line.meta.model_provider.as_deref() {
metadata.model_provider = provider.to_string();
}
if !meta_line.meta.cli_version.is_empty() {
metadata.cli_version = meta_line.meta.cli_version.clone();
}
if !meta_line.meta.cwd.as_os_str().is_empty() {
metadata.cwd = meta_line.meta.cwd.clone();
}
@@ -61,9 +67,14 @@ fn apply_event_msg(metadata: &mut ThreadMetadata, event: &EventMsg) {
}
}
EventMsg::UserMessage(user) => {
metadata.has_user_event = true;
if metadata.first_user_message.is_none() {
metadata.first_user_message = user_message_preview(user);
}
if metadata.title.is_empty() {
metadata.title = strip_user_message_prefix(user.message.as_str()).to_string();
let title = strip_user_message_prefix(user.message.as_str());
if !title.is_empty() {
metadata.title = title.to_string();
}
}
}
_ => {}
@@ -71,7 +82,7 @@ fn apply_event_msg(metadata: &mut ThreadMetadata, event: &EventMsg) {
}
fn apply_response_item(_metadata: &mut ThreadMetadata, _item: &ResponseItem) {
// Title and has_user_event are derived from EventMsg::UserMessage only.
// Title and first_user_message are derived from EventMsg::UserMessage only.
}
fn strip_user_message_prefix(text: &str) -> &str {
@@ -81,6 +92,22 @@ fn strip_user_message_prefix(text: &str) -> &str {
}
}
fn user_message_preview(user: &UserMessageEvent) -> Option<String> {
let message = strip_user_message_prefix(user.message.as_str());
if !message.is_empty() {
return Some(message.to_string());
}
if user
.images
.as_ref()
.is_some_and(|images| !images.is_empty())
|| !user.local_images.is_empty()
{
return Some(IMAGE_ONLY_USER_MESSAGE_PLACEHOLDER.to_string());
}
None
}
pub(crate) fn enum_to_string<T: Serialize>(value: &T) -> String {
match serde_json::to_value(value) {
Ok(Value::String(s)) => s,
@@ -108,7 +135,7 @@ mod tests {
use uuid::Uuid;
#[test]
fn response_item_user_messages_do_not_set_title_or_has_user_event() {
fn response_item_user_messages_do_not_set_title_or_first_user_message() {
let mut metadata = metadata_for_test();
let item = RolloutItem::ResponseItem(ResponseItem::Message {
id: None,
@@ -122,12 +149,12 @@ mod tests {
apply_rollout_item(&mut metadata, &item, "test-provider");
assert_eq!(metadata.has_user_event, false);
assert_eq!(metadata.first_user_message, None);
assert_eq!(metadata.title, "");
}
#[test]
fn event_msg_user_messages_set_title_and_has_user_event() {
fn event_msg_user_messages_set_title_and_first_user_message() {
let mut metadata = metadata_for_test();
let item = RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
message: format!("{USER_MESSAGE_BEGIN} actual user request"),
@@ -138,10 +165,48 @@ mod tests {
apply_rollout_item(&mut metadata, &item, "test-provider");
assert_eq!(metadata.has_user_event, true);
assert_eq!(
metadata.first_user_message.as_deref(),
Some("actual user request")
);
assert_eq!(metadata.title, "actual user request");
}
#[test]
fn event_msg_image_only_user_message_sets_image_placeholder_preview() {
let mut metadata = metadata_for_test();
let item = RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
message: String::new(),
images: Some(vec!["https://example.com/image.png".to_string()]),
local_images: vec![],
text_elements: vec![],
}));
apply_rollout_item(&mut metadata, &item, "test-provider");
assert_eq!(
metadata.first_user_message.as_deref(),
Some(super::IMAGE_ONLY_USER_MESSAGE_PLACEHOLDER)
);
assert_eq!(metadata.title, "");
}
#[test]
fn event_msg_blank_user_message_without_images_keeps_first_user_message_empty() {
let mut metadata = metadata_for_test();
let item = RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
message: " ".to_string(),
images: Some(vec![]),
local_images: vec![],
text_elements: vec![],
}));
apply_rollout_item(&mut metadata, &item, "test-provider");
assert_eq!(metadata.first_user_message, None);
assert_eq!(metadata.title, "");
}
fn metadata_for_test() -> ThreadMetadata {
let id = ThreadId::from_string(&Uuid::from_u128(42).to_string()).expect("thread id");
let created_at = DateTime::<Utc>::from_timestamp(1_735_689_600, 0).expect("timestamp");
@@ -153,11 +218,12 @@ mod tests {
source: "cli".to_string(),
model_provider: "openai".to_string(),
cwd: PathBuf::from("/tmp"),
cli_version: "0.0.0".to_string(),
title: String::new(),
sandbox_policy: "read-only".to_string(),
approval_mode: "on-request".to_string(),
tokens_used: 1,
has_user_event: false,
first_user_message: None,
archived_at: None,
git_sha: None,
git_branch: None,

View File

@@ -66,6 +66,8 @@ pub struct ThreadMetadata {
pub model_provider: String,
/// The working directory for the thread.
pub cwd: PathBuf,
/// Version of the CLI that created the thread.
pub cli_version: String,
/// A best-effort thread title.
pub title: String,
/// The sandbox policy (stringified enum).
@@ -74,8 +76,8 @@ pub struct ThreadMetadata {
pub approval_mode: String,
/// The last observed token usage.
pub tokens_used: i64,
/// Whether the thread has observed a user message.
pub has_user_event: bool,
/// First user message observed for this thread, if any.
pub first_user_message: Option<String>,
/// The archive timestamp, if the thread is archived.
pub archived_at: Option<DateTime<Utc>>,
/// The git commit SHA, if known.
@@ -103,6 +105,8 @@ pub struct ThreadMetadataBuilder {
pub model_provider: Option<String>,
/// The working directory for the thread.
pub cwd: PathBuf,
/// Version of the CLI that created the thread.
pub cli_version: Option<String>,
/// The sandbox policy.
pub sandbox_policy: SandboxPolicy,
/// The approval mode.
@@ -133,6 +137,7 @@ impl ThreadMetadataBuilder {
source,
model_provider: None,
cwd: PathBuf::new(),
cli_version: None,
sandbox_policy: SandboxPolicy::ReadOnly,
approval_mode: AskForApproval::OnRequest,
archived_at: None,
@@ -163,11 +168,12 @@ impl ThreadMetadataBuilder {
.clone()
.unwrap_or_else(|| default_provider.to_string()),
cwd: self.cwd.clone(),
cli_version: self.cli_version.clone().unwrap_or_default(),
title: String::new(),
sandbox_policy,
approval_mode,
tokens_used: 0,
has_user_event: false,
first_user_message: None,
archived_at: self.archived_at.map(canonicalize_datetime),
git_sha: self.git_sha.clone(),
git_branch: self.git_branch.clone(),
@@ -201,6 +207,9 @@ impl ThreadMetadata {
if self.cwd != other.cwd {
diffs.push("cwd");
}
if self.cli_version != other.cli_version {
diffs.push("cli_version");
}
if self.title != other.title {
diffs.push("title");
}
@@ -213,8 +222,8 @@ impl ThreadMetadata {
if self.tokens_used != other.tokens_used {
diffs.push("tokens_used");
}
if self.has_user_event != other.has_user_event {
diffs.push("has_user_event");
if self.first_user_message != other.first_user_message {
diffs.push("first_user_message");
}
if self.archived_at != other.archived_at {
diffs.push("archived_at");
@@ -245,11 +254,12 @@ pub(crate) struct ThreadRow {
source: String,
model_provider: String,
cwd: String,
cli_version: String,
title: String,
sandbox_policy: String,
approval_mode: String,
tokens_used: i64,
has_user_event: bool,
first_user_message: String,
archived_at: Option<i64>,
git_sha: Option<String>,
git_branch: Option<String>,
@@ -266,11 +276,12 @@ impl ThreadRow {
source: row.try_get("source")?,
model_provider: row.try_get("model_provider")?,
cwd: row.try_get("cwd")?,
cli_version: row.try_get("cli_version")?,
title: row.try_get("title")?,
sandbox_policy: row.try_get("sandbox_policy")?,
approval_mode: row.try_get("approval_mode")?,
tokens_used: row.try_get("tokens_used")?,
has_user_event: row.try_get("has_user_event")?,
first_user_message: row.try_get("first_user_message")?,
archived_at: row.try_get("archived_at")?,
git_sha: row.try_get("git_sha")?,
git_branch: row.try_get("git_branch")?,
@@ -291,11 +302,12 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
source,
model_provider,
cwd,
cli_version,
title,
sandbox_policy,
approval_mode,
tokens_used,
has_user_event,
first_user_message,
archived_at,
git_sha,
git_branch,
@@ -309,11 +321,12 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
source,
model_provider,
cwd: PathBuf::from(cwd),
cli_version,
title,
sandbox_policy,
approval_mode,
tokens_used,
has_user_event,
first_user_message: (!first_user_message.is_empty()).then_some(first_user_message),
archived_at: archived_at.map(epoch_seconds_to_datetime).transpose()?,
git_sha,
git_branch,

View File

@@ -177,11 +177,12 @@ SELECT
source,
model_provider,
cwd,
cli_version,
title,
sandbox_policy,
approval_mode,
tokens_used,
has_user_event,
first_user_message,
archived_at,
git_sha,
git_branch,
@@ -295,11 +296,12 @@ SELECT
source,
model_provider,
cwd,
cli_version,
title,
sandbox_policy,
approval_mode,
tokens_used,
has_user_event,
first_user_message,
archived_at,
git_sha,
git_branch,
@@ -449,17 +451,18 @@ INSERT INTO threads (
source,
model_provider,
cwd,
cli_version,
title,
sandbox_policy,
approval_mode,
tokens_used,
has_user_event,
first_user_message,
archived,
archived_at,
git_sha,
git_branch,
git_origin_url
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
rollout_path = excluded.rollout_path,
created_at = excluded.created_at,
@@ -467,11 +470,12 @@ ON CONFLICT(id) DO UPDATE SET
source = excluded.source,
model_provider = excluded.model_provider,
cwd = excluded.cwd,
cli_version = excluded.cli_version,
title = excluded.title,
sandbox_policy = excluded.sandbox_policy,
approval_mode = excluded.approval_mode,
tokens_used = excluded.tokens_used,
has_user_event = excluded.has_user_event,
first_user_message = excluded.first_user_message,
archived = excluded.archived,
archived_at = excluded.archived_at,
git_sha = excluded.git_sha,
@@ -486,11 +490,12 @@ ON CONFLICT(id) DO UPDATE SET
.bind(metadata.source.as_str())
.bind(metadata.model_provider.as_str())
.bind(metadata.cwd.display().to_string())
.bind(metadata.cli_version.as_str())
.bind(metadata.title.as_str())
.bind(metadata.sandbox_policy.as_str())
.bind(metadata.approval_mode.as_str())
.bind(metadata.tokens_used)
.bind(metadata.has_user_event)
.bind(metadata.first_user_message.as_deref().unwrap_or_default())
.bind(metadata.archived_at.is_some())
.bind(metadata.archived_at.map(datetime_to_epoch_seconds))
.bind(metadata.git_sha.as_deref())
@@ -900,7 +905,7 @@ fn push_thread_filters<'a>(
} else {
builder.push(" AND archived = 0");
}
builder.push(" AND has_user_event = 1");
builder.push(" AND first_user_message <> ''");
if !allowed_sources.is_empty() {
builder.push(" AND source IN (");
let mut separated = builder.separated(", ");
@@ -1391,11 +1396,12 @@ mod tests {
source: "cli".to_string(),
model_provider: "test-provider".to_string(),
cwd,
cli_version: "0.0.0".to_string(),
title: String::new(),
sandbox_policy: crate::extract::enum_to_string(&SandboxPolicy::ReadOnly),
approval_mode: crate::extract::enum_to_string(&AskForApproval::OnRequest),
tokens_used: 0,
has_user_event: true,
first_user_message: Some("hello".to_string()),
archived_at: None,
git_sha: None,
git_branch: None,