mirror of
https://github.com/openai/codex.git
synced 2026-05-01 03:42:05 +03:00
Leverage state DB metadata for thread summaries (#10621)
Summary: - read conversation summaries and cwd info from the state DB when possible so we no longer rely on rollout files for metadata and avoid extra I/O - persist CLI version in thread metadata, surface it through summary builders, and add the necessary DB migration hooks - simplify thread listing by using enriched state DB data directly rather than reading rollout heads Testing: - Not run (not requested)
This commit is contained in:
@@ -5,9 +5,12 @@ use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::SessionMetaLine;
|
||||
use codex_protocol::protocol::TurnContextItem;
|
||||
use codex_protocol::protocol::USER_MESSAGE_BEGIN;
|
||||
use codex_protocol::protocol::UserMessageEvent;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
|
||||
const IMAGE_ONLY_USER_MESSAGE_PLACEHOLDER: &str = "[Image]";
|
||||
|
||||
/// Apply a rollout item to the metadata structure.
|
||||
pub fn apply_rollout_item(
|
||||
metadata: &mut ThreadMetadata,
|
||||
@@ -37,6 +40,9 @@ fn apply_session_meta_from_item(metadata: &mut ThreadMetadata, meta_line: &Sessi
|
||||
if let Some(provider) = meta_line.meta.model_provider.as_deref() {
|
||||
metadata.model_provider = provider.to_string();
|
||||
}
|
||||
if !meta_line.meta.cli_version.is_empty() {
|
||||
metadata.cli_version = meta_line.meta.cli_version.clone();
|
||||
}
|
||||
if !meta_line.meta.cwd.as_os_str().is_empty() {
|
||||
metadata.cwd = meta_line.meta.cwd.clone();
|
||||
}
|
||||
@@ -61,9 +67,14 @@ fn apply_event_msg(metadata: &mut ThreadMetadata, event: &EventMsg) {
|
||||
}
|
||||
}
|
||||
EventMsg::UserMessage(user) => {
|
||||
metadata.has_user_event = true;
|
||||
if metadata.first_user_message.is_none() {
|
||||
metadata.first_user_message = user_message_preview(user);
|
||||
}
|
||||
if metadata.title.is_empty() {
|
||||
metadata.title = strip_user_message_prefix(user.message.as_str()).to_string();
|
||||
let title = strip_user_message_prefix(user.message.as_str());
|
||||
if !title.is_empty() {
|
||||
metadata.title = title.to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
@@ -71,7 +82,7 @@ fn apply_event_msg(metadata: &mut ThreadMetadata, event: &EventMsg) {
|
||||
}
|
||||
|
||||
fn apply_response_item(_metadata: &mut ThreadMetadata, _item: &ResponseItem) {
|
||||
// Title and has_user_event are derived from EventMsg::UserMessage only.
|
||||
// Title and first_user_message are derived from EventMsg::UserMessage only.
|
||||
}
|
||||
|
||||
fn strip_user_message_prefix(text: &str) -> &str {
|
||||
@@ -81,6 +92,22 @@ fn strip_user_message_prefix(text: &str) -> &str {
|
||||
}
|
||||
}
|
||||
|
||||
fn user_message_preview(user: &UserMessageEvent) -> Option<String> {
|
||||
let message = strip_user_message_prefix(user.message.as_str());
|
||||
if !message.is_empty() {
|
||||
return Some(message.to_string());
|
||||
}
|
||||
if user
|
||||
.images
|
||||
.as_ref()
|
||||
.is_some_and(|images| !images.is_empty())
|
||||
|| !user.local_images.is_empty()
|
||||
{
|
||||
return Some(IMAGE_ONLY_USER_MESSAGE_PLACEHOLDER.to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn enum_to_string<T: Serialize>(value: &T) -> String {
|
||||
match serde_json::to_value(value) {
|
||||
Ok(Value::String(s)) => s,
|
||||
@@ -108,7 +135,7 @@ mod tests {
|
||||
use uuid::Uuid;
|
||||
|
||||
#[test]
|
||||
fn response_item_user_messages_do_not_set_title_or_has_user_event() {
|
||||
fn response_item_user_messages_do_not_set_title_or_first_user_message() {
|
||||
let mut metadata = metadata_for_test();
|
||||
let item = RolloutItem::ResponseItem(ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -122,12 +149,12 @@ mod tests {
|
||||
|
||||
apply_rollout_item(&mut metadata, &item, "test-provider");
|
||||
|
||||
assert_eq!(metadata.has_user_event, false);
|
||||
assert_eq!(metadata.first_user_message, None);
|
||||
assert_eq!(metadata.title, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_msg_user_messages_set_title_and_has_user_event() {
|
||||
fn event_msg_user_messages_set_title_and_first_user_message() {
|
||||
let mut metadata = metadata_for_test();
|
||||
let item = RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
|
||||
message: format!("{USER_MESSAGE_BEGIN} actual user request"),
|
||||
@@ -138,10 +165,48 @@ mod tests {
|
||||
|
||||
apply_rollout_item(&mut metadata, &item, "test-provider");
|
||||
|
||||
assert_eq!(metadata.has_user_event, true);
|
||||
assert_eq!(
|
||||
metadata.first_user_message.as_deref(),
|
||||
Some("actual user request")
|
||||
);
|
||||
assert_eq!(metadata.title, "actual user request");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_msg_image_only_user_message_sets_image_placeholder_preview() {
|
||||
let mut metadata = metadata_for_test();
|
||||
let item = RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
|
||||
message: String::new(),
|
||||
images: Some(vec!["https://example.com/image.png".to_string()]),
|
||||
local_images: vec![],
|
||||
text_elements: vec![],
|
||||
}));
|
||||
|
||||
apply_rollout_item(&mut metadata, &item, "test-provider");
|
||||
|
||||
assert_eq!(
|
||||
metadata.first_user_message.as_deref(),
|
||||
Some(super::IMAGE_ONLY_USER_MESSAGE_PLACEHOLDER)
|
||||
);
|
||||
assert_eq!(metadata.title, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn event_msg_blank_user_message_without_images_keeps_first_user_message_empty() {
|
||||
let mut metadata = metadata_for_test();
|
||||
let item = RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
|
||||
message: " ".to_string(),
|
||||
images: Some(vec![]),
|
||||
local_images: vec![],
|
||||
text_elements: vec![],
|
||||
}));
|
||||
|
||||
apply_rollout_item(&mut metadata, &item, "test-provider");
|
||||
|
||||
assert_eq!(metadata.first_user_message, None);
|
||||
assert_eq!(metadata.title, "");
|
||||
}
|
||||
|
||||
fn metadata_for_test() -> ThreadMetadata {
|
||||
let id = ThreadId::from_string(&Uuid::from_u128(42).to_string()).expect("thread id");
|
||||
let created_at = DateTime::<Utc>::from_timestamp(1_735_689_600, 0).expect("timestamp");
|
||||
@@ -153,11 +218,12 @@ mod tests {
|
||||
source: "cli".to_string(),
|
||||
model_provider: "openai".to_string(),
|
||||
cwd: PathBuf::from("/tmp"),
|
||||
cli_version: "0.0.0".to_string(),
|
||||
title: String::new(),
|
||||
sandbox_policy: "read-only".to_string(),
|
||||
approval_mode: "on-request".to_string(),
|
||||
tokens_used: 1,
|
||||
has_user_event: false,
|
||||
first_user_message: None,
|
||||
archived_at: None,
|
||||
git_sha: None,
|
||||
git_branch: None,
|
||||
|
||||
@@ -66,6 +66,8 @@ pub struct ThreadMetadata {
|
||||
pub model_provider: String,
|
||||
/// The working directory for the thread.
|
||||
pub cwd: PathBuf,
|
||||
/// Version of the CLI that created the thread.
|
||||
pub cli_version: String,
|
||||
/// A best-effort thread title.
|
||||
pub title: String,
|
||||
/// The sandbox policy (stringified enum).
|
||||
@@ -74,8 +76,8 @@ pub struct ThreadMetadata {
|
||||
pub approval_mode: String,
|
||||
/// The last observed token usage.
|
||||
pub tokens_used: i64,
|
||||
/// Whether the thread has observed a user message.
|
||||
pub has_user_event: bool,
|
||||
/// First user message observed for this thread, if any.
|
||||
pub first_user_message: Option<String>,
|
||||
/// The archive timestamp, if the thread is archived.
|
||||
pub archived_at: Option<DateTime<Utc>>,
|
||||
/// The git commit SHA, if known.
|
||||
@@ -103,6 +105,8 @@ pub struct ThreadMetadataBuilder {
|
||||
pub model_provider: Option<String>,
|
||||
/// The working directory for the thread.
|
||||
pub cwd: PathBuf,
|
||||
/// Version of the CLI that created the thread.
|
||||
pub cli_version: Option<String>,
|
||||
/// The sandbox policy.
|
||||
pub sandbox_policy: SandboxPolicy,
|
||||
/// The approval mode.
|
||||
@@ -133,6 +137,7 @@ impl ThreadMetadataBuilder {
|
||||
source,
|
||||
model_provider: None,
|
||||
cwd: PathBuf::new(),
|
||||
cli_version: None,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
approval_mode: AskForApproval::OnRequest,
|
||||
archived_at: None,
|
||||
@@ -163,11 +168,12 @@ impl ThreadMetadataBuilder {
|
||||
.clone()
|
||||
.unwrap_or_else(|| default_provider.to_string()),
|
||||
cwd: self.cwd.clone(),
|
||||
cli_version: self.cli_version.clone().unwrap_or_default(),
|
||||
title: String::new(),
|
||||
sandbox_policy,
|
||||
approval_mode,
|
||||
tokens_used: 0,
|
||||
has_user_event: false,
|
||||
first_user_message: None,
|
||||
archived_at: self.archived_at.map(canonicalize_datetime),
|
||||
git_sha: self.git_sha.clone(),
|
||||
git_branch: self.git_branch.clone(),
|
||||
@@ -201,6 +207,9 @@ impl ThreadMetadata {
|
||||
if self.cwd != other.cwd {
|
||||
diffs.push("cwd");
|
||||
}
|
||||
if self.cli_version != other.cli_version {
|
||||
diffs.push("cli_version");
|
||||
}
|
||||
if self.title != other.title {
|
||||
diffs.push("title");
|
||||
}
|
||||
@@ -213,8 +222,8 @@ impl ThreadMetadata {
|
||||
if self.tokens_used != other.tokens_used {
|
||||
diffs.push("tokens_used");
|
||||
}
|
||||
if self.has_user_event != other.has_user_event {
|
||||
diffs.push("has_user_event");
|
||||
if self.first_user_message != other.first_user_message {
|
||||
diffs.push("first_user_message");
|
||||
}
|
||||
if self.archived_at != other.archived_at {
|
||||
diffs.push("archived_at");
|
||||
@@ -245,11 +254,12 @@ pub(crate) struct ThreadRow {
|
||||
source: String,
|
||||
model_provider: String,
|
||||
cwd: String,
|
||||
cli_version: String,
|
||||
title: String,
|
||||
sandbox_policy: String,
|
||||
approval_mode: String,
|
||||
tokens_used: i64,
|
||||
has_user_event: bool,
|
||||
first_user_message: String,
|
||||
archived_at: Option<i64>,
|
||||
git_sha: Option<String>,
|
||||
git_branch: Option<String>,
|
||||
@@ -266,11 +276,12 @@ impl ThreadRow {
|
||||
source: row.try_get("source")?,
|
||||
model_provider: row.try_get("model_provider")?,
|
||||
cwd: row.try_get("cwd")?,
|
||||
cli_version: row.try_get("cli_version")?,
|
||||
title: row.try_get("title")?,
|
||||
sandbox_policy: row.try_get("sandbox_policy")?,
|
||||
approval_mode: row.try_get("approval_mode")?,
|
||||
tokens_used: row.try_get("tokens_used")?,
|
||||
has_user_event: row.try_get("has_user_event")?,
|
||||
first_user_message: row.try_get("first_user_message")?,
|
||||
archived_at: row.try_get("archived_at")?,
|
||||
git_sha: row.try_get("git_sha")?,
|
||||
git_branch: row.try_get("git_branch")?,
|
||||
@@ -291,11 +302,12 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
|
||||
source,
|
||||
model_provider,
|
||||
cwd,
|
||||
cli_version,
|
||||
title,
|
||||
sandbox_policy,
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
has_user_event,
|
||||
first_user_message,
|
||||
archived_at,
|
||||
git_sha,
|
||||
git_branch,
|
||||
@@ -309,11 +321,12 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
|
||||
source,
|
||||
model_provider,
|
||||
cwd: PathBuf::from(cwd),
|
||||
cli_version,
|
||||
title,
|
||||
sandbox_policy,
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
has_user_event,
|
||||
first_user_message: (!first_user_message.is_empty()).then_some(first_user_message),
|
||||
archived_at: archived_at.map(epoch_seconds_to_datetime).transpose()?,
|
||||
git_sha,
|
||||
git_branch,
|
||||
|
||||
@@ -177,11 +177,12 @@ SELECT
|
||||
source,
|
||||
model_provider,
|
||||
cwd,
|
||||
cli_version,
|
||||
title,
|
||||
sandbox_policy,
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
has_user_event,
|
||||
first_user_message,
|
||||
archived_at,
|
||||
git_sha,
|
||||
git_branch,
|
||||
@@ -295,11 +296,12 @@ SELECT
|
||||
source,
|
||||
model_provider,
|
||||
cwd,
|
||||
cli_version,
|
||||
title,
|
||||
sandbox_policy,
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
has_user_event,
|
||||
first_user_message,
|
||||
archived_at,
|
||||
git_sha,
|
||||
git_branch,
|
||||
@@ -449,17 +451,18 @@ INSERT INTO threads (
|
||||
source,
|
||||
model_provider,
|
||||
cwd,
|
||||
cli_version,
|
||||
title,
|
||||
sandbox_policy,
|
||||
approval_mode,
|
||||
tokens_used,
|
||||
has_user_event,
|
||||
first_user_message,
|
||||
archived,
|
||||
archived_at,
|
||||
git_sha,
|
||||
git_branch,
|
||||
git_origin_url
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
rollout_path = excluded.rollout_path,
|
||||
created_at = excluded.created_at,
|
||||
@@ -467,11 +470,12 @@ ON CONFLICT(id) DO UPDATE SET
|
||||
source = excluded.source,
|
||||
model_provider = excluded.model_provider,
|
||||
cwd = excluded.cwd,
|
||||
cli_version = excluded.cli_version,
|
||||
title = excluded.title,
|
||||
sandbox_policy = excluded.sandbox_policy,
|
||||
approval_mode = excluded.approval_mode,
|
||||
tokens_used = excluded.tokens_used,
|
||||
has_user_event = excluded.has_user_event,
|
||||
first_user_message = excluded.first_user_message,
|
||||
archived = excluded.archived,
|
||||
archived_at = excluded.archived_at,
|
||||
git_sha = excluded.git_sha,
|
||||
@@ -486,11 +490,12 @@ ON CONFLICT(id) DO UPDATE SET
|
||||
.bind(metadata.source.as_str())
|
||||
.bind(metadata.model_provider.as_str())
|
||||
.bind(metadata.cwd.display().to_string())
|
||||
.bind(metadata.cli_version.as_str())
|
||||
.bind(metadata.title.as_str())
|
||||
.bind(metadata.sandbox_policy.as_str())
|
||||
.bind(metadata.approval_mode.as_str())
|
||||
.bind(metadata.tokens_used)
|
||||
.bind(metadata.has_user_event)
|
||||
.bind(metadata.first_user_message.as_deref().unwrap_or_default())
|
||||
.bind(metadata.archived_at.is_some())
|
||||
.bind(metadata.archived_at.map(datetime_to_epoch_seconds))
|
||||
.bind(metadata.git_sha.as_deref())
|
||||
@@ -900,7 +905,7 @@ fn push_thread_filters<'a>(
|
||||
} else {
|
||||
builder.push(" AND archived = 0");
|
||||
}
|
||||
builder.push(" AND has_user_event = 1");
|
||||
builder.push(" AND first_user_message <> ''");
|
||||
if !allowed_sources.is_empty() {
|
||||
builder.push(" AND source IN (");
|
||||
let mut separated = builder.separated(", ");
|
||||
@@ -1391,11 +1396,12 @@ mod tests {
|
||||
source: "cli".to_string(),
|
||||
model_provider: "test-provider".to_string(),
|
||||
cwd,
|
||||
cli_version: "0.0.0".to_string(),
|
||||
title: String::new(),
|
||||
sandbox_policy: crate::extract::enum_to_string(&SandboxPolicy::ReadOnly),
|
||||
approval_mode: crate::extract::enum_to_string(&AskForApproval::OnRequest),
|
||||
tokens_used: 0,
|
||||
has_user_event: true,
|
||||
first_user_message: Some("hello".to_string()),
|
||||
archived_at: None,
|
||||
git_sha: None,
|
||||
git_branch: None,
|
||||
|
||||
Reference in New Issue
Block a user