Add text element metadata to protocol, app server, and core (#9331)

The second part of breaking up PR
https://github.com/openai/codex/pull/9116

Summary:

- Add `TextElement` / `ByteRange` to protocol user inputs and user
message events with defaults.
- Thread `text_elements` through app-server v1/v2 request handling and
history rebuild.
- Preserve UI metadata only in user input/events (not `ContentItem`)
while keeping local image attachments in user events for rehydration.

Details:

- Protocol: `UserInput::Text` carries `text_elements`;
`UserMessageEvent` carries `text_elements` + `local_images`.
Serialization includes empty vectors for backward compatibility.
- app-server-protocol: v1 defines `V1TextElement` / `V1ByteRange` in
camelCase with conversions; v2 uses its own camelCase wrapper.
- app-server: v1/v2 input mapping includes `text_elements`; thread
history rebuilds include them.
- Core: user event emission preserves UI metadata while model history
stays clean; history replay round-trips the metadata.
This commit is contained in:
charley-oai
2026-01-15 17:26:41 -08:00
committed by GitHub
parent 004a74940a
commit 1fa8350ae7
18 changed files with 416 additions and 46 deletions

View File

@@ -4,6 +4,8 @@ use crate::protocol::AgentReasoningRawContentEvent;
use crate::protocol::EventMsg;
use crate::protocol::UserMessageEvent;
use crate::protocol::WebSearchEndEvent;
use crate::user_input::ByteRange;
use crate::user_input::TextElement;
use crate::user_input::UserInput;
use schemars::JsonSchema;
use serde::Deserialize;
@@ -62,13 +64,13 @@ impl UserMessageItem {
}
pub fn as_legacy_event(&self) -> EventMsg {
// Legacy user-message events flatten only text inputs into `message` and
// rebase text element ranges onto that concatenated text.
EventMsg::UserMessage(UserMessageEvent {
message: self.message(),
images: Some(self.image_urls()),
// TODO: Thread text element ranges into legacy user message events.
text_elements: Vec::new(),
// TODO: Thread local image paths into legacy user message events.
local_images: Vec::new(),
local_images: self.local_image_paths(),
text_elements: self.text_elements(),
})
}
@@ -83,6 +85,32 @@ impl UserMessageItem {
.join("")
}
pub fn text_elements(&self) -> Vec<TextElement> {
let mut out = Vec::new();
let mut offset = 0usize;
for input in &self.content {
if let UserInput::Text {
text,
text_elements,
} = input
{
// Text element ranges are relative to each text chunk; offset them so they align
// with the concatenated message returned by `message()`.
for elem in text_elements {
out.push(TextElement {
byte_range: ByteRange {
start: offset + elem.byte_range.start,
end: offset + elem.byte_range.end,
},
placeholder: elem.placeholder.clone(),
});
}
offset += text.len();
}
}
out
}
pub fn image_urls(&self) -> Vec<String> {
self.content
.iter()
@@ -92,6 +120,16 @@ impl UserMessageItem {
})
.collect()
}
pub fn local_image_paths(&self) -> Vec<std::path::PathBuf> {
self.content
.iter()
.filter_map(|c| match c {
UserInput::LocalImage { path } => Some(path.clone()),
_ => None,
})
.collect()
}
}
impl AgentMessageItem {