use std::collections::HashMap; use codex_utils_image::load_and_resize_to_fit; use mcp_types::CallToolResult; use mcp_types::ContentBlock; use serde::Deserialize; use serde::Deserializer; use serde::Serialize; use serde::ser::Serializer; use ts_rs::TS; use crate::user_input::UserInput; use codex_git::GhostCommit; use codex_utils_image::error::ImageProcessingError; use schemars::JsonSchema; /// Controls whether a command should use the session sandbox or bypass it. #[derive( Debug, Clone, Copy, Default, Eq, Hash, PartialEq, Serialize, Deserialize, JsonSchema, TS, )] #[serde(rename_all = "snake_case")] pub enum SandboxPermissions { /// Run with the configured sandbox #[default] UseDefault, /// Request to run outside the sandbox RequireEscalated, } impl SandboxPermissions { pub fn requires_escalated_permissions(self) -> bool { matches!(self, SandboxPermissions::RequireEscalated) } } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ResponseInputItem { Message { role: String, content: Vec, }, FunctionCallOutput { call_id: String, output: FunctionCallOutputPayload, }, McpToolCallOutput { call_id: String, result: Result, }, CustomToolCallOutput { call_id: String, output: String, }, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ContentItem { InputText { text: String }, InputImage { image_url: String }, OutputText { text: String }, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ResponseItem { Message { #[serde(default, skip_serializing)] #[ts(skip)] id: Option, role: String, content: Vec, }, Reasoning { #[serde(default, skip_serializing)] #[ts(skip)] id: String, summary: Vec, #[serde(default, skip_serializing_if = "should_serialize_reasoning_content")] #[ts(optional)] content: Option>, encrypted_content: Option, }, LocalShellCall { /// Set when using the chat completions API. #[serde(default, skip_serializing)] #[ts(skip)] id: Option, /// Set when using the Responses API. call_id: Option, status: LocalShellStatus, action: LocalShellAction, }, FunctionCall { #[serde(default, skip_serializing)] #[ts(skip)] id: Option, name: String, // The Responses API returns the function call arguments as a *string* that contains // JSON, not as an already‑parsed object. We keep it as a raw string here and let // Session::handle_function_call parse it into a Value. This exactly matches the // Chat Completions + Responses API behavior. arguments: String, call_id: String, }, // NOTE: The input schema for `function_call_output` objects that clients send to the // OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the // SSE stream. When *sending* we must wrap the string output inside an object that includes a // required `success` boolean. To ensure we serialize exactly the expected shape we introduce // a dedicated payload struct and flatten it here. FunctionCallOutput { call_id: String, output: FunctionCallOutputPayload, }, CustomToolCall { #[serde(default, skip_serializing)] #[ts(skip)] id: Option, #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] status: Option, call_id: String, name: String, input: String, }, CustomToolCallOutput { call_id: String, output: String, }, // Emitted by the Responses API when the agent triggers a web search. // Example payload (from SSE `response.output_item.done`): // { // "id":"ws_...", // "type":"web_search_call", // "status":"completed", // "action": {"type":"search","query":"weather: San Francisco, CA"} // } WebSearchCall { #[serde(default, skip_serializing)] #[ts(skip)] id: Option, #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] status: Option, action: WebSearchAction, }, // Generated by the harness but considered exactly as a model response. GhostSnapshot { ghost_commit: GhostCommit, }, #[serde(alias = "compaction_summary")] Compaction { encrypted_content: String, }, #[serde(other)] Other, } fn should_serialize_reasoning_content(content: &Option>) -> bool { match content { Some(content) => !content .iter() .any(|c| matches!(c, ReasoningItemContent::ReasoningText { .. })), None => false, } } fn local_image_error_placeholder( path: &std::path::Path, error: impl std::fmt::Display, ) -> ContentItem { ContentItem::InputText { text: format!( "Codex could not read the local image at `{}`: {}", path.display(), error ), } } fn invalid_image_error_placeholder( path: &std::path::Path, error: impl std::fmt::Display, ) -> ContentItem { ContentItem::InputText { text: format!( "Image located at `{}` is invalid: {}", path.display(), error ), } } fn unsupported_image_error_placeholder(path: &std::path::Path, mime: &str) -> ContentItem { ContentItem::InputText { text: format!( "Codex cannot attach image at `{}`: unsupported image format `{}`.", path.display(), mime ), } } impl From for ResponseItem { fn from(item: ResponseInputItem) -> Self { match item { ResponseInputItem::Message { role, content } => Self::Message { role, content, id: None, }, ResponseInputItem::FunctionCallOutput { call_id, output } => { Self::FunctionCallOutput { call_id, output } } ResponseInputItem::McpToolCallOutput { call_id, result } => { let output = match result { Ok(result) => FunctionCallOutputPayload::from(&result), Err(tool_call_err) => FunctionCallOutputPayload { content: format!("err: {tool_call_err:?}"), success: Some(false), ..Default::default() }, }; Self::FunctionCallOutput { call_id, output } } ResponseInputItem::CustomToolCallOutput { call_id, output } => { Self::CustomToolCallOutput { call_id, output } } } } } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(rename_all = "snake_case")] pub enum LocalShellStatus { Completed, InProgress, Incomplete, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum LocalShellAction { Exec(LocalShellExecAction), } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] pub struct LocalShellExecAction { pub command: Vec, pub timeout_ms: Option, pub working_directory: Option, pub env: Option>, pub user: Option, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum WebSearchAction { Search { #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] query: Option, }, OpenPage { #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] url: Option, }, FindInPage { #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] url: Option, #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] pattern: Option, }, #[serde(other)] Other, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ReasoningItemReasoningSummary { SummaryText { text: String }, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ReasoningItemContent { ReasoningText { text: String }, Text { text: String }, } impl From> for ResponseInputItem { fn from(items: Vec) -> Self { Self::Message { role: "user".to_string(), content: items .into_iter() .filter_map(|c| match c { UserInput::Text { text } => Some(ContentItem::InputText { text }), UserInput::Image { image_url } => Some(ContentItem::InputImage { image_url }), UserInput::LocalImage { path } => match load_and_resize_to_fit(&path) { Ok(image) => Some(ContentItem::InputImage { image_url: image.into_data_url(), }), Err(err) => { if matches!(&err, ImageProcessingError::Read { .. }) { Some(local_image_error_placeholder(&path, &err)) } else if err.is_invalid_image() { Some(invalid_image_error_placeholder(&path, &err)) } else { let Some(mime_guess) = mime_guess::from_path(&path).first() else { return Some(local_image_error_placeholder( &path, "unsupported MIME type (unknown)", )); }; let mime = mime_guess.essence_str().to_owned(); if !mime.starts_with("image/") { return Some(local_image_error_placeholder( &path, format!("unsupported MIME type `{mime}`"), )); } Some(unsupported_image_error_placeholder(&path, &mime)) } } }, UserInput::Skill { .. } => None, // Skill bodies are injected later in core }) .collect::>(), } } } /// If the `name` of a `ResponseItem::FunctionCall` is either `container.exec` /// or `shell`, the `arguments` field should deserialize to this struct. #[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] pub struct ShellToolCallParams { pub command: Vec, pub workdir: Option, /// This is the maximum time in milliseconds that the command is allowed to run. #[serde(alias = "timeout")] pub timeout_ms: Option, #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] pub sandbox_permissions: Option, #[serde(skip_serializing_if = "Option::is_none")] pub justification: Option, } /// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the /// `arguments` field should deserialize to this struct. #[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] pub struct ShellCommandToolCallParams { pub command: String, pub workdir: Option, /// Whether to run the shell with login shell semantics #[serde(skip_serializing_if = "Option::is_none")] pub login: Option, /// This is the maximum time in milliseconds that the command is allowed to run. #[serde(alias = "timeout")] pub timeout_ms: Option, #[serde(default, skip_serializing_if = "Option::is_none")] #[ts(optional)] pub sandbox_permissions: Option, #[serde(skip_serializing_if = "Option::is_none")] pub justification: Option, } /// Responses API compatible content items that can be returned by a tool call. /// This is a subset of ContentItem with the types we support as function call outputs. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(tag = "type", rename_all = "snake_case")] pub enum FunctionCallOutputContentItem { // Do not rename, these are serialized and used directly in the responses API. InputText { text: String }, // Do not rename, these are serialized and used directly in the responses API. InputImage { image_url: String }, } /// The payload we send back to OpenAI when reporting a tool call result. /// /// `content` preserves the historical plain-string payload so downstream /// integrations (tests, logging, etc.) can keep treating tool output as /// `String`. When an MCP server returns richer data we additionally populate /// `content_items` with the structured form that the Responses/Chat /// Completions APIs understand. #[derive(Debug, Default, Clone, PartialEq, JsonSchema, TS)] pub struct FunctionCallOutputPayload { pub content: String, #[serde(skip_serializing_if = "Option::is_none")] pub content_items: Option>, pub success: Option, } #[derive(Deserialize)] #[serde(untagged)] enum FunctionCallOutputPayloadSerde { Text(String), Items(Vec), } // The Responses API expects two *different* shapes depending on success vs failure: // • success → output is a plain string (no nested object) // • failure → output is an object { content, success:false } impl Serialize for FunctionCallOutputPayload { fn serialize(&self, serializer: S) -> Result where S: Serializer, { if let Some(items) = &self.content_items { items.serialize(serializer) } else { serializer.serialize_str(&self.content) } } } impl<'de> Deserialize<'de> for FunctionCallOutputPayload { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { match FunctionCallOutputPayloadSerde::deserialize(deserializer)? { FunctionCallOutputPayloadSerde::Text(content) => Ok(FunctionCallOutputPayload { content, ..Default::default() }), FunctionCallOutputPayloadSerde::Items(items) => { let content = serde_json::to_string(&items).map_err(serde::de::Error::custom)?; Ok(FunctionCallOutputPayload { content, content_items: Some(items), success: None, }) } } } } impl From<&CallToolResult> for FunctionCallOutputPayload { fn from(call_tool_result: &CallToolResult) -> Self { let CallToolResult { content, structured_content, is_error, } = call_tool_result; let is_success = is_error != &Some(true); if let Some(structured_content) = structured_content && !structured_content.is_null() { match serde_json::to_string(structured_content) { Ok(serialized_structured_content) => { return FunctionCallOutputPayload { content: serialized_structured_content, success: Some(is_success), ..Default::default() }; } Err(err) => { return FunctionCallOutputPayload { content: err.to_string(), success: Some(false), ..Default::default() }; } } } let serialized_content = match serde_json::to_string(content) { Ok(serialized_content) => serialized_content, Err(err) => { return FunctionCallOutputPayload { content: err.to_string(), success: Some(false), ..Default::default() }; } }; let content_items = convert_content_blocks_to_items(content); FunctionCallOutputPayload { content: serialized_content, content_items, success: Some(is_success), } } } fn convert_content_blocks_to_items( blocks: &[ContentBlock], ) -> Option> { let mut saw_image = false; let mut items = Vec::with_capacity(blocks.len()); tracing::warn!("Blocks: {:?}", blocks); for block in blocks { match block { ContentBlock::TextContent(text) => { items.push(FunctionCallOutputContentItem::InputText { text: text.text.clone(), }); } ContentBlock::ImageContent(image) => { saw_image = true; // Just in case the content doesn't include a data URL, add it. let image_url = if image.data.starts_with("data:") { image.data.clone() } else { format!("data:{};base64,{}", image.mime_type, image.data) }; items.push(FunctionCallOutputContentItem::InputImage { image_url }); } // TODO: render audio, resource, and embedded resource content to the model. _ => return None, } } if saw_image { Some(items) } else { None } } // Implement Display so callers can treat the payload like a plain string when logging or doing // trivial substring checks in tests (existing tests call `.contains()` on the output). Display // returns the raw `content` field. impl std::fmt::Display for FunctionCallOutputPayload { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(&self.content) } } impl std::ops::Deref for FunctionCallOutputPayload { type Target = str; fn deref(&self) -> &Self::Target { &self.content } } // (Moved event mapping logic into codex-core to avoid coupling protocol to UI-facing events.) #[cfg(test)] mod tests { use super::*; use anyhow::Result; use mcp_types::ImageContent; use mcp_types::TextContent; use pretty_assertions::assert_eq; use tempfile::tempdir; #[test] fn serializes_success_as_plain_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), output: FunctionCallOutputPayload { content: "ok".into(), ..Default::default() }, }; let json = serde_json::to_string(&item)?; let v: serde_json::Value = serde_json::from_str(&json)?; // Success case -> output should be a plain string assert_eq!(v.get("output").unwrap().as_str().unwrap(), "ok"); Ok(()) } #[test] fn serializes_failure_as_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), output: FunctionCallOutputPayload { content: "bad".into(), success: Some(false), ..Default::default() }, }; let json = serde_json::to_string(&item)?; let v: serde_json::Value = serde_json::from_str(&json)?; assert_eq!(v.get("output").unwrap().as_str().unwrap(), "bad"); Ok(()) } #[test] fn serializes_image_outputs_as_array() -> Result<()> { let call_tool_result = CallToolResult { content: vec![ ContentBlock::TextContent(TextContent { annotations: None, text: "caption".into(), r#type: "text".into(), }), ContentBlock::ImageContent(ImageContent { annotations: None, data: "BASE64".into(), mime_type: "image/png".into(), r#type: "image".into(), }), ], is_error: None, structured_content: None, }; let payload = FunctionCallOutputPayload::from(&call_tool_result); assert_eq!(payload.success, Some(true)); let items = payload.content_items.clone().expect("content items"); assert_eq!( items, vec![ FunctionCallOutputContentItem::InputText { text: "caption".into(), }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,BASE64".into(), }, ] ); let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), output: payload, }; let json = serde_json::to_string(&item)?; let v: serde_json::Value = serde_json::from_str(&json)?; let output = v.get("output").expect("output field"); assert!(output.is_array(), "expected array output"); Ok(()) } #[test] fn deserializes_array_payload_into_items() -> Result<()> { let json = r#"[ {"type": "input_text", "text": "note"}, {"type": "input_image", "image_url": "data:image/png;base64,XYZ"} ]"#; let payload: FunctionCallOutputPayload = serde_json::from_str(json)?; assert_eq!(payload.success, None); let expected_items = vec![ FunctionCallOutputContentItem::InputText { text: "note".into(), }, FunctionCallOutputContentItem::InputImage { image_url: "data:image/png;base64,XYZ".into(), }, ]; assert_eq!(payload.content_items, Some(expected_items.clone())); let expected_content = serde_json::to_string(&expected_items)?; assert_eq!(payload.content, expected_content); Ok(()) } #[test] fn deserializes_compaction_alias() -> Result<()> { let json = r#"{"type":"compaction_summary","encrypted_content":"abc"}"#; let item: ResponseItem = serde_json::from_str(json)?; assert_eq!( item, ResponseItem::Compaction { encrypted_content: "abc".into(), } ); Ok(()) } #[test] fn roundtrips_web_search_call_actions() -> Result<()> { let cases = vec![ ( r#"{ "type": "web_search_call", "status": "completed", "action": { "type": "search", "query": "weather seattle" } }"#, WebSearchAction::Search { query: Some("weather seattle".into()), }, Some("completed".into()), ), ( r#"{ "type": "web_search_call", "status": "open", "action": { "type": "open_page", "url": "https://example.com" } }"#, WebSearchAction::OpenPage { url: Some("https://example.com".into()), }, Some("open".into()), ), ( r#"{ "type": "web_search_call", "status": "in_progress", "action": { "type": "find_in_page", "url": "https://example.com/docs", "pattern": "installation" } }"#, WebSearchAction::FindInPage { url: Some("https://example.com/docs".into()), pattern: Some("installation".into()), }, Some("in_progress".into()), ), ]; for (json_literal, expected_action, expected_status) in cases { let parsed: ResponseItem = serde_json::from_str(json_literal)?; let expected = ResponseItem::WebSearchCall { id: None, status: expected_status.clone(), action: expected_action.clone(), }; assert_eq!(parsed, expected); let serialized = serde_json::to_value(&parsed)?; let original_value: serde_json::Value = serde_json::from_str(json_literal)?; assert_eq!(serialized, original_value); } Ok(()) } #[test] fn deserialize_shell_tool_call_params() -> Result<()> { let json = r#"{ "command": ["ls", "-l"], "workdir": "/tmp", "timeout": 1000 }"#; let params: ShellToolCallParams = serde_json::from_str(json)?; assert_eq!( ShellToolCallParams { command: vec!["ls".to_string(), "-l".to_string()], workdir: Some("/tmp".to_string()), timeout_ms: Some(1000), sandbox_permissions: None, justification: None, }, params ); Ok(()) } #[test] fn local_image_read_error_adds_placeholder() -> Result<()> { let dir = tempdir()?; let missing_path = dir.path().join("missing-image.png"); let item = ResponseInputItem::from(vec![UserInput::LocalImage { path: missing_path.clone(), }]); match item { ResponseInputItem::Message { content, .. } => { assert_eq!(content.len(), 1); match &content[0] { ContentItem::InputText { text } => { let display_path = missing_path.display().to_string(); assert!( text.contains(&display_path), "placeholder should mention missing path: {text}" ); assert!( text.contains("could not read"), "placeholder should mention read issue: {text}" ); } other => panic!("expected placeholder text but found {other:?}"), } } other => panic!("expected message response but got {other:?}"), } Ok(()) } #[test] fn local_image_non_image_adds_placeholder() -> Result<()> { let dir = tempdir()?; let json_path = dir.path().join("example.json"); std::fs::write(&json_path, br#"{"hello":"world"}"#)?; let item = ResponseInputItem::from(vec![UserInput::LocalImage { path: json_path.clone(), }]); match item { ResponseInputItem::Message { content, .. } => { assert_eq!(content.len(), 1); match &content[0] { ContentItem::InputText { text } => { assert!( text.contains("unsupported MIME type `application/json`"), "placeholder should mention unsupported MIME: {text}" ); assert!( text.contains(&json_path.display().to_string()), "placeholder should mention path: {text}" ); } other => panic!("expected placeholder text but found {other:?}"), } } other => panic!("expected message response but got {other:?}"), } Ok(()) } #[test] fn local_image_unsupported_image_format_adds_placeholder() -> Result<()> { let dir = tempdir()?; let svg_path = dir.path().join("example.svg"); std::fs::write( &svg_path, br#" "#, )?; let item = ResponseInputItem::from(vec![UserInput::LocalImage { path: svg_path.clone(), }]); match item { ResponseInputItem::Message { content, .. } => { assert_eq!(content.len(), 1); let expected = format!( "Codex cannot attach image at `{}`: unsupported image format `image/svg+xml`.", svg_path.display() ); match &content[0] { ContentItem::InputText { text } => assert_eq!(text, &expected), other => panic!("expected placeholder text but found {other:?}"), } } other => panic!("expected message response but got {other:?}"), } Ok(()) } }