mirror of
https://github.com/openai/codex.git
synced 2026-05-02 12:21:26 +03:00
## Summary - bridge Codex Apps tools that declare `_meta["openai/fileParams"]` through the OpenAI file upload flow - mask those file params in model-visible tool schemas so the model provides absolute local file paths instead of raw file payload objects - rewrite those local file path arguments client-side into `ProvidedFilePayload`-shaped objects before the normal MCP tool call ## Details - applies to scalar and array file params declared in `openai/fileParams` - Codex uploads local files directly to the backend and uses the uploaded file metadata to build the MCP tool arguments locally - this PR is input-only ## Verification - `just fmt` - `cargo test -p codex-core mcp_tool_call -- --nocapture` --------- Co-authored-by: Codex <noreply@openai.com>
371 lines
12 KiB
Rust
371 lines
12 KiB
Rust
use std::path::Path;
|
|
use std::path::PathBuf;
|
|
use std::time::Duration;
|
|
|
|
use crate::AuthProvider;
|
|
use codex_client::build_reqwest_client_with_custom_ca;
|
|
use reqwest::StatusCode;
|
|
use reqwest::header::CONTENT_LENGTH;
|
|
use serde::Deserialize;
|
|
use tokio::fs::File;
|
|
use tokio::time::Instant;
|
|
use tokio_util::io::ReaderStream;
|
|
|
|
pub const OPENAI_FILE_URI_PREFIX: &str = "sediment://";
|
|
pub const OPENAI_FILE_UPLOAD_LIMIT_BYTES: u64 = 512 * 1024 * 1024;
|
|
|
|
const OPENAI_FILE_REQUEST_TIMEOUT: Duration = Duration::from_secs(60);
|
|
const OPENAI_FILE_FINALIZE_TIMEOUT: Duration = Duration::from_secs(30);
|
|
const OPENAI_FILE_FINALIZE_RETRY_DELAY: Duration = Duration::from_millis(250);
|
|
const OPENAI_FILE_USE_CASE: &str = "codex";
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct UploadedOpenAiFile {
|
|
pub file_id: String,
|
|
pub uri: String,
|
|
pub download_url: String,
|
|
pub file_name: String,
|
|
pub file_size_bytes: u64,
|
|
pub mime_type: Option<String>,
|
|
pub path: PathBuf,
|
|
}
|
|
|
|
#[derive(Debug, thiserror::Error)]
|
|
pub enum OpenAiFileError {
|
|
#[error("path `{path}` does not exist")]
|
|
MissingPath { path: PathBuf },
|
|
#[error("path `{path}` is not a file")]
|
|
NotAFile { path: PathBuf },
|
|
#[error("path `{path}` cannot be read: {source}")]
|
|
ReadFile {
|
|
path: PathBuf,
|
|
#[source]
|
|
source: std::io::Error,
|
|
},
|
|
#[error(
|
|
"file `{path}` is too large: {size_bytes} bytes exceeds the limit of {limit_bytes} bytes"
|
|
)]
|
|
FileTooLarge {
|
|
path: PathBuf,
|
|
size_bytes: u64,
|
|
limit_bytes: u64,
|
|
},
|
|
#[error("failed to send OpenAI file request to {url}: {source}")]
|
|
Request {
|
|
url: String,
|
|
#[source]
|
|
source: reqwest::Error,
|
|
},
|
|
#[error("OpenAI file request to {url} failed with status {status}: {body}")]
|
|
UnexpectedStatus {
|
|
url: String,
|
|
status: StatusCode,
|
|
body: String,
|
|
},
|
|
#[error("failed to parse OpenAI file response from {url}: {source}")]
|
|
Decode {
|
|
url: String,
|
|
#[source]
|
|
source: serde_json::Error,
|
|
},
|
|
#[error("OpenAI file upload for `{file_id}` is not ready yet")]
|
|
UploadNotReady { file_id: String },
|
|
#[error("OpenAI file upload for `{file_id}` failed: {message}")]
|
|
UploadFailed { file_id: String, message: String },
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct CreateFileResponse {
|
|
file_id: String,
|
|
upload_url: String,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
#[serde(rename_all = "snake_case")]
|
|
struct DownloadLinkResponse {
|
|
status: String,
|
|
download_url: Option<String>,
|
|
file_name: Option<String>,
|
|
mime_type: Option<String>,
|
|
error_message: Option<String>,
|
|
}
|
|
|
|
pub fn openai_file_uri(file_id: &str) -> String {
|
|
format!("{OPENAI_FILE_URI_PREFIX}{file_id}")
|
|
}
|
|
|
|
pub async fn upload_local_file(
|
|
base_url: &str,
|
|
auth: &impl AuthProvider,
|
|
path: &Path,
|
|
) -> Result<UploadedOpenAiFile, OpenAiFileError> {
|
|
let metadata = tokio::fs::metadata(path)
|
|
.await
|
|
.map_err(|source| match source.kind() {
|
|
std::io::ErrorKind::NotFound => OpenAiFileError::MissingPath {
|
|
path: path.to_path_buf(),
|
|
},
|
|
_ => OpenAiFileError::ReadFile {
|
|
path: path.to_path_buf(),
|
|
source,
|
|
},
|
|
})?;
|
|
if !metadata.is_file() {
|
|
return Err(OpenAiFileError::NotAFile {
|
|
path: path.to_path_buf(),
|
|
});
|
|
}
|
|
if metadata.len() > OPENAI_FILE_UPLOAD_LIMIT_BYTES {
|
|
return Err(OpenAiFileError::FileTooLarge {
|
|
path: path.to_path_buf(),
|
|
size_bytes: metadata.len(),
|
|
limit_bytes: OPENAI_FILE_UPLOAD_LIMIT_BYTES,
|
|
});
|
|
}
|
|
|
|
let file_name = path
|
|
.file_name()
|
|
.and_then(|value| value.to_str())
|
|
.unwrap_or("file")
|
|
.to_string();
|
|
let create_url = format!("{}/files", base_url.trim_end_matches('/'));
|
|
let create_response = authorized_request(auth, reqwest::Method::POST, &create_url)
|
|
.json(&serde_json::json!({
|
|
"file_name": file_name,
|
|
"file_size": metadata.len(),
|
|
"use_case": OPENAI_FILE_USE_CASE,
|
|
}))
|
|
.send()
|
|
.await
|
|
.map_err(|source| OpenAiFileError::Request {
|
|
url: create_url.clone(),
|
|
source,
|
|
})?;
|
|
let create_status = create_response.status();
|
|
let create_body = create_response.text().await.unwrap_or_default();
|
|
if !create_status.is_success() {
|
|
return Err(OpenAiFileError::UnexpectedStatus {
|
|
url: create_url,
|
|
status: create_status,
|
|
body: create_body,
|
|
});
|
|
}
|
|
let create_payload: CreateFileResponse =
|
|
serde_json::from_str(&create_body).map_err(|source| OpenAiFileError::Decode {
|
|
url: create_url.clone(),
|
|
source,
|
|
})?;
|
|
|
|
let upload_file = File::open(path)
|
|
.await
|
|
.map_err(|source| OpenAiFileError::ReadFile {
|
|
path: path.to_path_buf(),
|
|
source,
|
|
})?;
|
|
let upload_response = build_reqwest_client()
|
|
.put(&create_payload.upload_url)
|
|
.timeout(OPENAI_FILE_REQUEST_TIMEOUT)
|
|
.header("x-ms-blob-type", "BlockBlob")
|
|
.header(CONTENT_LENGTH, metadata.len())
|
|
.body(reqwest::Body::wrap_stream(ReaderStream::new(upload_file)))
|
|
.send()
|
|
.await
|
|
.map_err(|source| OpenAiFileError::Request {
|
|
url: create_payload.upload_url.clone(),
|
|
source,
|
|
})?;
|
|
let upload_status = upload_response.status();
|
|
let upload_body = upload_response.text().await.unwrap_or_default();
|
|
if !upload_status.is_success() {
|
|
return Err(OpenAiFileError::UnexpectedStatus {
|
|
url: create_payload.upload_url.clone(),
|
|
status: upload_status,
|
|
body: upload_body,
|
|
});
|
|
}
|
|
|
|
let finalize_url = format!(
|
|
"{}/files/{}/uploaded",
|
|
base_url.trim_end_matches('/'),
|
|
create_payload.file_id,
|
|
);
|
|
let finalize_started_at = Instant::now();
|
|
loop {
|
|
let finalize_response = authorized_request(auth, reqwest::Method::POST, &finalize_url)
|
|
.json(&serde_json::json!({}))
|
|
.send()
|
|
.await
|
|
.map_err(|source| OpenAiFileError::Request {
|
|
url: finalize_url.clone(),
|
|
source,
|
|
})?;
|
|
let finalize_status = finalize_response.status();
|
|
let finalize_body = finalize_response.text().await.unwrap_or_default();
|
|
if !finalize_status.is_success() {
|
|
return Err(OpenAiFileError::UnexpectedStatus {
|
|
url: finalize_url.clone(),
|
|
status: finalize_status,
|
|
body: finalize_body,
|
|
});
|
|
}
|
|
let finalize_payload: DownloadLinkResponse =
|
|
serde_json::from_str(&finalize_body).map_err(|source| OpenAiFileError::Decode {
|
|
url: finalize_url.clone(),
|
|
source,
|
|
})?;
|
|
|
|
match finalize_payload.status.as_str() {
|
|
"success" => {
|
|
return Ok(UploadedOpenAiFile {
|
|
file_id: create_payload.file_id.clone(),
|
|
uri: openai_file_uri(&create_payload.file_id),
|
|
download_url: finalize_payload.download_url.ok_or_else(|| {
|
|
OpenAiFileError::UploadFailed {
|
|
file_id: create_payload.file_id.clone(),
|
|
message: "missing download_url".to_string(),
|
|
}
|
|
})?,
|
|
file_name: finalize_payload.file_name.unwrap_or(file_name),
|
|
file_size_bytes: metadata.len(),
|
|
mime_type: finalize_payload.mime_type,
|
|
path: path.to_path_buf(),
|
|
});
|
|
}
|
|
"retry" => {
|
|
if finalize_started_at.elapsed() >= OPENAI_FILE_FINALIZE_TIMEOUT {
|
|
return Err(OpenAiFileError::UploadNotReady {
|
|
file_id: create_payload.file_id,
|
|
});
|
|
}
|
|
tokio::time::sleep(OPENAI_FILE_FINALIZE_RETRY_DELAY).await;
|
|
}
|
|
_ => {
|
|
return Err(OpenAiFileError::UploadFailed {
|
|
file_id: create_payload.file_id,
|
|
message: finalize_payload
|
|
.error_message
|
|
.unwrap_or_else(|| "upload finalization returned an error".to_string()),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn authorized_request(
|
|
auth: &impl AuthProvider,
|
|
method: reqwest::Method,
|
|
url: &str,
|
|
) -> reqwest::RequestBuilder {
|
|
let client = build_reqwest_client();
|
|
let mut request = client
|
|
.request(method, url)
|
|
.timeout(OPENAI_FILE_REQUEST_TIMEOUT);
|
|
if let Some(token) = auth.bearer_token() {
|
|
request = request.bearer_auth(token);
|
|
}
|
|
if let Some(account_id) = auth.account_id() {
|
|
request = request.header("chatgpt-account-id", account_id);
|
|
}
|
|
request
|
|
}
|
|
|
|
fn build_reqwest_client() -> reqwest::Client {
|
|
build_reqwest_client_with_custom_ca(reqwest::Client::builder()).unwrap_or_else(|error| {
|
|
tracing::warn!(error = %error, "failed to build OpenAI file upload client");
|
|
reqwest::Client::new()
|
|
})
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::CoreAuthProvider;
|
|
use pretty_assertions::assert_eq;
|
|
use std::sync::Arc;
|
|
use std::sync::atomic::AtomicUsize;
|
|
use std::sync::atomic::Ordering;
|
|
use tempfile::TempDir;
|
|
use wiremock::Mock;
|
|
use wiremock::MockServer;
|
|
use wiremock::Request;
|
|
use wiremock::ResponseTemplate;
|
|
use wiremock::matchers::body_json;
|
|
use wiremock::matchers::header;
|
|
use wiremock::matchers::method;
|
|
use wiremock::matchers::path;
|
|
|
|
fn chatgpt_auth() -> CoreAuthProvider {
|
|
CoreAuthProvider::for_test(Some("token"), Some("account_id"))
|
|
}
|
|
|
|
fn base_url_for(server: &MockServer) -> String {
|
|
format!("{}/backend-api", server.uri())
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn upload_local_file_returns_canonical_uri() {
|
|
let server = MockServer::start().await;
|
|
Mock::given(method("POST"))
|
|
.and(path("/backend-api/files"))
|
|
.and(header("chatgpt-account-id", "account_id"))
|
|
.and(body_json(serde_json::json!({
|
|
"file_name": "hello.txt",
|
|
"file_size": 5,
|
|
"use_case": "codex",
|
|
})))
|
|
.respond_with(
|
|
ResponseTemplate::new(200)
|
|
.set_body_json(serde_json::json!({"file_id": "file_123", "upload_url": format!("{}/upload/file_123", server.uri())})),
|
|
)
|
|
.mount(&server)
|
|
.await;
|
|
Mock::given(method("PUT"))
|
|
.and(path("/upload/file_123"))
|
|
.and(header("content-length", "5"))
|
|
.respond_with(ResponseTemplate::new(200))
|
|
.mount(&server)
|
|
.await;
|
|
let finalize_attempts = Arc::new(AtomicUsize::new(0));
|
|
let finalize_attempts_responder = Arc::clone(&finalize_attempts);
|
|
let download_url = format!("{}/download/file_123", server.uri());
|
|
Mock::given(method("POST"))
|
|
.and(path("/backend-api/files/file_123/uploaded"))
|
|
.respond_with(move |_request: &Request| {
|
|
if finalize_attempts_responder.fetch_add(1, Ordering::SeqCst) == 0 {
|
|
return ResponseTemplate::new(200).set_body_json(serde_json::json!({
|
|
"status": "retry"
|
|
}));
|
|
}
|
|
|
|
ResponseTemplate::new(200).set_body_json(serde_json::json!({
|
|
"status": "success",
|
|
"download_url": download_url,
|
|
"file_name": "hello.txt",
|
|
"mime_type": "text/plain",
|
|
"file_size_bytes": 5
|
|
}))
|
|
})
|
|
.mount(&server)
|
|
.await;
|
|
|
|
let base_url = base_url_for(&server);
|
|
let dir = TempDir::new().expect("temp dir");
|
|
let path = dir.path().join("hello.txt");
|
|
tokio::fs::write(&path, b"hello").await.expect("write file");
|
|
|
|
let uploaded = upload_local_file(&base_url, &chatgpt_auth(), &path)
|
|
.await
|
|
.expect("upload succeeds");
|
|
|
|
assert_eq!(uploaded.file_id, "file_123");
|
|
assert_eq!(uploaded.uri, "sediment://file_123");
|
|
assert_eq!(
|
|
uploaded.download_url,
|
|
format!("{}/download/file_123", server.uri())
|
|
);
|
|
assert_eq!(uploaded.file_name, "hello.txt");
|
|
assert_eq!(uploaded.mime_type, Some("text/plain".to_string()));
|
|
assert_eq!(finalize_attempts.load(Ordering::SeqCst), 2);
|
|
}
|
|
}
|