Compare commits

..

12 Commits

Author SHA1 Message Date
aibrahim-oai
d465d71955 Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests 2025-07-16 22:32:32 -07:00
aibrahim-oai
3d1cfe31a2 Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests 2025-07-14 15:22:46 -07:00
Ahmed Ibrahim
d6e934f7cd replace stdout with file matching 2025-07-14 11:13:09 -07:00
Ahmed Ibrahim
0b83f2965c fmt 2025-07-14 11:00:54 -07:00
Ahmed Ibrahim
d4dc3b11bc fmt 2025-07-14 10:57:47 -07:00
aibrahim-oai
bcbe02ff1d Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests 2025-07-12 17:28:25 -07:00
Ahmed Ibrahim
51257e2fd0 Adressing feedback 2025-07-12 17:15:37 -07:00
aibrahim-oai
0ece374c58 Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests 2025-07-12 17:08:03 -07:00
aibrahim-oai
f532554924 Fix clippy warnings in integration tests 2025-07-11 14:43:58 -07:00
aibrahim-oai
f9609cc9bf Format integration test imports 2025-07-11 14:29:48 -07:00
aibrahim-oai
781798b4ed Use sandbox dirs and env var constant 2025-07-11 14:13:55 -07:00
aibrahim-oai
5bafe0dc59 Update Cargo.lock for new dev dependencies 2025-07-11 14:03:15 -07:00
19 changed files with 271 additions and 474 deletions

18
.vscode/launch.json vendored
View File

@@ -1,18 +0,0 @@
{
"version": "0.2.0",
"configurations": [
{
"type": "lldb",
"request": "launch",
"name": "Cargo launch",
"cargo": {
"cwd": "${workspaceFolder}/codex-rs",
"args": [
"build",
"--bin=codex-tui"
]
},
"args": []
}
]
}

10
.vscode/settings.json vendored
View File

@@ -1,10 +0,0 @@
{
"rust-analyzer.checkOnSave": true,
"rust-analyzer.check.command": "clippy",
"rust-analyzer.check.extraArgs": ["--all-features", "--tests"],
"rust-analyzer.rustfmt.extraArgs": ["--config", "imports_granularity=Item"],
"[rust]": {
"editor.defaultFormatter": "rust-lang.rust-analyzer",
"editor.formatOnSave": true,
}
}

View File

@@ -3,7 +3,3 @@
In the codex-rs folder where the rust code lives:
- Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR`. You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
After making changes to the rust code run `just fmt` (in `codex-rs` directory) to format the code and `just fix` (in `codex-rs` directory) to fix any linter issues in the code.
Ensure the test suite passes by running `cargo test --all-features` in the `codex-rs` directory.

72
codex-rs/Cargo.lock generated
View File

@@ -399,15 +399,6 @@ version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "bstr"
version = "1.12.0"
@@ -626,6 +617,7 @@ name = "codex-cli"
version = "0.0.0"
dependencies = [
"anyhow",
"assert_cmd",
"clap",
"clap_complete",
"codex-chatgpt",
@@ -636,10 +628,14 @@ dependencies = [
"codex-login",
"codex-mcp-server",
"codex-tui",
"indoc",
"predicates",
"serde_json",
"tempfile",
"tokio",
"tracing",
"tracing-subscriber",
"wiremock",
]
[[package]]
@@ -680,7 +676,6 @@ dependencies = [
"seccompiler",
"serde",
"serde_json",
"sha1",
"strum_macros 0.27.1",
"tempfile",
"thiserror 2.0.12",
@@ -693,7 +688,6 @@ dependencies = [
"tree-sitter",
"tree-sitter-bash",
"uuid",
"walkdir",
"wildmatch",
"wiremock",
]
@@ -943,15 +937,6 @@ version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "cpufeatures"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
dependencies = [
"libc",
]
[[package]]
name = "crc32fast"
version = "1.4.2"
@@ -1026,16 +1011,6 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "ctor"
version = "0.1.26"
@@ -1186,16 +1161,6 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
]
[[package]]
name = "dirs"
version = "6.0.0"
@@ -1685,16 +1650,6 @@ dependencies = [
"byteorder",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getopts"
version = "0.2.23"
@@ -3994,17 +3949,6 @@ dependencies = [
"syn 2.0.104",
]
[[package]]
name = "sha1"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
@@ -4912,12 +4856,6 @@ dependencies = [
"unicode-width 0.2.0",
]
[[package]]
name = "typenum"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
[[package]]
name = "unicase"
version = "2.8.1"

View File

@@ -36,3 +36,11 @@ tokio = { version = "1", features = [
] }
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
[dev-dependencies]
assert_cmd = "2"
predicates = "3"
tempfile = "3"
wiremock = "0.6"
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
indoc = "2"

View File

@@ -0,0 +1,223 @@
#![allow(clippy::unwrap_used)]
//! End-to-end integration tests for the `codex` CLI.
//!
//! These spin up a local [`wiremock`][] server to stand in for the MCP server
//! and then run the real compiled `codex` binary against it. The goal is to
//! verify the high-level request/response flow rather than the details of the
//! individual async functions.
//!
//! [`wiremock`]: https://docs.rs/wiremock
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use predicates::prelude::*;
use std::fs;
use std::path::Path;
use tempfile::TempDir;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
// ----- tests -----
/// Sends a single simple prompt and verifies that the streamed response is
/// surfaced to the user. This exercises the most common "ask a question, get a
/// textual answer" flow.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn full_conversation_turn_integration() {
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!("Skipping test because network is disabled");
return;
}
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(
ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_message("Hello, world."), "text/event-stream"),
)
.expect(1)
.mount(&server)
.await;
// Disable retries — the mock server will fail hard if we make an unexpected
// request, so retries only slow the test down.
unsafe {
std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
}
let codex_home = TempDir::new().unwrap();
let sandbox = TempDir::new().unwrap();
write_config(codex_home.path(), &server);
// Capture the agent's final message in a file so we can assert on it precisely.
let last_message_file = sandbox.path().join("last_message.txt");
let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
cmd.env("CODEX_HOME", codex_home.path())
.current_dir(sandbox.path())
.arg("exec")
.arg("--skip-git-repo-check")
.arg("--output-last-message")
.arg(&last_message_file)
.arg("Hello");
cmd.assert()
.success()
.stdout(predicate::str::contains("Hello, world."));
// Assert on the captured last message file (more robust than stdout formatting).
let last = fs::read_to_string(&last_message_file).unwrap();
let expected = "Hello, world.";
assert_eq!(last.trim(), expected);
}
/// Simulates a tool invocation (`shell`) followed by a second assistant message
/// once the tool call completes.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_invocation_flow() {
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!("Skipping test because network is disabled");
return;
}
let server = MockServer::start().await;
// The first request returns a function-call item; the second returns the
// final assistant message. Use an atomic counter to serve them in order.
struct SeqResponder {
count: std::sync::atomic::AtomicUsize,
}
impl wiremock::Respond for SeqResponder {
fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
use std::sync::atomic::Ordering;
match self.count.fetch_add(1, Ordering::SeqCst) {
0 => ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_function_call(), "text/event-stream"),
_ => ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(sse_final_after_call(), "text/event-stream"),
}
}
}
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(SeqResponder {
count: std::sync::atomic::AtomicUsize::new(0),
})
.expect(2)
.mount(&server)
.await;
unsafe {
std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
}
let codex_home = TempDir::new().unwrap();
let sandbox = TempDir::new().unwrap();
write_config(codex_home.path(), &server);
// Capture final assistant message after tool invocation.
let last_message_file = sandbox.path().join("last_message.txt");
let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
cmd.env("CODEX_HOME", codex_home.path())
.current_dir(sandbox.path())
.arg("exec")
.arg("--skip-git-repo-check")
.arg("--output-last-message")
.arg(&last_message_file)
.arg("Run shell");
cmd.assert()
.success()
.stdout(predicate::str::contains("exec echo hi"))
.stdout(predicate::str::contains("hi"));
// Assert that the final assistant message (second response) was 'done'.
let last = fs::read_to_string(&last_message_file).unwrap();
let expected = "done";
assert_eq!(last.trim(), expected);
}
/// Write a minimal `config.toml` pointing the CLI at the mock server.
fn write_config(codex_home: &Path, server: &MockServer) {
fs::write(
codex_home.join("config.toml"),
format!(
r#"
model_provider = "mock"
model = "test-model"
[model_providers.mock]
name = "mock"
base_url = "{}/v1"
env_key = "PATH"
wire_api = "responses"
"#,
server.uri()
),
)
.unwrap();
}
/// Small helper to generate an SSE stream with a single assistant message.
fn sse_message(text: &str) -> String {
const TEMPLATE: &str = r#"event: response.output_item.done
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}}
event: response.completed
data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
"#;
TEMPLATE.replace("TEXT_PLACEHOLDER", text)
}
/// Helper to craft an SSE stream that returns a `function_call`.
fn sse_function_call() -> String {
let call = serde_json::json!({
"type": "response.output_item.done",
"item": {
"type": "function_call",
"name": "shell",
"arguments": "{\"command\":[\"echo\",\"hi\"]}",
"call_id": "call1"
}
});
let completed = serde_json::json!({
"type": "response.completed",
"response": {"id": "resp1", "output": []}
});
format!(
"event: response.output_item.done\ndata: {call}\n\n\
event: response.completed\ndata: {completed}\n\n\n"
)
}
/// SSE stream for the assistant's final message after the tool call returns.
fn sse_final_after_call() -> String {
let msg = serde_json::json!({
"type": "response.output_item.done",
"item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
});
let completed = serde_json::json!({
"type": "response.completed",
"response": {"id": "resp2", "output": []}
});
format!(
"event: response.output_item.done\ndata: {msg}\n\n\
event: response.completed\ndata: {completed}\n\n\n"
)
}

View File

@@ -28,7 +28,6 @@ rand = "0.9"
reqwest = { version = "0.12", features = ["json", "stream"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sha1 = "0.10.6"
strum_macros = "0.27.1"
thiserror = "2.0.12"
time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
@@ -66,5 +65,4 @@ predicates = "3"
pretty_assertions = "1.4.1"
tempfile = "3"
tokio-test = "0.4"
walkdir = "2.5.0"
wiremock = "0.6"

View File

@@ -51,6 +51,7 @@ use crate::exec::process_exec_tool_call;
use crate::exec_env::create_env;
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
use crate::mcp_connection_manager::McpConnectionManager;
use crate::mcp_connection_manager::try_parse_fully_qualified_tool_name;
use crate::mcp_tool_call::handle_mcp_tool_call;
use crate::models::ContentItem;
use crate::models::FunctionCallOutputPayload;
@@ -1291,7 +1292,7 @@ async fn handle_function_call(
handle_container_exec_with_params(params, sess, sub_id, call_id).await
}
_ => {
match sess.mcp_connection_manager.parse_tool_name(&name) {
match try_parse_fully_qualified_tool_name(&name) {
Some((server, tool_name)) => {
// TODO(mbolin): Determine appropriate timeout for tool call.
let timeout = None;

View File

@@ -7,7 +7,6 @@
//! `"<server><MCP_TOOL_NAME_DELIMITER><tool>"` as the key.
use std::collections::HashMap;
use std::collections::HashSet;
use std::time::Duration;
use anyhow::Context;
@@ -17,12 +16,8 @@ use codex_mcp_client::McpClient;
use mcp_types::ClientCapabilities;
use mcp_types::Implementation;
use mcp_types::Tool;
use sha1::Digest;
use sha1::Sha1;
use tokio::task::JoinSet;
use tracing::info;
use tracing::warn;
use crate::config_types::McpServerConfig;
@@ -31,8 +26,7 @@ use crate::config_types::McpServerConfig;
///
/// OpenAI requires tool names to conform to `^[a-zA-Z0-9_-]+$`, so we must
/// choose a delimiter from this character set.
const MCP_TOOL_NAME_DELIMITER: &str = "__";
const MAX_TOOL_NAME_LENGTH: usize = 25;
const MCP_TOOL_NAME_DELIMITER: &str = "__OAI_CODEX_MCP__";
/// Timeout for the `tools/list` request.
const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
@@ -41,46 +35,16 @@ const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
/// spawned successfully.
pub type ClientStartErrors = HashMap<String, anyhow::Error>;
fn qualify_tools(tools: Vec<ToolInfo>) -> HashMap<String, ToolInfo> {
let mut used_names = HashSet::new();
let mut qualified_tools = HashMap::new();
for tool in tools {
let mut qualified_name = format!(
"{}{}{}",
tool.server_name, MCP_TOOL_NAME_DELIMITER, tool.tool_name
);
if qualified_name.len() > MAX_TOOL_NAME_LENGTH {
let mut hasher = Sha1::new();
hasher.update(qualified_name.as_bytes());
let sha1 = hasher.finalize();
let sha1_str = format!("{sha1:x}");
if MAX_TOOL_NAME_LENGTH > sha1_str.len() {
// Truncate to make room for the hash suffix
let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len();
qualified_name = format!("{}{}", &qualified_name[..prefix_len], sha1_str);
} else {
// Not enough space for the full hash; fall back to a truncated hash only
qualified_name = sha1_str[..MAX_TOOL_NAME_LENGTH].to_string();
}
}
if used_names.contains(&qualified_name) {
warn!("skipping duplicated tool {}", qualified_name);
continue;
}
used_names.insert(qualified_name.clone());
qualified_tools.insert(qualified_name, tool);
}
qualified_tools
fn fully_qualified_tool_name(server: &str, tool: &str) -> String {
format!("{server}{MCP_TOOL_NAME_DELIMITER}{tool}")
}
struct ToolInfo {
server_name: String,
tool_name: String,
tool: Tool,
pub(crate) fn try_parse_fully_qualified_tool_name(fq_name: &str) -> Option<(String, String)> {
let (server, tool) = fq_name.split_once(MCP_TOOL_NAME_DELIMITER)?;
if server.is_empty() || tool.is_empty() {
return None;
}
Some((server.to_string(), tool.to_string()))
}
/// A thin wrapper around a set of running [`McpClient`] instances.
@@ -93,7 +57,7 @@ pub(crate) struct McpConnectionManager {
clients: HashMap<String, std::sync::Arc<McpClient>>,
/// Fully qualified tool name -> tool instance.
tools: HashMap<String, ToolInfo>,
tools: HashMap<String, Tool>,
}
impl McpConnectionManager {
@@ -177,9 +141,7 @@ impl McpConnectionManager {
}
}
let all_tools = list_all_tools(&clients).await?;
let tools = qualify_tools(all_tools);
let tools = list_all_tools(&clients).await?;
Ok((Self { clients, tools }, errors))
}
@@ -187,10 +149,7 @@ impl McpConnectionManager {
/// Returns a single map that contains **all** tools. Each key is the
/// fully-qualified name for the tool.
pub fn list_all_tools(&self) -> HashMap<String, Tool> {
self.tools
.iter()
.map(|(name, tool)| (name.clone(), tool.tool.clone()))
.collect()
self.tools.clone()
}
/// Invoke the tool indicated by the (server, tool) pair.
@@ -212,19 +171,13 @@ impl McpConnectionManager {
.await
.with_context(|| format!("tool call failed for `{server}/{tool}`"))
}
pub fn parse_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
self.tools
.get(tool_name)
.map(|tool| (tool.server_name.clone(), tool.tool_name.clone()))
}
}
/// Query every server for its available tools and return a single map that
/// contains **all** tools. Each key is the fully-qualified name for the tool.
async fn list_all_tools(
pub async fn list_all_tools(
clients: &HashMap<String, std::sync::Arc<McpClient>>,
) -> Result<Vec<ToolInfo>> {
) -> Result<HashMap<String, Tool>> {
let mut join_set = JoinSet::new();
// Spawn one task per server so we can query them concurrently. This
@@ -241,19 +194,18 @@ async fn list_all_tools(
});
}
let mut aggregated: Vec<ToolInfo> = Vec::with_capacity(join_set.len());
let mut aggregated: HashMap<String, Tool> = HashMap::with_capacity(join_set.len());
while let Some(join_res) = join_set.join_next().await {
let (server_name, list_result) = join_res?;
let list_result = list_result?;
for tool in list_result.tools {
let tool_info = ToolInfo {
server_name: server_name.clone(),
tool_name: tool.name.clone(),
tool,
};
aggregated.push(tool_info);
// TODO(mbolin): escape tool names that contain invalid characters.
let fq_name = fully_qualified_tool_name(&server_name, &tool.name);
if aggregated.insert(fq_name.clone(), tool).is_some() {
panic!("tool name collision for '{fq_name}': suspicious");
}
}
}
@@ -272,84 +224,3 @@ fn is_valid_mcp_server_name(server_name: &str) -> bool {
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use mcp_types::ToolInputSchema;
fn create_test_tool(server_name: &str, tool_name: &str) -> ToolInfo {
ToolInfo {
server_name: server_name.to_string(),
tool_name: tool_name.to_string(),
tool: Tool {
annotations: None,
description: Some(format!("Test tool: {tool_name}")),
input_schema: ToolInputSchema {
properties: None,
required: None,
r#type: "object".to_string(),
},
name: tool_name.to_string(),
},
}
}
#[test]
fn test_qualify_tools_short_non_duplicated_names() {
let tools = vec![
create_test_tool("server1", "tool1"),
create_test_tool("server1", "tool2"),
];
let qualified_tools = qualify_tools(tools);
assert_eq!(qualified_tools.len(), 2);
assert!(qualified_tools.contains_key("server1__tool1"));
assert!(qualified_tools.contains_key("server1__tool2"));
}
#[test]
fn test_qualify_tools_duplicated_names_skipped() {
let tools = vec![
create_test_tool("server1", "duplicate_tool"),
create_test_tool("server1", "duplicate_tool"),
];
let qualified_tools = qualify_tools(tools);
// Only the first tool should remain, the second is skipped
assert_eq!(qualified_tools.len(), 1);
assert!(qualified_tools.contains_key("server1__duplicate_tool"));
}
#[test]
fn test_qualify_tools_long_names_same_server() {
let server_name = "my_server";
let tools = vec![
create_test_tool(
server_name,
"extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
),
create_test_tool(
server_name,
"yet_another_extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
),
];
let qualified_tools = qualify_tools(tools);
assert_eq!(qualified_tools.len(), 2);
let mut keys: Vec<_> = qualified_tools.keys().cloned().collect();
keys.sort();
assert_eq!(keys[0].len(), 25);
assert_eq!(keys[0], "1c3987bd9c50b826cbe168796");
assert_eq!(keys[1].len(), 25);
assert_eq!(keys[1], "a02e507efc5a9de88637e4366");
}
}

View File

@@ -153,16 +153,14 @@ struct LogFileInfo {
}
fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFileInfo> {
// Resolve ~/.codex/sessions/YYYY/MM/DD and create it if missing.
let timestamp = OffsetDateTime::now_local()
.map_err(|e| IoError::other(format!("failed to get local time: {e}")))?;
// Resolve ~/.codex/sessions and create it if missing.
let mut dir = config.codex_home.clone();
dir.push(SESSIONS_SUBDIR);
dir.push(timestamp.year().to_string());
dir.push(format!("{:02}", u8::from(timestamp.month())));
dir.push(format!("{:02}", timestamp.day()));
fs::create_dir_all(&dir)?;
let timestamp = OffsetDateTime::now_local()
.map_err(|e| IoError::other(format!("failed to get local time: {e}")))?;
// Custom format for YYYY-MM-DDThh-mm-ss. Use `-` instead of `:` for
// compatibility with filesystems that do not allow colons in filenames.
let format: &[FormatItem] =

View File

@@ -2,12 +2,7 @@
use assert_cmd::Command as AssertCommand;
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use serde_json::Value;
use std::time::Duration;
use std::time::Instant;
use tempfile::TempDir;
use uuid::Uuid;
use walkdir::WalkDir;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
@@ -122,154 +117,3 @@ async fn responses_api_stream_cli() {
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(stdout.contains("fixture hello"));
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn integration_creates_and_checks_session_file() {
// Honor sandbox network restrictions for CI parity with the other tests.
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!(
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
return;
}
// 1. Temp home so we read/write isolated session files.
let home = TempDir::new().unwrap();
// 2. Unique marker we'll look for in the session log.
let marker = format!("integration-test-{}", Uuid::new_v4());
let prompt = format!("echo {marker}");
// 3. Use the same offline SSE fixture as responses_api_stream_cli so the test is hermetic.
let fixture =
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
// 4. Run the codex CLI through cargo (ensures the right bin is built) and invoke `exec`,
// which is what records a session.
let mut cmd = AssertCommand::new("cargo");
cmd.arg("run")
.arg("-p")
.arg("codex-cli")
.arg("--quiet")
.arg("--")
.arg("exec")
.arg("--skip-git-repo-check")
.arg("-C")
.arg(env!("CARGO_MANIFEST_DIR"))
.arg(&prompt);
cmd.env("CODEX_HOME", home.path())
.env("OPENAI_API_KEY", "dummy")
.env("CODEX_RS_SSE_FIXTURE", &fixture)
// Required for CLI arg parsing even though fixture short-circuits network usage.
.env("OPENAI_BASE_URL", "http://unused.local");
let output = cmd.output().unwrap();
assert!(
output.status.success(),
"codex-cli exec failed: {}",
String::from_utf8_lossy(&output.stderr)
);
// 5. Sessions are written asynchronously; wait briefly for the directory to appear.
let sessions_dir = home.path().join("sessions");
let start = Instant::now();
while !sessions_dir.exists() && start.elapsed() < Duration::from_secs(2) {
std::thread::sleep(Duration::from_millis(50));
}
// 6. Scan all session files and find the one that contains our marker.
let mut matching_files = vec![];
for entry in WalkDir::new(&sessions_dir) {
let entry = entry.unwrap();
if entry.file_type().is_file() && entry.file_name().to_string_lossy().ends_with(".jsonl") {
let path = entry.path();
let content = std::fs::read_to_string(path).unwrap();
let mut lines = content.lines();
// Skip SessionMeta (first line)
let _ = lines.next();
for line in lines {
let item: Value = serde_json::from_str(line).unwrap();
if let Some("message") = item.get("type").and_then(|t| t.as_str()) {
if let Some(content) = item.get("content") {
if content.to_string().contains(&marker) {
matching_files.push(path.to_owned());
break;
}
}
}
}
}
}
assert_eq!(
matching_files.len(),
1,
"Expected exactly one session file containing the marker, found {}",
matching_files.len()
);
let path = &matching_files[0];
// 7. Verify directory structure: sessions/YYYY/MM/DD/filename.jsonl
let rel = match path.strip_prefix(&sessions_dir) {
Ok(r) => r,
Err(_) => panic!("session file should live under sessions/"),
};
let comps: Vec<String> = rel
.components()
.map(|c| c.as_os_str().to_string_lossy().into_owned())
.collect();
assert_eq!(
comps.len(),
4,
"Expected sessions/YYYY/MM/DD/<file>, got {rel:?}"
);
let year = &comps[0];
let month = &comps[1];
let day = &comps[2];
assert!(
year.len() == 4 && year.chars().all(|c| c.is_ascii_digit()),
"Year dir not 4-digit numeric: {year}"
);
assert!(
month.len() == 2 && month.chars().all(|c| c.is_ascii_digit()),
"Month dir not zero-padded 2-digit numeric: {month}"
);
assert!(
day.len() == 2 && day.chars().all(|c| c.is_ascii_digit()),
"Day dir not zero-padded 2-digit numeric: {day}"
);
// Range checks (best-effort; won't fail on leading zeros)
if let Ok(m) = month.parse::<u8>() {
assert!((1..=12).contains(&m), "Month out of range: {m}");
}
if let Ok(d) = day.parse::<u8>() {
assert!((1..=31).contains(&d), "Day out of range: {d}");
}
// 8. Parse SessionMeta line and basic sanity checks.
let content = std::fs::read_to_string(path).unwrap();
let mut lines = content.lines();
let meta: Value = serde_json::from_str(lines.next().unwrap()).unwrap();
assert!(meta.get("id").is_some(), "SessionMeta missing id");
assert!(
meta.get("timestamp").is_some(),
"SessionMeta missing timestamp"
);
// 9. Confirm at least one message contains the marker.
let mut found_message = false;
for line in lines {
let item: Value = serde_json::from_str(line).unwrap();
if item.get("type").map(|t| t == "message").unwrap_or(false) {
if let Some(content) = item.get("content") {
if content.to_string().contains(&marker) {
found_message = true;
break;
}
}
}
}
assert!(
found_message,
"No message found in session file containing the marker"
);
}

View File

@@ -23,10 +23,3 @@ file-search *args:
# format code
fmt:
cargo fmt -- --config imports_granularity=Item
fix:
cargo clippy --fix --all-features --tests --allow-dirty
install:
rustup show active-toolchain
cargo fetch

View File

@@ -1,3 +0,0 @@
[toolchain]
channel = "1.88.0"
components = [ "clippy", "rustfmt", "rust-src"]

View File

@@ -18,15 +18,8 @@ use crossterm::event::KeyEvent;
use crossterm::event::MouseEvent;
use crossterm::event::MouseEventKind;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::mpsc::Receiver;
use std::sync::mpsc::channel;
use std::thread;
use std::time::Duration;
/// Time window for debouncing redraw requests.
const REDRAW_DEBOUNCE: Duration = Duration::from_millis(100);
/// Top-level application state: which full-screen view is currently active.
#[allow(clippy::large_enum_variant)]
@@ -53,9 +46,6 @@ pub(crate) struct App<'a> {
file_search: FileSearchManager,
/// True when a redraw has been scheduled but not yet executed.
pending_redraw: Arc<Mutex<bool>>,
/// Stored parameters needed to instantiate the ChatWidget later, e.g.,
/// after dismissing the Git-repo warning.
chat_args: Option<ChatWidgetArgs>,
@@ -70,7 +60,7 @@ struct ChatWidgetArgs {
initial_images: Vec<PathBuf>,
}
impl App<'_> {
impl<'a> App<'a> {
pub(crate) fn new(
config: Config,
initial_prompt: Option<String>,
@@ -80,7 +70,6 @@ impl App<'_> {
) -> Self {
let (app_event_tx, app_event_rx) = channel();
let app_event_tx = AppEventSender::new(app_event_tx);
let pending_redraw = Arc::new(Mutex::new(false));
let scroll_event_helper = ScrollEventHelper::new(app_event_tx.clone());
// Spawn a dedicated thread for reading the crossterm event loop and
@@ -94,7 +83,7 @@ impl App<'_> {
app_event_tx.send(AppEvent::KeyEvent(key_event));
}
crossterm::event::Event::Resize(_, _) => {
app_event_tx.send(AppEvent::RequestRedraw);
app_event_tx.send(AppEvent::Redraw);
}
crossterm::event::Event::Mouse(MouseEvent {
kind: MouseEventKind::ScrollUp,
@@ -163,7 +152,6 @@ impl App<'_> {
app_state,
config,
file_search,
pending_redraw,
chat_args,
}
}
@@ -174,29 +162,6 @@ impl App<'_> {
self.app_event_tx.clone()
}
/// Schedule a redraw if one is not already pending.
#[allow(clippy::unwrap_used)]
fn schedule_redraw(&self) {
{
#[allow(clippy::unwrap_used)]
let mut flag = self.pending_redraw.lock().unwrap();
if *flag {
return;
}
*flag = true;
}
let tx = self.app_event_tx.clone();
let pending_redraw = self.pending_redraw.clone();
thread::spawn(move || {
thread::sleep(REDRAW_DEBOUNCE);
tx.send(AppEvent::Redraw);
#[allow(clippy::unwrap_used)]
let mut f = pending_redraw.lock().unwrap();
*f = false;
});
}
pub(crate) fn run(
&mut self,
terminal: &mut tui::Tui,
@@ -204,13 +169,10 @@ impl App<'_> {
) -> Result<()> {
// Insert an event to trigger the first render.
let app_event_tx = self.app_event_tx.clone();
app_event_tx.send(AppEvent::RequestRedraw);
app_event_tx.send(AppEvent::Redraw);
while let Ok(event) = self.app_event_rx.recv() {
match event {
AppEvent::RequestRedraw => {
self.schedule_redraw();
}
AppEvent::Redraw => {
self.draw_next_frame(terminal)?;
}
@@ -287,7 +249,7 @@ impl App<'_> {
Vec::new(),
));
self.app_state = AppState::Chat { widget: new_widget };
self.app_event_tx.send(AppEvent::RequestRedraw);
self.app_event_tx.send(AppEvent::Redraw);
}
SlashCommand::ToggleMouseMode => {
if let Err(e) = mouse_capture.toggle() {
@@ -374,7 +336,7 @@ impl App<'_> {
args.initial_images,
));
self.app_state = AppState::Chat { widget };
self.app_event_tx.send(AppEvent::RequestRedraw);
self.app_event_tx.send(AppEvent::Redraw);
}
GitWarningOutcome::Quit => {
self.app_event_tx.send(AppEvent::ExitRequest);

View File

@@ -8,10 +8,6 @@ use crate::slash_command::SlashCommand;
pub(crate) enum AppEvent {
CodexEvent(Event),
/// Request a redraw which will be debounced by the [`App`].
RequestRedraw,
/// Actually draw the next frame.
Redraw,
KeyEvent(KeyEvent),

View File

@@ -212,7 +212,7 @@ impl BottomPane<'_> {
}
pub(crate) fn request_redraw(&self) {
self.app_event_tx.send(AppEvent::RequestRedraw)
self.app_event_tx.send(AppEvent::Redraw)
}
/// Returns true when a popup inside the composer is visible.

View File

@@ -24,7 +24,7 @@ impl StatusIndicatorView {
}
}
impl BottomPaneView<'_> for StatusIndicatorView {
impl<'a> BottomPaneView<'a> for StatusIndicatorView {
fn update_status_text(&mut self, text: String) -> ConditionalUpdate {
self.update_text(text);
ConditionalUpdate::NeedsRedraw

View File

@@ -431,7 +431,7 @@ impl ChatWidget<'_> {
}
fn request_redraw(&mut self) {
self.app_event_tx.send(AppEvent::RequestRedraw);
self.app_event_tx.send(AppEvent::Redraw);
}
pub(crate) fn add_diff_output(&mut self, diff_output: String) {

View File

@@ -65,7 +65,7 @@ impl StatusIndicatorWidget {
std::thread::sleep(Duration::from_millis(200));
counter = counter.wrapping_add(1);
frame_idx_clone.store(counter, Ordering::Relaxed);
app_event_tx_clone.send(AppEvent::RequestRedraw);
app_event_tx_clone.send(AppEvent::Redraw);
}
});
}