Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests

replace stdout with file matching
2026-04-23 07:51:51 +03:00 · 2025-07-16 22:32:32 -07:00 · 2025-07-14 15:22:46 -07:00 · 2025-07-14 11:13:09 -07:00 · 2025-07-14 11:00:54 -07:00 · 2025-07-14 10:57:47 -07:00
19 changed files with 271 additions and 474 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,18 +0,0 @@
-{
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "type": "lldb",
-            "request": "launch",
-            "name": "Cargo launch",
-            "cargo": {
-                "cwd": "${workspaceFolder}/codex-rs",
-                "args": [
-                    "build",
-                    "--bin=codex-tui"
-                ]
-            },
-            "args": []
-        }
-    ]
-}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,10 +0,0 @@
-{
-    "rust-analyzer.checkOnSave": true,
-    "rust-analyzer.check.command": "clippy",
-    "rust-analyzer.check.extraArgs": ["--all-features", "--tests"],
-    "rust-analyzer.rustfmt.extraArgs": ["--config", "imports_granularity=Item"],
-    "[rust]": {
-        "editor.defaultFormatter": "rust-lang.rust-analyzer",
-        "editor.formatOnSave": true,
-    }
-}
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -3,7 +3,3 @@
 In the codex-rs folder where the rust code lives:

 - Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR`. You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
-
-After making changes to the rust code run `just fmt` (in `codex-rs` directory) to format the code and `just fix` (in `codex-rs` directory) to fix any linter issues in the code.
-
-Ensure the test suite passes by running `cargo test --all-features` in the `codex-rs` directory.
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -399,15 +399,6 @@ version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2"

-[[package]]
-name = "block-buffer"
-version = "0.10.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
-dependencies = [
- "generic-array",
-]
-
 [[package]]
 name = "bstr"
 version = "1.12.0"
@@ -626,6 +617,7 @@ name = "codex-cli"
 version = "0.0.0"
 dependencies = [
 "anyhow",
+ "assert_cmd",
 "clap",
 "clap_complete",
 "codex-chatgpt",
@@ -636,10 +628,14 @@ dependencies = [
 "codex-login",
 "codex-mcp-server",
 "codex-tui",
+ "indoc",
+ "predicates",
 "serde_json",
+ "tempfile",
 "tokio",
 "tracing",
 "tracing-subscriber",
+ "wiremock",
 ]

 [[package]]
@@ -680,7 +676,6 @@ dependencies = [
 "seccompiler",
 "serde",
 "serde_json",
- "sha1",
 "strum_macros 0.27.1",
 "tempfile",
 "thiserror 2.0.12",
@@ -693,7 +688,6 @@ dependencies = [
 "tree-sitter",
 "tree-sitter-bash",
 "uuid",
- "walkdir",
 "wildmatch",
 "wiremock",
 ]
@@ -943,15 +937,6 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"

-[[package]]
-name = "cpufeatures"
-version = "0.2.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "crc32fast"
 version = "1.4.2"
@@ -1026,16 +1011,6 @@ version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"

-[[package]]
-name = "crypto-common"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
-dependencies = [
- "generic-array",
- "typenum",
-]
-
 [[package]]
 name = "ctor"
 version = "0.1.26"
@@ -1186,16 +1161,6 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"

-[[package]]
-name = "digest"
-version = "0.10.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
-dependencies = [
- "block-buffer",
- "crypto-common",
-]
-
 [[package]]
 name = "dirs"
 version = "6.0.0"
@@ -1685,16 +1650,6 @@ dependencies = [
 "byteorder",
 ]

-[[package]]
-name = "generic-array"
-version = "0.14.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
-dependencies = [
- "typenum",
- "version_check",
-]
-
 [[package]]
 name = "getopts"
 version = "0.2.23"
@@ -3994,17 +3949,6 @@ dependencies = [
 "syn 2.0.104",
 ]

-[[package]]
-name = "sha1"
-version = "0.10.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
-dependencies = [
- "cfg-if",
- "cpufeatures",
- "digest",
-]
-
 [[package]]
 name = "sharded-slab"
 version = "0.1.7"
@@ -4912,12 +4856,6 @@ dependencies = [
 "unicode-width 0.2.0",
 ]

-[[package]]
-name = "typenum"
-version = "1.18.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
-
 [[package]]
 name = "unicase"
 version = "2.8.1"
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -36,3 +36,11 @@ tokio = { version = "1", features = [
 ] }
 tracing = "0.1.41"
 tracing-subscriber = "0.3.19"
+
+[dev-dependencies]
+assert_cmd = "2"
+predicates = "3"
+tempfile = "3"
+wiremock = "0.6"
+tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
+indoc = "2"
--- a/codex-rs/cli/tests/integration.rs
+++ b/codex-rs/cli/tests/integration.rs
@@ -0,0 +1,223 @@
+#![allow(clippy::unwrap_used)]
+
+//! End-to-end integration tests for the `codex` CLI.
+//!
+//! These spin up a local [`wiremock`][] server to stand in for the MCP server
+//! and then run the real compiled `codex` binary against it. The goal is to
+//! verify the high-level request/response flow rather than the details of the
+//! individual async functions.
+//!
+//! [`wiremock`]: https://docs.rs/wiremock
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+// ----- tests -----
+
+/// Sends a single simple prompt and verifies that the streamed response is
+/// surfaced to the user. This exercises the most common "ask a question, get a
+/// textual answer" flow.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn full_conversation_turn_integration() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!("Skipping test because network is disabled");
+        return;
+    }
+
+    let server = MockServer::start().await;
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(
+            ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(sse_message("Hello, world."), "text/event-stream"),
+        )
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    // Disable retries — the mock server will fail hard if we make an unexpected
+    // request, so retries only slow the test down.
+    unsafe {
+        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
+        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
+    }
+
+    let codex_home = TempDir::new().unwrap();
+    let sandbox = TempDir::new().unwrap();
+    write_config(codex_home.path(), &server);
+
+    // Capture the agent's final message in a file so we can assert on it precisely.
+    let last_message_file = sandbox.path().join("last_message.txt");
+
+    let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
+    cmd.env("CODEX_HOME", codex_home.path())
+        .current_dir(sandbox.path())
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("--output-last-message")
+        .arg(&last_message_file)
+        .arg("Hello");
+
+    cmd.assert()
+        .success()
+        .stdout(predicate::str::contains("Hello, world."));
+
+    // Assert on the captured last message file (more robust than stdout formatting).
+    let last = fs::read_to_string(&last_message_file).unwrap();
+    let expected = "Hello, world.";
+    assert_eq!(last.trim(), expected);
+}
+
+/// Simulates a tool invocation (`shell`) followed by a second assistant message
+/// once the tool call completes.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn tool_invocation_flow() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!("Skipping test because network is disabled");
+        return;
+    }
+
+    let server = MockServer::start().await;
+
+    // The first request returns a function-call item; the second returns the
+    // final assistant message. Use an atomic counter to serve them in order.
+    struct SeqResponder {
+        count: std::sync::atomic::AtomicUsize,
+    }
+    impl wiremock::Respond for SeqResponder {
+        fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
+            use std::sync::atomic::Ordering;
+            match self.count.fetch_add(1, Ordering::SeqCst) {
+                0 => ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_raw(sse_function_call(), "text/event-stream"),
+                _ => ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_raw(sse_final_after_call(), "text/event-stream"),
+            }
+        }
+    }
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(SeqResponder {
+            count: std::sync::atomic::AtomicUsize::new(0),
+        })
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    unsafe {
+        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
+        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
+    }
+
+    let codex_home = TempDir::new().unwrap();
+    let sandbox = TempDir::new().unwrap();
+    write_config(codex_home.path(), &server);
+
+    // Capture final assistant message after tool invocation.
+    let last_message_file = sandbox.path().join("last_message.txt");
+
+    let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
+    cmd.env("CODEX_HOME", codex_home.path())
+        .current_dir(sandbox.path())
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("--output-last-message")
+        .arg(&last_message_file)
+        .arg("Run shell");
+
+    cmd.assert()
+        .success()
+        .stdout(predicate::str::contains("exec echo hi"))
+        .stdout(predicate::str::contains("hi"));
+
+    // Assert that the final assistant message (second response) was 'done'.
+    let last = fs::read_to_string(&last_message_file).unwrap();
+    let expected = "done";
+    assert_eq!(last.trim(), expected);
+}
+
+/// Write a minimal `config.toml` pointing the CLI at the mock server.
+fn write_config(codex_home: &Path, server: &MockServer) {
+    fs::write(
+        codex_home.join("config.toml"),
+        format!(
+            r#"
+model_provider = "mock"
+model = "test-model"
+
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+            server.uri()
+        ),
+    )
+    .unwrap();
+}
+
+/// Small helper to generate an SSE stream with a single assistant message.
+fn sse_message(text: &str) -> String {
+    const TEMPLATE: &str = r#"event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
+
+
+"#;
+
+    TEMPLATE.replace("TEXT_PLACEHOLDER", text)
+}
+
+/// Helper to craft an SSE stream that returns a `function_call`.
+fn sse_function_call() -> String {
+    let call = serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "function_call",
+            "name": "shell",
+            "arguments": "{\"command\":[\"echo\",\"hi\"]}",
+            "call_id": "call1"
+        }
+    });
+    let completed = serde_json::json!({
+        "type": "response.completed",
+        "response": {"id": "resp1", "output": []}
+    });
+
+    format!(
+        "event: response.output_item.done\ndata: {call}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+    )
+}
+
+/// SSE stream for the assistant's final message after the tool call returns.
+fn sse_final_after_call() -> String {
+    let msg = serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
+    });
+    let completed = serde_json::json!({
+        "type": "response.completed",
+        "response": {"id": "resp2", "output": []}
+    });
+
+    format!(
+        "event: response.output_item.done\ndata: {msg}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+    )
+}
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -28,7 +28,6 @@ rand = "0.9"
 reqwest = { version = "0.12", features = ["json", "stream"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
-sha1 = "0.10.6"
 strum_macros = "0.27.1"
 thiserror = "2.0.12"
 time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
@@ -66,5 +65,4 @@ predicates = "3"
 pretty_assertions = "1.4.1"
 tempfile = "3"
 tokio-test = "0.4"
-walkdir = "2.5.0"
 wiremock = "0.6"
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -51,6 +51,7 @@ use crate::exec::process_exec_tool_call;
 use crate::exec_env::create_env;
 use crate::flags::OPENAI_STREAM_MAX_RETRIES;
 use crate::mcp_connection_manager::McpConnectionManager;
+use crate::mcp_connection_manager::try_parse_fully_qualified_tool_name;
 use crate::mcp_tool_call::handle_mcp_tool_call;
 use crate::models::ContentItem;
 use crate::models::FunctionCallOutputPayload;
@@ -1291,7 +1292,7 @@ async fn handle_function_call(
            handle_container_exec_with_params(params, sess, sub_id, call_id).await
        }
        _ => {
-            match sess.mcp_connection_manager.parse_tool_name(&name) {
+            match try_parse_fully_qualified_tool_name(&name) {
                Some((server, tool_name)) => {
                    // TODO(mbolin): Determine appropriate timeout for tool call.
                    let timeout = None;
--- a/codex-rs/core/src/mcp_connection_manager.rs
+++ b/codex-rs/core/src/mcp_connection_manager.rs
@@ -7,7 +7,6 @@
 //! `"<server><MCP_TOOL_NAME_DELIMITER><tool>"` as the key.

 use std::collections::HashMap;
-use std::collections::HashSet;
 use std::time::Duration;

 use anyhow::Context;
@@ -17,12 +16,8 @@ use codex_mcp_client::McpClient;
 use mcp_types::ClientCapabilities;
 use mcp_types::Implementation;
 use mcp_types::Tool;
-
-use sha1::Digest;
-use sha1::Sha1;
 use tokio::task::JoinSet;
 use tracing::info;
-use tracing::warn;

 use crate::config_types::McpServerConfig;

@@ -31,8 +26,7 @@ use crate::config_types::McpServerConfig;
 ///
 /// OpenAI requires tool names to conform to `^[a-zA-Z0-9_-]+$`, so we must
 /// choose a delimiter from this character set.
-const MCP_TOOL_NAME_DELIMITER: &str = "__";
-const MAX_TOOL_NAME_LENGTH: usize = 25;
+const MCP_TOOL_NAME_DELIMITER: &str = "__OAI_CODEX_MCP__";

 /// Timeout for the `tools/list` request.
 const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
@@ -41,46 +35,16 @@ const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
 /// spawned successfully.
 pub type ClientStartErrors = HashMap<String, anyhow::Error>;

-fn qualify_tools(tools: Vec<ToolInfo>) -> HashMap<String, ToolInfo> {
-    let mut used_names = HashSet::new();
-    let mut qualified_tools = HashMap::new();
-    for tool in tools {
-        let mut qualified_name = format!(
-            "{}{}{}",
-            tool.server_name, MCP_TOOL_NAME_DELIMITER, tool.tool_name
-        );
-        if qualified_name.len() > MAX_TOOL_NAME_LENGTH {
-            let mut hasher = Sha1::new();
-            hasher.update(qualified_name.as_bytes());
-            let sha1 = hasher.finalize();
-            let sha1_str = format!("{sha1:x}");
-
-            if MAX_TOOL_NAME_LENGTH > sha1_str.len() {
-                // Truncate to make room for the hash suffix
-                let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len();
-                qualified_name = format!("{}{}", &qualified_name[..prefix_len], sha1_str);
-            } else {
-                // Not enough space for the full hash; fall back to a truncated hash only
-                qualified_name = sha1_str[..MAX_TOOL_NAME_LENGTH].to_string();
-            }
-        }
-
-        if used_names.contains(&qualified_name) {
-            warn!("skipping duplicated tool {}", qualified_name);
-            continue;
-        }
-
-        used_names.insert(qualified_name.clone());
-        qualified_tools.insert(qualified_name, tool);
-    }
-
-    qualified_tools
+fn fully_qualified_tool_name(server: &str, tool: &str) -> String {
+    format!("{server}{MCP_TOOL_NAME_DELIMITER}{tool}")
 }

-struct ToolInfo {
-    server_name: String,
-    tool_name: String,
-    tool: Tool,
+pub(crate) fn try_parse_fully_qualified_tool_name(fq_name: &str) -> Option<(String, String)> {
+    let (server, tool) = fq_name.split_once(MCP_TOOL_NAME_DELIMITER)?;
+    if server.is_empty() || tool.is_empty() {
+        return None;
+    }
+    Some((server.to_string(), tool.to_string()))
 }

 /// A thin wrapper around a set of running [`McpClient`] instances.
@@ -93,7 +57,7 @@ pub(crate) struct McpConnectionManager {
    clients: HashMap<String, std::sync::Arc<McpClient>>,

    /// Fully qualified tool name -> tool instance.
-    tools: HashMap<String, ToolInfo>,
+    tools: HashMap<String, Tool>,
 }

 impl McpConnectionManager {
@@ -177,9 +141,7 @@ impl McpConnectionManager {
            }
        }

-        let all_tools = list_all_tools(&clients).await?;
-
-        let tools = qualify_tools(all_tools);
+        let tools = list_all_tools(&clients).await?;

        Ok((Self { clients, tools }, errors))
    }
@@ -187,10 +149,7 @@ impl McpConnectionManager {
    /// Returns a single map that contains **all** tools. Each key is the
    /// fully-qualified name for the tool.
    pub fn list_all_tools(&self) -> HashMap<String, Tool> {
-        self.tools
-            .iter()
-            .map(|(name, tool)| (name.clone(), tool.tool.clone()))
-            .collect()
+        self.tools.clone()
    }

    /// Invoke the tool indicated by the (server, tool) pair.
@@ -212,19 +171,13 @@ impl McpConnectionManager {
            .await
            .with_context(|| format!("tool call failed for `{server}/{tool}`"))
    }
-
-    pub fn parse_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
-        self.tools
-            .get(tool_name)
-            .map(|tool| (tool.server_name.clone(), tool.tool_name.clone()))
-    }
 }

 /// Query every server for its available tools and return a single map that
 /// contains **all** tools. Each key is the fully-qualified name for the tool.
-async fn list_all_tools(
+pub async fn list_all_tools(
    clients: &HashMap<String, std::sync::Arc<McpClient>>,
-) -> Result<Vec<ToolInfo>> {
+) -> Result<HashMap<String, Tool>> {
    let mut join_set = JoinSet::new();

    // Spawn one task per server so we can query them concurrently. This
@@ -241,19 +194,18 @@ async fn list_all_tools(
        });
    }

-    let mut aggregated: Vec<ToolInfo> = Vec::with_capacity(join_set.len());
+    let mut aggregated: HashMap<String, Tool> = HashMap::with_capacity(join_set.len());

    while let Some(join_res) = join_set.join_next().await {
        let (server_name, list_result) = join_res?;
        let list_result = list_result?;

        for tool in list_result.tools {
-            let tool_info = ToolInfo {
-                server_name: server_name.clone(),
-                tool_name: tool.name.clone(),
-                tool,
-            };
-            aggregated.push(tool_info);
+            // TODO(mbolin): escape tool names that contain invalid characters.
+            let fq_name = fully_qualified_tool_name(&server_name, &tool.name);
+            if aggregated.insert(fq_name.clone(), tool).is_some() {
+                panic!("tool name collision for '{fq_name}': suspicious");
+            }
        }
    }

@@ -272,84 +224,3 @@ fn is_valid_mcp_server_name(server_name: &str) -> bool {
            .chars()
            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
 }
-
-#[cfg(test)]
-#[allow(clippy::unwrap_used)]
-mod tests {
-    use super::*;
-    use mcp_types::ToolInputSchema;
-
-    fn create_test_tool(server_name: &str, tool_name: &str) -> ToolInfo {
-        ToolInfo {
-            server_name: server_name.to_string(),
-            tool_name: tool_name.to_string(),
-            tool: Tool {
-                annotations: None,
-                description: Some(format!("Test tool: {tool_name}")),
-                input_schema: ToolInputSchema {
-                    properties: None,
-                    required: None,
-                    r#type: "object".to_string(),
-                },
-                name: tool_name.to_string(),
-            },
-        }
-    }
-
-    #[test]
-    fn test_qualify_tools_short_non_duplicated_names() {
-        let tools = vec![
-            create_test_tool("server1", "tool1"),
-            create_test_tool("server1", "tool2"),
-        ];
-
-        let qualified_tools = qualify_tools(tools);
-
-        assert_eq!(qualified_tools.len(), 2);
-        assert!(qualified_tools.contains_key("server1__tool1"));
-        assert!(qualified_tools.contains_key("server1__tool2"));
-    }
-
-    #[test]
-    fn test_qualify_tools_duplicated_names_skipped() {
-        let tools = vec![
-            create_test_tool("server1", "duplicate_tool"),
-            create_test_tool("server1", "duplicate_tool"),
-        ];
-
-        let qualified_tools = qualify_tools(tools);
-
-        // Only the first tool should remain, the second is skipped
-        assert_eq!(qualified_tools.len(), 1);
-        assert!(qualified_tools.contains_key("server1__duplicate_tool"));
-    }
-
-    #[test]
-    fn test_qualify_tools_long_names_same_server() {
-        let server_name = "my_server";
-
-        let tools = vec![
-            create_test_tool(
-                server_name,
-                "extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
-            ),
-            create_test_tool(
-                server_name,
-                "yet_another_extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
-            ),
-        ];
-
-        let qualified_tools = qualify_tools(tools);
-
-        assert_eq!(qualified_tools.len(), 2);
-
-        let mut keys: Vec<_> = qualified_tools.keys().cloned().collect();
-        keys.sort();
-
-        assert_eq!(keys[0].len(), 25);
-        assert_eq!(keys[0], "1c3987bd9c50b826cbe168796");
-
-        assert_eq!(keys[1].len(), 25);
-        assert_eq!(keys[1], "a02e507efc5a9de88637e4366");
-    }
-}
--- a/codex-rs/core/src/rollout.rs
+++ b/codex-rs/core/src/rollout.rs
@@ -153,16 +153,14 @@ struct LogFileInfo {
 }

 fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFileInfo> {
-    // Resolve ~/.codex/sessions/YYYY/MM/DD and create it if missing.
-    let timestamp = OffsetDateTime::now_local()
-        .map_err(|e| IoError::other(format!("failed to get local time: {e}")))?;
+    // Resolve ~/.codex/sessions and create it if missing.
    let mut dir = config.codex_home.clone();
    dir.push(SESSIONS_SUBDIR);
-    dir.push(timestamp.year().to_string());
-    dir.push(format!("{:02}", u8::from(timestamp.month())));
-    dir.push(format!("{:02}", timestamp.day()));
    fs::create_dir_all(&dir)?;

+    let timestamp = OffsetDateTime::now_local()
+        .map_err(|e| IoError::other(format!("failed to get local time: {e}")))?;
+
    // Custom format for YYYY-MM-DDThh-mm-ss. Use `-` instead of `:` for
    // compatibility with filesystems that do not allow colons in filenames.
    let format: &[FormatItem] =
--- a/codex-rs/core/tests/cli_stream.rs
+++ b/codex-rs/core/tests/cli_stream.rs
@@ -2,12 +2,7 @@

 use assert_cmd::Command as AssertCommand;
 use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
-use serde_json::Value;
-use std::time::Duration;
-use std::time::Instant;
 use tempfile::TempDir;
-use uuid::Uuid;
-use walkdir::WalkDir;
 use wiremock::Mock;
 use wiremock::MockServer;
 use wiremock::ResponseTemplate;
@@ -122,154 +117,3 @@ async fn responses_api_stream_cli() {
    let stdout = String::from_utf8_lossy(&output.stdout);
    assert!(stdout.contains("fixture hello"));
 }
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn integration_creates_and_checks_session_file() {
-    // Honor sandbox network restrictions for CI parity with the other tests.
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
-
-    // 1. Temp home so we read/write isolated session files.
-    let home = TempDir::new().unwrap();
-
-    // 2. Unique marker we'll look for in the session log.
-    let marker = format!("integration-test-{}", Uuid::new_v4());
-    let prompt = format!("echo {marker}");
-
-    // 3. Use the same offline SSE fixture as responses_api_stream_cli so the test is hermetic.
-    let fixture =
-        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
-
-    // 4. Run the codex CLI through cargo (ensures the right bin is built) and invoke `exec`,
-    //    which is what records a session.
-    let mut cmd = AssertCommand::new("cargo");
-    cmd.arg("run")
-        .arg("-p")
-        .arg("codex-cli")
-        .arg("--quiet")
-        .arg("--")
-        .arg("exec")
-        .arg("--skip-git-repo-check")
-        .arg("-C")
-        .arg(env!("CARGO_MANIFEST_DIR"))
-        .arg(&prompt);
-    cmd.env("CODEX_HOME", home.path())
-        .env("OPENAI_API_KEY", "dummy")
-        .env("CODEX_RS_SSE_FIXTURE", &fixture)
-        // Required for CLI arg parsing even though fixture short-circuits network usage.
-        .env("OPENAI_BASE_URL", "http://unused.local");
-
-    let output = cmd.output().unwrap();
-    assert!(
-        output.status.success(),
-        "codex-cli exec failed: {}",
-        String::from_utf8_lossy(&output.stderr)
-    );
-
-    // 5. Sessions are written asynchronously; wait briefly for the directory to appear.
-    let sessions_dir = home.path().join("sessions");
-    let start = Instant::now();
-    while !sessions_dir.exists() && start.elapsed() < Duration::from_secs(2) {
-        std::thread::sleep(Duration::from_millis(50));
-    }
-
-    // 6. Scan all session files and find the one that contains our marker.
-    let mut matching_files = vec![];
-    for entry in WalkDir::new(&sessions_dir) {
-        let entry = entry.unwrap();
-        if entry.file_type().is_file() && entry.file_name().to_string_lossy().ends_with(".jsonl") {
-            let path = entry.path();
-            let content = std::fs::read_to_string(path).unwrap();
-            let mut lines = content.lines();
-            // Skip SessionMeta (first line)
-            let _ = lines.next();
-            for line in lines {
-                let item: Value = serde_json::from_str(line).unwrap();
-                if let Some("message") = item.get("type").and_then(|t| t.as_str()) {
-                    if let Some(content) = item.get("content") {
-                        if content.to_string().contains(&marker) {
-                            matching_files.push(path.to_owned());
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-    }
-    assert_eq!(
-        matching_files.len(),
-        1,
-        "Expected exactly one session file containing the marker, found {}",
-        matching_files.len()
-    );
-    let path = &matching_files[0];
-
-    // 7. Verify directory structure: sessions/YYYY/MM/DD/filename.jsonl
-    let rel = match path.strip_prefix(&sessions_dir) {
-        Ok(r) => r,
-        Err(_) => panic!("session file should live under sessions/"),
-    };
-    let comps: Vec<String> = rel
-        .components()
-        .map(|c| c.as_os_str().to_string_lossy().into_owned())
-        .collect();
-    assert_eq!(
-        comps.len(),
-        4,
-        "Expected sessions/YYYY/MM/DD/<file>, got {rel:?}"
-    );
-    let year = &comps[0];
-    let month = &comps[1];
-    let day = &comps[2];
-    assert!(
-        year.len() == 4 && year.chars().all(|c| c.is_ascii_digit()),
-        "Year dir not 4-digit numeric: {year}"
-    );
-    assert!(
-        month.len() == 2 && month.chars().all(|c| c.is_ascii_digit()),
-        "Month dir not zero-padded 2-digit numeric: {month}"
-    );
-    assert!(
-        day.len() == 2 && day.chars().all(|c| c.is_ascii_digit()),
-        "Day dir not zero-padded 2-digit numeric: {day}"
-    );
-    // Range checks (best-effort; won't fail on leading zeros)
-    if let Ok(m) = month.parse::<u8>() {
-        assert!((1..=12).contains(&m), "Month out of range: {m}");
-    }
-    if let Ok(d) = day.parse::<u8>() {
-        assert!((1..=31).contains(&d), "Day out of range: {d}");
-    }
-
-    // 8. Parse SessionMeta line and basic sanity checks.
-    let content = std::fs::read_to_string(path).unwrap();
-    let mut lines = content.lines();
-    let meta: Value = serde_json::from_str(lines.next().unwrap()).unwrap();
-    assert!(meta.get("id").is_some(), "SessionMeta missing id");
-    assert!(
-        meta.get("timestamp").is_some(),
-        "SessionMeta missing timestamp"
-    );
-
-    // 9. Confirm at least one message contains the marker.
-    let mut found_message = false;
-    for line in lines {
-        let item: Value = serde_json::from_str(line).unwrap();
-        if item.get("type").map(|t| t == "message").unwrap_or(false) {
-            if let Some(content) = item.get("content") {
-                if content.to_string().contains(&marker) {
-                    found_message = true;
-                    break;
-                }
-            }
-        }
-    }
-    assert!(
-        found_message,
-        "No message found in session file containing the marker"
-    );
-}
--- a/codex-rs/justfile
+++ b/codex-rs/justfile
@@ -23,10 +23,3 @@ file-search *args:
 # format code
 fmt:
    cargo fmt -- --config imports_granularity=Item
-
-fix:
-    cargo clippy --fix --all-features --tests --allow-dirty
-
-install:
-    rustup show active-toolchain
-    cargo fetch
--- a/codex-rs/rust-toolchain.toml
+++ b/codex-rs/rust-toolchain.toml
@@ -1,3 +0,0 @@
-[toolchain]
-channel = "1.88.0"
-components = [ "clippy", "rustfmt", "rust-src"]
--- a/codex-rs/tui/src/app.rs
+++ b/codex-rs/tui/src/app.rs
@@ -18,15 +18,8 @@ use crossterm::event::KeyEvent;
 use crossterm::event::MouseEvent;
 use crossterm::event::MouseEventKind;
 use std::path::PathBuf;
-use std::sync::Arc;
-use std::sync::Mutex;
 use std::sync::mpsc::Receiver;
 use std::sync::mpsc::channel;
-use std::thread;
-use std::time::Duration;
-
-/// Time window for debouncing redraw requests.
-const REDRAW_DEBOUNCE: Duration = Duration::from_millis(100);

 /// Top-level application state: which full-screen view is currently active.
 #[allow(clippy::large_enum_variant)]
@@ -53,9 +46,6 @@ pub(crate) struct App<'a> {

    file_search: FileSearchManager,

-    /// True when a redraw has been scheduled but not yet executed.
-    pending_redraw: Arc<Mutex<bool>>,
-
    /// Stored parameters needed to instantiate the ChatWidget later, e.g.,
    /// after dismissing the Git-repo warning.
    chat_args: Option<ChatWidgetArgs>,
@@ -70,7 +60,7 @@ struct ChatWidgetArgs {
    initial_images: Vec<PathBuf>,
 }

-impl App<'_> {
+impl<'a> App<'a> {
    pub(crate) fn new(
        config: Config,
        initial_prompt: Option<String>,
@@ -80,7 +70,6 @@ impl App<'_> {
    ) -> Self {
        let (app_event_tx, app_event_rx) = channel();
        let app_event_tx = AppEventSender::new(app_event_tx);
-        let pending_redraw = Arc::new(Mutex::new(false));
        let scroll_event_helper = ScrollEventHelper::new(app_event_tx.clone());

        // Spawn a dedicated thread for reading the crossterm event loop and
@@ -94,7 +83,7 @@ impl App<'_> {
                            app_event_tx.send(AppEvent::KeyEvent(key_event));
                        }
                        crossterm::event::Event::Resize(_, _) => {
-                            app_event_tx.send(AppEvent::RequestRedraw);
+                            app_event_tx.send(AppEvent::Redraw);
                        }
                        crossterm::event::Event::Mouse(MouseEvent {
                            kind: MouseEventKind::ScrollUp,
@@ -163,7 +152,6 @@ impl App<'_> {
            app_state,
            config,
            file_search,
-            pending_redraw,
            chat_args,
        }
    }
@@ -174,29 +162,6 @@ impl App<'_> {
        self.app_event_tx.clone()
    }

-    /// Schedule a redraw if one is not already pending.
-    #[allow(clippy::unwrap_used)]
-    fn schedule_redraw(&self) {
-        {
-            #[allow(clippy::unwrap_used)]
-            let mut flag = self.pending_redraw.lock().unwrap();
-            if *flag {
-                return;
-            }
-            *flag = true;
-        }
-
-        let tx = self.app_event_tx.clone();
-        let pending_redraw = self.pending_redraw.clone();
-        thread::spawn(move || {
-            thread::sleep(REDRAW_DEBOUNCE);
-            tx.send(AppEvent::Redraw);
-            #[allow(clippy::unwrap_used)]
-            let mut f = pending_redraw.lock().unwrap();
-            *f = false;
-        });
-    }
-
    pub(crate) fn run(
        &mut self,
        terminal: &mut tui::Tui,
@@ -204,13 +169,10 @@ impl App<'_> {
    ) -> Result<()> {
        // Insert an event to trigger the first render.
        let app_event_tx = self.app_event_tx.clone();
-        app_event_tx.send(AppEvent::RequestRedraw);
+        app_event_tx.send(AppEvent::Redraw);

        while let Ok(event) = self.app_event_rx.recv() {
            match event {
-                AppEvent::RequestRedraw => {
-                    self.schedule_redraw();
-                }
                AppEvent::Redraw => {
                    self.draw_next_frame(terminal)?;
                }
@@ -287,7 +249,7 @@ impl App<'_> {
                            Vec::new(),
                        ));
                        self.app_state = AppState::Chat { widget: new_widget };
-                        self.app_event_tx.send(AppEvent::RequestRedraw);
+                        self.app_event_tx.send(AppEvent::Redraw);
                    }
                    SlashCommand::ToggleMouseMode => {
                        if let Err(e) = mouse_capture.toggle() {
@@ -374,7 +336,7 @@ impl App<'_> {
                        args.initial_images,
                    ));
                    self.app_state = AppState::Chat { widget };
-                    self.app_event_tx.send(AppEvent::RequestRedraw);
+                    self.app_event_tx.send(AppEvent::Redraw);
                }
                GitWarningOutcome::Quit => {
                    self.app_event_tx.send(AppEvent::ExitRequest);
--- a/codex-rs/tui/src/app_event.rs
+++ b/codex-rs/tui/src/app_event.rs
@@ -8,10 +8,6 @@ use crate::slash_command::SlashCommand;
 pub(crate) enum AppEvent {
    CodexEvent(Event),

-    /// Request a redraw which will be debounced by the [`App`].
-    RequestRedraw,
-
-    /// Actually draw the next frame.
    Redraw,

    KeyEvent(KeyEvent),
--- a/codex-rs/tui/src/bottom_pane/mod.rs
+++ b/codex-rs/tui/src/bottom_pane/mod.rs
@@ -212,7 +212,7 @@ impl BottomPane<'_> {
    }

    pub(crate) fn request_redraw(&self) {
-        self.app_event_tx.send(AppEvent::RequestRedraw)
+        self.app_event_tx.send(AppEvent::Redraw)
    }

    /// Returns true when a popup inside the composer is visible.
--- a/codex-rs/tui/src/bottom_pane/status_indicator_view.rs
+++ b/codex-rs/tui/src/bottom_pane/status_indicator_view.rs
@@ -24,7 +24,7 @@ impl StatusIndicatorView {
    }
 }

-impl BottomPaneView<'_> for StatusIndicatorView {
+impl<'a> BottomPaneView<'a> for StatusIndicatorView {
    fn update_status_text(&mut self, text: String) -> ConditionalUpdate {
        self.update_text(text);
        ConditionalUpdate::NeedsRedraw
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -431,7 +431,7 @@ impl ChatWidget<'_> {
    }

    fn request_redraw(&mut self) {
-        self.app_event_tx.send(AppEvent::RequestRedraw);
+        self.app_event_tx.send(AppEvent::Redraw);
    }

    pub(crate) fn add_diff_output(&mut self, diff_output: String) {
--- a/codex-rs/tui/src/status_indicator_widget.rs
+++ b/codex-rs/tui/src/status_indicator_widget.rs
@@ -65,7 +65,7 @@ impl StatusIndicatorWidget {
                    std::thread::sleep(Duration::from_millis(200));
                    counter = counter.wrapping_add(1);
                    frame_idx_clone.store(counter, Ordering::Relaxed);
-                    app_event_tx_clone.send(AppEvent::RequestRedraw);
+                    app_event_tx_clone.send(AppEvent::Redraw);
                }
            });
        }
Author	SHA1	Message	Date
aibrahim-oai	d465d71955	Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests	2025-07-16 22:32:32 -07:00
aibrahim-oai	3d1cfe31a2	Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests	2025-07-14 15:22:46 -07:00
Ahmed Ibrahim	d6e934f7cd	replace stdout with file matching	2025-07-14 11:13:09 -07:00
Ahmed Ibrahim	0b83f2965c	fmt	2025-07-14 11:00:54 -07:00
Ahmed Ibrahim	d4dc3b11bc	fmt	2025-07-14 10:57:47 -07:00
aibrahim-oai	bcbe02ff1d	Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests	2025-07-12 17:28:25 -07:00
Ahmed Ibrahim	51257e2fd0	Adressing feedback	2025-07-12 17:15:37 -07:00
aibrahim-oai	0ece374c58	Merge branch 'main' into codex/implement-cli-tool-invocation-flow-tests	2025-07-12 17:08:03 -07:00
aibrahim-oai	f532554924	Fix clippy warnings in integration tests	2025-07-11 14:43:58 -07:00
aibrahim-oai	f9609cc9bf	Format integration test imports	2025-07-11 14:29:48 -07:00
aibrahim-oai	781798b4ed	Use sandbox dirs and env var constant	2025-07-11 14:13:55 -07:00
aibrahim-oai	5bafe0dc59	Update Cargo.lock for new dev dependencies	2025-07-11 14:03:15 -07:00