update tests

This commit is contained in:
Owen Lin
2025-11-03 14:15:34 -08:00
parent d11f9cc2e4
commit 9d153ee1f3
14 changed files with 121 additions and 166 deletions

View File

@@ -1,3 +1,4 @@
mod model_list;
mod thread_archive;
mod thread_list;
mod thread_resume;

View File

@@ -0,0 +1,183 @@
use std::time::Duration;
use anyhow::Result;
use anyhow::anyhow;
use app_test_support::McpProcess;
use app_test_support::to_response;
use codex_app_server_protocol::JSONRPCError;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::Model;
use codex_app_server_protocol::ModelListParams;
use codex_app_server_protocol::ModelListResponse;
use codex_app_server_protocol::ReasoningEffortOption;
use codex_app_server_protocol::RequestId;
use codex_protocol::config_types::ReasoningEffort;
use pretty_assertions::assert_eq;
use tempfile::TempDir;
use tokio::time::timeout;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
const INVALID_REQUEST_ERROR_CODE: i64 = -32600;
#[tokio::test]
async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
let codex_home = TempDir::new()?;
let mut mcp = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
let request_id = mcp
.send_list_models_request(ModelListParams {
cursor: None,
limit: Some(100),
})
.await?;
let response: JSONRPCResponse = timeout(
DEFAULT_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
)
.await??;
let ModelListResponse { data, next_cursor } = to_response::<ModelListResponse>(response)?;
let expected_models = vec![
Model {
id: "gpt-5-codex".to_string(),
model: "gpt-5-codex".to_string(),
display_name: "gpt-5-codex".to_string(),
description: "Optimized for coding tasks with many tools.".to_string(),
supported_reasoning_efforts: vec![
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::Low,
description: "Fastest responses with limited reasoning".to_string(),
},
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::Medium,
description: "Dynamically adjusts reasoning based on the task".to_string(),
},
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::High,
description: "Maximizes reasoning depth for complex or ambiguous problems"
.to_string(),
},
],
default_reasoning_effort: ReasoningEffort::Medium,
is_default: true,
},
Model {
id: "gpt-5".to_string(),
model: "gpt-5".to_string(),
display_name: "gpt-5".to_string(),
description: "Broad world knowledge with strong general reasoning.".to_string(),
supported_reasoning_efforts: vec![
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::Minimal,
description: "Fastest responses with little reasoning".to_string(),
},
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::Low,
description: "Balances speed with some reasoning; useful for straightforward \
queries and short explanations"
.to_string(),
},
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::Medium,
description: "Provides a solid balance of reasoning depth and latency for \
general-purpose tasks"
.to_string(),
},
ReasoningEffortOption {
reasoning_effort: ReasoningEffort::High,
description: "Maximizes reasoning depth for complex or ambiguous problems"
.to_string(),
},
],
default_reasoning_effort: ReasoningEffort::Medium,
is_default: false,
},
];
assert_eq!(data, expected_models);
assert!(next_cursor.is_none());
Ok(())
}
#[tokio::test]
async fn list_models_pagination_works() -> Result<()> {
let codex_home = TempDir::new()?;
let mut mcp = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
let first_request = mcp
.send_list_models_request(ModelListParams {
cursor: None,
limit: Some(1),
})
.await?;
let first_response: JSONRPCResponse = timeout(
DEFAULT_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(first_request)),
)
.await??;
let ModelListResponse {
data: first_items,
next_cursor: first_cursor,
} = to_response::<ModelListResponse>(first_response)?;
assert_eq!(first_items.len(), 1);
assert_eq!(first_items[0].id, "gpt-5-codex");
let next_cursor = first_cursor.ok_or_else(|| anyhow!("cursor for second page"))?;
let second_request = mcp
.send_list_models_request(ModelListParams {
cursor: Some(next_cursor.clone()),
limit: Some(1),
})
.await?;
let second_response: JSONRPCResponse = timeout(
DEFAULT_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(second_request)),
)
.await??;
let ModelListResponse {
data: second_items,
next_cursor: second_cursor,
} = to_response::<ModelListResponse>(second_response)?;
assert_eq!(second_items.len(), 1);
assert_eq!(second_items[0].id, "gpt-5");
assert!(second_cursor.is_none());
Ok(())
}
#[tokio::test]
async fn list_models_rejects_invalid_cursor() -> Result<()> {
let codex_home = TempDir::new()?;
let mut mcp = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
let request_id = mcp
.send_list_models_request(ModelListParams {
cursor: Some("invalid".to_string()),
limit: None,
})
.await?;
let error: JSONRPCError = timeout(
DEFAULT_TIMEOUT,
mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
)
.await??;
assert_eq!(error.id, RequestId::Integer(request_id));
assert_eq!(error.error.code, INVALID_REQUEST_ERROR_CODE);
assert_eq!(error.error.message, "invalid cursor: invalid");
Ok(())
}

View File

@@ -15,7 +15,7 @@ use tokio::time::timeout;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn thread_archive_moves_rollout_into_archived_directory() -> Result<()> {
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path())?;

View File

@@ -1,5 +1,6 @@
use anyhow::Result;
use app_test_support::McpProcess;
use app_test_support::create_fake_rollout;
use app_test_support::to_response;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
@@ -12,7 +13,7 @@ use uuid::Uuid;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn thread_list_basic_empty() -> Result<()> {
let codex_home = TempDir::new()?;
create_minimal_config(codex_home.path())?;
@@ -45,71 +46,14 @@ fn create_minimal_config(codex_home: &std::path::Path) -> std::io::Result<()> {
let config_toml = codex_home.join("config.toml");
std::fs::write(
config_toml,
"model = \"mock-model\"\napproval_policy = \"never\"\n",
r#"
model = "mock-model"
approval_policy = "never"
"#,
)
}
fn create_fake_rollout(
codex_home: &std::path::Path,
filename_ts: &str,
meta_rfc3339: &str,
) -> Result<String> {
let uuid = Uuid::new_v4();
let year = &filename_ts[0..4];
let month = &filename_ts[5..7];
let day = &filename_ts[8..10];
let dir = codex_home.join("sessions").join(year).join(month).join(day);
std::fs::create_dir_all(&dir)?;
let file_path = dir.join(format!("rollout-{filename_ts}-{uuid}.jsonl"));
let mut lines = Vec::new();
lines.push(
json!({
"timestamp": meta_rfc3339,
"type": "session_meta",
"payload": {
"id": uuid,
"timestamp": meta_rfc3339,
"cwd": "/",
"originator": "codex",
"cli_version": "0.0.0",
"instructions": null,
"source": "vscode",
"model_provider": "mock_provider"
}
})
.to_string(),
);
lines.push(
json!({
"timestamp": meta_rfc3339,
"type":"response_item",
"payload": {
"type":"message",
"role":"user",
"content":[{"type":"input_text","text": "Hello"}]
}
})
.to_string(),
);
// Add a matching user_message event so the scanner includes this rollout.
lines.push(
json!({
"timestamp": meta_rfc3339,
"type":"event_msg",
"payload": {
"type":"user_message",
"message":"Hello",
"kind":"plain"
}
})
.to_string(),
);
std::fs::write(file_path, lines.join("\n") + "\n")?;
Ok(uuid.to_string())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> {
let codex_home = TempDir::new()?;
create_minimal_config(codex_home.path())?;
@@ -119,16 +63,22 @@ async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> {
codex_home.path(),
"2025-01-02T12-00-00",
"2025-01-02T12:00:00Z",
"Hello",
Some("mock_provider"),
)?;
let _b = create_fake_rollout(
codex_home.path(),
"2025-01-01T13-00-00",
"2025-01-01T13:00:00Z",
"Hello",
Some("mock_provider"),
)?;
let _c = create_fake_rollout(
codex_home.path(),
"2025-01-01T12-00-00",
"2025-01-01T12:00:00Z",
"Hello",
Some("mock_provider"),
)?;
let mut mcp = McpProcess::new(codex_home.path()).await?;
@@ -180,7 +130,7 @@ async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn thread_list_respects_provider_filter() -> Result<()> {
let codex_home = TempDir::new()?;
create_minimal_config(codex_home.path())?;
@@ -190,6 +140,8 @@ async fn thread_list_respects_provider_filter() -> Result<()> {
codex_home.path(),
"2025-01-02T10-00-00",
"2025-01-02T10:00:00Z",
"X",
Some("mock_provider"),
)?; // mock_provider
// one with a different provider
let uuid = Uuid::new_v4();

View File

@@ -13,7 +13,7 @@ use tokio::time::timeout;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn thread_resume_returns_existing_thread() -> Result<()> {
let server = create_mock_chat_completions_server(vec![]).await;
let codex_home = TempDir::new()?;
@@ -25,7 +25,7 @@ async fn thread_resume_returns_existing_thread() -> Result<()> {
// Start a thread.
let start_id = mcp
.send_thread_start_request(ThreadStartParams {
model: Some("o3".to_string()),
model: Some("gpt-5-codex".to_string()),
..Default::default()
})
.await?;
@@ -63,7 +63,7 @@ fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io
r#"
model = "mock-model"
approval_policy = "never"
sandbox_mode = "danger-full-access"
sandbox_mode = "workspace-write"
model_provider = "mock_provider"

View File

@@ -15,7 +15,7 @@ use tokio::time::timeout;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn thread_start_creates_thread_and_emits_started() -> Result<()> {
// Provide a mock server and config so model wiring is valid.
let server = create_mock_chat_completions_server(vec![]).await;
@@ -55,7 +55,6 @@ async fn thread_start_creates_thread_and_emits_started() -> Result<()> {
serde_json::from_value(notif.params.expect("params must be present"))?;
assert_eq!(started.thread.id, thread.id);
drop(server);
Ok(())
}
@@ -68,7 +67,7 @@ fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()
r#"
model = "mock-model"
approval_policy = "never"
sandbox_mode = "danger-full-access"
sandbox_mode = "workspace-write"
model_provider = "mock_provider"

View File

@@ -19,7 +19,7 @@ use tokio::time::timeout;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn turn_interrupt_aborts_running_turn() -> Result<()> {
// Use a portable sleep command to keep the turn running.
#[cfg(target_os = "windows")]
@@ -104,8 +104,6 @@ async fn turn_interrupt_aborts_running_turn() -> Result<()> {
let _resp: TurnInterruptResponse = to_response::<TurnInterruptResponse>(interrupt_resp)?;
// No fields to assert on; successful deserialization confirms proper response shape.
drop(server);
Ok(())
}
@@ -118,7 +116,7 @@ fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io
r#"
model = "mock-model"
approval_policy = "never"
sandbox_mode = "danger-full-access"
sandbox_mode = "workspace-write"
model_provider = "mock_provider"

View File

@@ -21,6 +21,7 @@ use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use codex_protocol::parse_command::ParsedCommand;
use codex_protocol::protocol::Event;
use codex_protocol::protocol::EventMsg;
use core_test_support::skip_if_no_network;
use pretty_assertions::assert_eq;
use std::env;
use std::path::Path;
@@ -29,7 +30,7 @@ use tokio::time::timeout;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn turn_start_emits_notifications_and_accepts_model_override() -> Result<()> {
// Provide a mock server and config so model wiring is valid.
// Three Codex turns hit the mock model (session start + two turn/start calls).
@@ -136,11 +137,10 @@ async fn turn_start_emits_notifications_and_accepts_model_override() -> Result<(
)
.await??;
drop(server);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn turn_start_accepts_local_image_input() -> Result<()> {
// Two Codex turns hit the mock model (session start + turn/start).
let responses = vec![
@@ -194,12 +194,10 @@ async fn turn_start_accepts_local_image_input() -> Result<()> {
assert!(!turn.id.is_empty());
// This test only validates that turn/start responds and returns a turn.
drop(server);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!("Skipping v2 exec approval toggle test due to sandbox network disabled.");
@@ -335,16 +333,14 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
)
.await??;
drop(server);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[tokio::test]
async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
println!("Skipping v2 sandbox/cwd test due to sandbox network disabled.");
return Ok(());
}
// When returning Result from a test, pass an Ok(()) to the skip macro
// so the early return type matches. The no-arg form returns unit.
skip_if_no_network!(Ok(()));
let tmp = TempDir::new()?;
let codex_home = tmp.path().join("codex_home");
@@ -431,7 +427,7 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
)
.await??;
// second turn with danger-full-access and second_cwd, ensure exec begins in second_cwd
// second turn with workspace-write and second_cwd, ensure exec begins in second_cwd
let second_turn = mcp
.send_turn_start_request(TurnStartParams {
thread_id: thread.id.clone(),
@@ -482,7 +478,6 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
)
.await??;
drop(server);
Ok(())
}
@@ -495,7 +490,7 @@ fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()
r#"
model = "mock-model"
approval_policy = "never"
sandbox_mode = "danger-full-access"
sandbox_mode = "workspace-write"
model_provider = "mock_provider"
@@ -518,7 +513,7 @@ fn create_config_toml_untrusted(codex_home: &Path, server_uri: &str) -> std::io:
r#"
model = "mock-model"
approval_policy = "untrusted"
sandbox_mode = "danger-full-access"
sandbox_mode = "workspace-write"
model_provider = "mock_provider"