mirror of
https://github.com/openai/codex.git
synced 2026-03-09 07:25:36 +03:00
Compare commits
23 Commits
codex/refa
...
dh--env-co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
66858e4b29 | ||
|
|
78aafa465b | ||
|
|
c9c5d694c9 | ||
|
|
86d636cf33 | ||
|
|
9f7097160d | ||
|
|
e1f098b9b7 | ||
|
|
e5e13479d0 | ||
|
|
7bc3ca9e40 | ||
|
|
4d8b71d412 | ||
|
|
b484672961 | ||
|
|
a1ee10b438 | ||
|
|
dccce34d84 | ||
|
|
f5945d7c03 | ||
|
|
5fcf923c19 | ||
|
|
0c7efa0cfd | ||
|
|
d5853d9c47 | ||
|
|
d9118c04bf | ||
|
|
91e65ac0ce | ||
|
|
1ac4fb45d2 | ||
|
|
07b8bdfbf1 | ||
|
|
0f22067242 | ||
|
|
d7f8b97541 | ||
|
|
611e00c862 |
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
echo "pack_output=$PACK_OUTPUT" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Upload staged npm package artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: codex-npm-staging
|
||||
path: ${{ steps.stage_npm_package.outputs.pack_output }}
|
||||
|
||||
2
.github/workflows/rust-release.yml
vendored
2
.github/workflows/rust-release.yml
vendored
@@ -350,7 +350,7 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
- uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: ${{ matrix.target }}
|
||||
# Upload the per-binary .zst files as well as the new .tar.gz
|
||||
|
||||
@@ -75,6 +75,7 @@ Codex CLI supports a rich set of configuration options, with preferences stored
|
||||
|
||||
- [**Getting started**](./docs/getting-started.md)
|
||||
- [CLI usage](./docs/getting-started.md#cli-usage)
|
||||
- [Slash Commands](./docs/slash_commands.md)
|
||||
- [Running with a prompt as input](./docs/getting-started.md#running-with-a-prompt-as-input)
|
||||
- [Example prompts](./docs/getting-started.md#example-prompts)
|
||||
- [Custom prompts](./docs/prompts.md)
|
||||
|
||||
10
codex-rs/Cargo.lock
generated
10
codex-rs/Cargo.lock
generated
@@ -1750,8 +1750,7 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||
[[package]]
|
||||
name = "crossterm"
|
||||
version = "0.28.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6"
|
||||
source = "git+https://github.com/nornagon/crossterm?branch=nornagon%2Fcolor-query#87db8bfa6dc99427fd3b071681b07fc31c6ce995"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"crossterm_winapi",
|
||||
@@ -5685,6 +5684,12 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1_smol"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d"
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.9"
|
||||
@@ -6872,6 +6877,7 @@ dependencies = [
|
||||
"getrandom 0.3.3",
|
||||
"js-sys",
|
||||
"serde",
|
||||
"sha1_smol",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
|
||||
@@ -276,6 +276,7 @@ opt-level = 0
|
||||
# Uncomment to debug local changes.
|
||||
# ratatui = { path = "../../ratatui" }
|
||||
ratatui = { git = "https://github.com/nornagon/ratatui", branch = "nornagon-v0.29.0-patch" }
|
||||
crossterm = { git = "https://github.com/nornagon/crossterm", branch = "nornagon/color-query" }
|
||||
|
||||
# Uncomment to debug local changes.
|
||||
# rmcp = { path = "../../rust-sdk/crates/rmcp" }
|
||||
|
||||
@@ -1172,9 +1172,23 @@ impl CodexMessageProcessor {
|
||||
// Verify that the rollout path is in the sessions directory or else
|
||||
// a malicious client could specify an arbitrary path.
|
||||
let rollout_folder = self.config.codex_home.join(codex_core::SESSIONS_SUBDIR);
|
||||
let canonical_sessions_dir = match tokio::fs::canonicalize(&rollout_folder).await {
|
||||
Ok(path) => path,
|
||||
Err(err) => {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INTERNAL_ERROR_CODE,
|
||||
message: format!(
|
||||
"failed to archive conversation: unable to resolve sessions directory: {err}"
|
||||
),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
let canonical_rollout_path = tokio::fs::canonicalize(&rollout_path).await;
|
||||
let canonical_rollout_path = if let Ok(path) = canonical_rollout_path
|
||||
&& path.starts_with(&rollout_folder)
|
||||
&& path.starts_with(&canonical_sessions_dir)
|
||||
{
|
||||
path
|
||||
} else {
|
||||
|
||||
@@ -64,64 +64,79 @@ impl MessageProcessor {
|
||||
|
||||
pub(crate) async fn process_request(&mut self, request: JSONRPCRequest) {
|
||||
let request_id = request.id.clone();
|
||||
if let Ok(request_json) = serde_json::to_value(request)
|
||||
&& let Ok(codex_request) = serde_json::from_value::<ClientRequest>(request_json)
|
||||
{
|
||||
match codex_request {
|
||||
// Handle Initialize internally so CodexMessageProcessor does not have to concern
|
||||
// itself with the `initialized` bool.
|
||||
ClientRequest::Initialize { request_id, params } => {
|
||||
if self.initialized {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "Already initialized".to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
} else {
|
||||
let ClientInfo {
|
||||
name,
|
||||
title: _title,
|
||||
version,
|
||||
} = params.client_info;
|
||||
let user_agent_suffix = format!("{name}; {version}");
|
||||
if let Ok(mut suffix) = USER_AGENT_SUFFIX.lock() {
|
||||
*suffix = Some(user_agent_suffix);
|
||||
}
|
||||
let request_json = match serde_json::to_value(&request) {
|
||||
Ok(request_json) => request_json,
|
||||
Err(err) => {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: format!("Invalid request: {err}"),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let user_agent = get_codex_user_agent();
|
||||
let response = InitializeResponse { user_agent };
|
||||
self.outgoing.send_response(request_id, response).await;
|
||||
let codex_request = match serde_json::from_value::<ClientRequest>(request_json) {
|
||||
Ok(codex_request) => codex_request,
|
||||
Err(err) => {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: format!("Invalid request: {err}"),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
self.initialized = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if !self.initialized {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "Not initialized".to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
match codex_request {
|
||||
// Handle Initialize internally so CodexMessageProcessor does not have to concern
|
||||
// itself with the `initialized` bool.
|
||||
ClientRequest::Initialize { request_id, params } => {
|
||||
if self.initialized {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "Already initialized".to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
} else {
|
||||
let ClientInfo {
|
||||
name,
|
||||
title: _title,
|
||||
version,
|
||||
} = params.client_info;
|
||||
let user_agent_suffix = format!("{name}; {version}");
|
||||
if let Ok(mut suffix) = USER_AGENT_SUFFIX.lock() {
|
||||
*suffix = Some(user_agent_suffix);
|
||||
}
|
||||
|
||||
let user_agent = get_codex_user_agent();
|
||||
let response = InitializeResponse { user_agent };
|
||||
self.outgoing.send_response(request_id, response).await;
|
||||
|
||||
self.initialized = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if !self.initialized {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "Not initialized".to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.codex_message_processor
|
||||
.process_request(codex_request)
|
||||
.await;
|
||||
} else {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "Invalid request".to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
}
|
||||
|
||||
self.codex_message_processor
|
||||
.process_request(codex_request)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub(crate) async fn process_notification(&self, notification: JSONRPCNotification) {
|
||||
|
||||
@@ -353,7 +353,7 @@ async fn run_login(config_overrides: &CliConfigOverrides, login_args: LoginArgs)
|
||||
.context("failed to load configuration")?;
|
||||
|
||||
if !config.features.enabled(Feature::RmcpClient) {
|
||||
bail!("OAuth login is only supported when [feature].rmcp_client is true in config.toml.");
|
||||
bail!("OAuth login is only supported when [features].rmcp_client is true in config.toml.");
|
||||
}
|
||||
|
||||
let LoginArgs { name, scopes } = login_args;
|
||||
|
||||
@@ -80,7 +80,7 @@ toml_edit = { workspace = true }
|
||||
tracing = { workspace = true, features = ["log"] }
|
||||
tree-sitter = { workspace = true }
|
||||
tree-sitter-bash = { workspace = true }
|
||||
uuid = { workspace = true, features = ["serde", "v4"] }
|
||||
uuid = { workspace = true, features = ["serde", "v4", "v5"] }
|
||||
which = { workspace = true }
|
||||
wildmatch = { workspace = true }
|
||||
codex_windows_sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" }
|
||||
|
||||
@@ -931,8 +931,10 @@ async fn stream_from_fixture(
|
||||
fn rate_limit_regex() -> &'static Regex {
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
|
||||
// Match both OpenAI-style messages like "Please try again in 1.898s"
|
||||
// and Azure OpenAI-style messages like "Try again in 35 seconds".
|
||||
#[expect(clippy::unwrap_used)]
|
||||
RE.get_or_init(|| Regex::new(r"Please try again in (\d+(?:\.\d+)?)(s|ms)").unwrap())
|
||||
RE.get_or_init(|| Regex::new(r"(?i)try again in\s*(\d+(?:\.\d+)?)\s*(s|ms|seconds?)").unwrap())
|
||||
}
|
||||
|
||||
fn try_parse_retry_after(err: &Error) -> Option<Duration> {
|
||||
@@ -940,7 +942,8 @@ fn try_parse_retry_after(err: &Error) -> Option<Duration> {
|
||||
return None;
|
||||
}
|
||||
|
||||
// parse the Please try again in 1.898s format using regex
|
||||
// parse retry hints like "try again in 1.898s" or
|
||||
// "Try again in 35 seconds" using regex
|
||||
let re = rate_limit_regex();
|
||||
if let Some(message) = &err.message
|
||||
&& let Some(captures) = re.captures(message)
|
||||
@@ -950,9 +953,9 @@ fn try_parse_retry_after(err: &Error) -> Option<Duration> {
|
||||
|
||||
if let (Some(value), Some(unit)) = (seconds, unit) {
|
||||
let value = value.as_str().parse::<f64>().ok()?;
|
||||
let unit = unit.as_str();
|
||||
let unit = unit.as_str().to_ascii_lowercase();
|
||||
|
||||
if unit == "s" {
|
||||
if unit == "s" || unit.starts_with("second") {
|
||||
return Some(Duration::from_secs_f64(value));
|
||||
} else if unit == "ms" {
|
||||
return Some(Duration::from_millis(value as u64));
|
||||
@@ -1427,6 +1430,19 @@ mod tests {
|
||||
assert_eq!(delay, Some(Duration::from_secs_f64(1.898)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_parse_retry_after_azure() {
|
||||
let err = Error {
|
||||
r#type: None,
|
||||
message: Some("Rate limit exceeded. Try again in 35 seconds.".to_string()),
|
||||
code: Some("rate_limit_exceeded".to_string()),
|
||||
plan_type: None,
|
||||
resets_at: None,
|
||||
};
|
||||
let delay = try_parse_retry_after(&err);
|
||||
assert_eq!(delay, Some(Duration::from_secs(35)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_response_deserializes_schema_known_plan_type_and_serializes_back() {
|
||||
use crate::token_data::KnownPlan;
|
||||
|
||||
@@ -16,7 +16,6 @@ use crate::protocol::TurnContextItem;
|
||||
use crate::protocol::WarningEvent;
|
||||
use crate::truncate::truncate_middle;
|
||||
use crate::util::backoff;
|
||||
use askama::Template;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
@@ -29,13 +28,6 @@ use tracing::error;
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
|
||||
#[derive(Template)]
|
||||
#[template(path = "compact/history_bridge.md", escape = "none")]
|
||||
struct HistoryBridgeTemplate<'a> {
|
||||
user_messages_text: &'a str,
|
||||
summary_text: &'a str,
|
||||
}
|
||||
|
||||
pub(crate) async fn run_inline_auto_compact_task(
|
||||
sess: Arc<Session>,
|
||||
turn_context: Arc<TurnContext>,
|
||||
@@ -150,6 +142,7 @@ async fn run_compact_task_inner(
|
||||
let history_snapshot = sess.clone_history().await.get_history();
|
||||
let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
|
||||
let user_messages = collect_user_messages(&history_snapshot);
|
||||
|
||||
let initial_context = sess.build_initial_context(turn_context.as_ref());
|
||||
let mut new_history = build_compacted_history(initial_context, &user_messages, &summary_text);
|
||||
let ghost_snapshots: Vec<ResponseItem> = history_snapshot
|
||||
@@ -224,33 +217,47 @@ fn build_compacted_history_with_limit(
|
||||
summary_text: &str,
|
||||
max_bytes: usize,
|
||||
) -> Vec<ResponseItem> {
|
||||
let mut user_messages_text = if user_messages.is_empty() {
|
||||
"(none)".to_string()
|
||||
} else {
|
||||
user_messages.join("\n\n")
|
||||
};
|
||||
// Truncate the concatenated prior user messages so the bridge message
|
||||
// stays well under the context window (approx. 4 bytes/token).
|
||||
if user_messages_text.len() > max_bytes {
|
||||
user_messages_text = truncate_middle(&user_messages_text, max_bytes).0;
|
||||
let mut selected_messages: Vec<String> = Vec::new();
|
||||
if max_bytes > 0 {
|
||||
let mut remaining = max_bytes;
|
||||
for message in user_messages.iter().rev() {
|
||||
if remaining == 0 {
|
||||
break;
|
||||
}
|
||||
if message.len() <= remaining {
|
||||
selected_messages.push(message.clone());
|
||||
remaining = remaining.saturating_sub(message.len());
|
||||
} else {
|
||||
let (truncated, _) = truncate_middle(message, remaining);
|
||||
selected_messages.push(truncated);
|
||||
break;
|
||||
}
|
||||
}
|
||||
selected_messages.reverse();
|
||||
}
|
||||
|
||||
for message in &selected_messages {
|
||||
history.push(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: message.clone(),
|
||||
}],
|
||||
});
|
||||
}
|
||||
|
||||
let summary_text = if summary_text.is_empty() {
|
||||
"(no summary available)".to_string()
|
||||
} else {
|
||||
summary_text.to_string()
|
||||
};
|
||||
let Ok(bridge) = HistoryBridgeTemplate {
|
||||
user_messages_text: &user_messages_text,
|
||||
summary_text: &summary_text,
|
||||
}
|
||||
.render() else {
|
||||
return vec![];
|
||||
};
|
||||
|
||||
history.push(ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: bridge }],
|
||||
content: vec![ContentItem::InputText { text: summary_text }],
|
||||
});
|
||||
|
||||
history
|
||||
}
|
||||
|
||||
@@ -390,30 +397,55 @@ mod tests {
|
||||
"SUMMARY",
|
||||
max_bytes,
|
||||
);
|
||||
assert_eq!(history.len(), 2);
|
||||
|
||||
// Expect exactly one bridge message added to history (plus any initial context we provided, which is none).
|
||||
assert_eq!(history.len(), 1);
|
||||
let truncated_message = &history[0];
|
||||
let summary_message = &history[1];
|
||||
|
||||
// Extract the text content of the bridge message.
|
||||
let bridge_text = match &history[0] {
|
||||
let truncated_text = match truncated_message {
|
||||
ResponseItem::Message { role, content, .. } if role == "user" => {
|
||||
content_items_to_text(content).unwrap_or_default()
|
||||
}
|
||||
other => panic!("unexpected item in history: {other:?}"),
|
||||
};
|
||||
|
||||
// The bridge should contain the truncation marker and not the full original payload.
|
||||
assert!(
|
||||
bridge_text.contains("tokens truncated"),
|
||||
"expected truncation marker in bridge message"
|
||||
truncated_text.contains("tokens truncated"),
|
||||
"expected truncation marker in truncated user message"
|
||||
);
|
||||
assert!(
|
||||
!bridge_text.contains(&big),
|
||||
"bridge should not include the full oversized user text"
|
||||
!truncated_text.contains(&big),
|
||||
"truncated user message should not include the full oversized user text"
|
||||
);
|
||||
|
||||
let summary_text = match summary_message {
|
||||
ResponseItem::Message { role, content, .. } if role == "user" => {
|
||||
content_items_to_text(content).unwrap_or_default()
|
||||
}
|
||||
other => panic!("unexpected item in history: {other:?}"),
|
||||
};
|
||||
assert_eq!(summary_text, "SUMMARY");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_compacted_history_appends_summary_message() {
|
||||
let initial_context: Vec<ResponseItem> = Vec::new();
|
||||
let user_messages = vec!["first user message".to_string()];
|
||||
let summary_text = "summary text";
|
||||
|
||||
let history = build_compacted_history(initial_context, &user_messages, summary_text);
|
||||
assert!(
|
||||
bridge_text.contains("SUMMARY"),
|
||||
"bridge should include the provided summary text"
|
||||
!history.is_empty(),
|
||||
"expected compacted history to include summary"
|
||||
);
|
||||
|
||||
let last = history.last().expect("history should have a summary entry");
|
||||
let summary = match last {
|
||||
ResponseItem::Message { role, content, .. } if role == "user" => {
|
||||
content_items_to_text(content).unwrap_or_default()
|
||||
}
|
||||
other => panic!("expected summary message, found {other:?}"),
|
||||
};
|
||||
assert_eq!(summary, summary_text);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,14 @@ pub enum NetworkAccess {
|
||||
Restricted,
|
||||
Enabled,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct OperatingSystemInfo {
|
||||
pub name: String,
|
||||
pub version: String,
|
||||
pub is_likely_windows_subsystem_for_linux: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename = "environment_context", rename_all = "snake_case")]
|
||||
pub(crate) struct EnvironmentContext {
|
||||
@@ -29,6 +37,7 @@ pub(crate) struct EnvironmentContext {
|
||||
pub network_access: Option<NetworkAccess>,
|
||||
pub writable_roots: Option<Vec<PathBuf>>,
|
||||
pub shell: Option<Shell>,
|
||||
pub operating_system: Option<OperatingSystemInfo>,
|
||||
}
|
||||
|
||||
impl EnvironmentContext {
|
||||
@@ -70,6 +79,7 @@ impl EnvironmentContext {
|
||||
_ => None,
|
||||
},
|
||||
shell,
|
||||
operating_system: Self::operating_system_info(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +93,7 @@ impl EnvironmentContext {
|
||||
sandbox_mode,
|
||||
network_access,
|
||||
writable_roots,
|
||||
operating_system,
|
||||
// should compare all fields except shell
|
||||
shell: _,
|
||||
} = other;
|
||||
@@ -92,6 +103,7 @@ impl EnvironmentContext {
|
||||
&& self.sandbox_mode == *sandbox_mode
|
||||
&& self.network_access == *network_access
|
||||
&& self.writable_roots == *writable_roots
|
||||
&& self.operating_system == *operating_system
|
||||
}
|
||||
|
||||
pub fn diff(before: &TurnContext, after: &TurnContext) -> Self {
|
||||
@@ -110,7 +122,12 @@ impl EnvironmentContext {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
EnvironmentContext::new(cwd, approval_policy, sandbox_policy, None)
|
||||
// Diff messages should only include fields that changed between turns.
|
||||
// Operating system is a static property of the host and should not be
|
||||
// emitted as part of a per-turn diff.
|
||||
let mut ec = EnvironmentContext::new(cwd, approval_policy, sandbox_policy, None);
|
||||
ec.operating_system = None;
|
||||
ec
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,10 +158,11 @@ impl EnvironmentContext {
|
||||
/// <shell>...</shell>
|
||||
/// </environment_context>
|
||||
/// ```
|
||||
pub fn serialize_to_xml(self) -> String {
|
||||
pub fn serialize_to_xml(&self) -> String {
|
||||
let mut lines = vec![ENVIRONMENT_CONTEXT_OPEN_TAG.to_string()];
|
||||
if let Some(cwd) = self.cwd {
|
||||
lines.push(format!(" <cwd>{}</cwd>", cwd.to_string_lossy()));
|
||||
if let Some(cwd) = self.cwd.as_ref() {
|
||||
let cwd = cwd.to_string_lossy();
|
||||
lines.push(format!(" <cwd>{cwd}</cwd>"));
|
||||
}
|
||||
if let Some(approval_policy) = self.approval_policy {
|
||||
lines.push(format!(
|
||||
@@ -154,29 +172,44 @@ impl EnvironmentContext {
|
||||
if let Some(sandbox_mode) = self.sandbox_mode {
|
||||
lines.push(format!(" <sandbox_mode>{sandbox_mode}</sandbox_mode>"));
|
||||
}
|
||||
if let Some(network_access) = self.network_access {
|
||||
if let Some(network_access) = self.network_access.as_ref() {
|
||||
lines.push(format!(
|
||||
" <network_access>{network_access}</network_access>"
|
||||
));
|
||||
}
|
||||
if let Some(writable_roots) = self.writable_roots {
|
||||
if let Some(writable_roots) = self.writable_roots.as_ref() {
|
||||
lines.push(" <writable_roots>".to_string());
|
||||
for writable_root in writable_roots {
|
||||
lines.push(format!(
|
||||
" <root>{}</root>",
|
||||
writable_root.to_string_lossy()
|
||||
));
|
||||
let writable_root = writable_root.to_string_lossy();
|
||||
lines.push(format!(" <root>{writable_root}</root>"));
|
||||
}
|
||||
lines.push(" </writable_roots>".to_string());
|
||||
}
|
||||
if let Some(shell) = self.shell
|
||||
if let Some(shell) = self.shell.as_ref()
|
||||
&& let Some(shell_name) = shell.name()
|
||||
{
|
||||
lines.push(format!(" <shell>{shell_name}</shell>"));
|
||||
}
|
||||
if let Some(operating_system) = self.operating_system.as_ref() {
|
||||
lines.push(" <operating_system>".to_string());
|
||||
let name = operating_system.name.as_str();
|
||||
lines.push(format!(" <name>{name}</name>"));
|
||||
let version = operating_system.version.as_str();
|
||||
lines.push(format!(" <version>{version}</version>"));
|
||||
if let Some(is_wsl) = operating_system.is_likely_windows_subsystem_for_linux {
|
||||
lines.push(format!(
|
||||
" <is_likely_windows_subsystem_for_linux>{is_wsl}</is_likely_windows_subsystem_for_linux>"
|
||||
));
|
||||
}
|
||||
lines.push(" </operating_system>".to_string());
|
||||
}
|
||||
lines.push(ENVIRONMENT_CONTEXT_CLOSE_TAG.to_string());
|
||||
lines.join("\n")
|
||||
}
|
||||
|
||||
fn operating_system_info() -> Option<OperatingSystemInfo> {
|
||||
operating_system_info_impl()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<EnvironmentContext> for ResponseItem {
|
||||
@@ -191,6 +224,47 @@ impl From<EnvironmentContext> for ResponseItem {
|
||||
}
|
||||
}
|
||||
|
||||
// Restrict Operating System Info to Windows and Linux inside WSL for now
|
||||
#[cfg(target_os = "windows")]
|
||||
fn operating_system_info_impl() -> Option<OperatingSystemInfo> {
|
||||
let info = os_info::get();
|
||||
Some(OperatingSystemInfo {
|
||||
name: info.os_type().to_string(),
|
||||
version: info.version().to_string(),
|
||||
is_likely_windows_subsystem_for_linux: Some(has_wsl_env_markers()),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(all(unix, not(target_os = "macos")))]
|
||||
fn operating_system_info_impl() -> Option<OperatingSystemInfo> {
|
||||
let info = os_info::get();
|
||||
match has_wsl_env_markers() {
|
||||
true => Some(OperatingSystemInfo {
|
||||
name: info.os_type().to_string(),
|
||||
version: info.version().to_string(),
|
||||
is_likely_windows_subsystem_for_linux: Some(true),
|
||||
}),
|
||||
false => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
fn operating_system_info_impl() -> Option<OperatingSystemInfo> {
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
fn has_wsl_env_markers() -> bool {
|
||||
// Cache detection result since env vars are stable across process lifetime
|
||||
// and this function may be called multiple times.
|
||||
static CACHE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
|
||||
*CACHE.get_or_init(|| {
|
||||
std::env::var_os("WSL_INTEROP").is_some()
|
||||
|| std::env::var_os("WSLENV").is_some()
|
||||
|| std::env::var_os("WSL_DISTRO_NAME").is_some()
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::shell::BashShell;
|
||||
@@ -198,6 +272,58 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
fn expected_environment_context(mut body_lines: Vec<String>) -> String {
|
||||
let mut lines = vec!["<environment_context>".to_string()];
|
||||
lines.append(&mut body_lines);
|
||||
if let Some(os) = EnvironmentContext::operating_system_info() {
|
||||
lines.push(" <operating_system>".to_string());
|
||||
lines.push(format!(" <name>{}</name>", os.name));
|
||||
lines.push(format!(" <version>{}</version>", os.version));
|
||||
if let Some(is_wsl) = os.is_likely_windows_subsystem_for_linux {
|
||||
lines.push(format!(
|
||||
" <is_likely_windows_subsystem_for_linux>{is_wsl}</is_likely_windows_subsystem_for_linux>"
|
||||
));
|
||||
}
|
||||
lines.push(" </operating_system>".to_string());
|
||||
}
|
||||
lines.push("</environment_context>".to_string());
|
||||
lines.join("\n")
|
||||
}
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
#[test]
|
||||
fn operating_system_info_on_windows_includes_os_details() {
|
||||
let info = operating_system_info_impl().expect("expected Windows operating system info");
|
||||
let os_details = os_info::get();
|
||||
|
||||
assert_eq!(info.name, os_details.os_type().to_string());
|
||||
assert_eq!(info.version, os_details.version().to_string());
|
||||
assert_eq!(
|
||||
info.is_likely_windows_subsystem_for_linux,
|
||||
Some(has_wsl_env_markers())
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(all(unix, not(target_os = "macos")))]
|
||||
#[test]
|
||||
fn operating_system_info_matches_wsl_detection_on_unix() {
|
||||
let info = operating_system_info_impl();
|
||||
let os_details = os_info::get();
|
||||
if has_wsl_env_markers() {
|
||||
let info = info.expect("expected WSL operating system info");
|
||||
assert_eq!(info.name, os_details.os_type().to_string());
|
||||
assert_eq!(info.version, os_details.version().to_string());
|
||||
assert_eq!(info.is_likely_windows_subsystem_for_linux, Some(true));
|
||||
} else {
|
||||
assert_eq!(info, None);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
#[test]
|
||||
fn operating_system_info_is_none_on_macos() {
|
||||
assert_eq!(operating_system_info_impl(), None);
|
||||
}
|
||||
|
||||
fn workspace_write_policy(writable_roots: Vec<&str>, network_access: bool) -> SandboxPolicy {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
@@ -217,16 +343,16 @@ mod tests {
|
||||
None,
|
||||
);
|
||||
|
||||
let expected = r#"<environment_context>
|
||||
<cwd>/repo</cwd>
|
||||
<approval_policy>on-request</approval_policy>
|
||||
<sandbox_mode>workspace-write</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
<writable_roots>
|
||||
<root>/repo</root>
|
||||
<root>/tmp</root>
|
||||
</writable_roots>
|
||||
</environment_context>"#;
|
||||
let expected = expected_environment_context(vec![
|
||||
" <cwd>/repo</cwd>".to_string(),
|
||||
" <approval_policy>on-request</approval_policy>".to_string(),
|
||||
" <sandbox_mode>workspace-write</sandbox_mode>".to_string(),
|
||||
" <network_access>restricted</network_access>".to_string(),
|
||||
" <writable_roots>".to_string(),
|
||||
" <root>/repo</root>".to_string(),
|
||||
" <root>/tmp</root>".to_string(),
|
||||
" </writable_roots>".to_string(),
|
||||
]);
|
||||
|
||||
assert_eq!(context.serialize_to_xml(), expected);
|
||||
}
|
||||
@@ -240,11 +366,11 @@ mod tests {
|
||||
None,
|
||||
);
|
||||
|
||||
let expected = r#"<environment_context>
|
||||
<approval_policy>never</approval_policy>
|
||||
<sandbox_mode>read-only</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
</environment_context>"#;
|
||||
let expected = expected_environment_context(vec![
|
||||
" <approval_policy>never</approval_policy>".to_string(),
|
||||
" <sandbox_mode>read-only</sandbox_mode>".to_string(),
|
||||
" <network_access>restricted</network_access>".to_string(),
|
||||
]);
|
||||
|
||||
assert_eq!(context.serialize_to_xml(), expected);
|
||||
}
|
||||
@@ -258,11 +384,11 @@ mod tests {
|
||||
None,
|
||||
);
|
||||
|
||||
let expected = r#"<environment_context>
|
||||
<approval_policy>on-failure</approval_policy>
|
||||
<sandbox_mode>danger-full-access</sandbox_mode>
|
||||
<network_access>enabled</network_access>
|
||||
</environment_context>"#;
|
||||
let expected = expected_environment_context(vec![
|
||||
" <approval_policy>on-failure</approval_policy>".to_string(),
|
||||
" <sandbox_mode>danger-full-access</sandbox_mode>".to_string(),
|
||||
" <network_access>enabled</network_access>".to_string(),
|
||||
]);
|
||||
|
||||
assert_eq!(context.serialize_to_xml(), expected);
|
||||
}
|
||||
|
||||
@@ -255,7 +255,7 @@ impl std::fmt::Display for UsageLimitReachedError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let message = match self.plan_type.as_ref() {
|
||||
Some(PlanType::Known(KnownPlan::Plus)) => format!(
|
||||
"You've hit your usage limit. Upgrade to Pro (https://openai.com/chatgpt/pricing), visit chatgpt.com/codex/settings/usage to purchase more credits{}",
|
||||
"You've hit your usage limit. Upgrade to Pro (https://openai.com/chatgpt/pricing), visit https://chatgpt.com/codex/settings/usage to purchase more credits{}",
|
||||
retry_suffix_after_or(self.resets_at.as_ref())
|
||||
),
|
||||
Some(PlanType::Known(KnownPlan::Team)) | Some(PlanType::Known(KnownPlan::Business)) => {
|
||||
@@ -269,7 +269,7 @@ impl std::fmt::Display for UsageLimitReachedError {
|
||||
.to_string()
|
||||
}
|
||||
Some(PlanType::Known(KnownPlan::Pro)) => format!(
|
||||
"You've hit your usage limit. Visit chatgpt.com/codex/settings/usage to purchase more credits{}",
|
||||
"You've hit your usage limit. Visit https://chatgpt.com/codex/settings/usage to purchase more credits{}",
|
||||
retry_suffix_after_or(self.resets_at.as_ref())
|
||||
),
|
||||
Some(PlanType::Known(KnownPlan::Enterprise))
|
||||
@@ -460,7 +460,7 @@ mod tests {
|
||||
};
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"You've hit your usage limit. Upgrade to Pro (https://openai.com/chatgpt/pricing), visit chatgpt.com/codex/settings/usage to purchase more credits or try again later."
|
||||
"You've hit your usage limit. Upgrade to Pro (https://openai.com/chatgpt/pricing), visit https://chatgpt.com/codex/settings/usage to purchase more credits or try again later."
|
||||
);
|
||||
}
|
||||
|
||||
@@ -613,7 +613,7 @@ mod tests {
|
||||
rate_limits: Some(rate_limit_snapshot()),
|
||||
};
|
||||
let expected = format!(
|
||||
"You've hit your usage limit. Visit chatgpt.com/codex/settings/usage to purchase more credits or try again at {expected_time}."
|
||||
"You've hit your usage limit. Visit https://chatgpt.com/codex/settings/usage to purchase more credits or try again at {expected_time}."
|
||||
);
|
||||
assert_eq!(err.to_string(), expected);
|
||||
});
|
||||
@@ -647,7 +647,7 @@ mod tests {
|
||||
rate_limits: Some(rate_limit_snapshot()),
|
||||
};
|
||||
let expected = format!(
|
||||
"You've hit your usage limit. Upgrade to Pro (https://openai.com/chatgpt/pricing), visit chatgpt.com/codex/settings/usage to purchase more credits or try again at {expected_time}."
|
||||
"You've hit your usage limit. Upgrade to Pro (https://openai.com/chatgpt/pricing), visit https://chatgpt.com/codex/settings/usage to purchase more credits or try again at {expected_time}."
|
||||
);
|
||||
assert_eq!(err.to_string(), expected);
|
||||
});
|
||||
|
||||
@@ -123,7 +123,7 @@ fn set_if_some(
|
||||
if let Some(enabled) = maybe_value {
|
||||
set_feature(features, feature, enabled);
|
||||
log_alias(alias_key, feature);
|
||||
features.record_legacy_usage_force(alias_key, feature);
|
||||
features.record_legacy_usage(alias_key, feature);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,16 +25,6 @@ use tracing::warn;
|
||||
|
||||
const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[
|
||||
"data_deletion",
|
||||
"data_exfiltration",
|
||||
"privilege_escalation",
|
||||
"system_modification",
|
||||
"network_access",
|
||||
"resource_exhaustion",
|
||||
"compliance",
|
||||
];
|
||||
|
||||
#[derive(Template)]
|
||||
#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
|
||||
struct SandboxAssessmentPromptTemplate<'a> {
|
||||
@@ -176,27 +166,26 @@ pub(crate) async fn assess_command(
|
||||
call_id,
|
||||
"success",
|
||||
Some(assessment.risk_level),
|
||||
&assessment.risk_categories,
|
||||
duration,
|
||||
);
|
||||
return Some(assessment);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("failed to parse sandbox assessment JSON: {err}");
|
||||
parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration);
|
||||
parent_otel.sandbox_assessment(call_id, "parse_error", None, duration);
|
||||
}
|
||||
},
|
||||
Ok(Ok(None)) => {
|
||||
warn!("sandbox assessment response did not include any message");
|
||||
parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration);
|
||||
parent_otel.sandbox_assessment(call_id, "no_output", None, duration);
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
warn!("sandbox assessment failed: {err}");
|
||||
parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration);
|
||||
parent_otel.sandbox_assessment(call_id, "model_error", None, duration);
|
||||
}
|
||||
Err(_) => {
|
||||
warn!("sandbox assessment timed out");
|
||||
parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration);
|
||||
parent_otel.sandbox_assessment(call_id, "timeout", None, duration);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -229,7 +218,7 @@ fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf>
|
||||
fn sandbox_assessment_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"required": ["description", "risk_level", "risk_categories"],
|
||||
"required": ["description", "risk_level"],
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
@@ -240,13 +229,6 @@ fn sandbox_assessment_schema() -> serde_json::Value {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"]
|
||||
},
|
||||
"risk_categories": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": SANDBOX_RISK_CATEGORY_VALUES
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
})
|
||||
|
||||
@@ -1,26 +1,28 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StdoutStream;
|
||||
use crate::exec::execute_exec_env;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::TaskStartedEvent;
|
||||
use crate::sandboxing::ExecEnv;
|
||||
use crate::state::TaskKind;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::parallel::ToolCallRuntime;
|
||||
use crate::tools::router::ToolCall;
|
||||
use crate::tools::router::ToolRouter;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
|
||||
use super::SessionTask;
|
||||
use super::SessionTaskContext;
|
||||
|
||||
const USER_SHELL_TOOL_NAME: &str = "local_shell";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct UserShellCommandTask {
|
||||
command: String,
|
||||
@@ -76,40 +78,31 @@ impl SessionTask for UserShellCommandTask {
|
||||
}
|
||||
};
|
||||
|
||||
let command = shell_invocation;
|
||||
let cwd = turn_context.resolve_path(None);
|
||||
let call_id = Uuid::new_v4().to_string();
|
||||
let emitter = ToolEmitter::shell(command.clone(), cwd.clone(), true);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn_context.as_ref(), &call_id, None);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let exec_env = ExecEnv {
|
||||
command,
|
||||
cwd: cwd.clone(),
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
let params = ShellToolCallParams {
|
||||
command: shell_invocation,
|
||||
workdir: None,
|
||||
timeout_ms: None,
|
||||
sandbox: SandboxType::None,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
};
|
||||
|
||||
let stdout_stream = StdoutStream {
|
||||
sub_id: turn_context.sub_id.clone(),
|
||||
call_id: call_id.clone(),
|
||||
tx_event: session.get_tx_event(),
|
||||
let tool_call = ToolCall {
|
||||
tool_name: USER_SHELL_TOOL_NAME.to_string(),
|
||||
call_id: Uuid::new_v4().to_string(),
|
||||
payload: ToolPayload::LocalShell { params },
|
||||
};
|
||||
|
||||
let exec_result = tokio::select! {
|
||||
_ = cancellation_token.cancelled() => {
|
||||
return None;
|
||||
}
|
||||
res = execute_exec_env(exec_env, &turn_context.sandbox_policy, Some(stdout_stream)) => res,
|
||||
};
|
||||
let router = Arc::new(ToolRouter::from_config(&turn_context.tools_config, None));
|
||||
let tracker = Arc::new(Mutex::new(TurnDiffTracker::new()));
|
||||
let runtime = ToolCallRuntime::new(
|
||||
Arc::clone(&router),
|
||||
Arc::clone(&session),
|
||||
Arc::clone(&turn_context),
|
||||
Arc::clone(&tracker),
|
||||
);
|
||||
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn_context.as_ref(), &call_id, None);
|
||||
if let Err(err) = emitter
|
||||
.finish(event_ctx, exec_result.map_err(ToolError::Codex))
|
||||
if let Err(err) = runtime
|
||||
.handle_tool_call(tool_call, cancellation_token)
|
||||
.await
|
||||
{
|
||||
error!("user shell command failed: {err:?}");
|
||||
|
||||
@@ -40,7 +40,6 @@ pub enum ToolPayload {
|
||||
},
|
||||
LocalShell {
|
||||
params: ShellToolCallParams,
|
||||
is_user_shell_command: bool,
|
||||
},
|
||||
UnifiedExec {
|
||||
arguments: String,
|
||||
@@ -57,7 +56,7 @@ impl ToolPayload {
|
||||
match self {
|
||||
ToolPayload::Function { arguments } => Cow::Borrowed(arguments),
|
||||
ToolPayload::Custom { input } => Cow::Borrowed(input),
|
||||
ToolPayload::LocalShell { params, .. } => Cow::Owned(params.command.join(" ")),
|
||||
ToolPayload::LocalShell { params } => Cow::Owned(params.command.join(" ")),
|
||||
ToolPayload::UnifiedExec { arguments } => Cow::Borrowed(arguments),
|
||||
ToolPayload::Mcp { raw_arguments, .. } => Cow::Borrowed(raw_arguments),
|
||||
}
|
||||
|
||||
@@ -82,10 +82,7 @@ impl ToolHandler for ShellHandler {
|
||||
)
|
||||
.await
|
||||
}
|
||||
ToolPayload::LocalShell {
|
||||
params,
|
||||
is_user_shell_command,
|
||||
} => {
|
||||
ToolPayload::LocalShell { params } => {
|
||||
let exec_params = Self::to_exec_params(params, turn.as_ref());
|
||||
Self::run_exec_like(
|
||||
tool_name.as_str(),
|
||||
@@ -94,7 +91,7 @@ impl ToolHandler for ShellHandler {
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
is_user_shell_command,
|
||||
true,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -222,7 +219,6 @@ impl ShellHandler {
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
is_user_shell_command,
|
||||
};
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
|
||||
@@ -54,12 +54,21 @@ impl ToolOrchestrator {
|
||||
let mut already_approved = false;
|
||||
|
||||
if needs_initial_approval {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
|
||||
.await;
|
||||
}
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
risk: None,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
|
||||
@@ -120,10 +120,7 @@ impl ToolRouter {
|
||||
Ok(Some(ToolCall {
|
||||
tool_name: "local_shell".to_string(),
|
||||
call_id,
|
||||
payload: ToolPayload::LocalShell {
|
||||
params,
|
||||
is_user_shell_command: false,
|
||||
},
|
||||
payload: ToolPayload::LocalShell { params },
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,7 +34,6 @@ pub struct ShellRequest {
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub is_user_shell_command: bool,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
@@ -122,9 +121,6 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
if req.is_user_shell_command {
|
||||
return false;
|
||||
}
|
||||
if is_known_safe_command(&req.command) {
|
||||
return false;
|
||||
}
|
||||
@@ -150,7 +146,7 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
|
||||
req.is_user_shell_command || req.with_escalated_permissions.unwrap_or(false)
|
||||
req.with_escalated_permissions.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
You were originally given instructions from a user over one or more turns. Here were the user messages:
|
||||
|
||||
{{ user_messages_text }}
|
||||
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
|
||||
{{ summary_text }}
|
||||
@@ -1,5 +1,9 @@
|
||||
You have exceeded the maximum number of tokens, please stop coding and instead write a short memento message for the next agent. Your note should:
|
||||
- Summarize what you finished and what still needs work. If there was a recent update_plan call, repeat its steps verbatim.
|
||||
- List outstanding TODOs with file paths / line numbers so they're easy to find.
|
||||
- Flag code that needs more tests (edge cases, performance, integration, etc.).
|
||||
- Record any open bugs, quirks, or setup steps that will make it easier for the next agent to pick up where you left off.
|
||||
You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
|
||||
|
||||
Include:
|
||||
- Current progress and key decisions made
|
||||
- Important context, constraints, or user preferences
|
||||
- What remains to be done (clear next steps)
|
||||
- Any critical data, examples, or references needed to continue
|
||||
|
||||
Be concise, structured, and focused on helping the next LLM seamlessly continue the work.
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys:
|
||||
- description (concise summary, at most two sentences)
|
||||
You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk to help the user decide whether to approve command execution. Return strictly valid JSON with the keys:
|
||||
- description (concise summary of command intent and potential effects, no more than one sentence, use present tense)
|
||||
- risk_level ("low", "medium", or "high")
|
||||
- risk_categories (optional array of zero or more category strings)
|
||||
Risk level examples:
|
||||
- low: read-only inspections, listing files, printing configuration
|
||||
- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources
|
||||
- low: read-only inspections, listing files, printing configuration, fetching artifacts from trusted sources
|
||||
- medium: modifying project files, installing dependencies
|
||||
- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
|
||||
Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance.
|
||||
Use multiple categories when appropriate.
|
||||
If information is insufficient, choose the most cautious risk level supported by the evidence.
|
||||
Respond with JSON only, without markdown code fences or extra commentary.
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::ErrorEvent;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
@@ -13,9 +14,9 @@ use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::wait_for_event;
|
||||
use std::collections::VecDeque;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use codex_core::codex::compact::SUMMARIZATION_PROMPT;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_completed_with_tokens;
|
||||
@@ -27,6 +28,7 @@ use core_test_support::responses::sse;
|
||||
use core_test_support::responses::sse_failed;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
// --- Test helpers -----------------------------------------------------------
|
||||
|
||||
pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
|
||||
@@ -46,8 +48,39 @@ const CONTEXT_LIMIT_MESSAGE: &str =
|
||||
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
||||
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
||||
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
|
||||
const POST_AUTO_USER_MSG: &str = "post auto follow-up";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
pub(super) const TEST_COMPACT_PROMPT: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.\nTest-only compact prompt.";
|
||||
|
||||
pub(super) const COMPACT_WARNING_MESSAGE: &str = "Heads up: Long conversations and multiple compactions can cause the model to be less accurate. Start new a new conversation when possible to keep conversations small and targeted.";
|
||||
|
||||
fn auto_summary(summary: &str) -> String {
|
||||
summary.to_string()
|
||||
}
|
||||
|
||||
fn drop_call_id(value: &mut serde_json::Value) {
|
||||
match value {
|
||||
serde_json::Value::Object(obj) => {
|
||||
obj.retain(|k, _| k != "call_id");
|
||||
for v in obj.values_mut() {
|
||||
drop_call_id(v);
|
||||
}
|
||||
}
|
||||
serde_json::Value::Array(arr) => {
|
||||
for v in arr {
|
||||
drop_call_id(v);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn set_test_compact_prompt(config: &mut Config) {
|
||||
config.compact_prompt = Some(TEST_COMPACT_PROMPT.to_string());
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn summarize_context_three_requests_and_instructions() {
|
||||
skip_if_no_network!();
|
||||
@@ -73,14 +106,13 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
// Mount three expectations, one per request, matched by body content.
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains("\"text\":\"hello world\"") && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
@@ -98,6 +130,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
config.model_auto_compact_token_limit = Some(200_000);
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation {
|
||||
@@ -166,11 +199,11 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
|
||||
let text2 = last2["content"][0]["text"].as_str().unwrap();
|
||||
assert_eq!(
|
||||
text2, SUMMARIZATION_PROMPT,
|
||||
text2, TEST_COMPACT_PROMPT,
|
||||
"expected summarize trigger, got `{text2}`"
|
||||
);
|
||||
|
||||
// Third request must contain the refreshed instructions, bridge summary message and new user msg.
|
||||
// Third request must contain the refreshed instructions, compacted user history, and new user message.
|
||||
let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
|
||||
assert!(
|
||||
@@ -178,13 +211,21 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
"expected refreshed context and new user message in third request"
|
||||
);
|
||||
|
||||
// Collect all (role, text) message tuples.
|
||||
let mut messages: Vec<(String, String)> = Vec::new();
|
||||
|
||||
for item in input3 {
|
||||
if item["type"].as_str() == Some("message") {
|
||||
let role = item["role"].as_str().unwrap_or_default().to_string();
|
||||
let text = item["content"][0]["text"]
|
||||
.as_str()
|
||||
if let Some("message") = item.get("type").and_then(|v| v.as_str()) {
|
||||
let role = item
|
||||
.get("role")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let text = item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
messages.push((role, text));
|
||||
@@ -200,26 +241,22 @@ async fn summarize_context_three_requests_and_instructions() {
|
||||
.any(|(r, t)| r == "user" && t == THIRD_USER_MSG),
|
||||
"third request should include the new user message"
|
||||
);
|
||||
let Some((_, bridge_text)) = messages.iter().find(|(role, text)| {
|
||||
role == "user"
|
||||
&& (text.contains("Here were the user messages")
|
||||
|| text.contains("Here are all the user messages"))
|
||||
&& text.contains(SUMMARY_TEXT)
|
||||
}) else {
|
||||
panic!("expected a bridge message containing the summary");
|
||||
};
|
||||
assert!(
|
||||
bridge_text.contains("hello world"),
|
||||
"bridge should capture earlier user messages"
|
||||
messages
|
||||
.iter()
|
||||
.any(|(r, t)| r == "user" && t == "hello world"),
|
||||
"third request should include the original user message"
|
||||
);
|
||||
assert!(
|
||||
!bridge_text.contains(SUMMARIZATION_PROMPT),
|
||||
"bridge text should not echo the summarize trigger"
|
||||
messages
|
||||
.iter()
|
||||
.any(|(r, t)| r == "user" && t == SUMMARY_TEXT),
|
||||
"third request should include the summary message"
|
||||
);
|
||||
assert!(
|
||||
!messages
|
||||
.iter()
|
||||
.any(|(_, text)| text.contains(SUMMARIZATION_PROMPT)),
|
||||
.any(|(_, text)| text.contains(TEST_COMPACT_PROMPT)),
|
||||
"third request should not include the summarize trigger"
|
||||
);
|
||||
|
||||
@@ -323,7 +360,7 @@ async fn manual_compact_uses_custom_prompt() {
|
||||
if text == custom_prompt {
|
||||
found_custom_prompt = true;
|
||||
}
|
||||
if text == SUMMARIZATION_PROMPT {
|
||||
if text == TEST_COMPACT_PROMPT {
|
||||
found_default_prompt = true;
|
||||
}
|
||||
}
|
||||
@@ -354,12 +391,17 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||
ev_completed_with_tokens("r3", 200),
|
||||
]);
|
||||
let sse_resume = sse(vec![ev_completed("r3-resume")]);
|
||||
let sse4 = sse(vec![
|
||||
ev_assistant_message("m4", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r4", 120),
|
||||
]);
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
@@ -367,16 +409,30 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(SECOND_AUTO_MSG)
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
let resume_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(AUTO_SUMMARY_TEXT)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body.contains(POST_AUTO_USER_MSG)
|
||||
};
|
||||
mount_sse_once_match(&server, resume_matcher, sse_resume).await;
|
||||
|
||||
let fourth_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(POST_AUTO_USER_MSG) && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, fourth_matcher, sse4).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
@@ -385,6 +441,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
config.model_auto_compact_token_limit = Some(200_000);
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let codex = conversation_manager
|
||||
@@ -414,18 +471,29 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
// wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: POST_AUTO_USER_MSG.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.unwrap();
|
||||
assert!(
|
||||
requests.len() >= 3,
|
||||
"auto compact should add at least a third request, got {}",
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
5,
|
||||
"expected user turns, a compaction request, a resumed turn, and the follow-up turn; got {}",
|
||||
requests.len()
|
||||
);
|
||||
let is_auto_compact = |req: &wiremock::Request| {
|
||||
std::str::from_utf8(&req.body)
|
||||
.unwrap_or("")
|
||||
.contains("You have exceeded the maximum number of tokens")
|
||||
.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
let auto_compact_count = requests.iter().filter(|req| is_auto_compact(req)).count();
|
||||
assert_eq!(
|
||||
@@ -442,11 +510,41 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact should add a third request"
|
||||
);
|
||||
|
||||
let resume_index = requests
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find_map(|(idx, req)| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
(body.contains(AUTO_SUMMARY_TEXT)
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body.contains(POST_AUTO_USER_MSG))
|
||||
.then_some(idx)
|
||||
})
|
||||
.expect("resume request missing after compaction");
|
||||
|
||||
let follow_up_index = requests
|
||||
.iter()
|
||||
.enumerate()
|
||||
.rev()
|
||||
.find_map(|(idx, req)| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
(body.contains(POST_AUTO_USER_MSG) && !body.contains(COMPACT_PROMPT_MARKER))
|
||||
.then_some(idx)
|
||||
})
|
||||
.expect("follow-up request missing");
|
||||
assert_eq!(follow_up_index, 4, "follow-up request should be last");
|
||||
|
||||
let body_first = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
let body3 = requests[auto_compact_index]
|
||||
let body_auto = requests[auto_compact_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let instructions = body3
|
||||
let body_resume = requests[resume_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let body_follow_up = requests[follow_up_index]
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let instructions = body_auto
|
||||
.get("instructions")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
@@ -460,13 +558,16 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
"auto compact should keep the standard developer instructions",
|
||||
);
|
||||
|
||||
let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let last3 = input3
|
||||
let input_auto = body_auto.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let last_auto = input_auto
|
||||
.last()
|
||||
.expect("auto compact request should append a user message");
|
||||
assert_eq!(last3.get("type").and_then(|v| v.as_str()), Some("message"));
|
||||
assert_eq!(last3.get("role").and_then(|v| v.as_str()), Some("user"));
|
||||
let last_text = last3
|
||||
assert_eq!(
|
||||
last_auto.get("type").and_then(|v| v.as_str()),
|
||||
Some("message")
|
||||
);
|
||||
assert_eq!(last_auto.get("role").and_then(|v| v.as_str()), Some("user"));
|
||||
let last_text = last_auto
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|items| items.first())
|
||||
@@ -474,9 +575,59 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
||||
.and_then(|text| text.as_str())
|
||||
.unwrap_or_default();
|
||||
assert_eq!(
|
||||
last_text, SUMMARIZATION_PROMPT,
|
||||
last_text, TEST_COMPACT_PROMPT,
|
||||
"auto compact should send the summarization prompt as a user message",
|
||||
);
|
||||
|
||||
let input_resume = body_resume.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
assert!(
|
||||
input_resume.iter().any(|item| {
|
||||
item.get("type").and_then(|v| v.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|v| v.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
== Some(AUTO_SUMMARY_TEXT)
|
||||
}),
|
||||
"resume request should include compacted history"
|
||||
);
|
||||
|
||||
let input_follow_up = body_follow_up
|
||||
.get("input")
|
||||
.and_then(|v| v.as_array())
|
||||
.unwrap();
|
||||
let user_texts: Vec<String> = input_follow_up
|
||||
.iter()
|
||||
.filter(|item| item.get("type").and_then(|v| v.as_str()) == Some("message"))
|
||||
.filter(|item| item.get("role").and_then(|v| v.as_str()) == Some("user"))
|
||||
.filter_map(|item| {
|
||||
item.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(std::string::ToString::to_string)
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
user_texts.iter().any(|text| text == FIRST_AUTO_MSG),
|
||||
"auto compact follow-up request should include the first user message"
|
||||
);
|
||||
assert!(
|
||||
user_texts.iter().any(|text| text == SECOND_AUTO_MSG),
|
||||
"auto compact follow-up request should include the second user message"
|
||||
);
|
||||
assert!(
|
||||
user_texts.iter().any(|text| text == POST_AUTO_USER_MSG),
|
||||
"auto compact follow-up request should include the new user message"
|
||||
);
|
||||
assert!(
|
||||
user_texts.iter().any(|text| text == AUTO_SUMMARY_TEXT),
|
||||
"auto compact follow-up request should include the summary message"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
@@ -495,8 +646,9 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
ev_completed_with_tokens("r2", 330_000),
|
||||
]);
|
||||
|
||||
let auto_summary_payload = auto_summary(AUTO_SUMMARY_TEXT);
|
||||
let sse3 = sse(vec![
|
||||
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||
ev_assistant_message("m3", &auto_summary_payload),
|
||||
ev_completed_with_tokens("r3", 200),
|
||||
]);
|
||||
|
||||
@@ -504,7 +656,7 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains(SECOND_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1).await;
|
||||
|
||||
@@ -512,13 +664,13 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(SECOND_AUTO_MSG)
|
||||
&& body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
@@ -530,6 +682,7 @@ async fn auto_compact_persists_rollout_entries() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation {
|
||||
conversation: codex,
|
||||
@@ -603,8 +756,9 @@ async fn auto_compact_stops_after_failed_attempt() {
|
||||
ev_completed_with_tokens("r1", 500),
|
||||
]);
|
||||
|
||||
let summary_payload = auto_summary(SUMMARY_TEXT);
|
||||
let sse2 = sse(vec![
|
||||
ev_assistant_message("m2", SUMMARY_TEXT),
|
||||
ev_assistant_message("m2", &summary_payload),
|
||||
ev_completed_with_tokens("r2", 50),
|
||||
]);
|
||||
|
||||
@@ -615,21 +769,19 @@ async fn auto_compact_stops_after_failed_attempt() {
|
||||
|
||||
let first_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains(FIRST_AUTO_MSG)
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(FIRST_AUTO_MSG) && !body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, first_matcher, sse1.clone()).await;
|
||||
|
||||
let second_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(&server, second_matcher, sse2.clone()).await;
|
||||
|
||||
let third_matcher = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
!body.contains("You have exceeded the maximum number of tokens")
|
||||
&& body.contains(SUMMARY_TEXT)
|
||||
!body.contains(COMPACT_PROMPT_MARKER) && body.contains(SUMMARY_TEXT)
|
||||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3.clone()).await;
|
||||
|
||||
@@ -641,6 +793,7 @@ async fn auto_compact_stops_after_failed_attempt() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let codex = conversation_manager
|
||||
@@ -689,7 +842,7 @@ async fn auto_compact_stops_after_failed_attempt() {
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str())
|
||||
.map(|text| text == SUMMARIZATION_PROMPT)
|
||||
.map(|text| text == TEST_COMPACT_PROMPT)
|
||||
.unwrap_or(false)
|
||||
});
|
||||
assert!(
|
||||
@@ -736,6 +889,7 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
config.model_auto_compact_token_limit = Some(200_000);
|
||||
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
|
||||
.new_conversation(config)
|
||||
@@ -795,7 +949,7 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str()),
|
||||
Some(SUMMARIZATION_PROMPT),
|
||||
Some(TEST_COMPACT_PROMPT),
|
||||
"compact attempt should include summarization prompt"
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -806,7 +960,7 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
.and_then(|items| items.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|text| text.as_str()),
|
||||
Some(SUMMARIZATION_PROMPT),
|
||||
Some(TEST_COMPACT_PROMPT),
|
||||
"retry attempt should include summarization prompt"
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -826,6 +980,228 @@ async fn manual_compact_retries_after_context_window_error() {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn manual_compact_twice_preserves_latest_user_messages() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let first_user_message = "first manual turn";
|
||||
let second_user_message = "second manual turn";
|
||||
let final_user_message = "post compact follow-up";
|
||||
let first_summary = "FIRST_MANUAL_SUMMARY";
|
||||
let second_summary = "SECOND_MANUAL_SUMMARY";
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let first_turn = sse(vec![
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed("r1"),
|
||||
]);
|
||||
let first_compact_summary = auto_summary(first_summary);
|
||||
let first_compact = sse(vec![
|
||||
ev_assistant_message("m2", &first_compact_summary),
|
||||
ev_completed("r2"),
|
||||
]);
|
||||
let second_turn = sse(vec![
|
||||
ev_assistant_message("m3", SECOND_LARGE_REPLY),
|
||||
ev_completed("r3"),
|
||||
]);
|
||||
let second_compact_summary = auto_summary(second_summary);
|
||||
let second_compact = sse(vec![
|
||||
ev_assistant_message("m4", &second_compact_summary),
|
||||
ev_completed("r4"),
|
||||
]);
|
||||
let final_turn = sse(vec![
|
||||
ev_assistant_message("m5", FINAL_REPLY),
|
||||
ev_completed("r5"),
|
||||
]);
|
||||
|
||||
let responses_mock = mount_sse_sequence(
|
||||
&server,
|
||||
vec![
|
||||
first_turn,
|
||||
first_compact,
|
||||
second_turn,
|
||||
second_compact,
|
||||
final_turn,
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.unwrap()
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: first_user_message.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: second_user_message.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex.submit(Op::Compact).await.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: final_user_message.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = responses_mock.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
5,
|
||||
"expected exactly 5 requests (user turn, compact, user turn, compact, final turn)"
|
||||
);
|
||||
let contains_user_text = |input: &[serde_json::Value], expected: &str| -> bool {
|
||||
input.iter().any(|item| {
|
||||
item.get("type").and_then(|v| v.as_str()) == Some("message")
|
||||
&& item.get("role").and_then(|v| v.as_str()) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter().any(|entry| {
|
||||
entry.get("text").and_then(|v| v.as_str()) == Some(expected)
|
||||
})
|
||||
})
|
||||
.unwrap_or(false)
|
||||
})
|
||||
};
|
||||
|
||||
let first_turn_input = requests[0].input();
|
||||
assert!(
|
||||
contains_user_text(&first_turn_input, first_user_message),
|
||||
"first turn request missing first user message"
|
||||
);
|
||||
assert!(
|
||||
!contains_user_text(&first_turn_input, TEST_COMPACT_PROMPT),
|
||||
"first turn request should not include summarization prompt"
|
||||
);
|
||||
|
||||
let first_compact_input = requests[1].input();
|
||||
assert!(
|
||||
contains_user_text(&first_compact_input, TEST_COMPACT_PROMPT),
|
||||
"first compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
contains_user_text(&first_compact_input, first_user_message),
|
||||
"first compact request should include history before compaction"
|
||||
);
|
||||
|
||||
let second_turn_input = requests[2].input();
|
||||
assert!(
|
||||
contains_user_text(&second_turn_input, second_user_message),
|
||||
"second turn request missing second user message"
|
||||
);
|
||||
assert!(
|
||||
contains_user_text(&second_turn_input, first_user_message),
|
||||
"second turn request should include the compacted user history"
|
||||
);
|
||||
|
||||
let second_compact_input = requests[3].input();
|
||||
assert!(
|
||||
contains_user_text(&second_compact_input, TEST_COMPACT_PROMPT),
|
||||
"second compact request should include summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
contains_user_text(&second_compact_input, second_user_message),
|
||||
"second compact request should include latest history"
|
||||
);
|
||||
|
||||
let mut final_output = requests
|
||||
.last()
|
||||
.unwrap_or_else(|| panic!("final turn request missing for {final_user_message}"))
|
||||
.input()
|
||||
.into_iter()
|
||||
.collect::<VecDeque<_>>();
|
||||
|
||||
// System prompt
|
||||
final_output.pop_front();
|
||||
// Developer instructions
|
||||
final_output.pop_front();
|
||||
|
||||
let _ = final_output
|
||||
.iter_mut()
|
||||
.map(drop_call_id)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expected = vec![
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": first_user_message,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": first_summary,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": second_user_message,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": second_summary,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
json!({
|
||||
"content": vec![json!({
|
||||
"text": final_user_message,
|
||||
"type": "input_text",
|
||||
})],
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
}),
|
||||
];
|
||||
assert_eq!(final_output, expected);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
|
||||
skip_if_no_network!();
|
||||
@@ -836,8 +1212,9 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
ev_assistant_message("m1", FIRST_REPLY),
|
||||
ev_completed_with_tokens("r1", 500),
|
||||
]);
|
||||
let first_summary_payload = auto_summary(FIRST_AUTO_SUMMARY);
|
||||
let sse2 = sse(vec![
|
||||
ev_assistant_message("m2", FIRST_AUTO_SUMMARY),
|
||||
ev_assistant_message("m2", &first_summary_payload),
|
||||
ev_completed_with_tokens("r2", 50),
|
||||
]);
|
||||
let sse3 = sse(vec![
|
||||
@@ -848,8 +1225,9 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
ev_assistant_message("m4", SECOND_LARGE_REPLY),
|
||||
ev_completed_with_tokens("r4", 450),
|
||||
]);
|
||||
let second_summary_payload = auto_summary(SECOND_AUTO_SUMMARY);
|
||||
let sse5 = sse(vec![
|
||||
ev_assistant_message("m5", SECOND_AUTO_SUMMARY),
|
||||
ev_assistant_message("m5", &second_summary_payload),
|
||||
ev_completed_with_tokens("r5", 60),
|
||||
]);
|
||||
let sse6 = sse(vec![
|
||||
@@ -867,6 +1245,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
config.model_auto_compact_token_limit = Some(200);
|
||||
let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let codex = conversation_manager
|
||||
@@ -925,7 +1304,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"first request should contain the user input"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[1].contains("You have exceeded the maximum number of tokens"),
|
||||
request_bodies[1].contains(COMPACT_PROMPT_MARKER),
|
||||
"first auto compact request should include the summarization prompt"
|
||||
);
|
||||
assert!(
|
||||
@@ -933,7 +1312,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"function call output should be sent before the second auto compact"
|
||||
);
|
||||
assert!(
|
||||
request_bodies[4].contains("You have exceeded the maximum number of tokens"),
|
||||
request_bodies[4].contains(COMPACT_PROMPT_MARKER),
|
||||
"second auto compact request should include the summarization prompt"
|
||||
);
|
||||
}
|
||||
@@ -956,8 +1335,9 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
ev_assistant_message("m2", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r2", over_limit_tokens),
|
||||
]);
|
||||
let auto_summary_payload = auto_summary(AUTO_SUMMARY_TEXT);
|
||||
let auto_compact_turn = sse(vec![
|
||||
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||
ev_assistant_message("m3", &auto_summary_payload),
|
||||
ev_completed_with_tokens("r3", 10),
|
||||
]);
|
||||
let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);
|
||||
@@ -977,6 +1357,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
set_test_compact_prompt(&mut config);
|
||||
config.model_context_window = Some(context_window);
|
||||
config.model_auto_compact_token_limit = Some(limit);
|
||||
|
||||
@@ -1027,7 +1408,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
|
||||
let auto_compact_body = auto_compact_mock.single_request().body_json().to_string();
|
||||
assert!(
|
||||
auto_compact_body.contains("You have exceeded the maximum number of tokens"),
|
||||
auto_compact_body.contains(COMPACT_PROMPT_MARKER),
|
||||
"auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -10,13 +10,13 @@
|
||||
use super::compact::COMPACT_WARNING_MESSAGE;
|
||||
use super::compact::FIRST_REPLY;
|
||||
use super::compact::SUMMARY_TEXT;
|
||||
use super::compact::TEST_COMPACT_PROMPT;
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::NewConversation;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::codex::compact::SUMMARIZATION_PROMPT;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::OPENAI_DEFAULT_MODEL;
|
||||
use codex_core::protocol::EventMsg;
|
||||
@@ -38,6 +38,8 @@ use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
const AFTER_SECOND_RESUME: &str = "AFTER_SECOND_RESUME";
|
||||
const COMPACT_PROMPT_MARKER: &str =
|
||||
"You are performing a CONTEXT CHECKPOINT COMPACTION for a tool.";
|
||||
|
||||
fn network_disabled() -> bool {
|
||||
std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
|
||||
@@ -66,6 +68,27 @@ fn is_ghost_snapshot_message(item: &Value) -> bool {
|
||||
.is_some_and(|text| text.trim_start().starts_with("<ghost_snapshot>"))
|
||||
}
|
||||
|
||||
fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
|
||||
request
|
||||
.get("input")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(Value::as_str) == Some("message")
|
||||
&& item.get("role").and_then(Value::as_str) == Some("user")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
== Some(summary_text)
|
||||
})
|
||||
})
|
||||
.cloned()
|
||||
.unwrap_or_else(|| panic!("expected summary message {summary_text}"))
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
/// Scenario: compact an initial conversation, resume it, fork one turn back, and
|
||||
/// ensure the model-visible history matches expectations at each request.
|
||||
@@ -157,6 +180,9 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let expected_model = OPENAI_DEFAULT_MODEL;
|
||||
let summary_after_compact = extract_summary_message(&requests[2], SUMMARY_TEXT);
|
||||
let summary_after_resume = extract_summary_message(&requests[3], SUMMARY_TEXT);
|
||||
let summary_after_fork = extract_summary_message(&requests[4], SUMMARY_TEXT);
|
||||
let user_turn_1 = json!(
|
||||
{
|
||||
"model": expected_model,
|
||||
@@ -257,7 +283,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": SUMMARIZATION_PROMPT
|
||||
"text": TEST_COMPACT_PROMPT
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -306,16 +332,11 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "You were originally given instructions from a user over one or more turns. Here were the user messages:
|
||||
|
||||
hello world
|
||||
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
|
||||
SUMMARY_ONLY_CONTEXT"
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
summary_after_compact,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -371,16 +392,11 @@ SUMMARY_ONLY_CONTEXT"
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "You were originally given instructions from a user over one or more turns. Here were the user messages:
|
||||
|
||||
hello world
|
||||
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
|
||||
SUMMARY_ONLY_CONTEXT"
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
summary_after_resume,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -456,16 +472,11 @@ SUMMARY_ONLY_CONTEXT"
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "You were originally given instructions from a user over one or more turns. Here were the user messages:
|
||||
|
||||
hello world
|
||||
|
||||
Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
|
||||
|
||||
SUMMARY_ONLY_CONTEXT"
|
||||
"text": "hello world"
|
||||
}
|
||||
]
|
||||
},
|
||||
summary_after_fork,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -605,6 +616,11 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
|
||||
// Build expected final request input: initial context + forked user message +
|
||||
// compacted summary + post-compact user message + resumed user message.
|
||||
let summary_after_second_compact =
|
||||
extract_summary_message(&requests[requests.len() - 3], SUMMARY_TEXT);
|
||||
|
||||
let mut expected = json!([
|
||||
{
|
||||
"instructions": prompt,
|
||||
@@ -635,10 +651,11 @@ async fn compact_resume_after_second_compaction_preserves_history() {
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "You were originally given instructions from a user over one or more turns. Here were the user messages:\n\nAFTER_FORK\n\nAnother language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\n\nSUMMARY_ONLY_CONTEXT"
|
||||
"text": "AFTER_FORK"
|
||||
}
|
||||
]
|
||||
},
|
||||
summary_after_second_compact,
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
@@ -724,7 +741,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
let match_first = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("\"text\":\"hello world\"")
|
||||
&& !body.contains("You have exceeded the maximum number of tokens")
|
||||
&& !body.contains(COMPACT_PROMPT_MARKER)
|
||||
&& !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
|
||||
&& !body.contains("\"text\":\"AFTER_COMPACT\"")
|
||||
&& !body.contains("\"text\":\"AFTER_RESUME\"")
|
||||
@@ -734,7 +751,7 @@ async fn mount_initial_flow(server: &MockServer) {
|
||||
|
||||
let match_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
body.contains(COMPACT_PROMPT_MARKER)
|
||||
};
|
||||
mount_sse_once_match(server, match_compact, sse2).await;
|
||||
|
||||
@@ -768,8 +785,7 @@ async fn mount_second_compact_flow(server: &MockServer) {
|
||||
|
||||
let match_second_compact = |req: &wiremock::Request| {
|
||||
let body = std::str::from_utf8(&req.body).unwrap_or("");
|
||||
body.contains("You have exceeded the maximum number of tokens")
|
||||
&& body.contains("AFTER_FORK")
|
||||
body.contains(COMPACT_PROMPT_MARKER) && body.contains("AFTER_FORK")
|
||||
};
|
||||
mount_sse_once_match(server, match_second_compact, sse6).await;
|
||||
|
||||
@@ -790,6 +806,7 @@ async fn start_test_conversation(
|
||||
let home = TempDir::new().expect("create temp dir");
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
config.compact_prompt = Some(TEST_COMPACT_PROMPT.to_string());
|
||||
|
||||
let manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
|
||||
let NewConversation { conversation, .. } = manager
|
||||
|
||||
@@ -36,19 +36,53 @@ fn text_user_input(text: String) -> serde_json::Value {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn has_wsl_env_markers() -> bool {
|
||||
std::env::var_os("WSL_INTEROP").is_some()
|
||||
|| std::env::var_os("WSLENV").is_some()
|
||||
|| std::env::var_os("WSL_DISTRO_NAME").is_some()
|
||||
}
|
||||
|
||||
fn operating_system_context_block() -> String {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
let info = os_info::get();
|
||||
let name = info.os_type().to_string();
|
||||
let version = info.version().to_string();
|
||||
let is_wsl = has_wsl_env_markers();
|
||||
format!(
|
||||
" <operating_system>\n <name>{name}</name>\n <version>{version}</version>\n <is_likely_windows_subsystem_for_linux>{is_wsl}</is_likely_windows_subsystem_for_linux>\n </operating_system>\n"
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(all(unix, not(target_os = "macos")))]
|
||||
{
|
||||
if has_wsl_env_markers() {
|
||||
" <operating_system>\n <name>{name}</name>\n <version></version>\n <is_likely_windows_subsystem_for_linux>true</is_likely_windows_subsystem_for_linux>\n </operating_system>\n".to_string()
|
||||
} else {
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
|
||||
fn default_env_context_str(cwd: &str, shell: &Shell) -> String {
|
||||
let shell_line = match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
};
|
||||
let os_block = operating_system_context_block();
|
||||
format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
<cwd>{cwd}</cwd>
|
||||
<approval_policy>on-request</approval_policy>
|
||||
<sandbox_mode>read-only</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
{}</environment_context>"#,
|
||||
cwd,
|
||||
match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
}
|
||||
{shell_line}{os_block}</environment_context>"#
|
||||
)
|
||||
}
|
||||
|
||||
@@ -341,22 +375,10 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
||||
|
||||
let shell = default_user_shell().await;
|
||||
|
||||
let expected_env_text = format!(
|
||||
r#"<environment_context>
|
||||
<cwd>{}</cwd>
|
||||
<approval_policy>on-request</approval_policy>
|
||||
<sandbox_mode>read-only</sandbox_mode>
|
||||
<network_access>restricted</network_access>
|
||||
{}</environment_context>"#,
|
||||
cwd.path().to_string_lossy(),
|
||||
match shell.name() {
|
||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||
None => String::new(),
|
||||
}
|
||||
);
|
||||
let cwd_str = cwd.path().to_string_lossy().into_owned();
|
||||
let expected_env_text = default_env_context_str(&cwd_str, &shell);
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>",
|
||||
cwd.path().to_string_lossy()
|
||||
"# AGENTS.md instructions for {cwd_str}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>"
|
||||
);
|
||||
|
||||
let expected_env_msg = serde_json::json!({
|
||||
|
||||
@@ -8,7 +8,6 @@ use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SandboxRiskCategory;
|
||||
use codex_protocol::protocol::SandboxRiskLevel;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use eventsource_stream::Event as StreamEvent;
|
||||
@@ -373,19 +372,9 @@ impl OtelEventManager {
|
||||
call_id: &str,
|
||||
status: &str,
|
||||
risk_level: Option<SandboxRiskLevel>,
|
||||
risk_categories: &[SandboxRiskCategory],
|
||||
duration: Duration,
|
||||
) {
|
||||
let level = risk_level.map(|level| level.as_str());
|
||||
let categories = if risk_categories.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
risk_categories
|
||||
.iter()
|
||||
.map(SandboxRiskCategory::as_str)
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
};
|
||||
|
||||
tracing::event!(
|
||||
tracing::Level::INFO,
|
||||
@@ -402,7 +391,6 @@ impl OtelEventManager {
|
||||
call_id = %call_id,
|
||||
status = %status,
|
||||
risk_level = level,
|
||||
risk_categories = categories,
|
||||
duration_ms = %duration.as_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -16,24 +16,10 @@ pub enum SandboxRiskLevel {
|
||||
High,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SandboxRiskCategory {
|
||||
DataDeletion,
|
||||
DataExfiltration,
|
||||
PrivilegeEscalation,
|
||||
SystemModification,
|
||||
NetworkAccess,
|
||||
ResourceExhaustion,
|
||||
Compliance,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
pub struct SandboxCommandAssessment {
|
||||
pub description: String,
|
||||
pub risk_level: SandboxRiskLevel,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub risk_categories: Vec<SandboxRiskCategory>,
|
||||
}
|
||||
|
||||
impl SandboxRiskLevel {
|
||||
@@ -46,20 +32,6 @@ impl SandboxRiskLevel {
|
||||
}
|
||||
}
|
||||
|
||||
impl SandboxRiskCategory {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::DataDeletion => "data_deletion",
|
||||
Self::DataExfiltration => "data_exfiltration",
|
||||
Self::PrivilegeEscalation => "privilege_escalation",
|
||||
Self::SystemModification => "system_modification",
|
||||
Self::NetworkAccess => "network_access",
|
||||
Self::ResourceExhaustion => "resource_exhaustion",
|
||||
Self::Compliance => "compliance",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ExecApprovalRequestEvent {
|
||||
/// Identifier for the associated exec call, if available.
|
||||
|
||||
@@ -37,7 +37,6 @@ use ts_rs::TS;
|
||||
pub use crate::approvals::ApplyPatchApprovalRequestEvent;
|
||||
pub use crate::approvals::ExecApprovalRequestEvent;
|
||||
pub use crate::approvals::SandboxCommandAssessment;
|
||||
pub use crate::approvals::SandboxRiskCategory;
|
||||
pub use crate::approvals::SandboxRiskLevel;
|
||||
|
||||
/// Open/close tags for special user-input blocks. Used across crates to avoid
|
||||
@@ -813,12 +812,8 @@ impl TokenUsage {
|
||||
(self.non_cached_input() + self.output_tokens.max(0)).max(0)
|
||||
}
|
||||
|
||||
/// For estimating what % of the model's context window is used, we need to account
|
||||
/// for reasoning output tokens from prior turns being dropped from the context window.
|
||||
/// We approximate this here by subtracting reasoning output tokens from the total.
|
||||
/// This will be off for the current turn and pending function calls.
|
||||
pub fn tokens_in_context_window(&self) -> i64 {
|
||||
(self.total_tokens - self.reasoning_output_tokens).max(0)
|
||||
self.total_tokens
|
||||
}
|
||||
|
||||
/// Estimate the remaining user-controllable percentage of the model's context window.
|
||||
|
||||
@@ -40,12 +40,23 @@ curl --fail --silent --show-error "${PROXY_BASE_URL}/shutdown"
|
||||
## CLI
|
||||
|
||||
```
|
||||
codex-responses-api-proxy [--port <PORT>] [--server-info <FILE>] [--http-shutdown]
|
||||
codex-responses-api-proxy [--port <PORT>] [--server-info <FILE>] [--http-shutdown] [--upstream-url <URL>]
|
||||
```
|
||||
|
||||
- `--port <PORT>`: Port to bind on `127.0.0.1`. If omitted, an ephemeral port is chosen.
|
||||
- `--server-info <FILE>`: If set, the proxy writes a single line of JSON with `{ "port": <PORT>, "pid": <PID> }` once listening.
|
||||
- `--http-shutdown`: If set, enables `GET /shutdown` to exit the process with code `0`.
|
||||
- `--upstream-url <URL>`: Absolute URL to forward requests to. Defaults to `https://api.openai.com/v1/responses`.
|
||||
- Authentication is fixed to `Authorization: Bearer <key>` to match the Codex CLI expectations.
|
||||
|
||||
For Azure, for example (ensure your deployment accepts `Authorization: Bearer <key>`):
|
||||
|
||||
```shell
|
||||
printenv AZURE_OPENAI_API_KEY | env -u AZURE_OPENAI_API_KEY codex-responses-api-proxy \
|
||||
--http-shutdown \
|
||||
--server-info /tmp/server-info.json \
|
||||
--upstream-url "https://YOUR_PROJECT_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT/responses?api-version=2025-04-01-preview"
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
@@ -57,7 +68,7 @@ codex-responses-api-proxy [--port <PORT>] [--server-info <FILE>] [--http-shutdow
|
||||
Care is taken to restrict access/copying to the value of `OPENAI_API_KEY` retained in memory:
|
||||
|
||||
- We leverage [`codex_process_hardening`](https://github.com/openai/codex/blob/main/codex-rs/process-hardening/README.md) so `codex-responses-api-proxy` is run with standard process-hardening techniques.
|
||||
- At startup, we allocate a `1024` byte buffer on the stack and write `"Bearer "` as the first `7` bytes.
|
||||
- At startup, we allocate a `1024` byte buffer on the stack and copy `"Bearer "` into the start of the buffer.
|
||||
- We then read from `stdin`, copying the contents into the buffer after `"Bearer "`.
|
||||
- After verifying the key matches `/^[a-zA-Z0-9_-]+$/` (and does not exceed the buffer), we create a `String` from that buffer (so the data is now on the heap).
|
||||
- We zero out the stack-allocated buffer using https://crates.io/crates/zeroize so it is not optimized away by the compiler.
|
||||
|
||||
@@ -12,6 +12,7 @@ use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
use clap::Parser;
|
||||
use reqwest::Url;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::AUTHORIZATION;
|
||||
use reqwest::header::HOST;
|
||||
@@ -44,6 +45,10 @@ pub struct Args {
|
||||
/// Enable HTTP shutdown endpoint at GET /shutdown
|
||||
#[arg(long)]
|
||||
pub http_shutdown: bool,
|
||||
|
||||
/// Absolute URL the proxy should forward requests to (defaults to OpenAI).
|
||||
#[arg(long, default_value = "https://api.openai.com/v1/responses")]
|
||||
pub upstream_url: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -52,10 +57,29 @@ struct ServerInfo {
|
||||
pid: u32,
|
||||
}
|
||||
|
||||
struct ForwardConfig {
|
||||
upstream_url: Url,
|
||||
host_header: HeaderValue,
|
||||
}
|
||||
|
||||
/// Entry point for the library main, for parity with other crates.
|
||||
pub fn run_main(args: Args) -> Result<()> {
|
||||
let auth_header = read_auth_header_from_stdin()?;
|
||||
|
||||
let upstream_url = Url::parse(&args.upstream_url).context("parsing --upstream-url")?;
|
||||
let host = match (upstream_url.host_str(), upstream_url.port()) {
|
||||
(Some(host), Some(port)) => format!("{host}:{port}"),
|
||||
(Some(host), None) => host.to_string(),
|
||||
_ => return Err(anyhow!("upstream URL must include a host")),
|
||||
};
|
||||
let host_header =
|
||||
HeaderValue::from_str(&host).context("constructing Host header from upstream URL")?;
|
||||
|
||||
let forward_config = Arc::new(ForwardConfig {
|
||||
upstream_url,
|
||||
host_header,
|
||||
});
|
||||
|
||||
let (listener, bound_addr) = bind_listener(args.port)?;
|
||||
if let Some(path) = args.server_info.as_ref() {
|
||||
write_server_info(path, bound_addr.port())?;
|
||||
@@ -75,13 +99,14 @@ pub fn run_main(args: Args) -> Result<()> {
|
||||
let http_shutdown = args.http_shutdown;
|
||||
for request in server.incoming_requests() {
|
||||
let client = client.clone();
|
||||
let forward_config = forward_config.clone();
|
||||
std::thread::spawn(move || {
|
||||
if http_shutdown && request.method() == &Method::Get && request.url() == "/shutdown" {
|
||||
let _ = request.respond(Response::new_empty(StatusCode(200)));
|
||||
std::process::exit(0);
|
||||
}
|
||||
|
||||
if let Err(e) = forward_request(&client, auth_header, request) {
|
||||
if let Err(e) = forward_request(&client, auth_header, &forward_config, request) {
|
||||
eprintln!("forwarding error: {e}");
|
||||
}
|
||||
});
|
||||
@@ -115,7 +140,12 @@ fn write_server_info(path: &Path, port: u16) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn forward_request(client: &Client, auth_header: &'static str, mut req: Request) -> Result<()> {
|
||||
fn forward_request(
|
||||
client: &Client,
|
||||
auth_header: &'static str,
|
||||
config: &ForwardConfig,
|
||||
mut req: Request,
|
||||
) -> Result<()> {
|
||||
// Only allow POST /v1/responses exactly, no query string.
|
||||
let method = req.method().clone();
|
||||
let url_path = req.url().to_string();
|
||||
@@ -157,11 +187,10 @@ fn forward_request(client: &Client, auth_header: &'static str, mut req: Request)
|
||||
auth_header_value.set_sensitive(true);
|
||||
headers.insert(AUTHORIZATION, auth_header_value);
|
||||
|
||||
headers.insert(HOST, HeaderValue::from_static("api.openai.com"));
|
||||
headers.insert(HOST, config.host_header.clone());
|
||||
|
||||
let upstream = "https://api.openai.com/v1/responses";
|
||||
let upstream_resp = client
|
||||
.post(upstream)
|
||||
.post(config.upstream_url.clone())
|
||||
.headers(headers)
|
||||
.body(body)
|
||||
.send()
|
||||
|
||||
@@ -121,7 +121,7 @@ where
|
||||
if total_read == capacity && !saw_newline && !saw_eof {
|
||||
buf.zeroize();
|
||||
return Err(anyhow!(
|
||||
"OPENAI_API_KEY is too large to fit in the 512-byte buffer"
|
||||
"API key is too large to fit in the {BUFFER_SIZE}-byte buffer"
|
||||
));
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@ where
|
||||
if total == AUTH_HEADER_PREFIX.len() {
|
||||
buf.zeroize();
|
||||
return Err(anyhow!(
|
||||
"OPENAI_API_KEY must be provided via stdin (e.g. printenv OPENAI_API_KEY | codex responses-api-proxy)"
|
||||
"API key must be provided via stdin (e.g. printenv OPENAI_API_KEY | codex responses-api-proxy)"
|
||||
));
|
||||
}
|
||||
|
||||
@@ -214,7 +214,7 @@ fn validate_auth_header_bytes(key_bytes: &[u8]) -> Result<()> {
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"OPENAI_API_KEY may only contain ASCII letters, numbers, '-' or '_'"
|
||||
"API key may only contain ASCII letters, numbers, '-' or '_'"
|
||||
))
|
||||
}
|
||||
|
||||
@@ -290,7 +290,9 @@ mod tests {
|
||||
})
|
||||
.unwrap_err();
|
||||
let message = format!("{err:#}");
|
||||
assert!(message.contains("OPENAI_API_KEY is too large to fit in the 512-byte buffer"));
|
||||
let expected_error =
|
||||
format!("API key is too large to fit in the {BUFFER_SIZE}-byte buffer");
|
||||
assert!(message.contains(&expected_error));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -317,9 +319,7 @@ mod tests {
|
||||
.unwrap_err();
|
||||
|
||||
let message = format!("{err:#}");
|
||||
assert!(
|
||||
message.contains("OPENAI_API_KEY may only contain ASCII letters, numbers, '-' or '_'")
|
||||
);
|
||||
assert!(message.contains("API key may only contain ASCII letters, numbers, '-' or '_'"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -337,8 +337,6 @@ mod tests {
|
||||
.unwrap_err();
|
||||
|
||||
let message = format!("{err:#}");
|
||||
assert!(
|
||||
message.contains("OPENAI_API_KEY may only contain ASCII letters, numbers, '-' or '_'")
|
||||
);
|
||||
assert!(message.contains("API key may only contain ASCII letters, numbers, '-' or '_'"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,7 +41,10 @@ codex-protocol = { workspace = true }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-feedback = { workspace = true }
|
||||
color-eyre = { workspace = true }
|
||||
crossterm = { workspace = true, features = ["bracketed-paste", "event-stream"] }
|
||||
crossterm = { workspace = true, features = [
|
||||
"bracketed-paste",
|
||||
"event-stream",
|
||||
] }
|
||||
diffy = { workspace = true }
|
||||
dirs = { workspace = true }
|
||||
dunce = { workspace = true }
|
||||
|
||||
@@ -20,7 +20,6 @@ use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::protocol::SandboxCommandAssessment;
|
||||
use codex_core::protocol::SandboxRiskCategory;
|
||||
use codex_core::protocol::SandboxRiskLevel;
|
||||
use crossterm::event::KeyCode;
|
||||
use crossterm::event::KeyEvent;
|
||||
@@ -356,35 +355,11 @@ fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec<Line<'static>> {
|
||||
]));
|
||||
}
|
||||
|
||||
let mut spans: Vec<Span<'static>> = vec!["Risk: ".into(), level_span];
|
||||
if !risk.risk_categories.is_empty() {
|
||||
spans.push(" (".into());
|
||||
for (idx, category) in risk.risk_categories.iter().enumerate() {
|
||||
if idx > 0 {
|
||||
spans.push(", ".into());
|
||||
}
|
||||
spans.push(risk_category_label(*category).into());
|
||||
}
|
||||
spans.push(")".into());
|
||||
}
|
||||
|
||||
lines.push(Line::from(spans));
|
||||
lines.push(vec!["Risk: ".into(), level_span].into());
|
||||
lines.push(Line::from(""));
|
||||
lines
|
||||
}
|
||||
|
||||
fn risk_category_label(category: SandboxRiskCategory) -> &'static str {
|
||||
match category {
|
||||
SandboxRiskCategory::DataDeletion => "data deletion",
|
||||
SandboxRiskCategory::DataExfiltration => "data exfiltration",
|
||||
SandboxRiskCategory::PrivilegeEscalation => "privilege escalation",
|
||||
SandboxRiskCategory::SystemModification => "system modification",
|
||||
SandboxRiskCategory::NetworkAccess => "network access",
|
||||
SandboxRiskCategory::ResourceExhaustion => "resource exhaustion",
|
||||
SandboxRiskCategory::Compliance => "compliance",
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum ApprovalVariant {
|
||||
Exec { id: String, command: Vec<String> },
|
||||
|
||||
@@ -10,6 +10,7 @@ use codex_login::ShutdownHandle;
|
||||
use codex_login::run_login_server;
|
||||
use crossterm::event::KeyCode;
|
||||
use crossterm::event::KeyEvent;
|
||||
use crossterm::event::KeyEventKind;
|
||||
use crossterm::event::KeyModifiers;
|
||||
use ratatui::buffer::Buffer;
|
||||
use ratatui::layout::Constraint;
|
||||
@@ -428,7 +429,9 @@ impl AuthModeWidget {
|
||||
should_request_frame = true;
|
||||
}
|
||||
KeyCode::Char(c)
|
||||
if !key_event.modifiers.contains(KeyModifiers::CONTROL)
|
||||
if key_event.kind == KeyEventKind::Press
|
||||
&& !key_event.modifiers.contains(KeyModifiers::SUPER)
|
||||
&& !key_event.modifiers.contains(KeyModifiers::CONTROL)
|
||||
&& !key_event.modifiers.contains(KeyModifiers::ALT) =>
|
||||
{
|
||||
if state.prepopulated_from_env {
|
||||
|
||||
@@ -313,7 +313,9 @@ impl HistoryCell for StatusHistoryCell {
|
||||
|
||||
let note_first_line = Line::from(vec![
|
||||
Span::from("Visit ").cyan(),
|
||||
"chatgpt.com/codex/settings/usage".cyan().underlined(),
|
||||
"https://chatgpt.com/codex/settings/usage"
|
||||
.cyan()
|
||||
.underlined(),
|
||||
Span::from(" for up-to-date").cyan(),
|
||||
]);
|
||||
let note_second_line = Line::from(vec![
|
||||
|
||||
@@ -7,7 +7,7 @@ expression: sanitized
|
||||
╭────────────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
|
||||
@@ -7,7 +7,7 @@ expression: sanitized
|
||||
╭─────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning high, summaries detailed) │
|
||||
@@ -17,7 +17,7 @@ expression: sanitized
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.1K used / 272K) │
|
||||
│ Context window: 100% left (2.25K used / 272K) │
|
||||
│ 5h limit: [███████████████░░░░░] 72% used (resets 03:14) │
|
||||
│ Weekly limit: [█████████░░░░░░░░░░░] 45% used (resets 03:24) │
|
||||
╰─────────────────────────────────────────────────────────────────────╯
|
||||
|
||||
@@ -7,7 +7,7 @@ expression: sanitized
|
||||
╭─────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
|
||||
@@ -7,7 +7,7 @@ expression: sanitized
|
||||
╭─────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
|
||||
@@ -7,7 +7,7 @@ expression: sanitized
|
||||
╭─────────────────────────────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ Visit https://chatgpt.com/codex/settings/usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning none, summaries auto) │
|
||||
@@ -17,7 +17,7 @@ expression: sanitized
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.1K used / 272K) │
|
||||
│ Context window: 100% left (2.25K used / 272K) │
|
||||
│ 5h limit: [███████████████░░░░░] 72% used (resets 03:14) │
|
||||
│ Weekly limit: [████████░░░░░░░░░░░░] 40% used (resets 03:34) │
|
||||
│ Warning: limits may be stale - start new turn to refresh. │
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
---
|
||||
source: tui/src/status/tests.rs
|
||||
assertion_line: 257
|
||||
expression: sanitized
|
||||
---
|
||||
/status
|
||||
@@ -7,8 +8,8 @@ expression: sanitized
|
||||
╭────────────────────────────────────────────╮
|
||||
│ >_ OpenAI Codex (v0.0.0) │
|
||||
│ │
|
||||
│ Visit chatgpt.com/codex/settings/usage for │
|
||||
│ up-to-date │
|
||||
│ Visit https://chatgpt.com/codex/settings/ │
|
||||
│ usage for up-to-date │
|
||||
│ information on rate limits and credits │
|
||||
│ │
|
||||
│ Model: gpt-5-codex (reasoning │
|
||||
@@ -18,7 +19,7 @@ expression: sanitized
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + │
|
||||
│ Context window: 100% left (2.1K used / │
|
||||
│ Context window: 100% left (2.25K used / │
|
||||
│ 5h limit: [███████████████░░░░░] │
|
||||
│ (resets 03:14) │
|
||||
╰────────────────────────────────────────────╯
|
||||
|
||||
@@ -50,6 +50,9 @@ pub fn default_bg() -> Option<(u8, u8, u8)> {
|
||||
#[cfg(all(unix, not(test)))]
|
||||
mod imp {
|
||||
use super::DefaultColors;
|
||||
use crossterm::style::Color as CrosstermColor;
|
||||
use crossterm::style::query_background_color;
|
||||
use crossterm::style::query_foreground_color;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
@@ -105,128 +108,16 @@ mod imp {
|
||||
}
|
||||
|
||||
fn query_default_colors() -> std::io::Result<Option<DefaultColors>> {
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::ErrorKind;
|
||||
use std::io::IsTerminal;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
let mut stdout_handle = std::io::stdout();
|
||||
if !stdout_handle.is_terminal() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut tty = match OpenOptions::new().read(true).open("/dev/tty") {
|
||||
Ok(file) => file,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
|
||||
let fd = tty.as_raw_fd();
|
||||
unsafe {
|
||||
let flags = libc::fcntl(fd, libc::F_GETFL);
|
||||
if flags >= 0 {
|
||||
libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK);
|
||||
}
|
||||
}
|
||||
|
||||
stdout_handle.write_all(b"\x1b]10;?\x07\x1b]11;?\x07")?;
|
||||
stdout_handle.flush()?;
|
||||
|
||||
let mut deadline = Instant::now() + Duration::from_millis(200);
|
||||
let mut buffer = Vec::new();
|
||||
let mut fg = None;
|
||||
let mut bg = None;
|
||||
|
||||
while Instant::now() < deadline {
|
||||
let mut chunk = [0u8; 128];
|
||||
match tty.read(&mut chunk) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
deadline = Instant::now() + Duration::from_millis(200);
|
||||
buffer.extend_from_slice(&chunk[..n]);
|
||||
if fg.is_none() {
|
||||
fg = parse_osc_color(&buffer, 10);
|
||||
}
|
||||
if bg.is_none() {
|
||||
bg = parse_osc_color(&buffer, 11);
|
||||
}
|
||||
if let (Some(fg), Some(bg)) = (fg, bg) {
|
||||
return Ok(Some(DefaultColors { fg, bg }));
|
||||
}
|
||||
}
|
||||
Err(err) if err.kind() == ErrorKind::WouldBlock => {
|
||||
std::thread::sleep(Duration::from_millis(5));
|
||||
}
|
||||
Err(err) if err.kind() == ErrorKind::Interrupted => continue,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
|
||||
if fg.is_none() {
|
||||
fg = parse_osc_color(&buffer, 10);
|
||||
}
|
||||
if bg.is_none() {
|
||||
bg = parse_osc_color(&buffer, 11);
|
||||
}
|
||||
|
||||
let fg = query_foreground_color()?.and_then(color_to_tuple);
|
||||
let bg = query_background_color()?.and_then(color_to_tuple);
|
||||
Ok(fg.zip(bg).map(|(fg, bg)| DefaultColors { fg, bg }))
|
||||
}
|
||||
|
||||
fn parse_component(component: &str) -> Option<u8> {
|
||||
let trimmed = component.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
fn color_to_tuple(color: CrosstermColor) -> Option<(u8, u8, u8)> {
|
||||
match color {
|
||||
CrosstermColor::Rgb { r, g, b } => Some((r, g, b)),
|
||||
_ => None,
|
||||
}
|
||||
let bits = trimmed.len().checked_mul(4)?;
|
||||
if bits == 0 || bits > 64 {
|
||||
return None;
|
||||
}
|
||||
let max = if bits == 64 {
|
||||
u64::MAX
|
||||
} else {
|
||||
(1u64 << bits) - 1
|
||||
};
|
||||
let value = u64::from_str_radix(trimmed, 16).ok()?;
|
||||
Some(((value * 255 + max / 2) / max) as u8)
|
||||
}
|
||||
|
||||
fn parse_osc_color(buffer: &[u8], code: u8) -> Option<(u8, u8, u8)> {
|
||||
let text = std::str::from_utf8(buffer).ok()?;
|
||||
let prefix = match code {
|
||||
10 => "\u{1b}]10;",
|
||||
11 => "\u{1b}]11;",
|
||||
_ => return None,
|
||||
};
|
||||
let start = text.rfind(prefix)?;
|
||||
let after_prefix = &text[start + prefix.len()..];
|
||||
let end_bel = after_prefix.find('\u{7}');
|
||||
let end_st = after_prefix.find("\u{1b}\\");
|
||||
let end_idx = match (end_bel, end_st) {
|
||||
(Some(bel), Some(st)) => bel.min(st),
|
||||
(Some(bel), None) => bel,
|
||||
(None, Some(st)) => st,
|
||||
(None, None) => return None,
|
||||
};
|
||||
let payload = after_prefix[..end_idx].trim();
|
||||
parse_color_payload(payload)
|
||||
}
|
||||
|
||||
fn parse_color_payload(payload: &str) -> Option<(u8, u8, u8)> {
|
||||
if payload.is_empty() || payload == "?" {
|
||||
return None;
|
||||
}
|
||||
let (model, values) = payload.split_once(':')?;
|
||||
if model != "rgb" && model != "rgba" {
|
||||
return None;
|
||||
}
|
||||
let mut parts = values.split('/');
|
||||
let r = parse_component(parts.next()?)?;
|
||||
let g = parse_component(parts.next()?)?;
|
||||
let b = parse_component(parts.next()?)?;
|
||||
Some((r, g, b))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
## Advanced
|
||||
|
||||
If you already lean on Codex every day and just need a little more control, this page collects the knobs you are most likely to reach for: tweak defaults in [Config](./config.md), add extra tools through [Model Context Protocol support](./advanced.md#model-context-protocol), and script full runs with [`codex exec`](./exec.md). Jump to the section you need and keep building.
|
||||
If you already lean on Codex every day and just need a little more control, this page collects the knobs you are most likely to reach for: tweak defaults in [Config](./config.md), add extra tools through [Model Context Protocol support](#model-context-protocol), and script full runs with [`codex exec`](./exec.md). Jump to the section you need and keep building.
|
||||
|
||||
## Config quickstart {#config-quickstart}
|
||||
|
||||
|
||||
@@ -415,7 +415,7 @@ cwd = "/Users/<user>/code/my-server"
|
||||
|
||||
```toml
|
||||
[mcp_servers.figma]
|
||||
url = "https://mcp.linear.app/mcp"
|
||||
url = "https://mcp.figma.com/mcp"
|
||||
# Optional environment variable containing a bearer token to use for auth
|
||||
bearer_token_env_var = "ENV_VAR"
|
||||
# Optional map of headers with hard-coded values.
|
||||
|
||||
@@ -12,7 +12,7 @@ If you want to add a new feature or change the behavior of an existing one, plea
|
||||
|
||||
- Create a _topic branch_ from `main` - e.g. `feat/interactive-prompt`.
|
||||
- Keep your changes focused. Multiple unrelated fixes should be opened as separate PRs.
|
||||
- Following the [development setup](#development-workflow) instructions above, ensure your change is free of lint warnings and test failures.
|
||||
- Ensure your change is free of lint warnings and test failures.
|
||||
|
||||
### Writing high-impact code changes
|
||||
|
||||
|
||||
31
docs/slash_commands.md
Normal file
31
docs/slash_commands.md
Normal file
@@ -0,0 +1,31 @@
|
||||
## Slash Commands
|
||||
|
||||
### What are slash commands?
|
||||
|
||||
Slash commands are special commands you can type that start with `/`.
|
||||
|
||||
---
|
||||
|
||||
### Built-in slash commands
|
||||
|
||||
Control Codex’s behavior during an interactive session with slash commands.
|
||||
|
||||
| Command | Purpose |
|
||||
| ------------ | ----------------------------------------------------------- |
|
||||
| `/model` | choose what model and reasoning effort to use |
|
||||
| `/approvals` | choose what Codex can do without approval |
|
||||
| `/review` | review my current changes and find issues |
|
||||
| `/new` | start a new chat during a conversation |
|
||||
| `/init` | create an AGENTS.md file with instructions for Codex |
|
||||
| `/compact` | summarize conversation to prevent hitting the context limit |
|
||||
| `/undo` | ask Codex to undo a turn |
|
||||
| `/diff` | show git diff (including untracked files) |
|
||||
| `/mention` | mention a file |
|
||||
| `/status` | show current session configuration and token usage |
|
||||
| `/mcp` | list configured MCP tools |
|
||||
| `/logout` | log out of Codex |
|
||||
| `/quit` | exit Codex |
|
||||
| `/exit` | exit Codex |
|
||||
| `/feedback` | send logs to maintainers |
|
||||
|
||||
---
|
||||
Reference in New Issue
Block a user