add sandbox policy and sandbox name to codex.tool.call metrics (#10711)

This will give visibility into the comparative success rate of the
Windows sandbox implementations compared to other platforms.
This commit is contained in:
iceweasel-oai
2026-02-05 11:42:12 -08:00
committed by GitHub
parent 4df9f2020b
commit 901d5b8fd6
4 changed files with 75 additions and 17 deletions

View File

@@ -3,11 +3,15 @@ use std::sync::Arc;
use std::time::Duration;
use crate::client_common::tools::ToolSpec;
use crate::exec::SandboxType;
use crate::function_tool::FunctionCallError;
use crate::protocol::SandboxPolicy;
use crate::safety::get_platform_sandbox;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use async_trait::async_trait;
use codex_protocol::config_types::WindowsSandboxLevel;
use codex_protocol::models::ResponseInputItem;
use codex_utils_readiness::Readiness;
use tracing::warn;
@@ -73,19 +77,33 @@ impl ToolRegistry {
let otel = invocation.turn.otel_manager.clone();
let payload_for_response = invocation.payload.clone();
let log_payload = payload_for_response.log_payload();
let metric_tags = [
(
"sandbox",
sandbox_tag(
&invocation.turn.sandbox_policy,
invocation.turn.windows_sandbox_level,
),
),
(
"sandbox_policy",
sandbox_policy_tag(&invocation.turn.sandbox_policy),
),
];
let handler = match self.handler(tool_name.as_ref()) {
Some(handler) => handler,
None => {
let message =
unsupported_tool_call_message(&invocation.payload, tool_name.as_ref());
otel.tool_result(
otel.tool_result_with_tags(
tool_name.as_ref(),
&call_id_owned,
log_payload.as_ref(),
Duration::ZERO,
false,
&message,
&metric_tags,
);
return Err(FunctionCallError::RespondToModel(message));
}
@@ -93,13 +111,14 @@ impl ToolRegistry {
if !handler.matches_kind(&invocation.payload) {
let message = format!("tool {tool_name} invoked with incompatible payload");
otel.tool_result(
otel.tool_result_with_tags(
tool_name.as_ref(),
&call_id_owned,
log_payload.as_ref(),
Duration::ZERO,
false,
&message,
&metric_tags,
);
return Err(FunctionCallError::Fatal(message));
}
@@ -107,10 +126,11 @@ impl ToolRegistry {
let output_cell = tokio::sync::Mutex::new(None);
let result = otel
.log_tool_result(
.log_tool_result_with_tags(
tool_name.as_ref(),
&call_id_owned,
log_payload.as_ref(),
&metric_tags,
|| {
let handler = handler.clone();
let output_cell = &output_cell;
@@ -231,3 +251,29 @@ fn unsupported_tool_call_message(payload: &ToolPayload, tool_name: &str) -> Stri
_ => format!("unsupported call: {tool_name}"),
}
}
fn sandbox_tag(policy: &SandboxPolicy, windows_sandbox_level: WindowsSandboxLevel) -> &'static str {
if matches!(policy, SandboxPolicy::DangerFullAccess) {
return "none";
}
if matches!(policy, SandboxPolicy::ExternalSandbox { .. }) {
return "external";
}
if cfg!(target_os = "windows") && matches!(windows_sandbox_level, WindowsSandboxLevel::Elevated)
{
return "windows_elevated";
}
get_platform_sandbox(windows_sandbox_level != WindowsSandboxLevel::Disabled)
.map(SandboxType::as_metric_tag)
.unwrap_or("none")
}
fn sandbox_policy_tag(policy: &SandboxPolicy) -> &'static str {
match policy {
SandboxPolicy::ReadOnly => "read-only",
SandboxPolicy::WorkspaceWrite { .. } => "workspace-write",
SandboxPolicy::DangerFullAccess => "danger-full-access",
SandboxPolicy::ExternalSandbox { .. } => "external-sandbox",
}
}