Files
codex/codex-rs/tools/src/json_schema.rs
Michael Bolin 44d28f500f codex-tools: extract shared tool schema parsing (#15923)
## Why

`parse_tool_input_schema` and the supporting `JsonSchema` model were
living in `core/src/tools/spec.rs`, but they already serve callers
outside `codex-core`.

Keeping that shared schema parsing logic inside `codex-core` makes the
crate boundary harder to reason about and works against the guidance in
`AGENTS.md` to avoid growing `codex-core` when reusable code can live
elsewhere.

This change takes the first extraction step by moving the schema parsing
primitive into its own crate while keeping the rest of the tool-spec
assembly in `codex-core`.

## What changed

- added a new `codex-tools` crate under `codex-rs/tools`
- moved the shared tool input schema model and sanitizer/parser into
`tools/src/json_schema.rs`
- kept `tools/src/lib.rs` exports-only, with the module-level unit tests
split into `json_schema_tests.rs`
- updated `codex-core` to use `codex-tools::JsonSchema` and re-export
`parse_tool_input_schema`
- updated `codex-app-server` dynamic tool validation to depend on
`codex-tools` directly instead of reaching through `codex-core`
- wired the new crate into the Cargo workspace and Bazel build graph
2026-03-27 00:03:35 +00:00

177 lines
6.2 KiB
Rust

use serde::Deserialize;
use serde::Serialize;
use serde_json::Value as JsonValue;
use serde_json::json;
use std::collections::BTreeMap;
/// Generic JSON-Schema subset needed for our tool definitions.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum JsonSchema {
Boolean {
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
},
String {
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
},
/// MCP schema allows "number" | "integer" for Number.
#[serde(alias = "integer")]
Number {
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
},
Array {
items: Box<JsonSchema>,
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
},
Object {
properties: BTreeMap<String, JsonSchema>,
#[serde(skip_serializing_if = "Option::is_none")]
required: Option<Vec<String>>,
#[serde(
rename = "additionalProperties",
skip_serializing_if = "Option::is_none"
)]
additional_properties: Option<AdditionalProperties>,
},
}
/// Whether additional properties are allowed, and if so, any required schema.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum AdditionalProperties {
Boolean(bool),
Schema(Box<JsonSchema>),
}
impl From<bool> for AdditionalProperties {
fn from(value: bool) -> Self {
Self::Boolean(value)
}
}
impl From<JsonSchema> for AdditionalProperties {
fn from(value: JsonSchema) -> Self {
Self::Schema(Box::new(value))
}
}
/// Parse the tool `input_schema` or return an error for invalid schema.
pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, serde_json::Error> {
let mut input_schema = input_schema.clone();
sanitize_json_schema(&mut input_schema);
serde_json::from_value::<JsonSchema>(input_schema)
}
/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited
/// JsonSchema enum. This function:
/// - Ensures every schema object has a "type". If missing, infers it from
/// common keywords (properties => object, items => array, enum/const/format => string)
/// and otherwise defaults to "string".
/// - Fills required child fields (e.g. array items, object properties) with
/// permissive defaults when absent.
fn sanitize_json_schema(value: &mut JsonValue) {
match value {
JsonValue::Bool(_) => {
// JSON Schema boolean form: true/false. Coerce to an accept-all string.
*value = json!({ "type": "string" });
}
JsonValue::Array(values) => {
for value in values {
sanitize_json_schema(value);
}
}
JsonValue::Object(map) => {
if let Some(properties) = map.get_mut("properties")
&& let Some(properties_map) = properties.as_object_mut()
{
for value in properties_map.values_mut() {
sanitize_json_schema(value);
}
}
if let Some(items) = map.get_mut("items") {
sanitize_json_schema(items);
}
for combiner in ["oneOf", "anyOf", "allOf", "prefixItems"] {
if let Some(value) = map.get_mut(combiner) {
sanitize_json_schema(value);
}
}
let mut schema_type = map
.get("type")
.and_then(|value| value.as_str())
.map(str::to_string);
if schema_type.is_none()
&& let Some(JsonValue::Array(types)) = map.get("type")
{
for candidate in types {
if let Some(candidate_type) = candidate.as_str()
&& matches!(
candidate_type,
"object" | "array" | "string" | "number" | "integer" | "boolean"
)
{
schema_type = Some(candidate_type.to_string());
break;
}
}
}
if schema_type.is_none() {
if map.contains_key("properties")
|| map.contains_key("required")
|| map.contains_key("additionalProperties")
{
schema_type = Some("object".to_string());
} else if map.contains_key("items") || map.contains_key("prefixItems") {
schema_type = Some("array".to_string());
} else if map.contains_key("enum")
|| map.contains_key("const")
|| map.contains_key("format")
{
schema_type = Some("string".to_string());
} else if map.contains_key("minimum")
|| map.contains_key("maximum")
|| map.contains_key("exclusiveMinimum")
|| map.contains_key("exclusiveMaximum")
|| map.contains_key("multipleOf")
{
schema_type = Some("number".to_string());
}
}
let schema_type = schema_type.unwrap_or_else(|| "string".to_string());
map.insert("type".to_string(), JsonValue::String(schema_type.clone()));
if schema_type == "object" {
if !map.contains_key("properties") {
map.insert(
"properties".to_string(),
JsonValue::Object(serde_json::Map::new()),
);
}
if let Some(additional_properties) = map.get_mut("additionalProperties")
&& !matches!(additional_properties, JsonValue::Bool(_))
{
sanitize_json_schema(additional_properties);
}
}
if schema_type == "array" && !map.contains_key("items") {
map.insert("items".to_string(), json!({ "type": "string" }));
}
}
_ => {}
}
}
#[cfg(test)]
#[path = "json_schema_tests.rs"]
mod tests;