execpolicy: add host_executable() path mappings (#12964)

## Why

`execpolicy` currently keys `prefix_rule()` matching off the literal
first token. That works for rules like `["/usr/bin/git"]`, but it means
shared basename rules such as `["git"]` do not help when a caller passes
an absolute executable path like `/usr/bin/git`.

This PR lays the groundwork for basename-aware matching without changing
existing callers yet. It adds typed host-executable metadata and an
opt-in resolution path in `codex-execpolicy`, so a follow-up PR can
adopt the new behavior in `unix_escalation.rs` and other call sites
without having to redesign the policy layer first.

## What Changed

- added `host_executable(name = ..., paths = [...])` to the execpolicy
parser and validated it with `AbsolutePathBuf`
- stored host executable mappings separately from prefix rules inside
`Policy`
- added `MatchOptions` and opt-in `*_with_options()` APIs that preserve
existing behavior by default
- implemented exact-first matching with optional basename fallback,
gated by `host_executable()` allowlists when present
- normalized executable names for cross-platform matching so Windows
paths like `git.exe` can satisfy `host_executable(name = "git", ...)`
- updated `match` / `not_match` example validation to exercise the
host-executable resolution path instead of only raw prefix-rule matching
- preserved source locations for deferred example-validation errors so
policy load failures still point at the right file and line
- surfaced `resolvedProgram` on `RuleMatch` so callers can tell when a
basename rule matched an absolute executable path
- preserved host executable metadata when requirements policies overlay
file-based policies in `core/src/exec_policy.rs`
- documented the new rule shape and CLI behavior in
`execpolicy/README.md`

## Verification

- `cargo test -p codex-execpolicy`
- added coverage in `execpolicy/tests/basic.rs` for parsing, precedence,
empty allowlists, basename fallback, exact-match precedence, and
host-executable-backed `match` / `not_match` examples
- added a regression test in `core/src/exec_policy.rs` to verify
requirements overlays preserve `host_executable()` metadata
- verified `cargo test -p codex-core --lib`, including source-rendering
coverage for deferred validation errors
This commit is contained in:
Michael Bolin
2026-02-27 12:59:24 -08:00
committed by GitHub
parent 6e0f1e9469
commit b148d98e0e
14 changed files with 900 additions and 35 deletions

View File

@@ -38,16 +38,47 @@ pub enum Error {
ExampleDidNotMatch {
rules: Vec<String>,
examples: Vec<String>,
location: Option<ErrorLocation>,
},
#[error("expected example to not match rule `{rule}`: {example}")]
ExampleDidMatch { rule: String, example: String },
ExampleDidMatch {
rule: String,
example: String,
location: Option<ErrorLocation>,
},
#[error("starlark error: {0}")]
Starlark(StarlarkError),
}
impl Error {
pub fn with_location(self, location: ErrorLocation) -> Self {
match self {
Error::ExampleDidNotMatch {
rules,
examples,
location: None,
} => Error::ExampleDidNotMatch {
rules,
examples,
location: Some(location),
},
Error::ExampleDidMatch {
rule,
example,
location: None,
} => Error::ExampleDidMatch {
rule,
example,
location: Some(location),
},
other => other,
}
}
pub fn location(&self) -> Option<ErrorLocation> {
match self {
Error::ExampleDidNotMatch { location, .. }
| Error::ExampleDidMatch { location, .. } => location.clone(),
Error::Starlark(err) => err.span().map(|span| {
let resolved = span.resolve_span();
ErrorLocation {

View File

@@ -7,6 +7,7 @@ use clap::Parser;
use serde::Serialize;
use crate::Decision;
use crate::MatchOptions;
use crate::Policy;
use crate::PolicyParser;
use crate::RuleMatch;
@@ -22,6 +23,11 @@ pub struct ExecPolicyCheckCommand {
#[arg(long)]
pub pretty: bool,
/// Resolve absolute program paths against basename rules, gated by any
/// `host_executable()` definitions in the loaded policy files.
#[arg(long)]
pub resolve_host_executables: bool,
/// Command tokens to check against the policy.
#[arg(
value_name = "COMMAND",
@@ -36,7 +42,13 @@ impl ExecPolicyCheckCommand {
/// Load the policies for this command, evaluate the command, and render JSON output.
pub fn run(&self) -> Result<()> {
let policy = load_policies(&self.rules)?;
let matched_rules = policy.matches_for_command(&self.command, None);
let matched_rules = policy.matches_for_command_with_options(
&self.command,
None,
&MatchOptions {
resolve_host_executables: self.resolve_host_executables,
},
);
let json = format_matches_json(&matched_rules, self.pretty)?;
println!("{json}");

View File

@@ -0,0 +1,29 @@
use std::path::Path;
#[cfg(windows)]
const WINDOWS_EXECUTABLE_SUFFIXES: [&str; 4] = [".exe", ".cmd", ".bat", ".com"];
pub(crate) fn executable_lookup_key(raw: &str) -> String {
#[cfg(windows)]
{
let raw = raw.to_ascii_lowercase();
for suffix in WINDOWS_EXECUTABLE_SUFFIXES {
if raw.ends_with(suffix) {
let stripped_len = raw.len() - suffix.len();
return raw[..stripped_len].to_string();
}
}
raw
}
#[cfg(not(windows))]
{
raw.to_string()
}
}
pub(crate) fn executable_path_lookup_key(path: &Path) -> Option<String> {
path.file_name()
.and_then(|name| name.to_str())
.map(executable_lookup_key)
}

View File

@@ -2,6 +2,7 @@ pub mod amend;
pub mod decision;
pub mod error;
pub mod execpolicycheck;
mod executable_name;
pub mod parser;
pub mod policy;
pub mod rule;
@@ -18,6 +19,7 @@ pub use error::TextRange;
pub use execpolicycheck::ExecPolicyCheckCommand;
pub use parser::PolicyParser;
pub use policy::Evaluation;
pub use policy::MatchOptions;
pub use policy::Policy;
pub use rule::NetworkRuleProtocol;
pub use rule::Rule;

View File

@@ -1,6 +1,8 @@
use codex_utils_absolute_path::AbsolutePathBuf;
use multimap::MultiMap;
use shlex;
use starlark::any::ProvidesStaticType;
use starlark::codemap::FileSpan;
use starlark::environment::GlobalsBuilder;
use starlark::environment::Module;
use starlark::eval::Evaluator;
@@ -13,11 +15,18 @@ use starlark::values::list::UnpackList;
use starlark::values::none::NoneType;
use std::cell::RefCell;
use std::cell::RefMut;
use std::collections::HashMap;
use std::path::Path;
use std::sync::Arc;
use crate::decision::Decision;
use crate::error::Error;
use crate::error::ErrorLocation;
use crate::error::Result;
use crate::error::TextPosition;
use crate::error::TextRange;
use crate::executable_name::executable_lookup_key;
use crate::executable_name::executable_path_lookup_key;
use crate::rule::NetworkRule;
use crate::rule::NetworkRuleProtocol;
use crate::rule::PatternToken;
@@ -47,6 +56,7 @@ impl PolicyParser {
/// Parses a policy, tagging parser errors with `policy_identifier` so failures include the
/// identifier alongside line numbers.
pub fn parse(&mut self, policy_identifier: &str, policy_file_contents: &str) -> Result<()> {
let pending_validation_count = self.builder.borrow().pending_example_validations.len();
let mut dialect = Dialect::Extended.clone();
dialect.enable_f_strings = true;
let ast = AstModule::parse(
@@ -62,6 +72,9 @@ impl PolicyParser {
eval.extra = Some(&self.builder);
eval.eval_module(ast, &globals).map_err(Error::Starlark)?;
}
self.builder
.borrow()
.validate_pending_examples_from(pending_validation_count)?;
Ok(())
}
@@ -74,6 +87,8 @@ impl PolicyParser {
struct PolicyBuilder {
rules_by_program: MultiMap<String, RuleRef>,
network_rules: Vec<NetworkRule>,
host_executables_by_name: HashMap<String, Arc<[AbsolutePathBuf]>>,
pending_example_validations: Vec<PendingExampleValidation>,
}
impl PolicyBuilder {
@@ -81,6 +96,8 @@ impl PolicyBuilder {
Self {
rules_by_program: MultiMap::new(),
network_rules: Vec::new(),
host_executables_by_name: HashMap::new(),
pending_example_validations: Vec::new(),
}
}
@@ -93,9 +110,62 @@ impl PolicyBuilder {
self.network_rules.push(rule);
}
fn build(self) -> crate::policy::Policy {
crate::policy::Policy::from_parts(self.rules_by_program, self.network_rules)
fn add_host_executable(&mut self, name: String, paths: Vec<AbsolutePathBuf>) {
self.host_executables_by_name.insert(name, paths.into());
}
fn add_pending_example_validation(
&mut self,
rules: Vec<RuleRef>,
matches: Vec<Vec<String>>,
not_matches: Vec<Vec<String>>,
location: Option<ErrorLocation>,
) {
self.pending_example_validations
.push(PendingExampleValidation {
rules,
matches,
not_matches,
location,
});
}
fn validate_pending_examples_from(&self, start: usize) -> Result<()> {
for validation in &self.pending_example_validations[start..] {
let mut rules_by_program = MultiMap::new();
for rule in &validation.rules {
rules_by_program.insert(rule.program().to_string(), rule.clone());
}
let policy = crate::policy::Policy::from_parts(
rules_by_program,
Vec::new(),
self.host_executables_by_name.clone(),
);
validate_not_match_examples(&policy, &validation.rules, &validation.not_matches)
.map_err(|error| attach_validation_location(error, validation.location.clone()))?;
validate_match_examples(&policy, &validation.rules, &validation.matches)
.map_err(|error| attach_validation_location(error, validation.location.clone()))?;
}
Ok(())
}
fn build(self) -> crate::policy::Policy {
crate::policy::Policy::from_parts(
self.rules_by_program,
self.network_rules,
self.host_executables_by_name,
)
}
}
#[derive(Debug)]
struct PendingExampleValidation {
rules: Vec<RuleRef>,
matches: Vec<Vec<String>>,
not_matches: Vec<Vec<String>>,
location: Option<ErrorLocation>,
}
fn parse_pattern<'v>(pattern: UnpackList<Value<'v>>) -> Result<Vec<PatternToken>> {
@@ -150,6 +220,36 @@ fn parse_examples<'v>(examples: UnpackList<Value<'v>>) -> Result<Vec<Vec<String>
examples.items.into_iter().map(parse_example).collect()
}
fn parse_literal_absolute_path(raw: &str) -> Result<AbsolutePathBuf> {
if !Path::new(raw).is_absolute() {
return Err(Error::InvalidRule(format!(
"host_executable paths must be absolute (got {raw})"
)));
}
AbsolutePathBuf::try_from(raw.to_string())
.map_err(|error| Error::InvalidRule(format!("invalid absolute path `{raw}`: {error}")))
}
fn validate_host_executable_name(name: &str) -> Result<()> {
if name.is_empty() {
return Err(Error::InvalidRule(
"host_executable name cannot be empty".to_string(),
));
}
let path = Path::new(name);
if path.components().count() != 1
|| path.file_name().and_then(|value| value.to_str()) != Some(name)
{
return Err(Error::InvalidRule(format!(
"host_executable name must be a bare executable name (got {name})"
)));
}
Ok(())
}
fn parse_network_rule_decision(raw: &str) -> Result<Decision> {
match raw {
"deny" => Ok(Decision::Forbidden),
@@ -157,6 +257,30 @@ fn parse_network_rule_decision(raw: &str) -> Result<Decision> {
}
}
fn error_location_from_file_span(span: FileSpan) -> ErrorLocation {
let resolved = span.resolve_span();
ErrorLocation {
path: span.filename().to_string(),
range: TextRange {
start: TextPosition {
line: resolved.begin.line + 1,
column: resolved.begin.column + 1,
},
end: TextPosition {
line: resolved.end.line + 1,
column: resolved.end.column + 1,
},
},
}
}
fn attach_validation_location(error: Error, location: Option<ErrorLocation>) -> Error {
match location {
Some(location) => error.with_location(location),
None => error,
}
}
fn parse_example<'v>(value: Value<'v>) -> Result<Vec<String>> {
if let Some(raw) = value.unpack_str() {
parse_string_example(raw)
@@ -251,6 +375,9 @@ fn policy_builtins(builder: &mut GlobalsBuilder) {
.map(parse_examples)
.transpose()?
.unwrap_or_default();
let location = eval
.call_stack_top_location()
.map(error_location_from_file_span);
let mut builder = policy_builder(eval);
@@ -275,9 +402,7 @@ fn policy_builtins(builder: &mut GlobalsBuilder) {
})
.collect();
validate_not_match_examples(&rules, &not_matches)?;
validate_match_examples(&rules, &matches)?;
builder.add_pending_example_validation(rules.clone(), matches, not_matches, location);
rules.into_iter().for_each(|rule| builder.add_rule(rule));
Ok(NoneType)
}
@@ -308,4 +433,41 @@ fn policy_builtins(builder: &mut GlobalsBuilder) {
});
Ok(NoneType)
}
fn host_executable<'v>(
name: &'v str,
paths: UnpackList<Value<'v>>,
eval: &mut Evaluator<'v, '_, '_>,
) -> anyhow::Result<NoneType> {
validate_host_executable_name(name)?;
let mut parsed_paths = Vec::new();
for value in paths.items {
let raw = value.unpack_str().ok_or_else(|| {
Error::InvalidRule(format!(
"host_executable paths must be strings (got {})",
value.get_type()
))
})?;
let path = parse_literal_absolute_path(raw)?;
let Some(path_name) = executable_path_lookup_key(path.as_path()) else {
return Err(Error::InvalidRule(format!(
"host_executable path `{raw}` must have basename `{name}`"
))
.into());
};
if path_name != executable_lookup_key(name) {
return Err(Error::InvalidRule(format!(
"host_executable path `{raw}` must have basename `{name}`"
))
.into());
}
if !parsed_paths.iter().any(|existing| existing == &path) {
parsed_paths.push(path);
}
}
policy_builder(eval).add_host_executable(executable_lookup_key(name), parsed_paths);
Ok(NoneType)
}
}

View File

@@ -1,6 +1,7 @@
use crate::decision::Decision;
use crate::error::Error;
use crate::error::Result;
use crate::executable_name::executable_path_lookup_key;
use crate::rule::NetworkRule;
use crate::rule::NetworkRuleProtocol;
use crate::rule::PatternToken;
@@ -9,31 +10,41 @@ use crate::rule::PrefixRule;
use crate::rule::RuleMatch;
use crate::rule::RuleRef;
use crate::rule::normalize_network_rule_host;
use codex_utils_absolute_path::AbsolutePathBuf;
use multimap::MultiMap;
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::sync::Arc;
type HeuristicsFallback<'a> = Option<&'a dyn Fn(&[String]) -> Decision>;
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct MatchOptions {
pub resolve_host_executables: bool,
}
#[derive(Clone, Debug)]
pub struct Policy {
rules_by_program: MultiMap<String, RuleRef>,
network_rules: Vec<NetworkRule>,
host_executables_by_name: HashMap<String, Arc<[AbsolutePathBuf]>>,
}
impl Policy {
pub fn new(rules_by_program: MultiMap<String, RuleRef>) -> Self {
Self::from_parts(rules_by_program, Vec::new())
Self::from_parts(rules_by_program, Vec::new(), HashMap::new())
}
pub fn from_parts(
rules_by_program: MultiMap<String, RuleRef>,
network_rules: Vec<NetworkRule>,
host_executables_by_name: HashMap<String, Arc<[AbsolutePathBuf]>>,
) -> Self {
Self {
rules_by_program,
network_rules,
host_executables_by_name,
}
}
@@ -49,6 +60,10 @@ impl Policy {
&self.network_rules
}
pub fn host_executables(&self) -> &HashMap<String, Arc<[AbsolutePathBuf]>> {
&self.host_executables_by_name
}
pub fn get_allowed_prefixes(&self) -> Vec<Vec<String>> {
let mut prefixes = Vec::new();
@@ -119,6 +134,36 @@ impl Policy {
Ok(())
}
pub fn set_host_executable_paths(&mut self, name: String, paths: Vec<AbsolutePathBuf>) {
self.host_executables_by_name.insert(name, paths.into());
}
pub fn merge_overlay(&self, overlay: &Policy) -> Policy {
let mut combined_rules = self.rules_by_program.clone();
for (program, rules) in overlay.rules_by_program.iter_all() {
for rule in rules {
combined_rules.insert(program.clone(), rule.clone());
}
}
let mut combined_network_rules = self.network_rules.clone();
combined_network_rules.extend(overlay.network_rules.iter().cloned());
let mut host_executables_by_name = self.host_executables_by_name.clone();
host_executables_by_name.extend(
overlay
.host_executables_by_name
.iter()
.map(|(name, paths)| (name.clone(), paths.clone())),
);
Policy::from_parts(
combined_rules,
combined_network_rules,
host_executables_by_name,
)
}
pub fn compiled_network_domains(&self) -> (Vec<String>, Vec<String>) {
let mut allowed = Vec::new();
let mut denied = Vec::new();
@@ -144,7 +189,25 @@ impl Policy {
where
F: Fn(&[String]) -> Decision,
{
let matched_rules = self.matches_for_command(cmd, Some(heuristics_fallback));
let matched_rules = self.matches_for_command_with_options(
cmd,
Some(heuristics_fallback),
&MatchOptions::default(),
);
Evaluation::from_matches(matched_rules)
}
pub fn check_with_options<F>(
&self,
cmd: &[String],
heuristics_fallback: &F,
options: &MatchOptions,
) -> Evaluation
where
F: Fn(&[String]) -> Decision,
{
let matched_rules =
self.matches_for_command_with_options(cmd, Some(heuristics_fallback), options);
Evaluation::from_matches(matched_rules)
}
@@ -154,6 +217,20 @@ impl Policy {
commands: Commands,
heuristics_fallback: &F,
) -> Evaluation
where
Commands: IntoIterator,
Commands::Item: AsRef<[String]>,
F: Fn(&[String]) -> Decision,
{
self.check_multiple_with_options(commands, heuristics_fallback, &MatchOptions::default())
}
pub fn check_multiple_with_options<Commands, F>(
&self,
commands: Commands,
heuristics_fallback: &F,
options: &MatchOptions,
) -> Evaluation
where
Commands: IntoIterator,
Commands::Item: AsRef<[String]>,
@@ -162,7 +239,11 @@ impl Policy {
let matched_rules: Vec<RuleMatch> = commands
.into_iter()
.flat_map(|command| {
self.matches_for_command(command.as_ref(), Some(heuristics_fallback))
self.matches_for_command_with_options(
command.as_ref(),
Some(heuristics_fallback),
options,
)
})
.collect();
@@ -181,14 +262,25 @@ impl Policy {
cmd: &[String],
heuristics_fallback: HeuristicsFallback<'_>,
) -> Vec<RuleMatch> {
let matched_rules: Vec<RuleMatch> = match cmd.first() {
Some(first) => self
.rules_by_program
.get_vec(first)
.map(|rules| rules.iter().filter_map(|rule| rule.matches(cmd)).collect())
.unwrap_or_default(),
None => Vec::new(),
};
self.matches_for_command_with_options(cmd, heuristics_fallback, &MatchOptions::default())
}
pub fn matches_for_command_with_options(
&self,
cmd: &[String],
heuristics_fallback: HeuristicsFallback<'_>,
options: &MatchOptions,
) -> Vec<RuleMatch> {
let matched_rules = self
.match_exact_rules(cmd)
.filter(|matched_rules| !matched_rules.is_empty())
.or_else(|| {
options
.resolve_host_executables
.then(|| self.match_host_executable_rules(cmd))
.filter(|matched_rules| !matched_rules.is_empty())
})
.unwrap_or_default();
if matched_rules.is_empty()
&& let Some(heuristics_fallback) = heuristics_fallback
@@ -201,6 +293,45 @@ impl Policy {
matched_rules
}
}
fn match_exact_rules(&self, cmd: &[String]) -> Option<Vec<RuleMatch>> {
let first = cmd.first()?;
Some(
self.rules_by_program
.get_vec(first)
.map(|rules| rules.iter().filter_map(|rule| rule.matches(cmd)).collect())
.unwrap_or_default(),
)
}
fn match_host_executable_rules(&self, cmd: &[String]) -> Vec<RuleMatch> {
let Some(first) = cmd.first() else {
return Vec::new();
};
let Ok(program) = AbsolutePathBuf::try_from(first.clone()) else {
return Vec::new();
};
let Some(basename) = executable_path_lookup_key(program.as_path()) else {
return Vec::new();
};
let Some(rules) = self.rules_by_program.get_vec(&basename) else {
return Vec::new();
};
if let Some(paths) = self.host_executables_by_name.get(&basename)
&& !paths.iter().any(|path| path == &program)
{
return Vec::new();
}
let basename_command = std::iter::once(basename)
.chain(cmd.iter().skip(1).cloned())
.collect::<Vec<_>>();
rules
.iter()
.filter_map(|rule| rule.matches(&basename_command))
.map(|rule_match| rule_match.with_resolved_program(&program))
.collect()
}
}
fn upsert_domain(entries: &mut Vec<String>, host: &str) {

View File

@@ -1,6 +1,9 @@
use crate::decision::Decision;
use crate::error::Error;
use crate::error::Result;
use crate::policy::MatchOptions;
use crate::policy::Policy;
use codex_utils_absolute_path::AbsolutePathBuf;
use serde::Deserialize;
use serde::Serialize;
use shlex::try_join;
@@ -63,6 +66,8 @@ pub enum RuleMatch {
#[serde(rename = "matchedPrefix")]
matched_prefix: Vec<String>,
decision: Decision,
#[serde(rename = "resolvedProgram", skip_serializing_if = "Option::is_none")]
resolved_program: Option<AbsolutePathBuf>,
/// Optional rationale for why this rule exists.
///
/// This can be supplied for any decision and may be surfaced in different contexts
@@ -83,6 +88,23 @@ impl RuleMatch {
Self::HeuristicsRuleMatch { decision, .. } => *decision,
}
}
pub fn with_resolved_program(self, resolved_program: &AbsolutePathBuf) -> Self {
match self {
Self::PrefixRuleMatch {
matched_prefix,
decision,
justification,
..
} => Self::PrefixRuleMatch {
matched_prefix,
decision,
resolved_program: Some(resolved_program.clone()),
justification,
},
other => other,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
@@ -210,6 +232,7 @@ impl Rule for PrefixRule {
.map(|matched_prefix| RuleMatch::PrefixRuleMatch {
matched_prefix,
decision: self.decision,
resolved_program: None,
justification: self.justification.clone(),
})
}
@@ -220,11 +243,21 @@ impl Rule for PrefixRule {
}
/// Count how many rules match each provided example and error if any example is unmatched.
pub(crate) fn validate_match_examples(rules: &[RuleRef], matches: &[Vec<String>]) -> Result<()> {
pub(crate) fn validate_match_examples(
policy: &Policy,
rules: &[RuleRef],
matches: &[Vec<String>],
) -> Result<()> {
let mut unmatched_examples = Vec::new();
let options = MatchOptions {
resolve_host_executables: true,
};
for example in matches {
if rules.iter().any(|rule| rule.matches(example).is_some()) {
if !policy
.matches_for_command_with_options(example, None, &options)
.is_empty()
{
continue;
}
@@ -240,21 +273,31 @@ pub(crate) fn validate_match_examples(rules: &[RuleRef], matches: &[Vec<String>]
Err(Error::ExampleDidNotMatch {
rules: rules.iter().map(|rule| format!("{rule:?}")).collect(),
examples: unmatched_examples,
location: None,
})
}
}
/// Ensure that no rule matches any provided negative example.
pub(crate) fn validate_not_match_examples(
rules: &[RuleRef],
policy: &Policy,
_rules: &[RuleRef],
not_matches: &[Vec<String>],
) -> Result<()> {
let options = MatchOptions {
resolve_host_executables: true,
};
for example in not_matches {
if let Some(rule) = rules.iter().find(|rule| rule.matches(example).is_some()) {
if let Some(rule) = policy
.matches_for_command_with_options(example, None, &options)
.first()
{
return Err(Error::ExampleDidMatch {
rule: format!("{rule:?}"),
example: try_join(example.iter().map(String::as_str))
.unwrap_or_else(|_| "unable to render example".to_string()),
location: None,
});
}
}