mirror of
https://github.com/openai/codex.git
synced 2026-05-02 12:21:26 +03:00
feat: introducing a network sandbox proxy (#8442)
This add a new crate, `codex-network-proxy`, a local network proxy service used by Codex to enforce fine-grained network policy (domain allow/deny) and to surface blocked network events for interactive approvals. - New crate: `codex-rs/network-proxy/` (`codex-network-proxy` binary + library) - Core capabilities: - HTTP proxy support (including CONNECT tunneling) - SOCKS5 proxy support (in the later PR) - policy evaluation (allowed/denied domain lists; denylist wins; wildcard support) - small admin API for polling/reload/mode changes - optional MITM support for HTTPS CONNECT to enforce “limited mode” method restrictions (later PR) Will follow up integration with codex in subsequent PRs. ## Testing - `cd codex-rs && cargo build -p codex-network-proxy` - `cd codex-rs && cargo run -p codex-network-proxy -- proxy`
This commit is contained in:
435
codex-rs/network-proxy/src/policy.rs
Normal file
435
codex-rs/network-proxy/src/policy.rs
Normal file
@@ -0,0 +1,435 @@
|
||||
#[cfg(test)]
|
||||
use crate::config::NetworkMode;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::ensure;
|
||||
use globset::GlobBuilder;
|
||||
use globset::GlobSet;
|
||||
use globset::GlobSetBuilder;
|
||||
use std::collections::HashSet;
|
||||
use std::net::IpAddr;
|
||||
use std::net::Ipv4Addr;
|
||||
use std::net::Ipv6Addr;
|
||||
use url::Host as UrlHost;
|
||||
|
||||
/// A normalized host string for policy evaluation.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct Host(String);
|
||||
|
||||
impl Host {
|
||||
pub fn parse(input: &str) -> Result<Self> {
|
||||
let normalized = normalize_host(input);
|
||||
ensure!(!normalized.is_empty(), "host is empty");
|
||||
Ok(Self(normalized))
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the host is a loopback hostname or IP literal.
|
||||
pub fn is_loopback_host(host: &Host) -> bool {
|
||||
let host = host.as_str();
|
||||
let host = host.split_once('%').map(|(ip, _)| ip).unwrap_or(host);
|
||||
if host == "localhost" {
|
||||
return true;
|
||||
}
|
||||
if let Ok(ip) = host.parse::<IpAddr>() {
|
||||
return ip.is_loopback();
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn is_non_public_ip(ip: IpAddr) -> bool {
|
||||
match ip {
|
||||
IpAddr::V4(ip) => is_non_public_ipv4(ip),
|
||||
IpAddr::V6(ip) => is_non_public_ipv6(ip),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_non_public_ipv4(ip: Ipv4Addr) -> bool {
|
||||
// Use the standard library classification helpers where possible; they encode the intent more
|
||||
// clearly than hand-rolled range checks. Some non-public ranges (e.g., CGNAT and TEST-NET
|
||||
// blocks) are not covered by stable stdlib helpers yet, so we fall back to CIDR checks.
|
||||
ip.is_loopback()
|
||||
|| ip.is_private()
|
||||
|| ip.is_link_local()
|
||||
|| ip.is_unspecified()
|
||||
|| ip.is_multicast()
|
||||
|| ip.is_broadcast()
|
||||
|| ipv4_in_cidr(ip, [0, 0, 0, 0], 8) // "this network" (RFC 1122)
|
||||
|| ipv4_in_cidr(ip, [100, 64, 0, 0], 10) // CGNAT (RFC 6598)
|
||||
|| ipv4_in_cidr(ip, [192, 0, 0, 0], 24) // IETF Protocol Assignments (RFC 6890)
|
||||
|| ipv4_in_cidr(ip, [192, 0, 2, 0], 24) // TEST-NET-1 (RFC 5737)
|
||||
|| ipv4_in_cidr(ip, [198, 18, 0, 0], 15) // Benchmarking (RFC 2544)
|
||||
|| ipv4_in_cidr(ip, [198, 51, 100, 0], 24) // TEST-NET-2 (RFC 5737)
|
||||
|| ipv4_in_cidr(ip, [203, 0, 113, 0], 24) // TEST-NET-3 (RFC 5737)
|
||||
|| ipv4_in_cidr(ip, [240, 0, 0, 0], 4) // Reserved (RFC 6890)
|
||||
}
|
||||
|
||||
fn ipv4_in_cidr(ip: Ipv4Addr, base: [u8; 4], prefix: u8) -> bool {
|
||||
let ip = u32::from(ip);
|
||||
let base = u32::from(Ipv4Addr::from(base));
|
||||
let mask = if prefix == 0 {
|
||||
0
|
||||
} else {
|
||||
u32::MAX << (32 - prefix)
|
||||
};
|
||||
(ip & mask) == (base & mask)
|
||||
}
|
||||
|
||||
fn is_non_public_ipv6(ip: Ipv6Addr) -> bool {
|
||||
if let Some(v4) = ip.to_ipv4() {
|
||||
return is_non_public_ipv4(v4) || ip.is_loopback();
|
||||
}
|
||||
// Treat anything that isn't globally routable as "local" for SSRF prevention. In particular:
|
||||
// - `::1` loopback
|
||||
// - `fc00::/7` unique-local (RFC 4193)
|
||||
// - `fe80::/10` link-local
|
||||
// - `::` unspecified
|
||||
// - multicast ranges
|
||||
ip.is_loopback()
|
||||
|| ip.is_unspecified()
|
||||
|| ip.is_multicast()
|
||||
|| ip.is_unique_local()
|
||||
|| ip.is_unicast_link_local()
|
||||
}
|
||||
|
||||
/// Normalize host fragments for policy matching (trim whitespace, strip ports/brackets, lowercase).
|
||||
pub fn normalize_host(host: &str) -> String {
|
||||
let host = host.trim();
|
||||
if host.starts_with('[')
|
||||
&& let Some(end) = host.find(']')
|
||||
{
|
||||
return normalize_dns_host(&host[1..end]);
|
||||
}
|
||||
|
||||
// The proxy stack should typically hand us a host without a port, but be
|
||||
// defensive and strip `:port` when there is exactly one `:`.
|
||||
if host.bytes().filter(|b| *b == b':').count() == 1 {
|
||||
let host = host.split(':').next().unwrap_or_default();
|
||||
return normalize_dns_host(host);
|
||||
}
|
||||
|
||||
// Avoid mangling unbracketed IPv6 literals, but strip trailing dots so fully qualified domain
|
||||
// names are treated the same as their dotless variants.
|
||||
normalize_dns_host(host)
|
||||
}
|
||||
|
||||
fn normalize_dns_host(host: &str) -> String {
|
||||
let host = host.to_ascii_lowercase();
|
||||
host.trim_end_matches('.').to_string()
|
||||
}
|
||||
|
||||
fn normalize_pattern(pattern: &str) -> String {
|
||||
let pattern = pattern.trim();
|
||||
if pattern == "*" {
|
||||
return "*".to_string();
|
||||
}
|
||||
|
||||
let (prefix, remainder) = if let Some(domain) = pattern.strip_prefix("**.") {
|
||||
("**.", domain)
|
||||
} else if let Some(domain) = pattern.strip_prefix("*.") {
|
||||
("*.", domain)
|
||||
} else {
|
||||
("", pattern)
|
||||
};
|
||||
|
||||
let remainder = normalize_host(remainder);
|
||||
if prefix.is_empty() {
|
||||
remainder
|
||||
} else {
|
||||
format!("{prefix}{remainder}")
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn compile_globset(patterns: &[String]) -> Result<GlobSet> {
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
let mut seen = HashSet::new();
|
||||
for pattern in patterns {
|
||||
let pattern = normalize_pattern(pattern);
|
||||
// Supported domain patterns:
|
||||
// - "example.com": match the exact host
|
||||
// - "*.example.com": match any subdomain (not the apex)
|
||||
// - "**.example.com": match the apex and any subdomain
|
||||
// - "*": match any host
|
||||
for candidate in expand_domain_pattern(&pattern) {
|
||||
if !seen.insert(candidate.clone()) {
|
||||
continue;
|
||||
}
|
||||
let glob = GlobBuilder::new(&candidate)
|
||||
.case_insensitive(true)
|
||||
.build()
|
||||
.with_context(|| format!("invalid glob pattern: {candidate}"))?;
|
||||
builder.add(glob);
|
||||
}
|
||||
}
|
||||
Ok(builder.build()?)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) enum DomainPattern {
|
||||
Any,
|
||||
ApexAndSubdomains(String),
|
||||
SubdomainsOnly(String),
|
||||
Exact(String),
|
||||
}
|
||||
|
||||
impl DomainPattern {
|
||||
/// Parse a policy pattern for constraint comparisons.
|
||||
///
|
||||
/// Validation of glob syntax happens when building the globset; here we only
|
||||
/// decode the wildcard prefixes to keep constraint checks lightweight.
|
||||
pub(crate) fn parse(input: &str) -> Self {
|
||||
let input = input.trim();
|
||||
if input.is_empty() {
|
||||
return Self::Exact(String::new());
|
||||
}
|
||||
if input == "*" {
|
||||
Self::Any
|
||||
} else if let Some(domain) = input.strip_prefix("**.") {
|
||||
Self::parse_domain(domain, Self::ApexAndSubdomains)
|
||||
} else if let Some(domain) = input.strip_prefix("*.") {
|
||||
Self::parse_domain(domain, Self::SubdomainsOnly)
|
||||
} else {
|
||||
Self::Exact(input.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a policy pattern for constraint comparisons, validating domain parts with `url`.
|
||||
pub(crate) fn parse_for_constraints(input: &str) -> Self {
|
||||
let input = input.trim();
|
||||
if input.is_empty() {
|
||||
return Self::Exact(String::new());
|
||||
}
|
||||
if input == "*" {
|
||||
return Self::Any;
|
||||
}
|
||||
if let Some(domain) = input.strip_prefix("**.") {
|
||||
return Self::ApexAndSubdomains(parse_domain_for_constraints(domain));
|
||||
}
|
||||
if let Some(domain) = input.strip_prefix("*.") {
|
||||
return Self::SubdomainsOnly(parse_domain_for_constraints(domain));
|
||||
}
|
||||
Self::Exact(parse_domain_for_constraints(input))
|
||||
}
|
||||
|
||||
fn parse_domain(domain: &str, build: impl FnOnce(String) -> Self) -> Self {
|
||||
let domain = domain.trim();
|
||||
if domain.is_empty() {
|
||||
return Self::Exact(String::new());
|
||||
}
|
||||
build(domain.to_string())
|
||||
}
|
||||
|
||||
pub(crate) fn allows(&self, candidate: &DomainPattern) -> bool {
|
||||
match self {
|
||||
DomainPattern::Any => true,
|
||||
DomainPattern::Exact(domain) => match candidate {
|
||||
DomainPattern::Exact(candidate) => domain_eq(candidate, domain),
|
||||
_ => false,
|
||||
},
|
||||
DomainPattern::SubdomainsOnly(domain) => match candidate {
|
||||
DomainPattern::Any => false,
|
||||
DomainPattern::Exact(candidate) => is_strict_subdomain(candidate, domain),
|
||||
DomainPattern::SubdomainsOnly(candidate) => {
|
||||
is_subdomain_or_equal(candidate, domain)
|
||||
}
|
||||
DomainPattern::ApexAndSubdomains(candidate) => {
|
||||
is_strict_subdomain(candidate, domain)
|
||||
}
|
||||
},
|
||||
DomainPattern::ApexAndSubdomains(domain) => match candidate {
|
||||
DomainPattern::Any => false,
|
||||
DomainPattern::Exact(candidate) => is_subdomain_or_equal(candidate, domain),
|
||||
DomainPattern::SubdomainsOnly(candidate) => {
|
||||
is_subdomain_or_equal(candidate, domain)
|
||||
}
|
||||
DomainPattern::ApexAndSubdomains(candidate) => {
|
||||
is_subdomain_or_equal(candidate, domain)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_domain_for_constraints(domain: &str) -> String {
|
||||
let domain = domain.trim().trim_end_matches('.');
|
||||
if domain.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
let host = if domain.starts_with('[') && domain.ends_with(']') {
|
||||
&domain[1..domain.len().saturating_sub(1)]
|
||||
} else {
|
||||
domain
|
||||
};
|
||||
if host.contains('*') || host.contains('?') || host.contains('%') {
|
||||
return domain.to_string();
|
||||
}
|
||||
match UrlHost::parse(host) {
|
||||
Ok(host) => host.to_string(),
|
||||
Err(_) => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_domain_pattern(pattern: &str) -> Vec<String> {
|
||||
match DomainPattern::parse(pattern) {
|
||||
DomainPattern::Any => vec![pattern.to_string()],
|
||||
DomainPattern::Exact(domain) => vec![domain],
|
||||
DomainPattern::SubdomainsOnly(domain) => {
|
||||
vec![format!("?*.{domain}")]
|
||||
}
|
||||
DomainPattern::ApexAndSubdomains(domain) => {
|
||||
vec![domain.clone(), format!("?*.{domain}")]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_domain(domain: &str) -> String {
|
||||
domain.trim_end_matches('.').to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn domain_eq(left: &str, right: &str) -> bool {
|
||||
normalize_domain(left) == normalize_domain(right)
|
||||
}
|
||||
|
||||
fn is_subdomain_or_equal(child: &str, parent: &str) -> bool {
|
||||
let child = normalize_domain(child);
|
||||
let parent = normalize_domain(parent);
|
||||
if child == parent {
|
||||
return true;
|
||||
}
|
||||
child.ends_with(&format!(".{parent}"))
|
||||
}
|
||||
|
||||
fn is_strict_subdomain(child: &str, parent: &str) -> bool {
|
||||
let child = normalize_domain(child);
|
||||
let parent = normalize_domain(parent);
|
||||
child != parent && child.ends_with(&format!(".{parent}"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn method_allowed_full_allows_everything() {
|
||||
assert!(NetworkMode::Full.allows_method("GET"));
|
||||
assert!(NetworkMode::Full.allows_method("POST"));
|
||||
assert!(NetworkMode::Full.allows_method("CONNECT"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn method_allowed_limited_allows_only_safe_methods() {
|
||||
assert!(NetworkMode::Limited.allows_method("GET"));
|
||||
assert!(NetworkMode::Limited.allows_method("HEAD"));
|
||||
assert!(NetworkMode::Limited.allows_method("OPTIONS"));
|
||||
assert!(!NetworkMode::Limited.allows_method("POST"));
|
||||
assert!(!NetworkMode::Limited.allows_method("CONNECT"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compile_globset_normalizes_trailing_dots() {
|
||||
let set = compile_globset(&["Example.COM.".to_string()]).unwrap();
|
||||
|
||||
assert_eq!(true, set.is_match("example.com"));
|
||||
assert_eq!(false, set.is_match("api.example.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compile_globset_normalizes_wildcards() {
|
||||
let set = compile_globset(&["*.Example.COM.".to_string()]).unwrap();
|
||||
|
||||
assert_eq!(true, set.is_match("api.example.com"));
|
||||
assert_eq!(false, set.is_match("example.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compile_globset_normalizes_apex_and_subdomains() {
|
||||
let set = compile_globset(&["**.Example.COM.".to_string()]).unwrap();
|
||||
|
||||
assert_eq!(true, set.is_match("example.com"));
|
||||
assert_eq!(true, set.is_match("api.example.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compile_globset_normalizes_bracketed_ipv6_literals() {
|
||||
let set = compile_globset(&["[::1]".to_string()]).unwrap();
|
||||
|
||||
assert_eq!(true, set.is_match("::1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_loopback_host_handles_localhost_variants() {
|
||||
assert!(is_loopback_host(&Host::parse("localhost").unwrap()));
|
||||
assert!(is_loopback_host(&Host::parse("localhost.").unwrap()));
|
||||
assert!(is_loopback_host(&Host::parse("LOCALHOST").unwrap()));
|
||||
assert!(!is_loopback_host(&Host::parse("notlocalhost").unwrap()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_loopback_host_handles_ip_literals() {
|
||||
assert!(is_loopback_host(&Host::parse("127.0.0.1").unwrap()));
|
||||
assert!(is_loopback_host(&Host::parse("::1").unwrap()));
|
||||
assert!(!is_loopback_host(&Host::parse("1.2.3.4").unwrap()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_non_public_ip_rejects_private_and_loopback_ranges() {
|
||||
assert!(is_non_public_ip("127.0.0.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("10.0.0.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("192.168.0.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("100.64.0.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("192.0.0.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("192.0.2.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("198.18.0.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("198.51.100.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("203.0.113.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("240.0.0.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("0.1.2.3".parse().unwrap()));
|
||||
assert!(!is_non_public_ip("8.8.8.8".parse().unwrap()));
|
||||
|
||||
assert!(is_non_public_ip("::ffff:127.0.0.1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("::ffff:10.0.0.1".parse().unwrap()));
|
||||
assert!(!is_non_public_ip("::ffff:8.8.8.8".parse().unwrap()));
|
||||
|
||||
assert!(is_non_public_ip("::1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("fe80::1".parse().unwrap()));
|
||||
assert!(is_non_public_ip("fc00::1".parse().unwrap()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_host_lowercases_and_trims() {
|
||||
assert_eq!(normalize_host(" ExAmPlE.CoM "), "example.com");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_host_strips_port_for_host_port() {
|
||||
assert_eq!(normalize_host("example.com:1234"), "example.com");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_host_preserves_unbracketed_ipv6() {
|
||||
assert_eq!(normalize_host("2001:db8::1"), "2001:db8::1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_host_strips_trailing_dot() {
|
||||
assert_eq!(normalize_host("example.com."), "example.com");
|
||||
assert_eq!(normalize_host("ExAmPlE.CoM."), "example.com");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_host_strips_trailing_dot_with_port() {
|
||||
assert_eq!(normalize_host("example.com.:443"), "example.com");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_host_strips_brackets_for_ipv6() {
|
||||
assert_eq!(normalize_host("[::1]"), "::1");
|
||||
assert_eq!(normalize_host("[::1]:443"), "::1");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user