fix(core) Filter non-matching prefix rules (#12314)

## Summary `gpt-5.3-codex` really likes to write complicated shell scripts, and suggest a partial prefix_rule that wouldn't actually approve the command. We should only show the `prefix_rule` suggestion from the model if it would actually fully approve the command the user is seeing. This will technically cause more instances of overly-specific suggestions when we fallback, but I think the UX is clearer, particularly when the model doesn't necessarily understand the current limitations of execpolicy parsing. ## Testing - [x] Add unit tests - [x] Add integration tests
2026-05-06 06:12:59 +03:00 · 2026-02-20 22:02:35 -08:00
parent 1779feb6a7
commit a8b4b569fb
2 changed files with 278 additions and 13 deletions
--- a/codex-rs/core/src/exec_policy.rs
+++ b/codex-rs/core/src/exec_policy.rs
@@ -224,6 +224,9 @@ impl ExecPolicyManager {
        let requested_amendment = derive_requested_execpolicy_amendment_from_prefix_rule(
            prefix_rule.as_ref(),
            &evaluation.matched_rules,
+            exec_policy.as_ref(),
+            &commands,
+            &exec_policy_fallback,
        );

        match evaluation.decision {
@@ -592,6 +595,9 @@ fn try_derive_execpolicy_amendment_for_allow_rules(
 fn derive_requested_execpolicy_amendment_from_prefix_rule(
    prefix_rule: Option<&Vec<String>>,
    matched_rules: &[RuleMatch],
+    exec_policy: &Policy,
+    commands: &[Vec<String>],
+    exec_policy_fallback: &impl Fn(&[String]) -> Decision,
 ) -> Option<ExecPolicyAmendment> {
    let prefix_rule = prefix_rule?;
    if prefix_rule.is_empty() {
@@ -612,7 +618,39 @@ fn derive_requested_execpolicy_amendment_from_prefix_rule(
        return None;
    }

-    Some(ExecPolicyAmendment::new(prefix_rule.clone()))
+    let amendment = ExecPolicyAmendment::new(prefix_rule.clone());
+    if prefix_rule_would_approve_all_commands(
+        exec_policy,
+        &amendment.command,
+        commands,
+        exec_policy_fallback,
+    ) {
+        Some(amendment)
+    } else {
+        None
+    }
+}
+
+fn prefix_rule_would_approve_all_commands(
+    exec_policy: &Policy,
+    prefix_rule: &[String],
+    commands: &[Vec<String>],
+    exec_policy_fallback: &impl Fn(&[String]) -> Decision,
+) -> bool {
+    let mut policy_with_prefix_rule = exec_policy.clone();
+    if policy_with_prefix_rule
+        .add_prefix_rule(prefix_rule, Decision::Allow)
+        .is_err()
+    {
+        return false;
+    }
+
+    commands.iter().all(|command| {
+        policy_with_prefix_rule
+            .check(command, exec_policy_fallback)
+            .decision
+            == Decision::Allow
+    })
 }

 /// Only return a reason when a policy rule drove the prompt decision.
@@ -1125,7 +1163,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
    }

    #[tokio::test]
-    async fn keeps_requested_amendment_for_heredoc_fallback_prompts() {
+    async fn drops_requested_amendment_for_heredoc_fallback_prompts_when_it_wont_match() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
@@ -1147,7 +1185,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
            requirement,
            ExecApprovalRequirement::NeedsApproval {
                reason: None,
-                proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(requested_prefix)),
+                proposed_execpolicy_amendment: None,
            }
        );
    }
@@ -1472,6 +1510,38 @@ prefix_rule(
        );
    }

+    #[tokio::test]
+    async fn request_rule_falls_back_when_prefix_rule_does_not_approve_all_commands() {
+        let command = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "cargo install cargo-insta && rm -rf /tmp/codex".to_string(),
+        ];
+        let manager = ExecPolicyManager::default();
+
+        let requirement = manager
+            .create_exec_approval_requirement_for_command(ExecApprovalRequest {
+                command: &command,
+                approval_policy: AskForApproval::OnRequest,
+                sandbox_policy: &SandboxPolicy::DangerFullAccess,
+                sandbox_permissions: SandboxPermissions::RequireEscalated,
+                prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
+            })
+            .await;
+
+        assert_eq!(
+            requirement,
+            ExecApprovalRequirement::NeedsApproval {
+                reason: None,
+                proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
+                    "rm".to_string(),
+                    "-rf".to_string(),
+                    "/tmp/codex".to_string(),
+                ])),
+            }
+        );
+    }
+
    #[tokio::test]
    async fn heuristics_apply_when_other_commands_match_policy() {
        let policy_src = r#"prefix_rule(pattern=["apple"], decision="allow")"#;
@@ -1728,11 +1798,28 @@ prefix_rule(
        );
    }

+    fn derive_requested_execpolicy_amendment_for_test(
+        prefix_rule: Option<&Vec<String>>,
+        matched_rules: &[RuleMatch],
+    ) -> Option<ExecPolicyAmendment> {
+        let commands = prefix_rule
+            .cloned()
+            .map(|prefix_rule| vec![prefix_rule])
+            .unwrap_or_else(|| vec![vec!["echo".to_string()]]);
+        derive_requested_execpolicy_amendment_from_prefix_rule(
+            prefix_rule,
+            matched_rules,
+            &Policy::empty(),
+            &commands,
+            &|_: &[String]| Decision::Allow,
+        )
+    }
+
    #[test]
    fn derive_requested_execpolicy_amendment_returns_none_for_missing_prefix_rule() {
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(None, &[])
+            derive_requested_execpolicy_amendment_for_test(None, &[])
        );
    }

@@ -1740,7 +1827,7 @@ prefix_rule(
    fn derive_requested_execpolicy_amendment_returns_none_for_empty_prefix_rule() {
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(Some(&Vec::new()), &[])
+            derive_requested_execpolicy_amendment_for_test(Some(&Vec::new()), &[])
        );
    }

@@ -1748,7 +1835,7 @@ prefix_rule(
    fn derive_requested_execpolicy_amendment_returns_none_for_exact_banned_prefix_rule() {
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(
+            derive_requested_execpolicy_amendment_for_test(
                Some(&vec!["python".to_string(), "-c".to_string()]),
                &[],
            )
@@ -1767,7 +1854,7 @@ prefix_rule(
        ] {
            assert_eq!(
                None,
-                derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
+                derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
            );
        }
    }
@@ -1793,7 +1880,7 @@ prefix_rule(
        ] {
            assert_eq!(
                None,
-                derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
+                derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
            );
        }
    }
@@ -1808,7 +1895,7 @@ prefix_rule(

        assert_eq!(
            Some(ExecPolicyAmendment::new(prefix_rule.clone())),
-            derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
+            derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
        );
    }

@@ -1823,7 +1910,7 @@ prefix_rule(
        }];
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(
+            derive_requested_execpolicy_amendment_for_test(
                Some(&prefix_rule),
                &matched_rules_prompt
            ),
@@ -1836,7 +1923,7 @@ prefix_rule(
        }];
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(
+            derive_requested_execpolicy_amendment_for_test(
                Some(&prefix_rule),
                &matched_rules_allow
            ),
@@ -1849,9 +1936,9 @@ prefix_rule(
        }];
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(
+            derive_requested_execpolicy_amendment_for_test(
                Some(&prefix_rule),
-                &matched_rules_forbidden
+                &matched_rules_forbidden,
            ),
            "should return none when prompt policy matches"
        );