Files
codex/codex-cli/src/utils/agent/handle-exec-command.ts
Brayden Moon f3d085aaf8 feat: shell command explanation option (#173)
# Shell Command Explanation Option

## Description
This PR adds an option to explain shell commands when the user is
prompted to approve them (Fixes #110). When reviewing a shell command,
users can now select "Explain this command" to get a detailed
explanation of what the command does before deciding whether to approve
or reject it.

## Changes
- Added a new "EXPLAIN" option to the `ReviewDecision` enum
- Updated the command review UI to include an "Explain this command (x)"
option
- Implemented the logic to send the command to the LLM for explanation
using the same model as the agent
- Added a display for the explanation in the command review UI
- Updated all relevant components to pass the explanation through the
component tree

## Benefits
- Improves user understanding of shell commands before approving them
- Reduces the risk of approving potentially harmful commands
- Enhances the educational aspect of the tool, helping users learn about
shell commands
- Maintains the same workflow with minimal UI changes

## Testing
- Manually tested the explanation feature with various shell commands
- Verified that the explanation is displayed correctly in the UI
- Confirmed that the user can still approve or reject the command after
viewing the explanation

## Screenshots

![improved_shell_explanation_demo](https://github.com/user-attachments/assets/05923481-29db-4eba-9cc6-5e92301d2be0)


## Additional Notes
The explanation is generated using the same model as the agent, ensuring
consistency in the quality and style of explanations.

---------

Signed-off-by: crazywolf132 <crazywolf132@gmail.com>
2025-04-17 13:28:58 -07:00

339 lines
10 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { CommandConfirmation } from "./agent-loop.js";
import type { AppConfig } from "../config.js";
import type { ExecInput } from "./sandbox/interface.js";
import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
import type { ResponseInputItem } from "openai/resources/responses/responses.mjs";
import { exec, execApplyPatch } from "./exec.js";
import { isLoggingEnabled, log } from "./log.js";
import { ReviewDecision } from "./review.js";
import { FullAutoErrorMode } from "../auto-approval-mode.js";
import { SandboxType } from "./sandbox/interface.js";
import { canAutoApprove } from "../../approvals.js";
import { formatCommandForDisplay } from "../../format-command.js";
import { access } from "fs/promises";
// ---------------------------------------------------------------------------
// Sessionlevel cache of commands that the user has chosen to always approve.
//
// The values are derived via `deriveCommandKey()` which intentionally ignores
// volatile arguments (for example the patch text passed to `apply_patch`).
// Storing *generalised* keys means that once a user selects "always approve"
// for a given class of command we will genuinely stop prompting them for
// subsequent, equivalent invocations during the same CLI session.
// ---------------------------------------------------------------------------
const alwaysApprovedCommands = new Set<string>();
// ---------------------------------------------------------------------------
// Helper: Given the argv-style representation of a command, return a stable
// string key that can be used for equality checks.
//
// The key space purposefully abstracts away parts of the command line that
// are expected to change between invocations while still retaining enough
// information to differentiate *meaningfully distinct* operations. See the
// extensive inline documentation for details.
// ---------------------------------------------------------------------------
function deriveCommandKey(cmd: Array<string>): string {
// pull off only the bits you care about
const [
maybeShell,
maybeFlag,
coreInvocation,
/* …ignore the rest… */
] = cmd;
if (coreInvocation?.startsWith("apply_patch")) {
return "apply_patch";
}
if (maybeShell === "bash" && maybeFlag === "-lc") {
// If the command was invoked through `bash -lc "<script>"` we extract the
// base program name from the script string.
const script = coreInvocation ?? "";
return script.split(/\s+/)[0] || "bash";
}
// For every other command we fall back to using only the program name (the
// first argv element). This guarantees we always return a *string* even if
// `coreInvocation` is undefined.
if (coreInvocation) {
return coreInvocation.split(/\s+/)[0]!;
}
return JSON.stringify(cmd);
}
type HandleExecCommandResult = {
outputText: string;
metadata: Record<string, unknown>;
additionalItems?: Array<ResponseInputItem>;
};
export async function handleExecCommand(
args: ExecInput,
config: AppConfig,
policy: ApprovalPolicy,
getCommandConfirmation: (
command: Array<string>,
applyPatch: ApplyPatchCommand | undefined,
) => Promise<CommandConfirmation>,
abortSignal?: AbortSignal,
): Promise<HandleExecCommandResult> {
const { cmd: command } = args;
const key = deriveCommandKey(command);
// 1) If the user has already said "always approve", skip
// any policy & never sandbox.
if (alwaysApprovedCommands.has(key)) {
return execCommand(
args,
/* applyPatch */ undefined,
/* runInSandbox */ false,
abortSignal,
).then(convertSummaryToResult);
}
// 2) Otherwise fall back to the normal policy
// `canAutoApprove` now requires the list of writable roots that the command
// is allowed to modify. For the CLI we conservatively pass the current
// working directory so that edits are constrained to the project root. If
// the caller wishes to broaden or restrict the set it can be made
// configurable in the future.
const safety = canAutoApprove(command, policy, [process.cwd()]);
let runInSandbox: boolean;
switch (safety.type) {
case "ask-user": {
const review = await askUserPermission(
args,
safety.applyPatch,
getCommandConfirmation,
);
if (review != null) {
return review;
}
runInSandbox = false;
break;
}
case "auto-approve": {
runInSandbox = safety.runInSandbox;
break;
}
case "reject": {
return {
outputText: "aborted",
metadata: {
error: "command rejected",
reason: "Command rejected by auto-approval system.",
},
};
}
}
const { applyPatch } = safety;
const summary = await execCommand(
args,
applyPatch,
runInSandbox,
abortSignal,
);
// If the operation was aborted in the meantime, propagate the cancellation
// upward by returning an empty (noop) result so that the agent loop will
// exit cleanly without emitting spurious output.
if (abortSignal?.aborted) {
return {
outputText: "",
metadata: {},
};
}
if (
summary.exitCode !== 0 &&
runInSandbox &&
// Default: If the user has configured to ignore and continue,
// skip re-running the command.
//
// Otherwise, if they selected "ask-user", then we should ask the user
// for permission to re-run the command outside of the sandbox.
config.fullAutoErrorMode &&
config.fullAutoErrorMode === FullAutoErrorMode.ASK_USER
) {
const review = await askUserPermission(
args,
safety.applyPatch,
getCommandConfirmation,
);
if (review != null) {
return review;
} else {
// The user has approved the command, so we will run it outside of the
// sandbox.
const summary = await execCommand(args, applyPatch, false, abortSignal);
return convertSummaryToResult(summary);
}
} else {
return convertSummaryToResult(summary);
}
}
function convertSummaryToResult(
summary: ExecCommandSummary,
): HandleExecCommandResult {
const { stdout, stderr, exitCode, durationMs } = summary;
return {
outputText: stdout || stderr,
metadata: {
exit_code: exitCode,
duration_seconds: Math.round(durationMs / 100) / 10,
},
};
}
type ExecCommandSummary = {
stdout: string;
stderr: string;
exitCode: number;
durationMs: number;
};
async function execCommand(
execInput: ExecInput,
applyPatchCommand: ApplyPatchCommand | undefined,
runInSandbox: boolean,
abortSignal?: AbortSignal,
): Promise<ExecCommandSummary> {
let { workdir } = execInput;
if (workdir) {
try {
await access(workdir);
} catch (e) {
log(`EXEC workdir=${workdir} not found, use process.cwd() instead`);
workdir = process.cwd();
}
}
if (isLoggingEnabled()) {
if (applyPatchCommand != null) {
log("EXEC running apply_patch command");
} else {
const { cmd, timeoutInMillis } = execInput;
// Seconds are a bit easier to read in log messages and most timeouts
// are specified as multiples of 1000, anyway.
const timeout =
timeoutInMillis != null
? Math.round(timeoutInMillis / 1000).toString()
: "undefined";
log(
`EXEC running \`${formatCommandForDisplay(
cmd,
)}\` in workdir=${workdir} with timeout=${timeout}s`,
);
}
}
// Note execApplyPatch() and exec() are coded defensively and should not
// throw. Any internal errors should be mapped to a non-zero value for the
// exitCode field.
const start = Date.now();
const execResult =
applyPatchCommand != null
? execApplyPatch(applyPatchCommand.patch)
: await exec(execInput, await getSandbox(runInSandbox), abortSignal);
const duration = Date.now() - start;
const { stdout, stderr, exitCode } = execResult;
if (isLoggingEnabled()) {
log(
`EXEC exit=${exitCode} time=${duration}ms:\n\tSTDOUT: ${stdout}\n\tSTDERR: ${stderr}`,
);
}
return {
stdout,
stderr,
exitCode,
durationMs: duration,
};
}
const isInLinux = async (): Promise<boolean> => {
try {
await access("/proc/1/cgroup");
return true;
} catch {
return false;
}
};
async function getSandbox(runInSandbox: boolean): Promise<SandboxType> {
if (runInSandbox) {
if (process.platform === "darwin") {
return SandboxType.MACOS_SEATBELT;
} else if (await isInLinux()) {
return SandboxType.NONE;
} else if (process.platform === "win32") {
// On Windows, we don't have a sandbox implementation yet, so we fall back to NONE
// instead of throwing an error, which would crash the application
log(
"WARNING: Sandbox was requested but is not available on Windows. Continuing without sandbox.",
);
return SandboxType.NONE;
}
// For other platforms, still throw an error as before
throw new Error("Sandbox was mandated, but no sandbox is available!");
} else {
return SandboxType.NONE;
}
}
/**
* If return value is non-null, then the command was rejected by the user.
*/
async function askUserPermission(
args: ExecInput,
applyPatchCommand: ApplyPatchCommand | undefined,
getCommandConfirmation: (
command: Array<string>,
applyPatch: ApplyPatchCommand | undefined,
) => Promise<CommandConfirmation>,
): Promise<HandleExecCommandResult | null> {
const { review: decision, customDenyMessage } = await getCommandConfirmation(
args.cmd,
applyPatchCommand,
);
if (decision === ReviewDecision.ALWAYS) {
// Persist this command so we won't ask again during this session.
const key = deriveCommandKey(args.cmd);
alwaysApprovedCommands.add(key);
}
// Handle EXPLAIN decision by returning null to continue with the normal flow
// but with a flag to indicate that an explanation was requested
if (decision === ReviewDecision.EXPLAIN) {
return null;
}
// Any decision other than an affirmative (YES / ALWAYS) or EXPLAIN aborts execution.
if (decision !== ReviewDecision.YES && decision !== ReviewDecision.ALWAYS) {
const note =
decision === ReviewDecision.NO_CONTINUE
? customDenyMessage?.trim() || "No, don't do that — keep going though."
: "No, don't do that — stop for now.";
return {
outputText: "aborted",
metadata: {},
additionalItems: [
{
type: "message",
role: "user",
content: [{ type: "input_text", text: note }],
},
],
};
} else {
return null;
}
}