Compare commits

...

1 Commits

Author SHA1 Message Date
Michael Bolin
85d16b5d6e scripts: add Makeself Codex dev builder 2026-05-08 09:28:47 -07:00

434
scripts/build_makeself_codex.py Executable file
View File

@@ -0,0 +1,434 @@
#!/usr/bin/env python3
"""Build a self-extracting Codex dev artifact with Makeself.
This script is intentionally local-dev tooling, not release packaging. The
initial workflow it supports is: build a host-platform `codex` binary, wrap it
in one executable-looking `.run` file, copy that file into a container or other
test environment, and run it there without needing a separate install step.
The runtime extraction location is the most important design constraint. Do not
let generated archives extract into `/tmp`, `/private/tmp`, `$TMPDIR`, or other
system temp locations. Codex treats running from temp directories as unsafe
because an agent with write access to the workspace could modify the executable
that is currently running. Generated archives should instead extract under
`$HOME/.cache/codex-dev`, which is deliberately separate from the normal
`$HOME/.cache/codex` cache used elsewhere by Codex.
The extraction directory is content-addressed from the staged payload. That
means the same `.run` file extracts once and then reuses the cached tree, while
each changed local build gets a new directory. Prefer hashing the staged bytes
over adding random UUIDs or trying to infer repository state; the payload is
the thing that must be isolated. The `.run` file itself does not need to be
bit-for-bit reproducible for this cache key to be stable.
The script patches the installed Makeself header at build time rather than
vendoring Makeself into this repository. The patch preserves Makeself maintenance
operations, but routes normal Codex CLI flags such as `--help` and `--version`
to the embedded `codex` binary so the generated file behaves like the tool it
wraps. Makeself help is moved to `--makeself-help`.
Compression defaults to `none` (`makeself --nocomp`). This keeps the generated
file directly executable without requiring a decompressor in the target
environment and leaves room for external transport compression. Callers can opt
into Makeself-managed compression with `--compression gzip`, `--compression zstd`,
or another supported mode when that target-side dependency is acceptable.
Build-time staging may use temporary directories. That is fine: only the
runtime extraction cache must avoid temp locations.
"""
from __future__ import annotations
import argparse
import hashlib
import os
import platform
import shutil
import stat
import subprocess
import sys
import tempfile
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent
CODEX_RS = REPO_ROOT / "codex-rs"
DEFAULT_PROFILE = "dev-small"
DEFAULT_COMPRESSION = "none"
DEFAULT_CACHE_ROOT = "$HOME/.cache/codex-dev"
COMPRESSION_CHOICES = (
"none",
"gzip",
"pigz",
"zstd",
"bzip2",
"pbzip2",
"bzip3",
"xz",
"lzo",
"lz4",
"compress",
)
COMPLETE_SENTINEL = ".codex-makeself-complete"
RUNNER_NAME = "run-codex"
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
parser.add_argument(
"--profile",
default=DEFAULT_PROFILE,
help=f"Cargo profile to build with. Default: {DEFAULT_PROFILE}.",
)
parser.add_argument(
"--output",
type=Path,
default=None,
help="Path to write the generated Makeself archive. Default: dist/codex-dev/codex-dev.run.",
)
parser.add_argument(
"--compression",
choices=COMPRESSION_CHOICES,
default=DEFAULT_COMPRESSION,
help=(
"Compression mode for the embedded Makeself payload. "
f"Default: {DEFAULT_COMPRESSION}."
),
)
parser.add_argument(
"--cache-root",
default=DEFAULT_CACHE_ROOT,
help=(
"Runtime cache root for extracted builds. Shell variables are preserved "
f"in the generated artifact. Default: {DEFAULT_CACHE_ROOT}."
),
)
parser.add_argument(
"--include-bwrap",
choices=("auto", "always", "never"),
default="auto",
help="Whether to build and bundle bwrap. Default: auto, which includes it on Linux.",
)
parser.add_argument(
"--skip-cargo-build",
action="store_true",
help="Use existing Cargo build outputs instead of invoking cargo build.",
)
parser.add_argument(
"--keep-staging-dir",
action="store_true",
help="Keep the temporary staged payload directory for inspection.",
)
parser.add_argument(
"--makeself",
default="makeself",
help="Path to the makeself executable. Default: makeself from PATH.",
)
parser.add_argument(
"--makeself-header",
type=Path,
default=None,
help="Path to makeself-header.sh. Default: infer from the makeself installation.",
)
return parser.parse_args()
def run_command(cmd: list[str], cwd: Path) -> None:
print("+", " ".join(cmd), flush=True)
subprocess.run(cmd, cwd=cwd, check=True)
def cargo_profile_output_dir(profile_name: str) -> Path:
match profile_name:
case "dev":
profile_dir = "debug"
case "release":
profile_dir = "release"
case _:
profile_dir = profile_name
return CODEX_RS / "target" / profile_dir
def host_executable_name(name: str) -> str:
if os.name == "nt":
return f"{name}.exe"
return name
def should_include_bwrap(mode: str) -> bool:
match mode:
case "always":
return True
case "never":
return False
case "auto":
return platform.system() == "Linux"
case _:
raise ValueError(f"unexpected bwrap mode: {mode}")
def makeself_compression_arg(compression: str) -> str:
if compression == "none":
return "--nocomp"
return f"--{compression}"
def validate_cache_root(cache_root: str) -> None:
normalized = cache_root.rstrip("/")
forbidden_roots = {
"/tmp",
"/private/tmp",
"/var/tmp",
"/var/folders",
"/private/var/folders",
"$TMPDIR",
"${TMPDIR}",
"${TMPDIR:-/tmp}",
"${TMPDIR-/tmp}",
}
system_temp = Path(tempfile.gettempdir()).resolve()
forbidden_roots.add(str(system_temp))
if normalized in forbidden_roots:
raise RuntimeError(f"Refusing to use temp directory as cache root: {cache_root}")
forbidden_prefixes = tuple(f"{root}/" for root in sorted(forbidden_roots))
if normalized.startswith(forbidden_prefixes):
raise RuntimeError(f"Refusing to use temp directory as cache root: {cache_root}")
def build_binaries(profile_name: str, include_bwrap: bool, skip_cargo_build: bool) -> None:
if skip_cargo_build:
return
cmd = ["cargo", "build", "--profile", profile_name, "--bin", "codex"]
if include_bwrap:
cmd.extend(["--bin", "bwrap"])
run_command(cmd, cwd=CODEX_RS)
def require_file(path: Path, description: str) -> None:
if not path.is_file():
raise RuntimeError(f"Missing {description}: {path}")
def stage_payload(build_dir: Path, staging_dir: Path, include_bwrap: bool) -> None:
codex_name = host_executable_name("codex")
codex_src = build_dir / codex_name
require_file(codex_src, "codex binary")
shutil.copy2(codex_src, staging_dir / codex_name)
if include_bwrap:
bwrap_src = build_dir / host_executable_name("bwrap")
require_file(bwrap_src, "bwrap binary")
resources_dir = staging_dir / "codex-resources"
resources_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(bwrap_src, resources_dir / host_executable_name("bwrap"))
runner = staging_dir / RUNNER_NAME
runner.write_text(
"\n".join(
[
"#!/bin/sh",
"set -eu",
f": > {COMPLETE_SENTINEL}",
f'exec ./{codex_name} "$@"',
"",
]
),
encoding="utf-8",
)
runner.chmod(0o755)
def iter_staged_files(staging_dir: Path) -> list[Path]:
return sorted(path for path in staging_dir.rglob("*") if path.is_file())
def hash_staged_tree(staging_dir: Path) -> str:
digest = hashlib.sha256()
for path in iter_staged_files(staging_dir):
relative_path = path.relative_to(staging_dir).as_posix()
mode = stat.S_IMODE(path.stat().st_mode)
digest.update(relative_path.encode("utf-8"))
digest.update(b"\0")
digest.update(f"{mode:o}".encode("ascii"))
digest.update(b"\0")
with path.open("rb") as file:
for chunk in iter(lambda: file.read(1024 * 1024), b""):
digest.update(chunk)
digest.update(b"\0")
return digest.hexdigest()
def infer_makeself_header(makeself: str) -> Path:
makeself_path = shutil.which(makeself)
if makeself_path is None:
candidate = Path(makeself)
if candidate.is_file():
makeself_path = str(candidate)
else:
raise RuntimeError(f"Unable to find makeself executable: {makeself}")
resolved = Path(makeself_path).resolve()
candidates = [
resolved.parent / "makeself-header.sh",
resolved.parent.parent / "libexec" / "makeself-header.sh",
resolved.parent.parent / "share" / "makeself" / "makeself-header.sh",
Path("/usr/libexec/makeself-header.sh"),
Path("/usr/share/makeself/makeself-header.sh"),
Path("/usr/lib/makeself/makeself-header.sh"),
]
for candidate in candidates:
if candidate.is_file():
return candidate
raise RuntimeError(
"Unable to infer makeself-header.sh. Pass --makeself-header with its path."
)
def write_cached_makeself_header(source_header: Path, output_header: Path) -> None:
header = source_header.read_text(encoding="utf-8")
marker = 'if test x"\\$targetdir" = x.; then'
if marker not in header:
raise RuntimeError(f"Unable to patch Makeself header; marker not found in {source_header}")
header = pass_codex_options_through(header, source_header)
cache_fast_path = f"""
# Codex dev artifacts use content-addressed --target directories. On cache hits,
# run the existing extraction instead of unpacking the payload again.
if test x"\\$keep" = xy -a x"\\$script" != x -a -f "\\$targetdir/{COMPLETE_SENTINEL}"; then
cd "\\$targetdir" || {{
echo "Cannot enter cached target directory \\$targetdir" >&2
exit 1
}}
if test x"\\$quiet" = xn; then
echo "Using cached extraction in \\$targetdir"
fi
res=0
if test x"\\$verbose" = xy; then
MS_Printf "OK to execute: \\$script \\$scriptargs \\$* ? [Y/n] "
read yn
if test x"\\$yn" = x -o x"\\$yn" = xy -o x"\\$yn" = xY; then
eval "\\"\\$script\\" \\$scriptargs "\\\\\\$@""; res=\\$?
fi
else
eval "\\"\\$script\\" \\$scriptargs "\\\\\\$@""; res=\\$?
fi
exit \\$res
fi
"""
output_header.write_text(header.replace(marker, cache_fast_path + marker), encoding="utf-8")
def pass_codex_options_through(header: str, source_header: Path) -> str:
header = header.replace("-h | --help)", "--makeself-help)")
header = header.replace(
"\\$0 --help Print this message",
"\\$0 --makeself-help Print this message",
)
unrecognized_flag_block = """ -*)
\techo Unrecognized flag : "\\$1" >&2
\tMS_Help
\texit 1
\t;;"""
if unrecognized_flag_block not in header:
raise RuntimeError(
f"Unable to patch Makeself option parser; marker not found in {source_header}"
)
return header.replace(
unrecognized_flag_block,
""" -*)
\tbreak
\t;;""",
)
def build_archive(
makeself: str,
compression: str,
header: Path,
staging_dir: Path,
output_path: Path,
target_dir: str,
tree_hash: str,
) -> None:
output_path.parent.mkdir(parents=True, exist_ok=True)
label = f"Codex dev build {tree_hash[:12]}"
cmd = [
makeself,
makeself_compression_arg(compression),
"--sha256",
"--packaging-date",
f"content-sha256:{tree_hash}",
"--header",
str(header),
"--target",
target_dir,
str(staging_dir),
str(output_path),
label,
f"./{RUNNER_NAME}",
]
run_command(cmd, cwd=REPO_ROOT)
def default_output_path() -> Path:
return REPO_ROOT / "dist" / "codex-dev" / "codex-dev.run"
def main() -> int:
args = parse_args()
validate_cache_root(args.cache_root)
include_bwrap = should_include_bwrap(args.include_bwrap)
build_binaries(args.profile, include_bwrap, args.skip_cargo_build)
build_dir = cargo_profile_output_dir(args.profile)
output_path = args.output or default_output_path()
makeself_header = args.makeself_header or infer_makeself_header(args.makeself)
require_file(makeself_header, "makeself header")
with tempfile.TemporaryDirectory(prefix="codex-makeself-") as temp_root_name:
temp_root = Path(temp_root_name)
staging_dir = temp_root / "payload"
staging_dir.mkdir()
patched_header = temp_root / "makeself-header-codex-cache.sh"
stage_payload(build_dir, staging_dir, include_bwrap)
tree_hash = hash_staged_tree(staging_dir)
target_dir = f"{args.cache_root.rstrip('/')}/sha256-{tree_hash}"
write_cached_makeself_header(makeself_header, patched_header)
build_archive(
args.makeself,
args.compression,
patched_header,
staging_dir,
output_path,
target_dir,
tree_hash,
)
if args.keep_staging_dir:
kept_staging_dir = output_path.parent / f"payload-sha256-{tree_hash[:12]}"
if kept_staging_dir.exists():
shutil.rmtree(kept_staging_dir)
shutil.copytree(staging_dir, kept_staging_dir, copy_function=shutil.copy2)
print(f"Kept staged payload at {kept_staging_dir}")
print(f"Wrote {output_path}")
print(f"Payload sha256: {tree_hash}")
print(f"Runtime cache target: {target_dir}")
if not include_bwrap:
print("bwrap was not bundled; pass --include-bwrap=always to require it.")
return 0
if __name__ == "__main__":
try:
sys.exit(main())
except RuntimeError as exc:
print(f"error: {exc}", file=sys.stderr)
sys.exit(1)