Fix PR babysitter review comment monitoring (#16363)

## Summary
- prioritize newly surfaced review comments ahead of CI and mergeability
handling in the PR babysitter watcher
- keep `--watch` running for open PRs even when they are currently
merge-ready so later review feedback is not missed
This commit is contained in:
Eric Traut
2026-03-31 14:25:32 -06:00
committed by GitHub
parent e8de4ea953
commit 0fe873ad5f
4 changed files with 183 additions and 28 deletions

View File

@@ -45,7 +45,6 @@ MERGE_CONFLICT_OR_BLOCKING_STATES = {
"DRAFT",
"UNKNOWN",
}
GREEN_STATE_MAX_POLL_SECONDS = 60 * 60
class GhCommandError(RuntimeError):
@@ -578,7 +577,7 @@ def recommend_actions(pr, checks_summary, failed_runs, new_review_items, retries
return unique_actions(actions)
if is_pr_ready_to_merge(pr, checks_summary, new_review_items):
actions.append("stop_ready_to_merge")
actions.append("ready_to_merge")
return unique_actions(actions)
if new_review_items:
@@ -606,12 +605,6 @@ def collect_snapshot(args):
if not state.get("started_at"):
state["started_at"] = int(time.time())
# `gh pr checks -R <repo>` requires an explicit PR/branch/url argument.
# After resolving `--pr auto`, reuse the concrete PR number.
checks = get_pr_checks(str(pr["number"]), repo=pr["repo"])
checks_summary = summarize_checks(checks)
workflow_runs = get_workflow_runs_for_sha(pr["repo"], pr["head_sha"])
failed_runs = failed_runs_from_workflow_runs(workflow_runs, pr["head_sha"])
authenticated_login = get_authenticated_login()
new_review_items = fetch_new_review_items(
pr,
@@ -619,6 +612,15 @@ def collect_snapshot(args):
fresh_state=fresh_state,
authenticated_login=authenticated_login,
)
# Surface review feedback before drilling into CI and mergeability details.
# That keeps the babysitter responsive to new comments even when other
# actions are also available.
# `gh pr checks -R <repo>` requires an explicit PR/branch/url argument.
# After resolving `--pr auto`, reuse the concrete PR number.
checks = get_pr_checks(str(pr["number"]), repo=pr["repo"])
checks_summary = summarize_checks(checks)
workflow_runs = get_workflow_runs_for_sha(pr["repo"], pr["head_sha"])
failed_runs = failed_runs_from_workflow_runs(workflow_runs, pr["head_sha"])
retries_used = current_retry_count(state, pr["head_sha"])
actions = recommend_actions(
@@ -761,7 +763,6 @@ def run_watch(args):
if (
"stop_pr_closed" in actions
or "stop_exhausted_retries" in actions
or "stop_ready_to_merge" in actions
):
print_event("stop", {"actions": snapshot.get("actions"), "pr": snapshot.get("pr")})
return 0
@@ -769,13 +770,13 @@ def run_watch(args):
current_change_key = snapshot_change_key(snapshot)
changed = current_change_key != last_change_key
green = is_ci_green(snapshot)
pr = snapshot.get("pr") or {}
pr_open = not bool(pr.get("closed")) and not bool(pr.get("merged"))
if not green:
if not green or pr_open:
poll_seconds = args.poll_seconds
elif changed or last_change_key is None:
poll_seconds = args.poll_seconds
else:
poll_seconds = min(poll_seconds * 2, GREEN_STATE_MAX_POLL_SECONDS)
last_change_key = current_change_key
time.sleep(poll_seconds)

View File

@@ -0,0 +1,155 @@
import argparse
import importlib.util
from pathlib import Path
import pytest
MODULE_PATH = Path(__file__).with_name("gh_pr_watch.py")
MODULE_SPEC = importlib.util.spec_from_file_location("gh_pr_watch", MODULE_PATH)
gh_pr_watch = importlib.util.module_from_spec(MODULE_SPEC)
assert MODULE_SPEC.loader is not None
MODULE_SPEC.loader.exec_module(gh_pr_watch)
def sample_pr():
return {
"number": 123,
"url": "https://github.com/openai/codex/pull/123",
"repo": "openai/codex",
"head_sha": "abc123",
"head_branch": "feature",
"state": "OPEN",
"merged": False,
"closed": False,
"mergeable": "MERGEABLE",
"merge_state_status": "CLEAN",
"review_decision": "",
}
def sample_checks(**overrides):
checks = {
"pending_count": 0,
"failed_count": 0,
"passed_count": 12,
"all_terminal": True,
}
checks.update(overrides)
return checks
def test_collect_snapshot_fetches_review_items_before_ci(monkeypatch, tmp_path):
call_order = []
pr = sample_pr()
monkeypatch.setattr(gh_pr_watch, "resolve_pr", lambda *args, **kwargs: pr)
monkeypatch.setattr(gh_pr_watch, "load_state", lambda path: ({}, True))
monkeypatch.setattr(
gh_pr_watch,
"get_authenticated_login",
lambda: call_order.append("auth") or "octocat",
)
monkeypatch.setattr(
gh_pr_watch,
"fetch_new_review_items",
lambda *args, **kwargs: call_order.append("review") or [],
)
monkeypatch.setattr(
gh_pr_watch,
"get_pr_checks",
lambda *args, **kwargs: call_order.append("checks") or [],
)
monkeypatch.setattr(
gh_pr_watch,
"summarize_checks",
lambda checks: call_order.append("summarize") or sample_checks(),
)
monkeypatch.setattr(
gh_pr_watch,
"get_workflow_runs_for_sha",
lambda *args, **kwargs: call_order.append("workflow") or [],
)
monkeypatch.setattr(
gh_pr_watch,
"failed_runs_from_workflow_runs",
lambda *args, **kwargs: call_order.append("failed_runs") or [],
)
monkeypatch.setattr(
gh_pr_watch,
"recommend_actions",
lambda *args, **kwargs: call_order.append("recommend") or ["idle"],
)
monkeypatch.setattr(gh_pr_watch, "save_state", lambda *args, **kwargs: None)
args = argparse.Namespace(
pr="123",
repo=None,
state_file=str(tmp_path / "watcher-state.json"),
max_flaky_retries=3,
)
gh_pr_watch.collect_snapshot(args)
assert call_order.index("review") < call_order.index("checks")
assert call_order.index("review") < call_order.index("workflow")
def test_recommend_actions_prioritizes_review_comments():
actions = gh_pr_watch.recommend_actions(
sample_pr(),
sample_checks(failed_count=1),
[{"run_id": 99}],
[{"kind": "review_comment", "id": "1"}],
0,
3,
)
assert actions == [
"process_review_comment",
"diagnose_ci_failure",
"retry_failed_checks",
]
def test_run_watch_keeps_polling_open_ready_to_merge_pr(monkeypatch):
sleeps = []
events = []
snapshot = {
"pr": sample_pr(),
"checks": sample_checks(),
"failed_runs": [],
"new_review_items": [],
"actions": ["ready_to_merge"],
"retry_state": {
"current_sha_retries_used": 0,
"max_flaky_retries": 3,
},
}
monkeypatch.setattr(
gh_pr_watch,
"collect_snapshot",
lambda args: (snapshot, Path("/tmp/codex-babysit-pr-state.json")),
)
monkeypatch.setattr(
gh_pr_watch,
"print_event",
lambda event, payload: events.append((event, payload)),
)
class StopWatch(Exception):
pass
def fake_sleep(seconds):
sleeps.append(seconds)
if len(sleeps) >= 2:
raise StopWatch
monkeypatch.setattr(gh_pr_watch.time, "sleep", fake_sleep)
with pytest.raises(StopWatch):
gh_pr_watch.run_watch(argparse.Namespace(poll_seconds=30))
assert sleeps == [30, 30]
assert [event for event, _ in events] == ["snapshot", "snapshot"]