From d550f50f2c70b4b75b5441bff625fe05edd92282 Mon Sep 17 00:00:00 2001 From: Aethersailor <22260104+Aethersailor@users.noreply.github.com> Date: Thu, 21 May 2026 09:54:55 +0800 Subject: [PATCH] ci(parser): automate upstream parser sync --- .github/upstream-subconverter.applied.json | 1 + .github/upstream-subconverter.seen | 1 + .github/upstream-subconverter.skipped.json | 1 + .github/workflows/sync-upstream-parser.yml | 309 +++++++++++++ .gitignore | 7 + scripts/check_sync_guards.py | 130 ++++++ scripts/sync_upstream_parser.py | 493 +++++++++++++++++++++ 7 files changed, 942 insertions(+) create mode 100644 .github/upstream-subconverter.applied.json create mode 100644 .github/upstream-subconverter.seen create mode 100644 .github/upstream-subconverter.skipped.json create mode 100644 .github/workflows/sync-upstream-parser.yml create mode 100644 scripts/check_sync_guards.py create mode 100644 scripts/sync_upstream_parser.py diff --git a/.github/upstream-subconverter.applied.json b/.github/upstream-subconverter.applied.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/.github/upstream-subconverter.applied.json @@ -0,0 +1 @@ +[] diff --git a/.github/upstream-subconverter.seen b/.github/upstream-subconverter.seen new file mode 100644 index 0000000..6e4ca6c --- /dev/null +++ b/.github/upstream-subconverter.seen @@ -0,0 +1 @@ +633ecd5a3b33cf288658f0910fb2cc5faabd351c diff --git a/.github/upstream-subconverter.skipped.json b/.github/upstream-subconverter.skipped.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/.github/upstream-subconverter.skipped.json @@ -0,0 +1 @@ +[] diff --git a/.github/workflows/sync-upstream-parser.yml b/.github/workflows/sync-upstream-parser.yml new file mode 100644 index 0000000..c53e8e0 --- /dev/null +++ b/.github/workflows/sync-upstream-parser.yml @@ -0,0 +1,309 @@ +name: Sync Upstream Parser + +on: + schedule: + - cron: "0 20 * * *" + workflow_dispatch: {} + +permissions: + contents: write + issues: write + +concurrency: + group: sync-upstream-parser-dev + cancel-in-progress: false + +env: + # Scheduled workflows are read from the default branch, but parser syncs + # must be planned, tested, and committed on dev. + TARGET_BRANCH: dev + UPSTREAM_REPO: https://github.com/asdlokj1qpi233/subconverter.git + UPSTREAM_BRANCH: master + +jobs: + sync: + runs-on: ubuntu-latest + steps: + - name: Checkout dev + uses: actions/checkout@v6 + with: + ref: ${{ env.TARGET_BRANCH }} + fetch-depth: 0 + token: ${{ secrets.PAT_TOKEN || github.token }} + + - name: Configure Git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Fetch upstream + run: | + set -euo pipefail + git remote add upstream "$UPSTREAM_REPO" 2>/dev/null || git remote set-url upstream "$UPSTREAM_REPO" + git fetch --no-tags upstream "$UPSTREAM_BRANCH" + + - name: Check protected integrations before sync + run: python3 scripts/check_sync_guards.py + + - name: Plan upstream parser sync + run: | + set -euo pipefail + python3 scripts/sync_upstream_parser.py plan \ + --upstream-ref "upstream/$UPSTREAM_BRANCH" \ + --max-commits 30 \ + --output upstream-sync-candidates.json \ + --report upstream-sync-plan.md + + - name: Inspect sync plan + id: plan + run: | + set -euo pipefail + python3 - <<'PY' >> "$GITHUB_OUTPUT" + import json + data = json.load(open("upstream-sync-candidates.json", encoding="utf-8")) + candidates = data.get("candidates", []) + reviewable = [item for item in candidates if item.get("safe_by_rules")] + print(f"candidate_count={len(candidates)}") + print(f"reviewable_count={len(reviewable)}") + print(f"has_candidates={'true' if candidates else 'false'}") + print(f"has_reviewable={'true' if reviewable else 'false'}") + PY + + - name: Check Copilot token + id: copilot_token + if: steps.plan.outputs.has_reviewable == 'true' + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: | + if [ -n "${COPILOT_GITHUB_TOKEN:-}" ]; then + echo "available=true" >> "$GITHUB_OUTPUT" + else + echo "available=false" >> "$GITHUB_OUTPUT" + fi + + - name: Set up Node.js + if: steps.plan.outputs.has_reviewable == 'true' && steps.copilot_token.outputs.available == 'true' + uses: actions/setup-node@v6 + + - name: Ask Copilot to classify parser candidates + id: copilot_classify + if: steps.plan.outputs.has_reviewable == 'true' && steps.copilot_token.outputs.available == 'true' + continue-on-error: true + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + run: | + set -euo pipefail + npm install -g @github/copilot + + PROMPT="$(cat <<'EOF' + You are reviewing upstream parser changes for SubConverter-Extended. + + The project has protected Mihomo/proxy-provider integrations. A change + may be automatically applied only when it is a low-risk parser-only + update and does not require output adapter changes. + + Return only strict JSON. Do not use Markdown. + + Required schema: + { + "decisions": [ + { + "sha": "full commit sha from the plan", + "decision": "safe_parser_only | needs_output_adapter | touches_protected_area | unsafe | skip", + "risk": "low | medium | high", + "reason": "short explanation", + "required_tests": ["test names"] + } + ] + } + + Only use "safe_parser_only" with "low" risk when the diff updates + parser-only behavior and can be safely applied without touching + Mihomo parser bridge, RawParams pass-through, proxy-provider logic, + FetchContext handling, nodemanip.cpp, or Clash output internals. + EOF + )" + + PROMPT="$PROMPT + + $(cat upstream-sync-candidates.json)" + + copilot -p "$PROMPT" --no-ask-user > upstream-sync-decisions.raw + + python3 - <<'PY' + import json + from pathlib import Path + + raw = Path("upstream-sync-decisions.raw").read_text(encoding="utf-8") + start = raw.find("{") + end = raw.rfind("}") + if start == -1 or end == -1 or end < start: + raise SystemExit("Copilot did not return a JSON object.") + data = json.loads(raw[start:end + 1]) + if "decisions" not in data or not isinstance(data["decisions"], list): + raise SystemExit("Copilot JSON is missing decisions array.") + Path("upstream-sync-decisions.json").write_text( + json.dumps(data, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + PY + + - name: Create empty decisions for non-reviewable plan + if: steps.plan.outputs.has_candidates == 'true' && steps.plan.outputs.has_reviewable != 'true' + run: printf '%s\n' '{"decisions":[]}' > upstream-sync-decisions.json + + - name: Apply approved upstream parser updates + id: apply + if: steps.plan.outputs.has_candidates == 'true' && (steps.plan.outputs.has_reviewable != 'true' || steps.copilot_classify.outcome == 'success') + run: | + set -euo pipefail + python3 scripts/sync_upstream_parser.py apply \ + --plan upstream-sync-candidates.json \ + --decisions upstream-sync-decisions.json \ + --result upstream-sync-result.json \ + --report upstream-sync-result.md + + python3 - <<'PY' >> "$GITHUB_OUTPUT" + import json + data = json.load(open("upstream-sync-result.json", encoding="utf-8")) + print(f"applied_count={len(data.get('applied', []))}") + print(f"skipped_count={len(data.get('skipped', []))}") + print(f"has_applied={'true' if data.get('applied') else 'false'}") + print(f"has_skipped={'true' if data.get('skipped') else 'false'}") + PY + + - name: Check protected integrations after sync + if: steps.apply.outcome == 'success' + run: python3 scripts/check_sync_guards.py + + - name: Determine changed source files + id: changes + if: steps.apply.outcome == 'success' + run: | + set -euo pipefail + if git diff --quiet; then + echo "has_changes=false" >> "$GITHUB_OUTPUT" + echo "has_source_changes=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "has_changes=true" >> "$GITHUB_OUTPUT" + if git diff --name-only | grep -Eq '^(src|bridge|include|CMakeLists\.txt|Dockerfile)'; then + echo "has_source_changes=true" >> "$GITHUB_OUTPUT" + else + echo "has_source_changes=false" >> "$GITHUB_OUTPUT" + fi + + - name: Build and smoke test source changes + if: steps.changes.outputs.has_source_changes == 'true' + run: | + set -euo pipefail + docker build --target builder -t subconverter-upstream-sync-test --build-arg THREADS=2 . + + CID="$(docker run -d -e PORT=25500 -p '127.0.0.1::25500' subconverter-upstream-sync-test /src/subconverter -f /src/base/pref.example.toml)" + trap 'docker rm -f "$CID" >/dev/null 2>&1 || true' EXIT + + HOST_PORT="$(docker port "$CID" 25500/tcp | awk -F: 'END {print $NF}')" + BASE_URL="http://127.0.0.1:${HOST_PORT}" + + for i in $(seq 1 60); do + if curl -fsS "$BASE_URL/version" >/tmp/version.out; then + break + fi + sleep 1 + done + curl -fsS "$BASE_URL/version" >/tmp/version.out + + curl -fsS "$BASE_URL/sub?target=clash&url=https%3A%2F%2Fexample.com%2Fsub" > /tmp/clash-provider.yml + grep -q "proxy-providers:" /tmp/clash-provider.yml + + curl -fsS "$BASE_URL/sub?target=clash&url=ss%3A%2F%2FY2hhY2hhMjAtaWV0Zi1wb2x5MTMwNTpwYXNzd29yZA%40www.example.com%3A1080%23Example" > /tmp/clash-node.yml + grep -Eq "type: ss|type: \"ss\"" /tmp/clash-node.yml + + curl -fsS "$BASE_URL/sub?target=singbox&url=ss%3A%2F%2FY2hhY2hhMjAtaWV0Zi1wb2x5MTMwNTpwYXNzd29yZA%40www.example.com%3A1080%23Example" > /tmp/singbox.json + python3 -m json.tool /tmp/singbox.json >/dev/null + + curl -fsS "$BASE_URL/sub?target=surge&url=ss%3A%2F%2FY2hhY2hhMjAtaWV0Zi1wb2x5MTMwNTpwYXNzd29yZA%40www.example.com%3A1080%23Example" > /tmp/surge.conf + test -s /tmp/surge.conf + + - name: Commit sync result to dev + if: steps.changes.outputs.has_changes == 'true' + run: | + set -euo pipefail + git add \ + .github/upstream-subconverter.seen \ + .github/upstream-subconverter.applied.json \ + .github/upstream-subconverter.skipped.json \ + src/parser/subparser.cpp \ + src/parser/subparser.h \ + src/parser/config/proxy.h + + if git diff --staged --quiet; then + echo "No staged sync changes." + exit 0 + fi + + if [ "${{ steps.apply.outputs.has_applied }}" = "true" ]; then + git commit -m "chore(parser): sync upstream parser updates" + else + git commit -m "chore(parser): update upstream sync marker [skip ci]" + fi + + git fetch --no-tags origin "refs/heads/$TARGET_BRANCH:refs/remotes/origin/$TARGET_BRANCH" + git rebase "origin/$TARGET_BRANCH" + git push origin "HEAD:refs/heads/$TARGET_BRANCH" + + - name: Upload sync reports + if: always() + uses: actions/upload-artifact@v6 + with: + name: upstream-parser-sync-report + path: | + upstream-sync-candidates.json + upstream-sync-decisions.raw + upstream-sync-decisions.json + upstream-sync-plan.md + upstream-sync-result.json + upstream-sync-result.md + if-no-files-found: ignore + + - name: Report sync items that need attention + if: always() && (steps.apply.outputs.has_skipped == 'true' || (steps.plan.outputs.has_reviewable == 'true' && (steps.copilot_token.outputs.available != 'true' || steps.copilot_classify.outcome != 'success'))) + uses: actions/github-script@v9 + with: + script: | + const fs = require('fs'); + const title = 'Upstream parser sync requires attention'; + const marker = ''; + let body = `${marker}\nAutomated upstream parser sync on \`${process.env.TARGET_BRANCH}\` needs attention.\n\n`; + + if (fs.existsSync('upstream-sync-result.md')) { + body += fs.readFileSync('upstream-sync-result.md', 'utf8'); + } else if (fs.existsSync('upstream-sync-plan.md')) { + body += fs.readFileSync('upstream-sync-plan.md', 'utf8'); + body += '\n\nCopilot classification was unavailable or failed, so reviewable candidates were not applied.\n'; + } + + const {data: issues} = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: undefined, + per_page: 50 + }); + const existing = issues.find(issue => issue.title === title); + if (existing) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: existing.number, + body + }); + } else { + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title, + body + }); + } diff --git a/.gitignore b/.gitignore index 019759d..64ceeaf 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,10 @@ bridge/libmihomo.a bridge/libmihomo.so bridge/libmihomo.dll bridge/libmihomo.dll.a + +__pycache__/ +*.pyc + +upstream-sync-*.json +upstream-sync-*.md +upstream-sync-decisions.raw diff --git a/scripts/check_sync_guards.py b/scripts/check_sync_guards.py new file mode 100644 index 0000000..1292442 --- /dev/null +++ b/scripts/check_sync_guards.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +"""Guard checks for automated upstream parser syncs. + +The sync workflow may import parser fixes from upstream subconverter, but it +must never erase this fork's Mihomo/proxy-provider integration. This script +keeps those invariants machine-checkable. +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + + +CHECKS = [ + { + "id": "mihomo-parser-compile-flag", + "file": "src/generator/config/nodemanip.cpp", + "pattern": r"USE_MIHOMO_PARSER", + "message": "Mihomo parser compile flag is missing from nodemanip.cpp.", + }, + { + "id": "mihomo-parser-call", + "file": "src/generator/config/nodemanip.cpp", + "pattern": r"mihomo::parseSubscription", + "message": "Mihomo parser call is missing from nodemanip.cpp.", + }, + { + "id": "fetch-context", + "file": "src/generator/config/nodemanip.h", + "pattern": r"FetchContext\s+fetch_context", + "message": "FetchContext-aware parsing state is missing.", + }, + { + "id": "rawparams-field", + "file": "src/parser/config/proxy.h", + "pattern": r"std::map<\s*String\s*,\s*String\s*>\s+RawParams\s*;", + "message": "Proxy.RawParams must be preserved for Mihomo pass-through.", + }, + { + "id": "rawparams-to-clash", + "file": "src/generator/config/subexport.cpp", + "pattern": r"if\s*\(!x\.RawParams\.empty\(\)\)", + "message": "Clash RawParams pass-through block is missing.", + }, + { + "id": "proxy-provider-output", + "file": "src/generator/config/subexport.cpp", + "pattern": r"proxy-providers", + "message": "proxy-providers output support is missing.", + }, + { + "id": "provider-direct-option", + "file": "src/generator/config/subexport.h", + "pattern": r"provider_proxy_direct", + "message": "provider_proxy_direct option is missing.", + }, + { + "id": "provider-name-override", + "file": "src/generator/config/subexport.cpp", + "pattern": r"buildProviderProxyNameOverride", + "message": "provider proxy-name override support is missing.", + }, + { + "id": "provider-creation", + "file": "src/handler/interfaces.cpp", + "pattern": r"ProxyProvider\s+provider", + "message": "provider creation path is missing from request handling.", + }, +] + + +def run_checks() -> list[dict[str, str]]: + failures: list[dict[str, str]] = [] + for check in CHECKS: + path = ROOT / check["file"] + if not path.exists(): + failures.append( + { + "id": check["id"], + "file": check["file"], + "message": f"Required file is missing: {check['file']}", + } + ) + continue + + text = path.read_text(encoding="utf-8", errors="ignore") + if not re.search(check["pattern"], text, flags=re.MULTILINE): + failures.append( + { + "id": check["id"], + "file": check["file"], + "message": check["message"], + } + ) + + return failures + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--json", action="store_true", help="emit JSON") + args = parser.parse_args() + + failures = run_checks() + result = {"ok": not failures, "failures": failures} + + if args.json: + print(json.dumps(result, indent=2, ensure_ascii=False)) + elif failures: + print("Sync guard checks failed:", file=sys.stderr) + for failure in failures: + print( + f"- {failure['id']} ({failure['file']}): {failure['message']}", + file=sys.stderr, + ) + else: + print("Sync guard checks passed.") + + return 0 if not failures else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/sync_upstream_parser.py b/scripts/sync_upstream_parser.py new file mode 100644 index 0000000..16ef96d --- /dev/null +++ b/scripts/sync_upstream_parser.py @@ -0,0 +1,493 @@ +#!/usr/bin/env python3 +"""Plan and apply guarded parser syncs from upstream subconverter. + +This script intentionally does not trust upstream diffs by default. It plans +candidate commits, lets CI/Copilot classify them, applies only low-risk parser +updates, and records skipped commits so one unsafe upstream commit does not +block later safe ones. +""" + +from __future__ import annotations + +import argparse +import datetime as dt +import json +import subprocess +import sys +from pathlib import Path +from typing import Any + + +ROOT = Path(__file__).resolve().parents[1] + +SEEN_FILE = ROOT / ".github" / "upstream-subconverter.seen" +APPLIED_FILE = ROOT / ".github" / "upstream-subconverter.applied.json" +SKIPPED_FILE = ROOT / ".github" / "upstream-subconverter.skipped.json" + +ALLOWED_AUTO_PATHS = { + "src/parser/subparser.cpp", + "src/parser/subparser.h", + "src/parser/config/proxy.h", +} + +REPORT_ONLY_PATHS = { + "src/generator/config/subexport.cpp", +} + +PROTECTED_PATHS = { + "src/generator/config/nodemanip.cpp", + "src/generator/config/nodemanip.h", + "src/generator/config/subexport.h", + "src/parser/mihomo_bridge.cpp", + "src/parser/mihomo_bridge.h", + "src/parser/mihomo_schemes.h", + "src/parser/param_compat.h", + "bridge/converter.go", + "bridge/go.mod", + "bridge/go.sum", +} + +PROTECTED_PREFIXES = ( + "bridge/", +) + +SAFE_DECISION = "safe_parser_only" + + +def git(*args: str, input_text: str | None = None, check: bool = True) -> str: + proc = subprocess.run( + ["git", *args], + cwd=ROOT, + input=input_text, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if check and proc.returncode != 0: + raise RuntimeError( + f"git {' '.join(args)} failed with {proc.returncode}\n{proc.stderr}" + ) + return proc.stdout + + +def run(*args: str, check: bool = True) -> subprocess.CompletedProcess[str]: + return subprocess.run( + list(args), + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=check, + ) + + +def utc_now() -> str: + return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat() + + +def read_json_array(path: Path) -> list[dict[str, Any]]: + if not path.exists(): + return [] + try: + data = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + return [] + return data if isinstance(data, list) else [] + + +def write_json(path: Path, data: Any) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") + + +def read_seen() -> str: + if not SEEN_FILE.exists(): + return "" + return SEEN_FILE.read_text(encoding="utf-8").strip() + + +def path_is_protected(path: str) -> bool: + return path in PROTECTED_PATHS or any(path.startswith(prefix) for prefix in PROTECTED_PREFIXES) + + +def commit_files(sha: str) -> list[str]: + output = git("diff-tree", "--no-commit-id", "--name-only", "-r", sha) + return [line.strip() for line in output.splitlines() if line.strip()] + + +def commit_subject(sha: str) -> str: + return git("show", "-s", "--format=%s", sha).strip() + + +def commit_patch(sha: str, paths: list[str], max_chars: int = 12000) -> str: + if not paths: + return "" + patch = git("show", "--format=fuller", "--stat", "--patch", sha, "--", *paths) + if len(patch) <= max_chars: + return patch + return patch[:max_chars] + "\n\n[diff truncated]\n" + + +def classify_commit(sha: str) -> dict[str, Any]: + files = commit_files(sha) + allowed = [path for path in files if path in ALLOWED_AUTO_PATHS] + protected = [path for path in files if path_is_protected(path)] + report_only = [path for path in files if path in REPORT_ONLY_PATHS] + other = [ + path + for path in files + if path not in ALLOWED_AUTO_PATHS + and path not in REPORT_ONLY_PATHS + and not path_is_protected(path) + ] + + if not allowed: + rule_decision = "ignore_no_parser_changes" + safe_by_rules = False + reason = "Commit does not change parser whitelist files." + elif protected: + rule_decision = "skip_protected_path" + safe_by_rules = False + reason = "Commit touches protected project-specific integration paths." + elif report_only or other: + rule_decision = "needs_human_or_ai_report" + safe_by_rules = False + reason = "Commit changes parser files plus non-whitelisted files." + else: + rule_decision = "candidate" + safe_by_rules = True + reason = "Commit changes only parser whitelist files." + + return { + "sha": sha, + "short_sha": sha[:12], + "subject": commit_subject(sha), + "files": files, + "allowed_paths": allowed, + "protected_paths": protected, + "report_only_paths": report_only, + "other_paths": other, + "safe_by_rules": safe_by_rules, + "rule_decision": rule_decision, + "reason": reason, + "patch_excerpt": commit_patch(sha, allowed or files[:10]), + } + + +def plan(args: argparse.Namespace) -> int: + upstream_head = git("rev-parse", args.upstream_ref).strip() + seen = read_seen() + bootstrap = False + + if not seen: + bootstrap = True + commits: list[str] = [] + else: + exists = run("git", "cat-file", "-e", f"{seen}^{{commit}}", check=False) + if exists.returncode != 0: + bootstrap = True + commits = [] + else: + commits_out = git( + "rev-list", + "--reverse", + "--no-merges", + f"{seen}..{args.upstream_ref}", + ) + commits = [line.strip() for line in commits_out.splitlines() if line.strip()] + + if args.max_commits > 0: + commits = commits[: args.max_commits] + + candidates = [classify_commit(sha) for sha in commits] + data = { + "generated_at": utc_now(), + "upstream_ref": args.upstream_ref, + "upstream_head": upstream_head, + "seen": seen, + "bootstrap": bootstrap, + "allowed_auto_paths": sorted(ALLOWED_AUTO_PATHS), + "protected_paths": sorted(PROTECTED_PATHS), + "protected_prefixes": list(PROTECTED_PREFIXES), + "safe_decision": SAFE_DECISION, + "candidates": candidates, + } + + write_json(Path(args.output), data) + write_plan_report(Path(args.report), data) + + safe_count = sum(1 for item in candidates if item["safe_by_rules"]) + print(f"Planned {len(candidates)} upstream commits ({safe_count} rule-safe).") + if bootstrap: + print("No seen marker was available; plan bootstrapped without candidates.") + return 0 + + +def write_plan_report(path: Path, data: dict[str, Any]) -> None: + lines = [ + "# Upstream Parser Sync Plan", + "", + f"- Generated: {data['generated_at']}", + f"- Upstream ref: `{data['upstream_ref']}`", + f"- Seen: `{data['seen'] or 'none'}`", + f"- Upstream head: `{data['upstream_head']}`", + f"- Bootstrap: `{data['bootstrap']}`", + "", + "## Candidates", + "", + ] + if not data["candidates"]: + lines.append("No candidate commits.") + for item in data["candidates"]: + lines.extend( + [ + f"### `{item['short_sha']}` {item['subject']}", + "", + f"- Rule decision: `{item['rule_decision']}`", + f"- Safe by rules: `{item['safe_by_rules']}`", + f"- Reason: {item['reason']}", + f"- Files: {', '.join(f'`{path}`' for path in item['files']) or 'none'}", + "", + ] + ) + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + +def load_decisions(path: Path) -> dict[str, dict[str, Any]]: + text = path.read_text(encoding="utf-8").strip() + data = json.loads(text) + decisions = data.get("decisions", data) + if not isinstance(decisions, list): + raise ValueError("Copilot decision file must contain a decisions array.") + result: dict[str, dict[str, Any]] = {} + for item in decisions: + if not isinstance(item, dict) or "sha" not in item: + continue + result[item["sha"]] = item + return result + + +def snapshot_paths(paths: list[str]) -> dict[str, bytes | None]: + snapshot: dict[str, bytes | None] = {} + for path in paths: + full = ROOT / path + snapshot[path] = full.read_bytes() if full.exists() else None + return snapshot + + +def restore_snapshot(snapshot: dict[str, bytes | None]) -> None: + for path, content in snapshot.items(): + full = ROOT / path + if content is None: + if full.exists(): + full.unlink() + else: + full.parent.mkdir(parents=True, exist_ok=True) + full.write_bytes(content) + run("git", "reset", "--", *snapshot.keys(), check=False) + + +def apply_patch_for_commit(sha: str, paths: list[str]) -> tuple[bool, str]: + patch = git("show", "--format=", "--binary", sha, "--", *paths) + if not patch.strip(): + return False, "No patch content for allowed parser files." + + check = subprocess.run( + ["git", "apply", "-3", "--check", "--whitespace=nowarn", "-"], + cwd=ROOT, + input=patch, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if check.returncode != 0: + return False, check.stderr.strip() or "git apply --check failed." + + applied = subprocess.run( + ["git", "apply", "-3", "--whitespace=nowarn", "-"], + cwd=ROOT, + input=patch, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if applied.returncode != 0: + return False, applied.stderr.strip() or "git apply failed." + return True, "Applied." + + +def run_guards() -> tuple[bool, str]: + proc = subprocess.run( + [sys.executable, "scripts/check_sync_guards.py", "--json"], + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + output = (proc.stdout or "") + (proc.stderr or "") + return proc.returncode == 0, output.strip() + + +def append_state(path: Path, entries: list[dict[str, Any]]) -> None: + if not entries: + return + current = read_json_array(path) + current.extend(entries) + write_json(path, current) + + +def apply(args: argparse.Namespace) -> int: + plan_data = json.loads(Path(args.plan).read_text(encoding="utf-8")) + decisions = load_decisions(Path(args.decisions)) + applied_entries: list[dict[str, Any]] = [] + skipped_entries: list[dict[str, Any]] = [] + ignored_entries: list[dict[str, Any]] = [] + + for item in plan_data.get("candidates", []): + sha = item["sha"] + decision = decisions.get(sha) + record_base = { + "sha": sha, + "subject": item["subject"], + "time": utc_now(), + } + + if item.get("rule_decision") == "ignore_no_parser_changes": + ignored_entries.append( + {**record_base, "reason": "No parser whitelist files changed."} + ) + continue + + if not item.get("safe_by_rules"): + skipped_entries.append( + { + **record_base, + "reason": item.get("reason", "Rejected by deterministic rules."), + "rule_decision": item.get("rule_decision"), + } + ) + continue + + if not decision: + skipped_entries.append( + {**record_base, "reason": "No Copilot decision was supplied."} + ) + continue + + if decision.get("decision") != SAFE_DECISION or decision.get("risk") != "low": + skipped_entries.append( + { + **record_base, + "reason": decision.get("reason", "Copilot did not approve automatic sync."), + "copilot_decision": decision, + } + ) + continue + + paths = item.get("allowed_paths", []) + backup = snapshot_paths(paths) + ok, message = apply_patch_for_commit(sha, paths) + if not ok: + restore_snapshot(backup) + skipped_entries.append({**record_base, "reason": message}) + continue + + guards_ok, guards_output = run_guards() + if not guards_ok: + restore_snapshot(backup) + skipped_entries.append( + { + **record_base, + "reason": "Guard checks failed after applying patch.", + "guard_output": guards_output, + } + ) + continue + + applied_entries.append( + { + **record_base, + "paths": paths, + "copilot_reason": decision.get("reason", ""), + } + ) + + append_state(APPLIED_FILE, applied_entries) + append_state(SKIPPED_FILE, skipped_entries) + + if plan_data.get("upstream_head"): + SEEN_FILE.write_text(plan_data["upstream_head"] + "\n", encoding="utf-8") + + result = { + "generated_at": utc_now(), + "upstream_head": plan_data.get("upstream_head"), + "applied": applied_entries, + "skipped": skipped_entries, + "ignored": ignored_entries, + } + write_json(Path(args.result), result) + write_apply_report(Path(args.report), result) + + print( + f"Applied {len(applied_entries)} commits; " + f"skipped {len(skipped_entries)} commits; " + f"ignored {len(ignored_entries)} commits." + ) + return 0 + + +def write_apply_report(path: Path, result: dict[str, Any]) -> None: + lines = [ + "# Upstream Parser Sync Result", + "", + f"- Generated: {result['generated_at']}", + f"- Upstream head: `{result.get('upstream_head') or 'unknown'}`", + f"- Applied: {len(result['applied'])}", + f"- Skipped: {len(result['skipped'])}", + f"- Ignored: {len(result.get('ignored', []))}", + "", + "## Applied", + "", + ] + if not result["applied"]: + lines.append("No commits were applied.") + for item in result["applied"]: + lines.append(f"- `{item['sha'][:12]}` {item['subject']}") + + lines.extend(["", "## Skipped", ""]) + if not result["skipped"]: + lines.append("No commits were skipped.") + for item in result["skipped"]: + lines.append(f"- `{item['sha'][:12]}` {item['subject']}: {item['reason']}") + + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + +def main() -> int: + parser = argparse.ArgumentParser() + sub = parser.add_subparsers(dest="command", required=True) + + plan_parser = sub.add_parser("plan") + plan_parser.add_argument("--upstream-ref", required=True) + plan_parser.add_argument("--max-commits", type=int, default=20) + plan_parser.add_argument("--output", default="upstream-sync-candidates.json") + plan_parser.add_argument("--report", default="upstream-sync-plan.md") + plan_parser.set_defaults(func=plan) + + apply_parser = sub.add_parser("apply") + apply_parser.add_argument("--plan", default="upstream-sync-candidates.json") + apply_parser.add_argument("--decisions", default="upstream-sync-decisions.json") + apply_parser.add_argument("--result", default="upstream-sync-result.json") + apply_parser.add_argument("--report", default="upstream-sync-result.md") + apply_parser.set_defaults(func=apply) + + args = parser.parse_args() + try: + return args.func(args) + except Exception as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main())