|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import argparse |
| 3 | +import fnmatch |
| 4 | +import json |
| 5 | +import os |
| 6 | +import re |
| 7 | +import shlex |
| 8 | +import subprocess |
| 9 | +import sys |
| 10 | + |
| 11 | +from dataclasses import dataclass |
| 12 | +from functools import cached_property, total_ordering |
| 13 | +from typing import Any |
| 14 | + |
| 15 | + |
| 16 | +DEFAULT_ENV = { |
| 17 | + "CI": "true", |
| 18 | + "PYTHONIOENCODING": "utf-8" |
| 19 | +} |
| 20 | + |
| 21 | + |
| 22 | +# If any of these terms are in the job json, they do not run in public |
| 23 | +# infrastructure |
| 24 | +JOB_EXCLUSION_TERMS = ( |
| 25 | + "enterprise", |
| 26 | + "corporate-compliance", |
| 27 | +) |
| 28 | + |
| 29 | + |
| 30 | +@dataclass |
| 31 | +class Artifact: |
| 32 | + name: str |
| 33 | + pattern: str |
| 34 | + |
| 35 | + |
| 36 | +@total_ordering |
| 37 | +class Job: |
| 38 | + def __init__(self, job: dict[str, Any]): |
| 39 | + self.job = job |
| 40 | + |
| 41 | + @cached_property |
| 42 | + def runs_on(self) -> str: |
| 43 | + capabilities = self.job.get("capabilities", []) |
| 44 | + |
| 45 | + available_oss = { |
| 46 | + "macos-latest": ["darwin", "aarch64"], |
| 47 | + "ubuntu-24.04-arm": ["linux", "aarch64"], |
| 48 | + "windows-latest": ["windows", "amd64"] |
| 49 | + } |
| 50 | + |
| 51 | + for os, caps in available_oss.items(): |
| 52 | + if all(required in capabilities for required in caps): return os |
| 53 | + |
| 54 | + return "ubuntu-latest" |
| 55 | + |
| 56 | + @cached_property |
| 57 | + def name(self) -> str: |
| 58 | + return self.job["name"] |
| 59 | + |
| 60 | + @cached_property |
| 61 | + def targets(self) -> list[str]: |
| 62 | + return self.job.get("targets", []) |
| 63 | + |
| 64 | + @cached_property |
| 65 | + def env(self) -> dict[str, str]: |
| 66 | + return self.job.get("environment", {}) | DEFAULT_ENV |
| 67 | + |
| 68 | + @cached_property |
| 69 | + def mx_version(self) -> str | None: |
| 70 | + for k, v in self.job.get("packages", {}).items(): |
| 71 | + if k == "mx": |
| 72 | + return v.strip("=<>~") |
| 73 | + |
| 74 | + @cached_property |
| 75 | + def python_version(self) -> str | None: |
| 76 | + python_version = None |
| 77 | + for k, v in self.job.get("packages", {}).items(): |
| 78 | + if k == "python3": |
| 79 | + python_version = v.strip("=<>~") |
| 80 | + for k, v in self.job.get("downloads", {}).items(): |
| 81 | + if k == "PYTHON3_HOME": |
| 82 | + python_version = v.get("version", python_version) |
| 83 | + if "MX_PYTHON" in self.env: |
| 84 | + del self.env["MX_PYTHON"] |
| 85 | + if "MX_PYTHON_VERSION" in self.env: |
| 86 | + del self.env["MX_PYTHON_VERSION"] |
| 87 | + return python_version |
| 88 | + |
| 89 | + @cached_property |
| 90 | + def system_packages(self) -> list[str]: |
| 91 | + # TODO: support more packages |
| 92 | + system_packages = [] |
| 93 | + for k, _ in self.job.get("packages", {}).items(): |
| 94 | + if k.startswith("pip:"): |
| 95 | + continue |
| 96 | + elif k.startswith("00:") or k.startswith("01:"): |
| 97 | + k = k[3:] |
| 98 | + system_packages.append(f"'{k}'" if self.runs_on != "windows-latest" else f"{k}") |
| 99 | + return system_packages |
| 100 | + |
| 101 | + @cached_property |
| 102 | + def python_packages(self) -> list[str]: |
| 103 | + python_packages = [] |
| 104 | + for k, v in self.job.get("packages", {}).items(): |
| 105 | + if k.startswith("pip:"): |
| 106 | + python_packages.append(f"'{k[4:]}{v}'" if self.runs_on != "windows-latest" else f"{k[4:]}{v}") |
| 107 | + return python_packages |
| 108 | + |
| 109 | + @cached_property |
| 110 | + def downloads(self) -> dict[str, str] | None: |
| 111 | + # TODO |
| 112 | + return None |
| 113 | + |
| 114 | + @staticmethod |
| 115 | + def common_glob(strings: list[str]) -> str: |
| 116 | + assert strings |
| 117 | + if len(strings) == 1: |
| 118 | + return strings[0] |
| 119 | + prefix = strings[0] |
| 120 | + for s in strings[1:]: |
| 121 | + i = 0 |
| 122 | + while i < len(prefix) and i < len(s) and prefix[i] == s[i]: |
| 123 | + i += 1 |
| 124 | + prefix = prefix[:i] |
| 125 | + if not prefix: |
| 126 | + break |
| 127 | + suffix = strings[0][len(prefix):] |
| 128 | + for s in strings[1:]: |
| 129 | + i = 1 |
| 130 | + while i <= len(suffix) and i <= len(s) and suffix[-i] == s[-i]: |
| 131 | + i += 1 |
| 132 | + if i == 1: |
| 133 | + suffix = "" |
| 134 | + break |
| 135 | + suffix = suffix[-(i-1):] |
| 136 | + return f"{prefix}*{suffix}" |
| 137 | + |
| 138 | + @cached_property |
| 139 | + def upload_artifact(self) -> Artifact | None: |
| 140 | + if artifacts := self.job.get("publishArtifacts", []): |
| 141 | + assert len(artifacts) == 1 |
| 142 | + dir = artifacts[0].get("dir", ".") |
| 143 | + patterns = artifacts[0].get("patterns", ["*"]) |
| 144 | + return Artifact( |
| 145 | + artifacts[0]["name"], |
| 146 | + " ".join([os.path.normpath(os.path.join(dir, p)) for p in patterns]) |
| 147 | + ) |
| 148 | + return None |
| 149 | + |
| 150 | + @cached_property |
| 151 | + def download_artifact(self) -> Artifact | None: |
| 152 | + if artifacts := self.job.get("requireArtifacts", []): |
| 153 | + pattern = self.common_glob([a["name"] for a in artifacts]) |
| 154 | + return Artifact(pattern, os.path.normpath(artifacts[0].get("dir", "."))) |
| 155 | + return None |
| 156 | + |
| 157 | + @staticmethod |
| 158 | + def flatten_command(args: list[str | list[str]]) -> list[str]: |
| 159 | + flattened_args = [] |
| 160 | + for s in args: |
| 161 | + if isinstance(s, list): |
| 162 | + flattened_args.append(f"$( {shlex.join(s)} )") |
| 163 | + else: |
| 164 | + flattened_args.append(s) |
| 165 | + return flattened_args |
| 166 | + |
| 167 | + @cached_property |
| 168 | + def setup(self) -> str: |
| 169 | + cmds = [self.flatten_command(step) for step in self.job.get("setup", [])] |
| 170 | + return "\n".join(shlex.join(s) for s in cmds) |
| 171 | + |
| 172 | + @cached_property |
| 173 | + def run(self) -> str: |
| 174 | + cmds = [self.flatten_command(step) for step in self.job.get("run", [])] |
| 175 | + return "\n".join(shlex.join(s) for s in cmds) |
| 176 | + |
| 177 | + @cached_property |
| 178 | + def logs(self) -> str: |
| 179 | + return "\n".join(os.path.normpath(p) for p in self.job.get("logs", [])) |
| 180 | + |
| 181 | + def to_dict(self): |
| 182 | + """ |
| 183 | + This is the interchange with the YAML file defining the Github jobs, so here |
| 184 | + is where we must match the strings and expectations of the Github workflow. |
| 185 | + """ |
| 186 | + return { |
| 187 | + "name": self.name, |
| 188 | + "mx_version": self.mx_version, |
| 189 | + "os": self.runs_on, |
| 190 | + "python_version": self.python_version, |
| 191 | + "setup_steps": self.setup, |
| 192 | + "run_steps": self.run, |
| 193 | + "python_packages": " ".join(self.python_packages), |
| 194 | + "system_packages": " ".join(self.system_packages), |
| 195 | + "provide_artifact": [self.upload_artifact.name, self.upload_artifact.pattern] if self.upload_artifact else None, |
| 196 | + "require_artifact": [self.download_artifact.name, self.download_artifact.pattern] if self.download_artifact else None, |
| 197 | + "logs": self.logs.replace("../", "${{ env.PARENT_DIRECTORY }}/"), |
| 198 | + "env": self.env, |
| 199 | + } |
| 200 | + |
| 201 | + def __str__(self): |
| 202 | + return str(self.to_dict()) |
| 203 | + |
| 204 | + def __eq__(self, other): |
| 205 | + if isinstance(other, Job): |
| 206 | + return self.to_dict() == other.to_dict() |
| 207 | + return NotImplemented |
| 208 | + |
| 209 | + def __gt__(self, other): |
| 210 | + if isinstance(other, Job): |
| 211 | + if self.job.get("runAfter") == other.name: |
| 212 | + return True |
| 213 | + if self.download_artifact and not other.download_artifact: |
| 214 | + return True |
| 215 | + if self.download_artifact and other.upload_artifact: |
| 216 | + if fnmatch.fnmatch(other.upload_artifact.name, self.download_artifact.name): |
| 217 | + return True |
| 218 | + if not self.upload_artifact: |
| 219 | + return True |
| 220 | + return False |
| 221 | + return NotImplemented |
| 222 | + |
| 223 | + |
| 224 | +def get_tagged_jobs(buildspec, target, filter=None): |
| 225 | + jobs = [Job({"name": target}).to_dict()] |
| 226 | + for job in sorted([Job(build) for build in buildspec.get("builds", [])]): |
| 227 | + if not any(t for t in job.targets if t in [target]): |
| 228 | + continue |
| 229 | + if filter and not re.match(filter, job.name): |
| 230 | + continue |
| 231 | + if job.runs_on not in ["ubuntu-latest"]: |
| 232 | + continue |
| 233 | + if [x for x in JOB_EXCLUSION_TERMS if x in str(job)]: |
| 234 | + continue |
| 235 | + jobs.append(job.to_dict()) |
| 236 | + return jobs |
| 237 | + |
| 238 | + |
| 239 | +def main(jsonnet_bin, ci_jsonnet, target, filter=None, indent=False): |
| 240 | + result = subprocess.check_output([jsonnet_bin, ci_jsonnet], text=True) |
| 241 | + buildspec = json.loads(result) |
| 242 | + tagged_jobs = get_tagged_jobs(buildspec, target, filter=filter) |
| 243 | + matrix = tagged_jobs |
| 244 | + print(json.dumps(matrix, indent=2 if indent else None)) |
| 245 | + |
| 246 | + |
| 247 | +if __name__ == "__main__": |
| 248 | + parser = argparse.ArgumentParser(description="Generate GitHub CI matrix from Jsonnet buildspec.") |
| 249 | + parser.add_argument("jsonnet_bin", help="Path to jsonnet binary") |
| 250 | + parser.add_argument("ci_jsonnet", help="Path to ci.jsonnet spec") |
| 251 | + parser.add_argument("target", help="Target name (e.g., tier1)") |
| 252 | + parser.add_argument("filter", nargs="?", default=None, help="Regex filter for job names (optional)") |
| 253 | + parser.add_argument('--indent', action='store_true', help='Indent output JSON') |
| 254 | + args = parser.parse_args() |
| 255 | + main( |
| 256 | + jsonnet_bin=args.jsonnet_bin, |
| 257 | + ci_jsonnet=args.ci_jsonnet, |
| 258 | + target=args.target, |
| 259 | + filter=args.filter, |
| 260 | + indent=args.indent or sys.stdout.isatty(), |
| 261 | + ) |
0 commit comments