From cff22ddf68b80f2b9e45f596a7e6666c1acb8b67 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tg@trevorgross.com>
Date: Fri, 5 Dec 2025 20:49:06 -0500
Subject: [PATCH] ci: Simplify untar path list output

Currently the benchmark CI jobs prints multiple pages of paths from the
extracted archive, since `tar` is run with `v`. This is a lot of output
that is usually just noise in CI.

Switch to printing the paths from python instead, limiting to a depth of
three segments (and deduplicating). Removing it completely was an
option, but it's still nice to have a hint about what gets updated.
---
 ci/ci-util.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/ci/ci-util.py b/ci/ci-util.py
index 113820b7..ef9ce455 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -390,6 +390,7 @@ def locate_baseline(flags: list[str]) -> None:
 
     artifact_glob = f"{ARTIFACT_PREFIX}{f"-{tag}" if tag else ""}*"
 
+    # Skip checking because this will fail if the file already exists, which is fine.
     sp.run(
         ["gh", "run", "download", str(job_id), f"--pattern={artifact_glob}"],
         check=False,
@@ -409,7 +410,17 @@ def locate_baseline(flags: list[str]) -> None:
     candidate_baselines.sort(reverse=True)
     baseline_archive = candidate_baselines[0]
     eprint(f"extracting {baseline_archive}")
-    sp.run(["tar", "xJvf", baseline_archive], check=True)
+
+    all_paths = sp.check_output(["tar", "tJf", baseline_archive], encoding="utf8")
+    sp.run(["tar", "xJf", baseline_archive], check=True)
+
+    # Print a short summary of paths, we don't use `tar v` since the list is huge
+    short_paths = re.findall(r"^(?:[^/\n]+/?){1,3}", all_paths, re.MULTILINE)
+
+    print("Extracted:")
+    for path in sorted(set(short_paths)):
+        print(f"* {path}")
+
     eprint("baseline extracted successfully")