From 0399c24c9fcce998bbce0b9d9bebe25a37f7213c Mon Sep 17 00:00:00 2001
From: opsmithe <anyanwuchigozieprosper@gmail.com>
Date: Mon, 24 Nov 2025 10:19:02 +0100
Subject: [PATCH 1/2] Add safe file handling with QuantifyingException for
 process and report scripts

---
 scripts/2-process/gcs_process.py    |  3 +++
 scripts/2-process/github_process.py |  1 +
 scripts/3-report/gcs_report.py      |  9 +++++++++
 scripts/3-report/github_report.py   |  2 ++
 scripts/shared.py                   | 11 +++++++++++
 5 files changed, 26 insertions(+)

diff --git a/scripts/2-process/gcs_process.py b/scripts/2-process/gcs_process.py
index c5d354b7..4d74cb23 100755
--- a/scripts/2-process/gcs_process.py
+++ b/scripts/2-process/gcs_process.py
@@ -311,6 +311,7 @@ def main():
 
     # Count data
     file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")
+    shared.safe_open_file(file1_count, "process GCS count data")
     count_data = pd.read_csv(file1_count, usecols=["TOOL_IDENTIFIER", "COUNT"])
     process_product_totals(args, count_data)
     process_latest_prior_retired_totals(args, count_data)
@@ -321,6 +322,7 @@ def main():
     file2_language = shared.path_join(
         PATHS["data_1-fetch"], "gcs_2_count_by_language.csv"
     )
+    shared.safe_open_file(file2_language, "process GCS language data")
     language_data = pd.read_csv(
         file2_language, usecols=["TOOL_IDENTIFIER", "LANGUAGE", "COUNT"]
     )
@@ -330,6 +332,7 @@ def main():
     file3_country = shared.path_join(
         PATHS["data_1-fetch"], "gcs_3_count_by_country.csv"
     )
+    shared.safe_open_file(file3_country, "process GCS country data")
     country_data = pd.read_csv(
         file3_country, usecols=["TOOL_IDENTIFIER", "COUNTRY", "COUNT"]
     )
diff --git a/scripts/2-process/github_process.py b/scripts/2-process/github_process.py
index ae9d261a..76743dd0 100755
--- a/scripts/2-process/github_process.py
+++ b/scripts/2-process/github_process.py
@@ -178,6 +178,7 @@ def main():
     shared.git_fetch_and_merge(args, PATHS["repo"])
 
     file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
+    shared.safe_open_file(file_count, "process GitHub count data")
     count_data = pd.read_csv(file_count, usecols=["TOOL_IDENTIFIER", "COUNT"])
     process_totals_by_license(args, count_data)
     process_totals_by_restriction(args, count_data)
diff --git a/scripts/3-report/gcs_report.py b/scripts/3-report/gcs_report.py
index 105313fa..80b0aa35 100755
--- a/scripts/3-report/gcs_report.py
+++ b/scripts/3-report/gcs_report.py
@@ -79,6 +79,7 @@ def gcs_intro(args):
         "gcs_product_totals.csv",
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS intro")
     name_label = "CC legal tool product"
     data = pd.read_csv(file_path, index_col=name_label)
     total_count = f"{data['Count'].sum():,d}"
@@ -110,6 +111,7 @@ def plot_products(args):
         PATHS["data_2-process"], "gcs_product_totals.csv"
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS products report")
     name_label = "CC legal tool product"
     data = pd.read_csv(file_path, index_col=name_label)
     data = data[::-1]  # reverse order
@@ -155,6 +157,7 @@ def plot_tool_status(args):
         "gcs_status_combined_totals.csv",
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS tool status report")
     name_label = "CC legal tool"
     data = pd.read_csv(file_path, index_col=name_label)
     data.sort_values(name_label, ascending=False, inplace=True)
@@ -198,6 +201,7 @@ def plot_latest_tools(args):
         "gcs_status_latest_totals.csv",
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS latest tools report")
     name_label = "CC legal tool"
     data = pd.read_csv(file_path, index_col=name_label)
     data.sort_values(name_label, ascending=False, inplace=True)
@@ -240,6 +244,7 @@ def plot_prior_tools(args):
         PATHS["data_2-process"], "gcs_status_prior_totals.csv"
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS prior tools report")
     name_label = "CC legal tool"
     data = pd.read_csv(file_path, index_col=name_label)
     data.sort_values(name_label, ascending=False, inplace=True)
@@ -285,6 +290,7 @@ def plot_retired_tools(args):
         "gcs_status_retired_totals.csv",
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS retired tools report")
     name_label = "CC legal tool"
     data = pd.read_csv(file_path, index_col=name_label)
     data.sort_values(name_label, ascending=False, inplace=True)
@@ -330,6 +336,7 @@ def plot_countries_highest_usage(args):
         PATHS["data_2-process"], "gcs_totals_by_country.csv"
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS countries report")
     name_label = "Country"
     data_label = "Count"
     data = pd.read_csv(file_path, index_col=name_label)
@@ -383,6 +390,7 @@ def plot_languages_highest_usage(args):
         PATHS["data_2-process"], "gcs_totals_by_language.csv"
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS languages report")
     name_label = "Language"
     data_label = "Count"
     data = pd.read_csv(file_path, index_col=name_label)
@@ -437,6 +445,7 @@ def plot_free_culture(args):
         "gcs_totals_by_free_cultural.csv",
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GCS free culture report")
     name_label = "Category"
     data_label = "Count"
     data = pd.read_csv(file_path, index_col=name_label)
diff --git a/scripts/3-report/github_report.py b/scripts/3-report/github_report.py
index 7de0189c..8360603a 100755
--- a/scripts/3-report/github_report.py
+++ b/scripts/3-report/github_report.py
@@ -150,6 +150,7 @@ def plot_totals_by_license_type(args):
         "github_totals_by_license.csv",
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GitHub license report")
     name_label = "License"
     data_label = "Count"
     data = pd.read_csv(file_path, index_col=name_label)
@@ -199,6 +200,7 @@ def plot_totals_by_restriction(args):
         "github_totals_by_restriction.csv",
     )
     LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
+    shared.safe_open_file(file_path, "generate GitHub restriction report")
     name_label = "Category"
     data_label = "Count"
     data = pd.read_csv(file_path, index_col=name_label)
diff --git a/scripts/shared.py b/scripts/shared.py
index 541988fc..3db8947f 100644
--- a/scripts/shared.py
+++ b/scripts/shared.py
@@ -236,6 +236,17 @@ def setup(current_file):
     return logger, paths
 
 
+def safe_open_file(file_path, operation="read"):
+    """
+    Check file exists, raise QuantifyingException with helpful message if not.
+    """
+    if not os.path.exists(file_path):
+        raise QuantifyingException(
+            f"Cannot {operation} file: {file_path} does not exist"
+        )
+    return file_path
+
+
 def update_readme(
     args,
     section_title,

From 5f11dc1dac4685f094706c00da5e8f8cb260d334 Mon Sep 17 00:00:00 2001
From: opsmithe <anyanwuchigozieprosper@gmail.com>
Date: Mon, 24 Nov 2025 10:20:18 +0100
Subject: [PATCH 2/2] Make shared.py executable

---
 scripts/shared.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 scripts/shared.py

diff --git a/scripts/shared.py b/scripts/shared.py
old mode 100644
new mode 100755