Merge pull request #21 from stackql/feature/refactor

jeffreyaven · web-flow · commit 7eff11aa0fe1 · 2023-10-16T16:57:35.000+11:00
async server tests
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -75,6 +75,7 @@ jobs:
           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }}
+          AWS_REGION: ${{ vars.AWS_REGION }}
           AWS_REGIONS: ${{ vars.AWS_REGIONS }}
           GCP_PROJECT: ${{ vars.GCP_PROJECT }}
           GCP_ZONE: ${{ vars.GCP_ZONE }}          
diff --git a/.gitignore b/.gitignore
@@ -114,6 +114,9 @@ ipython_config.py
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
+stackql
+stackql-zip
+
 .pdm.toml
 
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Changelog
 
-## v3.1.1 (2023-10-16)
+## v3.1.2 (2023-10-16)
+
+### Updates
+
+ * `pandas` type fixes
+ 
+## v3.1.1 (2023-10-14)
 
 ### Updates
 
diff --git a/README.rst b/README.rst
@@ -112,7 +112,7 @@ PyStackQL has been tested on:
 - Python 3.9
 - Python 3.10
 - Python 3.11
-- Python 3.12 (MacOS and Linux only
+- Python 3.12 (MacOS and Linux only)
 
 Licensing
 ~~~~~~~~~
@@ -193,4 +193,4 @@ To publish the package to PyPI, run the following command:
 
 ::
 
-    twine upload dist/pystackql-3.1.1.tar.gz
+    twine upload dist/pystackql-3.1.2.tar.gz
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -26,7 +26,7 @@
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = '3.1.1'
+release = '3.1.2'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/pystackql/stackql.py b/pystackql/stackql.py
@@ -12,6 +12,8 @@
 from psycopg2.extras import RealDictCursor
 import pandas as pd
 
+from io import StringIO
+
 class StackQL:
 	"""
 	A class representing an instance of the StackQL query engine.
@@ -22,7 +24,7 @@ class StackQL:
 	
 	server_address: The address of the StackQL server (server_mode only).
 		:type server_address: str
-		:default: '0.0.0.0'
+		:default: '127.0.0.1'
 	
 	server_port: The port of the StackQL server (server_mode only).
 		:type server_port: int
@@ -214,7 +216,7 @@ def _run_query(self, query, is_statement=False):
 
 	def __init__(self, 
 				 server_mode=False, 
-				 server_address='0.0.0.0', 
+				 server_address='127.0.0.1', 
 				 server_port=5466, 
 				 download_dir=None, 
 				 output='dict',
@@ -264,8 +266,6 @@ def __init__(self,
 
 		if self.server_mode:
 			# server mode, connect to a server via the postgres wire protocol
-			if this_os == 'Windows':
-				server_address = '127.0.0.1'
 			self.server_address = server_address
 			self.server_port = server_port
    			# establish the connection
@@ -455,7 +455,8 @@ def execute(self, query):
 			result = self._run_server_query(query)
 			
 			if self.output == 'pandas':
-				return pd.DataFrame(result)	 # Convert dict results to DataFrame
+				json_str = json.dumps(result)
+				return pd.read_json(StringIO(json_str))
 			elif self.output == 'csv':
 				raise ValueError("CSV output is not supported in server_mode.")
 			else:  # Assume 'dict' output
@@ -468,15 +469,15 @@ def execute(self, query):
 				return output
 			elif self.output == 'pandas':
 				try:
-					json_output = json.loads(output)
-					return pd.DataFrame(json_output)
+					return pd.read_json(StringIO(output))
 				except ValueError:
 					return pd.DataFrame([{"error": "Invalid JSON output: {}".format(output.strip())}])
 			else:  # Assume 'dict' output
 				try:
 					return json.loads(output)
 				except ValueError:
 					return [{"error": "Invalid JSON output: {}".format(output.strip())}]
+
 	#
 	# asnyc query support
 	#
diff --git a/pystackql/stackql_magic.py b/pystackql/stackql_magic.py
@@ -43,7 +43,7 @@ def run_query(self, query):
         :param query: StackQL query to be executed.
         :type query: str
         :return: Query results, returned as a Pandas DataFrame.
-        :rtype: pandas.DataFrame or str
+        :rtype: pandas.DataFrame
         """
         return self.stackql_instance.execute(query)
     
diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 
 setup(
     name='pystackql',
-    version='3.1.1',
+    version='3.1.2',
     description='A Python interface for StackQL',
     long_description=readme,
     author='Jeffrey Aven',
diff --git a/tests/pystackql_async_server_tests.py b/tests/pystackql_async_server_tests.py
@@ -25,7 +25,7 @@ def setUpModule():
     print(res)
     print("starting stackql server...")
     PyStackQLTestsBase.server_process = subprocess.Popen([PyStackQLTestsBase.stackql.bin_path, "srv", "--pgsrv.address", server_address, "--pgsrv.port", str(server_port)])
-    time.sleep(5)
+    time.sleep(30)
 
 def tearDownModule():
     print("stopping stackql server...")
diff --git a/tests/pystackql_tests.py b/tests/pystackql_tests.py
@@ -31,6 +31,14 @@ class PyStackQLTestsBase(unittest.TestCase):
 def setUpModule():
     print("downloading stackql binary...")
     PyStackQLTestsBase.stackql = StackQL()
+	# Check whether code is running in GitHub Actions
+    is_github_actions = os.environ.get('GITHUB_ACTIONS') == 'true'
+
+    if not is_github_actions:
+        # Ensure you have the latest version of stackql, only when running locally
+        print("Running tests outside of GitHub Actions, upgrading stackql binary...")
+        PyStackQLTestsBase.stackql.upgrade()
+
     print("downloading aws provider for tests...")
     res = PyStackQLTestsBase.stackql.executeStmt(registry_pull_aws_query)
     print(res)
@@ -39,7 +47,7 @@ def setUpModule():
     print(res)
     print("starting stackql server...")
     PyStackQLTestsBase.server_process = subprocess.Popen([PyStackQLTestsBase.stackql.bin_path, "srv", "--pgsrv.address", server_address, "--pgsrv.port", str(server_port)])
-    time.sleep(5)
+    time.sleep(10)
 
 def tearDownModule():
     print("stopping stackql server...")
@@ -161,18 +169,26 @@ def test_11_execute_with_defaults(self):
         result = self.stackql.execute(google_query)
         is_valid_dict = isinstance(result, list) and all(isinstance(item, dict) for item in result)
         self.assertTrue(is_valid_dict, f"Result is not a valid dict: {result}")
-        print_test_result(f"Test execute with defaults\nRESULT_COUNT: {len(result)}", is_valid_dict)
+        print_test_result(f"Test execute with defaults\nRESULT: {result}", is_valid_dict)
 
     @pystackql_test_setup(output='pandas')
     def test_12_execute_with_pandas_output(self):
-        result = self.stackql.execute(google_query)
+        result = self.stackql.execute(aws_query)
         is_valid_dataframe = isinstance(result, pd.DataFrame)
         self.assertTrue(is_valid_dataframe, f"Result is not a valid DataFrame: {result}")
-        print_test_result(f"Test execute with pandas output\nRESULT_COUNT: {len(result)}", is_valid_dataframe)
+        # Check datatypes of the columns
+        expected_dtypes = {
+            'instance_state': 'object',  # This should be 'object' for older Pandas versions
+            'num_instances': 'int64'
+        }
+        for col, expected_dtype in expected_dtypes.items():
+            actual_dtype = result[col].dtype
+            self.assertEqual(actual_dtype, expected_dtype, f"Column '{col}' has dtype '{actual_dtype}' but expected '{expected_dtype}'")
+        print_test_result(f"Test execute with pandas output\nRESULT COUNT: {len(result)}", is_valid_dataframe)
 
     @pystackql_test_setup(output='csv')
     def test_13_execute_with_csv_output(self):
-        result = self.stackql.execute(google_query)
+        result = self.stackql.execute(aws_query)
         is_valid_csv = isinstance(result, str) and result.count("\n") >= 1 and result.count(",") >= 1
         self.assertTrue(is_valid_csv, f"Result is not a valid CSV: {result}")
         print_test_result(f"Test execute with csv output\nRESULT_COUNT: {len(result.splitlines())}", is_valid_csv)
@@ -233,9 +249,18 @@ def test_21_execute_server_mode_default_output(self):
 
     @pystackql_test_setup(server_mode=True, output='pandas')
     def test_22_execute_server_mode_pandas_output(self):
-        result = self.stackql.execute(google_query)
-        is_valid_pandas_output = isinstance(result, pd.DataFrame)
-        print_test_result(f"""Test execute in server_mode with pandas output\nRESULT_COUNT: {len(result)}""", is_valid_pandas_output, True)
+        result = self.stackql.execute(aws_query)
+        is_valid_dataframe = isinstance(result, pd.DataFrame)
+        self.assertTrue(is_valid_dataframe, f"Result is not a valid DataFrame: {result}")
+        # Check datatypes of the columns
+        expected_dtypes = {
+            'instance_state': 'object',  # This should be 'object' for older Pandas versions
+            'num_instances': 'int64'
+        }
+        for col, expected_dtype in expected_dtypes.items():
+            actual_dtype = result[col].dtype
+            self.assertEqual(actual_dtype, expected_dtype, f"Column '{col}' has dtype '{actual_dtype}' but expected '{expected_dtype}'")
+        print_test_result(f"Test execute in server_mode with pandas output\nRESULT COUNT: {len(result)}", is_valid_dataframe)
 
 class MockInteractiveShell:
     """A mock class for IPython's InteractiveShell."""
diff --git a/tests/test_params.py b/tests/test_params.py
@@ -41,6 +41,15 @@ def registry_pull_resp_pattern(provider):
 GROUP BY status
 """
 
+aws_query = f"""
+SELECT 
+split_part(instanceState, '\n', 3) as instance_state,
+count(*) as num_instances
+FROM aws.ec2.instances 
+WHERE region = '{os.environ['AWS_REGION']}'
+GROUP BY instance_state
+"""
+
 regions = os.environ.get('AWS_REGIONS').split(',')
 
 async_queries = [