v3.1.2

jeffreyaven · jeffreyaven · commit 3675cf988de4 · 2023-10-16T16:50:22.000+11:00
diff --git a/.gitignore b/.gitignore
@@ -114,6 +114,9 @@ ipython_config.py
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
+stackql
+stackql-zip
+
 .pdm.toml
 
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Changelog
 
-## v3.1.1 (2023-10-16)
+## v3.1.2 (2023-10-16)
+
+### Updates
+
+ * `pandas` type fixes
+ 
+## v3.1.1 (2023-10-14)
 
 ### Updates
 
diff --git a/README.rst b/README.rst
@@ -112,7 +112,7 @@ PyStackQL has been tested on:
 - Python 3.9
 - Python 3.10
 - Python 3.11
-- Python 3.12 (MacOS and Linux only
+- Python 3.12 (MacOS and Linux only)
 
 Licensing
 ~~~~~~~~~
@@ -193,4 +193,4 @@ To publish the package to PyPI, run the following command:
 
 ::
 
-    twine upload dist/pystackql-3.1.1.tar.gz
+    twine upload dist/pystackql-3.1.2.tar.gz
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -26,7 +26,7 @@
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = '3.1.1'
+release = '3.1.2'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/pystackql/stackql.py b/pystackql/stackql.py
@@ -12,6 +12,8 @@
 from psycopg2.extras import RealDictCursor
 import pandas as pd
 
+from io import StringIO
+
 class StackQL:
 	"""
 	A class representing an instance of the StackQL query engine.
@@ -453,7 +455,8 @@ def execute(self, query):
 			result = self._run_server_query(query)
 			
 			if self.output == 'pandas':
-				return pd.DataFrame(result)	 # Convert dict results to DataFrame
+				json_str = json.dumps(result)
+				return pd.read_json(StringIO(json_str))
 			elif self.output == 'csv':
 				raise ValueError("CSV output is not supported in server_mode.")
 			else:  # Assume 'dict' output
@@ -466,15 +469,15 @@ def execute(self, query):
 				return output
 			elif self.output == 'pandas':
 				try:
-					json_output = json.loads(output)
-					return pd.DataFrame(json_output)
+					return pd.read_json(StringIO(output))
 				except ValueError:
 					return pd.DataFrame([{"error": "Invalid JSON output: {}".format(output.strip())}])
 			else:  # Assume 'dict' output
 				try:
 					return json.loads(output)
 				except ValueError:
 					return [{"error": "Invalid JSON output: {}".format(output.strip())}]
+
 	#
 	# asnyc query support
 	#
diff --git a/pystackql/stackql_magic.py b/pystackql/stackql_magic.py
@@ -43,7 +43,7 @@ def run_query(self, query):
         :param query: StackQL query to be executed.
         :type query: str
         :return: Query results, returned as a Pandas DataFrame.
-        :rtype: pandas.DataFrame or str
+        :rtype: pandas.DataFrame
         """
         return self.stackql_instance.execute(query)
     
diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 
 setup(
     name='pystackql',
-    version='3.1.1',
+    version='3.1.2',
     description='A Python interface for StackQL',
     long_description=readme,
     author='Jeffrey Aven',
diff --git a/tests/pystackql_tests.py b/tests/pystackql_tests.py
@@ -169,18 +169,26 @@ def test_11_execute_with_defaults(self):
         result = self.stackql.execute(google_query)
         is_valid_dict = isinstance(result, list) and all(isinstance(item, dict) for item in result)
         self.assertTrue(is_valid_dict, f"Result is not a valid dict: {result}")
-        print_test_result(f"Test execute with defaults\nRESULT_COUNT: {len(result)}", is_valid_dict)
+        print_test_result(f"Test execute with defaults\nRESULT: {result}", is_valid_dict)
 
     @pystackql_test_setup(output='pandas')
     def test_12_execute_with_pandas_output(self):
-        result = self.stackql.execute(google_query)
+        result = self.stackql.execute(aws_query)
         is_valid_dataframe = isinstance(result, pd.DataFrame)
         self.assertTrue(is_valid_dataframe, f"Result is not a valid DataFrame: {result}")
-        print_test_result(f"Test execute with pandas output\nRESULT_COUNT: {len(result)}", is_valid_dataframe)
+        # Check datatypes of the columns
+        expected_dtypes = {
+            'instance_state': 'object',  # This should be 'object' for older Pandas versions
+            'num_instances': 'int64'
+        }
+        for col, expected_dtype in expected_dtypes.items():
+            actual_dtype = result[col].dtype
+            self.assertEqual(actual_dtype, expected_dtype, f"Column '{col}' has dtype '{actual_dtype}' but expected '{expected_dtype}'")
+        print_test_result(f"Test execute with pandas output\nRESULT COUNT: {len(result)}", is_valid_dataframe)
 
     @pystackql_test_setup(output='csv')
     def test_13_execute_with_csv_output(self):
-        result = self.stackql.execute(google_query)
+        result = self.stackql.execute(aws_query)
         is_valid_csv = isinstance(result, str) and result.count("\n") >= 1 and result.count(",") >= 1
         self.assertTrue(is_valid_csv, f"Result is not a valid CSV: {result}")
         print_test_result(f"Test execute with csv output\nRESULT_COUNT: {len(result.splitlines())}", is_valid_csv)
@@ -241,9 +249,18 @@ def test_21_execute_server_mode_default_output(self):
 
     @pystackql_test_setup(server_mode=True, output='pandas')
     def test_22_execute_server_mode_pandas_output(self):
-        result = self.stackql.execute(google_query)
-        is_valid_pandas_output = isinstance(result, pd.DataFrame)
-        print_test_result(f"""Test execute in server_mode with pandas output\nRESULT_COUNT: {len(result)}""", is_valid_pandas_output, True)
+        result = self.stackql.execute(aws_query)
+        is_valid_dataframe = isinstance(result, pd.DataFrame)
+        self.assertTrue(is_valid_dataframe, f"Result is not a valid DataFrame: {result}")
+        # Check datatypes of the columns
+        expected_dtypes = {
+            'instance_state': 'object',  # This should be 'object' for older Pandas versions
+            'num_instances': 'int64'
+        }
+        for col, expected_dtype in expected_dtypes.items():
+            actual_dtype = result[col].dtype
+            self.assertEqual(actual_dtype, expected_dtype, f"Column '{col}' has dtype '{actual_dtype}' but expected '{expected_dtype}'")
+        print_test_result(f"Test execute in server_mode with pandas output\nRESULT COUNT: {len(result)}", is_valid_dataframe)
 
 class MockInteractiveShell:
     """A mock class for IPython's InteractiveShell."""
diff --git a/tests/test_params.py b/tests/test_params.py
@@ -41,6 +41,15 @@ def registry_pull_resp_pattern(provider):
 GROUP BY status
 """
 
+aws_query = f"""
+SELECT 
+split_part(instanceState, '\n', 3) as instance_state,
+count(*) as num_instances
+FROM aws.ec2.instances 
+WHERE region = '{os.environ['AWS_REGION']}'
+GROUP BY instance_state
+"""
+
 regions = os.environ.get('AWS_REGIONS').split(',')
 
 async_queries = [