|
7 | 7 | Test Categories: |
8 | 8 | 1. Data Loading and Export Workflow (Sandeep Ramavath) |
9 | 9 | 2. Data Cleaning and Transformation Workflow (Nithikesh Bobbili) |
| 10 | +3. Aggregation and Analysis Workflow (Mallikarjuna) |
10 | 11 | """ |
11 | 12 | import os |
12 | 13 | import tempfile |
@@ -130,3 +131,69 @@ def test_missing_data_handling_workflow(self): |
130 | 131 | # Step 4: Verify complete workflow |
131 | 132 | original_shape = data.shape |
132 | 133 | assert filled_constant.shape == original_shape, "Shape should be preserved" |
| 134 | +class TestAggregationWorkflow: |
| 135 | + """ |
| 136 | + System tests by Mallikarjuna. |
| 137 | + Validates end-to-end data aggregation and analysis workflows. |
| 138 | + """ |
| 139 | + |
| 140 | + def test_groupby_aggregation_workflow(self): |
| 141 | + """ |
| 142 | + Test Case: Group-by Aggregation Analysis Workflow |
| 143 | + |
| 144 | + Pre-conditions: |
| 145 | + - pandas library functional |
| 146 | + - Sufficient memory for operations |
| 147 | + |
| 148 | + Test Steps: |
| 149 | + 1. Create DataFrame with categorical and numeric data |
| 150 | + 2. Group data by category using public API |
| 151 | + 3. Apply multiple aggregation functions |
| 152 | + 4. Verify aggregated results for each category |
| 153 | + 5. Verify multiple aggregation functions work simultaneously |
| 154 | + |
| 155 | + Expected Results: |
| 156 | + - Data groups correctly by category |
| 157 | + - Mean aggregation produces correct averages |
| 158 | + - Sum aggregation produces correct totals |
| 159 | + - Count aggregation shows correct group sizes |
| 160 | + - Multiple aggregations work in single operation |
| 161 | + """ |
| 162 | + # Step 1: Create DataFrame with categorical data |
| 163 | + data = pd.DataFrame({ |
| 164 | + 'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'], |
| 165 | + 'value': [10, 20, 15, 25, 20, 30, 25, 35], |
| 166 | + 'quantity': [1, 2, 3, 4, 5, 6, 7, 8] |
| 167 | + }) |
| 168 | + |
| 169 | + # Step 2: Group by category using public API |
| 170 | + grouped = data.groupby('category') |
| 171 | + |
| 172 | + # Step 3a: Apply mean aggregation |
| 173 | + mean_result = grouped['value'].mean() |
| 174 | + assert mean_result['A'] == 17.5, "Category A mean should be 17.5" |
| 175 | + assert mean_result['B'] == 27.5, "Category B mean should be 27.5" |
| 176 | + |
| 177 | + # Step 3b: Apply sum aggregation |
| 178 | + sum_result = grouped['value'].sum() |
| 179 | + assert sum_result['A'] == 70, "Category A sum should be 70" |
| 180 | + assert sum_result['B'] == 110, "Category B sum should be 110" |
| 181 | + |
| 182 | + # Step 3c: Apply count aggregation |
| 183 | + count_result = grouped.size() |
| 184 | + assert count_result['A'] == 4, "Category A should have 4 items" |
| 185 | + assert count_result['B'] == 4, "Category B should have 4 items" |
| 186 | + |
| 187 | + # Step 4: Apply multiple aggregations simultaneously |
| 188 | + multi_agg = grouped['value'].agg(['mean', 'sum', 'count']) |
| 189 | + |
| 190 | + # Step 5: Verify multi-aggregation results |
| 191 | + assert multi_agg.loc['A', 'mean'] == 17.5 |
| 192 | + assert multi_agg.loc['A', 'sum'] == 70 |
| 193 | + assert multi_agg.loc['A', 'count'] == 4 |
| 194 | + assert multi_agg.loc['B', 'mean'] == 27.5 |
| 195 | + assert multi_agg.loc['B', 'sum'] == 110 |
| 196 | + assert multi_agg.loc['B', 'count'] == 4 |
| 197 | + |
| 198 | + # Verify shape of result |
| 199 | + assert multi_agg.shape == (2, 3), "Should have 2 categories and 3 aggregations" |
0 commit comments