Skip to content

Commit 9694b3a

Browse files
Added system test caseData Cleaning and Transformation Workflow
1 parent 3fd39c0 commit 9694b3a

File tree

1 file changed

+60
-1
lines changed

1 file changed

+60
-1
lines changed

pandas/tests/system/test_system_workflows.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
treating the system as a black box without referencing internal implementation.
66
77
Test Categories:
8-
Data Loading and Export Workflow (Sandeep Ramavath)
8+
1. Data Loading and Export Workflow (Sandeep Ramavath)
9+
2. Data Cleaning and Transformation Workflow (Nithikesh Bobbili)
910
"""
1011
import os
1112
import tempfile
@@ -71,3 +72,61 @@ def test_csv_roundtrip_workflow(self, tmp_path):
7172
assert pd.api.types.is_datetime64_any_dtype(loaded_data['date'])
7273

7374

75+
class TestDataCleaningWorkflow:
76+
"""
77+
System tests by Nithikesh Bobbili.
78+
Validates end-to-end data cleaning and transformation workflows.
79+
"""
80+
81+
def test_missing_data_handling_workflow(self):
82+
"""
83+
Test Case: Missing Data Cleaning Workflow
84+
85+
Pre-conditions:
86+
- pandas library available
87+
- No external dependencies required
88+
89+
Test Steps:
90+
1. Create DataFrame with missing values using public API
91+
2. Detect missing values using public methods
92+
3. Fill missing values using multiple strategies
93+
4. Verify all missing values handled correctly
94+
95+
Expected Results:
96+
- Missing values correctly identified
97+
- Forward fill propagates last valid value
98+
- Backward fill propagates next valid value
99+
- Constant fill replaces with specified value
100+
- No missing values remain after filling
101+
"""
102+
# Step 1: Create DataFrame with missing data
103+
data = pd.DataFrame({
104+
'A': [1, np.nan, 3, np.nan, 5],
105+
'B': [np.nan, 2, np.nan, 4, 5],
106+
'C': [1, 2, 3, 4, np.nan]
107+
})
108+
109+
# Step 2: Detect missing values using public API
110+
missing_count = data.isnull().sum()
111+
assert missing_count['A'] == 2, "Column A should have 2 missing values"
112+
assert missing_count['B'] == 2, "Column B should have 2 missing values"
113+
assert missing_count['C'] == 1, "Column C should have 1 missing value"
114+
115+
# Step 3a: Fill missing values with forward fill
116+
filled_ffill = data.ffill()
117+
assert filled_ffill.isnull().sum().sum() == 1, "Should have 1 remaining NaN at start"
118+
assert filled_ffill.loc[1, 'A'] == 1.0, "Should forward fill from previous value"
119+
120+
# Step 3b: Fill missing values with backward fill
121+
filled_bfill = data.bfill()
122+
assert filled_bfill.isnull().sum().sum() == 1, "Should have 1 remaining NaN at end"
123+
assert filled_bfill.loc[0, 'B'] == 2.0, "Should backward fill from next value"
124+
125+
# Step 3c: Fill with constant value
126+
filled_constant = data.fillna(0)
127+
assert filled_constant.isnull().sum().sum() == 0, "No missing values should remain"
128+
assert filled_constant.loc[1, 'A'] == 0.0, "Should fill with constant value"
129+
130+
# Step 4: Verify complete workflow
131+
original_shape = data.shape
132+
assert filled_constant.shape == original_shape, "Shape should be preserved"

0 commit comments

Comments
 (0)