|
5 | 5 | treating the system as a black box without referencing internal implementation. |
6 | 6 |
|
7 | 7 | Test Categories: |
8 | | -Data Loading and Export Workflow (Sandeep Ramavath) |
| 8 | +1. Data Loading and Export Workflow (Sandeep Ramavath) |
| 9 | +2. Data Cleaning and Transformation Workflow (Nithikesh Bobbili) |
9 | 10 | """ |
10 | 11 | import os |
11 | 12 | import tempfile |
@@ -71,3 +72,61 @@ def test_csv_roundtrip_workflow(self, tmp_path): |
71 | 72 | assert pd.api.types.is_datetime64_any_dtype(loaded_data['date']) |
72 | 73 |
|
73 | 74 |
|
| 75 | +class TestDataCleaningWorkflow: |
| 76 | + """ |
| 77 | + System tests by Nithikesh Bobbili. |
| 78 | + Validates end-to-end data cleaning and transformation workflows. |
| 79 | + """ |
| 80 | + |
| 81 | + def test_missing_data_handling_workflow(self): |
| 82 | + """ |
| 83 | + Test Case: Missing Data Cleaning Workflow |
| 84 | + |
| 85 | + Pre-conditions: |
| 86 | + - pandas library available |
| 87 | + - No external dependencies required |
| 88 | + |
| 89 | + Test Steps: |
| 90 | + 1. Create DataFrame with missing values using public API |
| 91 | + 2. Detect missing values using public methods |
| 92 | + 3. Fill missing values using multiple strategies |
| 93 | + 4. Verify all missing values handled correctly |
| 94 | + |
| 95 | + Expected Results: |
| 96 | + - Missing values correctly identified |
| 97 | + - Forward fill propagates last valid value |
| 98 | + - Backward fill propagates next valid value |
| 99 | + - Constant fill replaces with specified value |
| 100 | + - No missing values remain after filling |
| 101 | + """ |
| 102 | + # Step 1: Create DataFrame with missing data |
| 103 | + data = pd.DataFrame({ |
| 104 | + 'A': [1, np.nan, 3, np.nan, 5], |
| 105 | + 'B': [np.nan, 2, np.nan, 4, 5], |
| 106 | + 'C': [1, 2, 3, 4, np.nan] |
| 107 | + }) |
| 108 | + |
| 109 | + # Step 2: Detect missing values using public API |
| 110 | + missing_count = data.isnull().sum() |
| 111 | + assert missing_count['A'] == 2, "Column A should have 2 missing values" |
| 112 | + assert missing_count['B'] == 2, "Column B should have 2 missing values" |
| 113 | + assert missing_count['C'] == 1, "Column C should have 1 missing value" |
| 114 | + |
| 115 | + # Step 3a: Fill missing values with forward fill |
| 116 | + filled_ffill = data.ffill() |
| 117 | + assert filled_ffill.isnull().sum().sum() == 1, "Should have 1 remaining NaN at start" |
| 118 | + assert filled_ffill.loc[1, 'A'] == 1.0, "Should forward fill from previous value" |
| 119 | + |
| 120 | + # Step 3b: Fill missing values with backward fill |
| 121 | + filled_bfill = data.bfill() |
| 122 | + assert filled_bfill.isnull().sum().sum() == 1, "Should have 1 remaining NaN at end" |
| 123 | + assert filled_bfill.loc[0, 'B'] == 2.0, "Should backward fill from next value" |
| 124 | + |
| 125 | + # Step 3c: Fill with constant value |
| 126 | + filled_constant = data.fillna(0) |
| 127 | + assert filled_constant.isnull().sum().sum() == 0, "No missing values should remain" |
| 128 | + assert filled_constant.loc[1, 'A'] == 0.0, "Should fill with constant value" |
| 129 | + |
| 130 | + # Step 4: Verify complete workflow |
| 131 | + original_shape = data.shape |
| 132 | + assert filled_constant.shape == original_shape, "Shape should be preserved" |
0 commit comments