Skip to content

Commit 8695cca

Browse files
rsareddy0329Roja Reddy Sareddy
andauthored
fix: update evaluate_base_model as False, minor change to README (#5425)
* feat: Add support to trainer object for model parameter in Evaluator * feat: Evaluator handshake with trainer * fix: update evaluate_base_model as False, minor change to README --------- Co-authored-by: Roja Reddy Sareddy <rsareddy@amazon.com>
1 parent a881369 commit 8695cca

File tree

7 files changed

+10
-10
lines changed

7 files changed

+10
-10
lines changed

sagemaker-train/README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Table of Contents
4747

4848

4949
Installing the SageMaker Python SDK Train
50-
-----------------------------------
50+
-----------------------------------------
5151

5252
You can install from source by cloning this repository and running a pip install command in the root directory of the repository:
5353

sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ class BenchMarkEvaluator(BaseEvaluator):
301301

302302
benchmark: _Benchmark
303303
subtasks: Optional[Union[str, List[str]]] = None
304-
evaluate_base_model: bool = True
304+
evaluate_base_model: bool = False
305305
_hyperparameters: Optional[Any] = None
306306

307307

sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class CustomScorerEvaluator(BaseEvaluator):
137137
_hyperparameters: Optional[Any] = None
138138

139139
# Template-required fields
140-
evaluate_base_model: bool = True
140+
evaluate_base_model: bool = False
141141

142142
@validator('dataset', pre=True)
143143
def _resolve_dataset(cls, v):

sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class LLMAsJudgeEvaluator(BaseEvaluator):
123123
custom_metrics: Optional[str] = None
124124

125125
# Template-required fields
126-
evaluate_base_model: bool = True
126+
evaluate_base_model: bool = False
127127

128128
@validator('dataset', pre=True)
129129
def _resolve_dataset(cls, v):

sagemaker-train/tests/unit/train/evaluate/test_benchmark_evaluator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def test_benchmark_evaluator_initialization_minimal(mock_artifact, mock_resolve)
130130

131131
assert evaluator.benchmark == _Benchmark.MMLU
132132
assert evaluator.model == DEFAULT_MODEL
133-
assert evaluator.evaluate_base_model is True
133+
assert evaluator.evaluate_base_model is False
134134
assert evaluator.subtasks == "ALL"
135135

136136

@@ -525,7 +525,7 @@ def test_benchmark_evaluator_get_benchmark_template_additions(mock_artifact, moc
525525
assert additions['strategy'] == 'zs_cot'
526526
assert additions['evaluation_metric'] == 'accuracy'
527527
assert additions['subtask'] == 'abstract_algebra'
528-
assert additions['evaluate_base_model'] is True
528+
assert additions['evaluate_base_model'] is False
529529

530530

531531
@patch('sagemaker.train.common_utils.recipe_utils._is_nova_model')

sagemaker-train/tests/unit/train/evaluate/test_custom_scorer_evaluator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def test_custom_scorer_evaluator_initialization_minimal(mock_artifact, mock_reso
8585
assert evaluator.evaluator == _BuiltInMetric.PRIME_MATH
8686
assert evaluator.dataset == DEFAULT_DATASET
8787
assert evaluator.model == DEFAULT_MODEL
88-
assert evaluator.evaluate_base_model is True
88+
assert evaluator.evaluate_base_model is False
8989

9090

9191
@patch('sagemaker.train.common_utils.model_resolution._resolve_base_model')
@@ -952,7 +952,7 @@ def test_custom_scorer_evaluator_get_custom_scorer_template_additions_builtin(
952952
assert additions['task'] == 'gen_qa'
953953
assert additions['strategy'] == 'gen_qa'
954954
assert additions['evaluation_metric'] == 'all'
955-
assert additions['evaluate_base_model'] is True
955+
assert additions['evaluate_base_model'] is False
956956
assert additions['evaluator_arn'] is None
957957
assert additions['preset_reward_function'] == 'prime_math'
958958
assert 'temperature' in additions

sagemaker-train/tests/unit/train/evaluate/test_llm_as_judge_evaluator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_llm_as_judge_evaluator_initialization_minimal(mock_artifact, mock_resol
6767
assert evaluator.evaluator_model == DEFAULT_EVALUATOR_MODEL
6868
assert evaluator.dataset == DEFAULT_DATASET
6969
assert evaluator.model == DEFAULT_MODEL
70-
assert evaluator.evaluate_base_model is True
70+
assert evaluator.evaluate_base_model is False
7171
assert evaluator.builtin_metrics is None
7272
assert evaluator.custom_metrics is None
7373

@@ -472,7 +472,7 @@ def test_llm_as_judge_evaluator_get_llmaj_template_additions(mock_artifact, mock
472472
assert additions['top_p'] == '1.0'
473473
# pipeline_name is no longer in template additions - it's resolved dynamically in execution.py
474474
assert 'pipeline_name' not in additions
475-
assert additions['evaluate_base_model'] is True
475+
assert additions['evaluate_base_model'] is False
476476

477477
# Verify S3 upload was called
478478
mock_s3_upload.assert_called_once()

0 commit comments

Comments
 (0)