Skip to content

Commit ca85a78

Browse files
author
Roja Reddy Sareddy
committed
Fix: Update Legal verbiage, and allowed reward model ids based on region
1 parent 7763bce commit ca85a78

File tree

7 files changed

+42
-18
lines changed

7 files changed

+42
-18
lines changed

sagemaker-train/src/sagemaker/train/constants.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,15 @@
4242

4343
HUB_NAME = "SageMakerPublicHub"
4444

45-
# Allowed reward model IDs for RLAIF trainer
46-
_ALLOWED_REWARD_MODEL_IDS = [
47-
"openai.gpt-oss-120b-1:0",
48-
"openai.gpt-oss-20b-1:0",
49-
"qwen.qwen3-32b-v1:0",
50-
"qwen.qwen3-coder-30b-a3b-v1:0"
51-
]
45+
# Allowed reward model IDs for RLAIF trainer with region restrictions
46+
_ALLOWED_REWARD_MODEL_IDS = {
47+
"openai.gpt-oss-120b-1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"],
48+
"openai.gpt-oss-20b-1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"],
49+
"qwen.qwen3-32b-v1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"],
50+
"qwen.qwen3-coder-30b-a3b-v1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"],
51+
"qwen.qwen3-coder-480b-a35b-v1:0": ["us-west-2", "ap-northeast-1"],
52+
"qwen.qwen3-235b-a22b-2507-v1:0": ["us-west-2", "ap-northeast-1"]
53+
}
5254

5355
# Allowed evaluator models for LLM as Judge evaluator with region restrictions
5456
_ALLOWED_EVALUATOR_MODELS = {

sagemaker-train/src/sagemaker/train/dpo_trainer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,9 @@ class DPOTrainer(BaseTrainer):
8686
mlflow_run_name (Optional[str]):
8787
The MLflow run name for this training job.
8888
training_dataset (Optional[Union[str, DataSet]]):
89-
The training dataset with preference pairs. Can be an S3 URI, dataset ARN, or DataSet object.
89+
The training dataset with preference pairs. Can be a dataset ARN, or DataSet object.
9090
validation_dataset (Optional[Union[str, DataSet]]):
91-
The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object.
91+
The validation dataset. Can be a dataset ARN, or DataSet object.
9292
s3_output_path (Optional[str]):
9393
The S3 path for training job outputs.
9494
If not specified, defaults to s3://sagemaker-<region>-<account>/output.

sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,14 @@ class LLMAsJudgeEvaluator(BaseEvaluator):
2323
2424
This evaluator uses foundation models to evaluate LLM responses
2525
based on various quality and responsible AI metrics.
26-
26+
27+
This feature is powered by Amazon Bedrock Evaluations. Your use of this feature is subject to pricing of
28+
Amazon Bedrock Evaluations, the Service Terms applicable to Amazon Bedrock, and the terms that apply to your
29+
usage of third-party models. Amazon Bedrock Evaluations may securely transmit data across AWS Regions within your
30+
geography for processing. For more information, access Amazon Bedrock Evaluations documentation.
31+
32+
Documentation: https://docs.aws.amazon.com/bedrock/latest/userguide/evaluation-judge.html
33+
2734
Attributes:
2835
evaluator_model (str): AWS Bedrock foundation model identifier to use as the judge.
2936
Required. For supported models, see:

sagemaker-train/src/sagemaker/train/rlaif_trainer.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ class RLAIFTrainer(BaseTrainer):
100100
mlflow_run_name (Optional[str]):
101101
The MLflow run name for this training job.
102102
training_dataset (Optional[Union[str, DataSet]]):
103-
The training dataset. Can be an S3 URI, dataset ARN, or DataSet object.
103+
The training dataset. Can be a dataset ARN, or DataSet object.
104104
validation_dataset (Optional[Union[str, DataSet]]):
105-
The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object.
105+
The validation dataset. Can be a dataset ARN, or DataSet object.
106106
s3_output_path (Optional[str]):
107107
The S3 path for training job outputs.
108108
If not specified, defaults to s3://sagemaker-<region>-<account>/output.
@@ -173,8 +173,20 @@ def _validate_reward_model_id(self, reward_model_id):
173173
if reward_model_id not in _ALLOWED_REWARD_MODEL_IDS:
174174
raise ValueError(
175175
f"Invalid reward_model_id '{reward_model_id}'. "
176-
f"Available models are: {_ALLOWED_REWARD_MODEL_IDS}"
176+
f"Available models are: {list(_ALLOWED_REWARD_MODEL_IDS.keys())}"
177177
)
178+
179+
# Check region compatibility
180+
session = self.sagemaker_session if hasattr(self, 'sagemaker_session') and self.sagemaker_session else TrainDefaults.get_sagemaker_session()
181+
current_region = session.boto_region_name
182+
allowed_regions = _ALLOWED_REWARD_MODEL_IDS[reward_model_id]
183+
184+
if current_region not in allowed_regions:
185+
raise ValueError(
186+
f"Reward model '{reward_model_id}' is not available in region '{current_region}'. "
187+
f"Available regions for this model: {allowed_regions}"
188+
)
189+
178190
return reward_model_id
179191

180192

sagemaker-train/src/sagemaker/train/rlvr_trainer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ class RLVRTrainer(BaseTrainer):
9292
mlflow_run_name (Optional[str]):
9393
The MLflow run name for this training job.
9494
training_dataset (Optional[Union[str, DataSet]]):
95-
The training dataset. Can be an S3 URI, dataset ARN, or DataSet object.
95+
The training dataset. Can be a dataset ARN, or DataSet object.
9696
validation_dataset (Optional[Union[str, DataSet]]):
97-
The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object.
97+
The validation dataset. Can be a dataset ARN, or DataSet object.
9898
s3_output_path (Optional[str]):
9999
The S3 path for training job outputs.
100100
If not specified, defaults to s3://sagemaker-<region>-<account>/output.

sagemaker-train/src/sagemaker/train/sft_trainer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ class SFTTrainer(BaseTrainer):
8888
mlflow_run_name (Optional[str]):
8989
The MLflow run name for this training job.
9090
training_dataset (Optional[Union[str, DataSet]]):
91-
The training dataset. Can be an S3 URI, dataset ARN, or DataSet object.
91+
The training dataset. Can be dataset ARN, or DataSet object.
9292
validation_dataset (Optional[Union[str, DataSet]]):
93-
The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object.
93+
The validation dataset. Can be dataset ARN, or DataSet object.
9494
s3_output_path (Optional[str]):
9595
The S3 path for training job outputs.
9696
If not specified, defaults to s3://sagemaker-<region>-<account>/output.

v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,10 @@
139139
"source": [
140140
"#### Reference \n",
141141
"Refer this doc for other models that support Model Customization: \n",
142-
"https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html"
142+
"https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html\n",
143+
"\n",
144+
"Refer this for supported reward models: \n",
145+
"https://github.com/aws/sagemaker-python-sdk/blob/master/sagemaker-train/src/sagemaker/train/constants.py#L46"
143146
]
144147
},
145148
{

0 commit comments

Comments
 (0)