Fix: Update Legal verbiage, and allowed reward model ids based on region

Roja Reddy Sareddy · Roja Reddy Sareddy · commit ca85a78310d8 · 2025-12-10T13:30:28.000-08:00
diff --git a/sagemaker-train/src/sagemaker/train/constants.py b/sagemaker-train/src/sagemaker/train/constants.py
@@ -42,13 +42,15 @@
 
 HUB_NAME = "SageMakerPublicHub"
 
-# Allowed reward model IDs for RLAIF trainer
-_ALLOWED_REWARD_MODEL_IDS = [
-    "openai.gpt-oss-120b-1:0",
-    "openai.gpt-oss-20b-1:0", 
-    "qwen.qwen3-32b-v1:0",
-    "qwen.qwen3-coder-30b-a3b-v1:0"
-]
+# Allowed reward model IDs for RLAIF trainer with region restrictions
+_ALLOWED_REWARD_MODEL_IDS = {
+    "openai.gpt-oss-120b-1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"],
+    "openai.gpt-oss-20b-1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"],
+    "qwen.qwen3-32b-v1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"],
+    "qwen.qwen3-coder-30b-a3b-v1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"],
+    "qwen.qwen3-coder-480b-a35b-v1:0": ["us-west-2", "ap-northeast-1"],
+    "qwen.qwen3-235b-a22b-2507-v1:0": ["us-west-2", "ap-northeast-1"]
+}
 
 # Allowed evaluator models for LLM as Judge evaluator with region restrictions
 _ALLOWED_EVALUATOR_MODELS = {
diff --git a/sagemaker-train/src/sagemaker/train/dpo_trainer.py b/sagemaker-train/src/sagemaker/train/dpo_trainer.py
@@ -86,9 +86,9 @@ class DPOTrainer(BaseTrainer):
         mlflow_run_name (Optional[str]):
             The MLflow run name for this training job.
         training_dataset (Optional[Union[str, DataSet]]):
-            The training dataset with preference pairs. Can be an S3 URI, dataset ARN, or DataSet object.
+            The training dataset with preference pairs. Can be a dataset ARN, or DataSet object.
         validation_dataset (Optional[Union[str, DataSet]]):
-            The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object.
+            The validation dataset. Can be a dataset ARN, or DataSet object.
         s3_output_path (Optional[str]):
             The S3 path for training job outputs.
             If not specified, defaults to s3://sagemaker-<region>-<account>/output.
diff --git a/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py b/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py
@@ -23,7 +23,14 @@ class LLMAsJudgeEvaluator(BaseEvaluator):
     
     This evaluator uses foundation models to evaluate LLM responses
     based on various quality and responsible AI metrics.
-    
+
+    This feature is powered by Amazon Bedrock Evaluations. Your use of this feature is subject to pricing of
+    Amazon Bedrock Evaluations, the Service Terms applicable to Amazon Bedrock, and the terms that apply to your
+    usage of third-party models. Amazon Bedrock Evaluations may securely transmit data across AWS Regions within your
+    geography for processing. For more information, access Amazon Bedrock Evaluations documentation.
+
+    Documentation: https://docs.aws.amazon.com/bedrock/latest/userguide/evaluation-judge.html
+
     Attributes:
         evaluator_model (str): AWS Bedrock foundation model identifier to use as the judge.
             Required. For supported models, see:
diff --git a/sagemaker-train/src/sagemaker/train/rlaif_trainer.py b/sagemaker-train/src/sagemaker/train/rlaif_trainer.py
@@ -100,9 +100,9 @@ class RLAIFTrainer(BaseTrainer):
         mlflow_run_name (Optional[str]):
             The MLflow run name for this training job.
         training_dataset (Optional[Union[str, DataSet]]):
-            The training dataset. Can be an S3 URI, dataset ARN, or DataSet object.
+            The training dataset. Can be a dataset ARN, or DataSet object.
         validation_dataset (Optional[Union[str, DataSet]]):
-            The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object.
+            The validation dataset. Can be a dataset ARN, or DataSet object.
         s3_output_path (Optional[str]):
             The S3 path for training job outputs.
             If not specified, defaults to s3://sagemaker-<region>-<account>/output.
@@ -173,8 +173,20 @@ def _validate_reward_model_id(self, reward_model_id):
         if reward_model_id not in _ALLOWED_REWARD_MODEL_IDS:
             raise ValueError(
                 f"Invalid reward_model_id '{reward_model_id}'. "
-                f"Available models are: {_ALLOWED_REWARD_MODEL_IDS}"
+                f"Available models are: {list(_ALLOWED_REWARD_MODEL_IDS.keys())}"
             )
+        
+        # Check region compatibility
+        session = self.sagemaker_session if hasattr(self, 'sagemaker_session') and self.sagemaker_session else TrainDefaults.get_sagemaker_session()
+        current_region = session.boto_region_name
+        allowed_regions = _ALLOWED_REWARD_MODEL_IDS[reward_model_id]
+        
+        if current_region not in allowed_regions:
+            raise ValueError(
+                f"Reward model '{reward_model_id}' is not available in region '{current_region}'. "
+                f"Available regions for this model: {allowed_regions}"
+            )
+        
         return reward_model_id
         
 
diff --git a/sagemaker-train/src/sagemaker/train/rlvr_trainer.py b/sagemaker-train/src/sagemaker/train/rlvr_trainer.py
@@ -92,9 +92,9 @@ class RLVRTrainer(BaseTrainer):
         mlflow_run_name (Optional[str]):
             The MLflow run name for this training job.
         training_dataset (Optional[Union[str, DataSet]]):
-            The training dataset. Can be an S3 URI, dataset ARN, or DataSet object.
+            The training dataset. Can be a dataset ARN, or DataSet object.
         validation_dataset (Optional[Union[str, DataSet]]):
-            The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object.
+            The validation dataset. Can be a dataset ARN, or DataSet object.
         s3_output_path (Optional[str]):
             The S3 path for training job outputs.
             If not specified, defaults to s3://sagemaker-<region>-<account>/output.
diff --git a/sagemaker-train/src/sagemaker/train/sft_trainer.py b/sagemaker-train/src/sagemaker/train/sft_trainer.py
@@ -88,9 +88,9 @@ class SFTTrainer(BaseTrainer):
         mlflow_run_name (Optional[str]):
             The MLflow run name for this training job.
         training_dataset (Optional[Union[str, DataSet]]):
-            The training dataset. Can be an S3 URI, dataset ARN, or DataSet object.
+            The training dataset. Can be dataset ARN, or DataSet object.
         validation_dataset (Optional[Union[str, DataSet]]):
-            The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object.
+            The validation dataset. Can be dataset ARN, or DataSet object.
         s3_output_path (Optional[str]):
             The S3 path for training job outputs.
             If not specified, defaults to s3://sagemaker-<region>-<account>/output.
diff --git a/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb b/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb
@@ -139,7 +139,10 @@
    "source": [
     "#### Reference \n",
     "Refer this doc for other models that support Model Customization: \n",
-    "https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html"
+    "https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html\n",
+    "\n",
+    "Refer this for supported reward models: \n",
+    "https://github.com/aws/sagemaker-python-sdk/blob/master/sagemaker-train/src/sagemaker/train/constants.py#L46"
    ]
   },
   {

Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,10 @@`
`139`	`139`	`"source": [`
`140`	`140`	`"#### Reference \n",`
`141`	`141`	`"Refer this doc for other models that support Model Customization: \n",`
`142`		`- "https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html"`
	`142`	`+ "https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html\n",`
	`143`	`+ "\n",`
	`144`	`+ "Refer this for supported reward models: \n",`
	`145`	`+ "https://github.com/aws/sagemaker-python-sdk/blob/master/sagemaker-train/src/sagemaker/train/constants.py#L46"`
`143`	`146`	`]`
`144`	`147`	`},`
`145`	`148`	`{`