diff --git a/sagemaker-train/src/sagemaker/train/common_utils/finetune_utils.py b/sagemaker-train/src/sagemaker/train/common_utils/finetune_utils.py index ee0256b79c..3fd17c3ac0 100644 --- a/sagemaker-train/src/sagemaker/train/common_utils/finetune_utils.py +++ b/sagemaker-train/src/sagemaker/train/common_utils/finetune_utils.py @@ -198,10 +198,19 @@ def _create_mlflow_app(sagemaker_session) -> Optional[MlflowApp]: if new_app.status in ["Created", "Updated"]: return new_app elif new_app.status in ["Failed", "Stopped"]: - raise RuntimeError(f"MLflow app creation failed with status: {new_app.status}") + # Get detailed error from MLflow app + error_msg = f"MLflow app creation failed with status: {new_app.status}" + if hasattr(new_app, 'failure_reason') and new_app.failure_reason: + error_msg += f". Reason: {new_app.failure_reason}" + raise RuntimeError(error_msg) time.sleep(poll_interval) - raise RuntimeError(f"MLflow app creation timed out after {max_wait_time} seconds") + # Timeout case - get current status and any error details + new_app.refresh() + error_msg = f"MLflow app creation failed. Current status: {new_app.status}" + if hasattr(new_app, 'failure_reason') and new_app.failure_reason: + error_msg += f". Reason: {new_app.failure_reason}" + raise RuntimeError(error_msg) except Exception as e: logger.error("Failed to create MLflow app: %s", e) @@ -693,7 +702,7 @@ def _validate_eula_for_gated_model(model, accept_eula, is_gated_model): def _validate_s3_path_exists(s3_path: str, sagemaker_session): - """Validate if S3 path exists and is accessible.""" + """Validate S3 path and create bucket/prefix if they don't exist.""" if not s3_path.startswith("s3://"): raise ValueError(f"Invalid S3 path format: {s3_path}") @@ -705,19 +714,34 @@ def _validate_s3_path_exists(s3_path: str, sagemaker_session): s3_client = sagemaker_session.boto_session.client('s3') try: - # Check if bucket exists and is accessible - s3_client.head_bucket(Bucket=bucket_name) + # Check if bucket exists, create if it doesn't + try: + s3_client.head_bucket(Bucket=bucket_name) + except Exception as e: + if "NoSuchBucket" in str(e) or "Not Found" in str(e): + # Create bucket + region = sagemaker_session.boto_region_name + if region == 'us-east-1': + s3_client.create_bucket(Bucket=bucket_name) + else: + s3_client.create_bucket( + Bucket=bucket_name, + CreateBucketConfiguration={'LocationConstraint': region} + ) + else: + raise - # If prefix is provided, check if it exists + # If prefix is provided, check if it exists, create if it doesn't if prefix: response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix, MaxKeys=1) if 'Contents' not in response: - raise ValueError(f"S3 prefix '{prefix}' does not exist in bucket '{bucket_name}'") + # Create the prefix by putting an empty object + if not prefix.endswith('/'): + prefix += '/' + s3_client.put_object(Bucket=bucket_name, Key=prefix, Body=b'') except Exception as e: - if "NoSuchBucket" in str(e): - raise ValueError(f"S3 bucket '{bucket_name}' does not exist or is not accessible") - raise ValueError(f"Failed to validate S3 path '{s3_path}': {str(e)}") + raise ValueError(f"Failed to validate/create S3 path '{s3_path}': {str(e)}") def _validate_hyperparameter_values(hyperparameters: dict): diff --git a/sagemaker-train/src/sagemaker/train/constants.py b/sagemaker-train/src/sagemaker/train/constants.py index c2545e79ee..309265d659 100644 --- a/sagemaker-train/src/sagemaker/train/constants.py +++ b/sagemaker-train/src/sagemaker/train/constants.py @@ -42,13 +42,15 @@ HUB_NAME = "SageMakerPublicHub" -# Allowed reward model IDs for RLAIF trainer -_ALLOWED_REWARD_MODEL_IDS = [ - "openai.gpt-oss-120b-1:0", - "openai.gpt-oss-20b-1:0", - "qwen.qwen3-32b-v1:0", - "qwen.qwen3-coder-30b-a3b-v1:0" -] +# Allowed reward model IDs for RLAIF trainer with region restrictions +_ALLOWED_REWARD_MODEL_IDS = { + "openai.gpt-oss-120b-1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"], + "openai.gpt-oss-20b-1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"], + "qwen.qwen3-32b-v1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"], + "qwen.qwen3-coder-30b-a3b-v1:0": ["us-west-2", "us-east-1", "ap-northeast-1", "eu-west-1"], + "qwen.qwen3-coder-480b-a35b-v1:0": ["us-west-2", "ap-northeast-1"], + "qwen.qwen3-235b-a22b-2507-v1:0": ["us-west-2", "ap-northeast-1"] +} # Allowed evaluator models for LLM as Judge evaluator with region restrictions _ALLOWED_EVALUATOR_MODELS = { diff --git a/sagemaker-train/src/sagemaker/train/dpo_trainer.py b/sagemaker-train/src/sagemaker/train/dpo_trainer.py index 1680d92450..690bf30e48 100644 --- a/sagemaker-train/src/sagemaker/train/dpo_trainer.py +++ b/sagemaker-train/src/sagemaker/train/dpo_trainer.py @@ -41,7 +41,7 @@ class DPOTrainer(BaseTrainer): trainer = DPOTrainer( model="meta-llama/Llama-2-7b-hf", training_type=TrainingType.LORA, - model_package_group_name="my-model-group", + model_package_group="my-model-group", training_dataset="s3://bucket/preference_data.jsonl" ) @@ -50,7 +50,7 @@ class DPOTrainer(BaseTrainer): # Complete workflow: create -> wait -> get model package ARN trainer = DPOTrainer( model="meta-llama/Llama-2-7b-hf", - model_package_group_name="my-dpo-models" + model_package_group="my-dpo-models" ) # Create training job (non-blocking) @@ -75,7 +75,7 @@ class DPOTrainer(BaseTrainer): training_type (Union[TrainingType, str]): The fine-tuning approach. Valid values are TrainingType.LORA (default), TrainingType.FULL. - model_package_group_name (Optional[Union[str, ModelPackageGroup]]): + model_package_group (Optional[Union[str, ModelPackageGroup]]): The model package group for storing the fine-tuned model. Can be a group name, ARN, or ModelPackageGroup object. Required when model is not a ModelPackage. mlflow_resource_arn (Optional[str]): @@ -86,9 +86,9 @@ class DPOTrainer(BaseTrainer): mlflow_run_name (Optional[str]): The MLflow run name for this training job. training_dataset (Optional[Union[str, DataSet]]): - The training dataset with preference pairs. Can be an S3 URI, dataset ARN, or DataSet object. + The training dataset with preference pairs. Can be a dataset ARN, or DataSet object. validation_dataset (Optional[Union[str, DataSet]]): - The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object. + The validation dataset. Can be a dataset ARN, or DataSet object. s3_output_path (Optional[str]): The S3 path for training job outputs. If not specified, defaults to s3://sagemaker--/output. @@ -101,7 +101,7 @@ def __init__( self, model: Union[str, ModelPackage], training_type: Union[TrainingType, str] = TrainingType.LORA, - model_package_group_name: Optional[Union[str, ModelPackageGroup]] = None, + model_package_group: Optional[Union[str, ModelPackageGroup]] = None, mlflow_resource_arn: Optional[str] = None, mlflow_experiment_name: Optional[str] = None, mlflow_run_name: Optional[str] = None, @@ -118,8 +118,8 @@ def __init__( self.model, self._model_name = _resolve_model_and_name(model, self.sagemaker_session) self.training_type = training_type - self.model_package_group_name = _validate_and_resolve_model_package_group(model, - model_package_group_name) + self.model_package_group = _validate_and_resolve_model_package_group(model, + model_package_group) self.mlflow_resource_arn = mlflow_resource_arn self.mlflow_experiment_name = mlflow_experiment_name self.mlflow_run_name = mlflow_run_name @@ -232,7 +232,7 @@ def train(self, _validate_hyperparameter_values(final_hyperparameters) model_package_config = _create_model_package_config( - model_package_group_name=self.model_package_group_name, + model_package_group_name=self.model_package_group, model=self.model, sagemaker_session=sagemaker_session ) diff --git a/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py b/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py index 3be78ebd6d..98e1c50c48 100644 --- a/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py +++ b/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py @@ -23,7 +23,14 @@ class LLMAsJudgeEvaluator(BaseEvaluator): This evaluator uses foundation models to evaluate LLM responses based on various quality and responsible AI metrics. - + + This feature is powered by Amazon Bedrock Evaluations. Your use of this feature is subject to pricing of + Amazon Bedrock Evaluations, the Service Terms applicable to Amazon Bedrock, and the terms that apply to your + usage of third-party models. Amazon Bedrock Evaluations may securely transmit data across AWS Regions within your + geography for processing. For more information, access Amazon Bedrock Evaluations documentation. + + Documentation: https://docs.aws.amazon.com/bedrock/latest/userguide/evaluation-judge.html + Attributes: evaluator_model (str): AWS Bedrock foundation model identifier to use as the judge. Required. For supported models, see: diff --git a/sagemaker-train/src/sagemaker/train/rlaif_trainer.py b/sagemaker-train/src/sagemaker/train/rlaif_trainer.py index 230d2566d0..1bf5c02813 100644 --- a/sagemaker-train/src/sagemaker/train/rlaif_trainer.py +++ b/sagemaker-train/src/sagemaker/train/rlaif_trainer.py @@ -44,7 +44,7 @@ class RLAIFTrainer(BaseTrainer): trainer = RLAIFTrainer( model="meta-llama/Llama-2-7b-hf", training_type=TrainingType.LORA, - model_package_group_name="my-model-group", + model_package_group="my-model-group", reward_model_id="reward-model-id", reward_prompt="Rate the helpfulness of this response on a scale of 1-10", training_dataset="s3://bucket/rlaif_data.jsonl" @@ -55,7 +55,7 @@ class RLAIFTrainer(BaseTrainer): # Complete workflow: create -> wait -> get model package ARN trainer = RLAIFTrainer( model="meta-llama/Llama-2-7b-hf", - model_package_group_name="my-rlaif-models", + model_package_group="my-rlaif-models", reward_model_id="reward-model-id", reward_prompt="Rate the helpfulness of this response on a scale of 1-10" ) @@ -82,7 +82,7 @@ class RLAIFTrainer(BaseTrainer): training_type (Union[TrainingType, str]): The fine-tuning approach. Valid values are TrainingType.LORA (default), TrainingType.FULL. - model_package_group_name (Optional[Union[str, ModelPackageGroup]]): + model_package_group (Optional[Union[str, ModelPackageGroup]]): The model package group for storing the fine-tuned model. Can be a group name, ARN, or ModelPackageGroup object. Required when model is not a ModelPackage. reward_model_id (str): @@ -100,9 +100,9 @@ class RLAIFTrainer(BaseTrainer): mlflow_run_name (Optional[str]): The MLflow run name for this training job. training_dataset (Optional[Union[str, DataSet]]): - The training dataset. Can be an S3 URI, dataset ARN, or DataSet object. + The training dataset. Can be a dataset ARN, or DataSet object. validation_dataset (Optional[Union[str, DataSet]]): - The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object. + The validation dataset. Can be a dataset ARN, or DataSet object. s3_output_path (Optional[str]): The S3 path for training job outputs. If not specified, defaults to s3://sagemaker--/output. @@ -116,7 +116,7 @@ def __init__( self, model: Union[str, ModelPackage], training_type: Union[TrainingType, str] = TrainingType.LORA, - model_package_group_name: Optional[Union[str, ModelPackageGroup]] = None, + model_package_group: Optional[Union[str, ModelPackageGroup]] = None, reward_model_id: str = None, reward_prompt: Union[str, Evaluator] = None, mlflow_resource_arn: Optional[Union[str, MlflowTrackingServer]] = None, @@ -138,8 +138,8 @@ def __init__( self.model, self._model_name = _resolve_model_and_name(model, self.sagemaker_session) self.training_type = training_type - self.model_package_group_name = _validate_and_resolve_model_package_group(model, - model_package_group_name) + self.model_package_group = _validate_and_resolve_model_package_group(model, + model_package_group) self.reward_model_id = self._validate_reward_model_id(reward_model_id) self.reward_prompt = reward_prompt self.mlflow_resource_arn = mlflow_resource_arn @@ -173,8 +173,20 @@ def _validate_reward_model_id(self, reward_model_id): if reward_model_id not in _ALLOWED_REWARD_MODEL_IDS: raise ValueError( f"Invalid reward_model_id '{reward_model_id}'. " - f"Available models are: {_ALLOWED_REWARD_MODEL_IDS}" + f"Available models are: {list(_ALLOWED_REWARD_MODEL_IDS.keys())}" ) + + # Check region compatibility + session = self.sagemaker_session if hasattr(self, 'sagemaker_session') and self.sagemaker_session else TrainDefaults.get_sagemaker_session() + current_region = session.boto_region_name + allowed_regions = _ALLOWED_REWARD_MODEL_IDS[reward_model_id] + + if current_region not in allowed_regions: + raise ValueError( + f"Reward model '{reward_model_id}' is not available in region '{current_region}'. " + f"Available regions for this model: {allowed_regions}" + ) + return reward_model_id @@ -239,7 +251,7 @@ def train(self, training_dataset: Optional[Union[str, DataSet]] = None, validati _validate_hyperparameter_values(final_hyperparameters) model_package_config = _create_model_package_config( - model_package_group_name=self.model_package_group_name, + model_package_group_name=self.model_package_group, model=self.model, sagemaker_session=sagemaker_session ) diff --git a/sagemaker-train/src/sagemaker/train/rlvr_trainer.py b/sagemaker-train/src/sagemaker/train/rlvr_trainer.py index 4274723f5a..f00c7aac36 100644 --- a/sagemaker-train/src/sagemaker/train/rlvr_trainer.py +++ b/sagemaker-train/src/sagemaker/train/rlvr_trainer.py @@ -42,7 +42,7 @@ class RLVRTrainer(BaseTrainer): trainer = RLVRTrainer( model="meta-llama/Llama-2-7b-hf", training_type=TrainingType.LORA, - model_package_group_name="my-model-group", + model_package_group="my-model-group", custom_reward_function="arn:aws:sagemaker:us-east-1:123456789012:hub-content/SageMakerPublicHub/JsonDoc/my-evaluator/1.0", training_dataset="s3://bucket/rlvr_data.jsonl" ) @@ -52,7 +52,7 @@ class RLVRTrainer(BaseTrainer): # Complete workflow: create -> wait -> get model package ARN trainer = RLVRTrainer( model="meta-llama/Llama-2-7b-hf", - model_package_group_name="my-rlvr-models", + model_package_group="my-rlvr-models", custom_reward_function="arn:aws:sagemaker:us-east-1:123456789012:hub-content/SageMakerPublicHub/JsonDoc/my-evaluator/1.0" ) @@ -78,7 +78,7 @@ class RLVRTrainer(BaseTrainer): training_type (Union[TrainingType, str]): The fine-tuning approach. Valid values are TrainingType.LORA (default), TrainingType.FULL. - model_package_group_name (Optional[Union[str, ModelPackageGroup]]): + model_package_group (Optional[Union[str, ModelPackageGroup]]): The model package group for storing the fine-tuned model. Can be a group name, ARN, or ModelPackageGroup object. Required when model is not a ModelPackage. custom_reward_function (Optional[Union[str, Evaluator]]): @@ -92,9 +92,9 @@ class RLVRTrainer(BaseTrainer): mlflow_run_name (Optional[str]): The MLflow run name for this training job. training_dataset (Optional[Union[str, DataSet]]): - The training dataset. Can be an S3 URI, dataset ARN, or DataSet object. + The training dataset. Can be a dataset ARN, or DataSet object. validation_dataset (Optional[Union[str, DataSet]]): - The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object. + The validation dataset. Can be a dataset ARN, or DataSet object. s3_output_path (Optional[str]): The S3 path for training job outputs. If not specified, defaults to s3://sagemaker--/output. @@ -108,7 +108,7 @@ def __init__( self, model: Union[str, ModelPackage], training_type: Union[TrainingType, str] = TrainingType.LORA, - model_package_group_name: Optional[Union[str, ModelPackageGroup]] = None, + model_package_group: Optional[Union[str, ModelPackageGroup]] = None, custom_reward_function: Optional[Union[str, Evaluator]] = None, mlflow_resource_arn: Optional[Union[str, MlflowTrackingServer]] = None, mlflow_experiment_name: Optional[str] = None, @@ -129,8 +129,8 @@ def __init__( self.model, self._model_name = _resolve_model_and_name(model, self.sagemaker_session) self.training_type = training_type - self.model_package_group_name = _validate_and_resolve_model_package_group(model, - model_package_group_name) + self.model_package_group = _validate_and_resolve_model_package_group(model, + model_package_group) self.custom_reward_function = custom_reward_function self.mlflow_resource_arn = mlflow_resource_arn self.mlflow_experiment_name = mlflow_experiment_name @@ -239,7 +239,7 @@ def train(self, training_dataset: Optional[Union[str, DataSet]] = None, _validate_hyperparameter_values(final_hyperparameters) model_package_config = _create_model_package_config( - model_package_group_name=self.model_package_group_name, + model_package_group_name=self.model_package_group, model=self.model, sagemaker_session=sagemaker_session ) diff --git a/sagemaker-train/src/sagemaker/train/sft_trainer.py b/sagemaker-train/src/sagemaker/train/sft_trainer.py index 17c4ec344d..57d2c52a06 100644 --- a/sagemaker-train/src/sagemaker/train/sft_trainer.py +++ b/sagemaker-train/src/sagemaker/train/sft_trainer.py @@ -42,7 +42,7 @@ class SFTTrainer(BaseTrainer): trainer = SFTTrainer( model="meta-llama/Llama-2-7b-hf", training_type=TrainingType.LORA, - model_package_group_name="my-model-group", + model_package_group="my-model-group", training_dataset="s3://bucket/train.jsonl", validation_dataset="s3://bucket/val.jsonl" ) @@ -52,7 +52,7 @@ class SFTTrainer(BaseTrainer): # Complete workflow: trainer = SFTTrainer( model="meta-llama/Llama-2-7b-hf", - model_package_group_name="my-fine-tuned-models" + model_package_group="my-fine-tuned-models" ) # Create training job (non-blocking) @@ -77,7 +77,7 @@ class SFTTrainer(BaseTrainer): training_type (Union[TrainingType, str]): The fine-tuning approach. Valid values are TrainingType.LORA (default), TrainingType.FULL. - model_package_group_name (Optional[Union[str, ModelPackageGroup]]): + model_package_group (Optional[Union[str, ModelPackageGroup]]): The model package group for storing the fine-tuned model. Can be a group name, ARN, or ModelPackageGroup object. Required when model is not a ModelPackage. mlflow_resource_arn (Optional[str]): @@ -88,9 +88,9 @@ class SFTTrainer(BaseTrainer): mlflow_run_name (Optional[str]): The MLflow run name for this training job. training_dataset (Optional[Union[str, DataSet]]): - The training dataset. Can be an S3 URI, dataset ARN, or DataSet object. + The training dataset. Can be dataset ARN, or DataSet object. validation_dataset (Optional[Union[str, DataSet]]): - The validation dataset. Can be an S3 URI, dataset ARN, or DataSet object. + The validation dataset. Can be dataset ARN, or DataSet object. s3_output_path (Optional[str]): The S3 path for training job outputs. If not specified, defaults to s3://sagemaker--/output. @@ -104,7 +104,7 @@ def __init__( self, model: Union[str, ModelPackage], training_type: Union[TrainingType, str] = TrainingType.LORA, - model_package_group_name: Optional[Union[str, ModelPackageGroup]] = None, + model_package_group: Optional[Union[str, ModelPackageGroup]] = None, mlflow_resource_arn: Optional[str] = None, mlflow_experiment_name: Optional[str] = None, mlflow_run_name: Optional[str] = None, @@ -122,8 +122,8 @@ def __init__( self.model, self._model_name = _resolve_model_and_name(model, self.sagemaker_session) self.training_type = training_type - self.model_package_group_name = _validate_and_resolve_model_package_group(model, - model_package_group_name) + self.model_package_group = _validate_and_resolve_model_package_group(model, + model_package_group) self.mlflow_resource_arn = mlflow_resource_arn self.mlflow_experiment_name = mlflow_experiment_name self.mlflow_run_name = mlflow_run_name @@ -233,7 +233,7 @@ def train(self, training_dataset: Optional[Union[str, DataSet]] = None, validati _validate_hyperparameter_values(final_hyperparameters) model_package_config = _create_model_package_config( - model_package_group_name=self.model_package_group_name, + model_package_group_name=self.model_package_group, model=self.model, sagemaker_session=sagemaker_session ) diff --git a/sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py b/sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py index 8c2c49dbc4..65cbd6c246 100644 --- a/sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py +++ b/sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py @@ -29,7 +29,7 @@ def test_dpo_trainer_lora_complete_workflow(sagemaker_session): trainer = DPOTrainer( model="meta-textgeneration-llama-3-2-1b-instruct", training_type=TrainingType.LORA, - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", training_dataset="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/DataSet/dpo-oss-test-data/0.0.1", s3_output_path="s3://mc-flows-sdk-testing/output/", accept_eula=True @@ -68,7 +68,7 @@ def test_dpo_trainer_with_validation_dataset(sagemaker_session): dpo_trainer = DPOTrainer( model="meta-textgeneration-llama-3-2-1b-instruct", training_type=TrainingType.LORA, - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", training_dataset="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/DataSet/dpo-oss-test-data/0.0.1", validation_dataset="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/DataSet/dpo-oss-test-data/0.0.1", s3_output_path="s3://mc-flows-sdk-testing/output/", diff --git a/sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py b/sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py index 7e7de19dee..296d62bfd8 100644 --- a/sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py +++ b/sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py @@ -28,7 +28,7 @@ def test_rlaif_trainer_lora_complete_workflow(sagemaker_session): rlaif_trainer = RLAIFTrainer( model="meta-textgeneration-llama-3-2-1b-instruct", training_type=TrainingType.LORA, - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", reward_model_id='openai.gpt-oss-120b-1:0', reward_prompt='Builtin.Summarize', mlflow_experiment_name="test-rlaif-finetuned-models-exp", @@ -68,7 +68,7 @@ def test_rlaif_trainer_with_custom_reward_settings(sagemaker_session): rlaif_trainer = RLAIFTrainer( model="meta-textgeneration-llama-3-2-1b-instruct", training_type=TrainingType.LORA, - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", reward_model_id='openai.gpt-oss-120b-1:0', reward_prompt="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/JsonDoc/rlaif-test-prompt/0.0.1", mlflow_experiment_name="test-rlaif-finetuned-models-exp", @@ -107,7 +107,7 @@ def test_rlaif_trainer_continued_finetuning(sagemaker_session): rlaif_trainer = RLAIFTrainer( model="arn:aws:sagemaker:us-west-2:729646638167:model-package/sdk-test-finetuned-models/1", training_type=TrainingType.LORA, - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", reward_model_id='openai.gpt-oss-120b-1:0', reward_prompt='Builtin.Summarize', mlflow_experiment_name="test-rlaif-finetuned-models-exp", diff --git a/sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py b/sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py index 6637a1fdb4..63d3ae3134 100644 --- a/sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py +++ b/sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py @@ -29,7 +29,7 @@ def test_rlvr_trainer_lora_complete_workflow(sagemaker_session): rlvr_trainer = RLVRTrainer( model="meta-textgeneration-llama-3-2-1b-instruct", training_type=TrainingType.LORA, - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", mlflow_experiment_name="test-rlvr-finetuned-models-exp", mlflow_run_name="test-rlvr-finetuned-models-run", training_dataset="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/DataSet/rlvr-rlaif-oss-test-data/0.0.1", @@ -67,7 +67,7 @@ def test_rlvr_trainer_with_custom_reward_function(sagemaker_session): rlvr_trainer = RLVRTrainer( model="meta-textgeneration-llama-3-2-1b-instruct", training_type=TrainingType.LORA, - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", mlflow_experiment_name="test-rlvr-finetuned-models-exp", mlflow_run_name="test-rlvr-finetuned-models-run", training_dataset="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/DataSet/rlvr-rlaif-oss-test-data/0.0.1", @@ -108,7 +108,7 @@ def test_rlvr_trainer_nova_workflow(sagemaker_session): # For fine-tuning rlvr_trainer = RLVRTrainer( model="nova-textgeneration-lite-v2", - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", mlflow_experiment_name="test-nova-rlvr-finetuned-models-exp", mlflow_run_name="test-nova-rlvr-finetuned-models-run", training_dataset="s3://mc-flows-sdk-testing-us-east-1/input_data/rlvr-nova/grpo-64-sample.jsonl", diff --git a/sagemaker-train/tests/integ/train/test_sft_trainer_integration.py b/sagemaker-train/tests/integ/train/test_sft_trainer_integration.py index aced084c6b..98dd154c3f 100644 --- a/sagemaker-train/tests/integ/train/test_sft_trainer_integration.py +++ b/sagemaker-train/tests/integ/train/test_sft_trainer_integration.py @@ -29,7 +29,7 @@ def test_sft_trainer_lora_complete_workflow(sagemaker_session): sft_trainer = SFTTrainer( model="meta-textgeneration-llama-3-2-1b-instruct", training_type=TrainingType.LORA, - model_package_group_name="arn:aws:sagemaker:us-west-2:729646638167:model-package-group/sdk-test-finetuned-models", + model_package_group="arn:aws:sagemaker:us-west-2:729646638167:model-package-group/sdk-test-finetuned-models", training_dataset="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/DataSet/sft-oss-test-data/0.0.1", s3_output_path="s3://mc-flows-sdk-testing/output/", accept_eula=True @@ -65,7 +65,7 @@ def test_sft_trainer_with_validation_dataset(sagemaker_session): sft_trainer = SFTTrainer( model="meta-textgeneration-llama-3-2-1b-instruct", training_type=TrainingType.LORA, - model_package_group_name="arn:aws:sagemaker:us-west-2:729646638167:model-package-group/sdk-test-finetuned-models", + model_package_group="arn:aws:sagemaker:us-west-2:729646638167:model-package-group/sdk-test-finetuned-models", training_dataset="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/DataSet/sft-oss-test-data/0.0.1", validation_dataset="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/DataSet/sft-oss-test-data/0.0.1", accept_eula=True @@ -103,7 +103,7 @@ def test_sft_trainer_nova_workflow(sagemaker_session): sft_trainer_nova = SFTTrainer( model="nova-textgeneration-lite-v2", training_type=TrainingType.LORA, - model_package_group_name="sdk-test-finetuned-models", + model_package_group="sdk-test-finetuned-models", mlflow_experiment_name="test-nova-finetuned-models-exp", mlflow_run_name="test-nova-finetuned-models-run", training_dataset="arn:aws:sagemaker:us-east-1:729646638167:hub-content/sdktest/DataSet/sft-nova-test-dataset/0.0.1", diff --git a/sagemaker-train/tests/unit/train/common_utils/test_finetune_utils.py b/sagemaker-train/tests/unit/train/common_utils/test_finetune_utils.py index e77b019e68..163ad332bb 100644 --- a/sagemaker-train/tests/unit/train/common_utils/test_finetune_utils.py +++ b/sagemaker-train/tests/unit/train/common_utils/test_finetune_utils.py @@ -537,16 +537,16 @@ def test__validate_s3_path_exists_with_prefix_exists(self, mock_boto_client): @patch('boto3.client') def test__validate_s3_path_exists_with_prefix_not_exists(self, mock_boto_client): - """Test S3 path validation raises error when prefix doesn't exist""" + """Test S3 path validation creates prefix when it doesn't exist""" mock_session = Mock() mock_s3_client = Mock() mock_session.boto_session.client.return_value = mock_s3_client mock_s3_client.list_objects_v2.return_value = {} # No contents - with pytest.raises(ValueError, match="Failed to validate S3 path 's3://test-bucket/prefix': S3 prefix 'prefix' does not exist in bucket 'test-bucket'"): - _validate_s3_path_exists("s3://test-bucket/prefix", mock_session) + _validate_s3_path_exists("s3://test-bucket/prefix", mock_session) mock_s3_client.head_bucket.assert_called_once_with(Bucket="test-bucket") mock_s3_client.list_objects_v2.assert_called_once_with(Bucket="test-bucket", Prefix="prefix", MaxKeys=1) + mock_s3_client.put_object.assert_called_once_with(Bucket="test-bucket", Key="prefix/", Body=b'') diff --git a/sagemaker-train/tests/unit/train/test_dpo_trainer.py b/sagemaker-train/tests/unit/train/test_dpo_trainer.py index 85dce8d56b..4f67221029 100644 --- a/sagemaker-train/tests/unit/train/test_dpo_trainer.py +++ b/sagemaker-train/tests/unit/train/test_dpo_trainer.py @@ -20,7 +20,7 @@ def test_init_with_defaults(self, mock_finetuning_options, mock_validate_group, mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = DPOTrainer(model="test-model", model_package_group_name="test-group") + trainer = DPOTrainer(model="test-model", model_package_group="test-group") assert trainer.training_type == TrainingType.LORA assert trainer.model == "test-model" @@ -31,7 +31,7 @@ def test_init_with_full_training_type(self, mock_finetuning_options, mock_valida mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = DPOTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group_name="test-group") + trainer = DPOTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group="test-group") assert trainer.training_type == TrainingType.FULL @patch('sagemaker.train.dpo_trainer._resolve_model_and_name') @@ -74,7 +74,7 @@ def test_train_with_lora(self, mock_training_job_create, mock_model_package_conf mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = DPOTrainer(model="test-model", training_type=TrainingType.LORA, model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = DPOTrainer(model="test-model", training_type=TrainingType.LORA, model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) assert mock_training_job_create.called @@ -86,7 +86,7 @@ def test_training_type_string_value(self, mock_finetuning_options, mock_validate mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = DPOTrainer(model="test-model", training_type="CUSTOM", model_package_group_name="test-group") + trainer = DPOTrainer(model="test-model", training_type="CUSTOM", model_package_group="test-group") assert trainer.training_type == "CUSTOM" @patch('sagemaker.train.dpo_trainer._resolve_model_and_name') @@ -115,7 +115,7 @@ def test_init_with_datasets(self, mock_finetuning_options, mock_validate_group, mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = DPOTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", training_dataset="s3://bucket/train", validation_dataset="s3://bucket/val" ) @@ -131,7 +131,7 @@ def test_init_with_mlflow_config(self, mock_finetuning_options, mock_validate_gr mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = DPOTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", mlflow_resource_arn="arn:aws:mlflow:us-east-1:123456789012:tracking-server/test", mlflow_experiment_name="test-experiment", mlflow_run_name="test-run" @@ -180,7 +180,7 @@ def test_train_with_full_training(self, mock_training_job_create, mock_model_pac mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = DPOTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = DPOTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) assert mock_training_job_create.called @@ -192,7 +192,7 @@ def test_fit_without_datasets_raises_error(self, mock_finetuning_options, mock_v mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = DPOTrainer(model="test-model", model_package_group_name="test-group") + trainer = DPOTrainer(model="test-model", model_package_group="test-group") with pytest.raises(Exception): trainer.train(wait=False) @@ -209,9 +209,9 @@ def test_model_package_group_handling(self, mock_validate_group, mock_get_option trainer = DPOTrainer( model="test-model", - model_package_group_name="test-group" + model_package_group="test-group" ) - assert trainer.model_package_group_name == "test-group" + assert trainer.model_package_group == "test-group" @patch('sagemaker.train.dpo_trainer._validate_and_resolve_model_package_group') @patch('sagemaker.train.dpo_trainer._get_fine_tuning_options_and_model_arn') @@ -222,7 +222,7 @@ def test_s3_output_path_configuration(self, mock_finetuning_options, mock_valida mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = DPOTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", s3_output_path="s3://bucket/output" ) assert trainer.s3_output_path == "s3://bucket/output" @@ -263,7 +263,7 @@ def test_train_with_tags(self, mock_training_job_create, mock_model_package_conf mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = DPOTrainer(model="test-model", model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = DPOTrainer(model="test-model", model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) mock_training_job_create.assert_called_once() @@ -284,10 +284,10 @@ def test_gated_model_eula_validation(self, mock_finetuning_options, mock_validat # Should raise error when accept_eula=False for gated model with pytest.raises(ValueError, match="gated model and requires EULA acceptance"): - DPOTrainer(model="gated-model", model_package_group_name="test-group", accept_eula=False) + DPOTrainer(model="gated-model", model_package_group="test-group", accept_eula=False) # Should work when accept_eula=True for gated model - trainer = DPOTrainer(model="gated-model", model_package_group_name="test-group", accept_eula=True) + trainer = DPOTrainer(model="gated-model", model_package_group="test-group", accept_eula=True) assert trainer.accept_eula == True def test_process_hyperparameters_removes_constructor_handled_keys(self): diff --git a/sagemaker-train/tests/unit/train/test_rlaif_trainer.py b/sagemaker-train/tests/unit/train/test_rlaif_trainer.py index eca69eed6d..4c45e21ba1 100644 --- a/sagemaker-train/tests/unit/train/test_rlaif_trainer.py +++ b/sagemaker-train/tests/unit/train/test_rlaif_trainer.py @@ -20,7 +20,7 @@ def test_init_with_defaults(self, mock_finetuning_options, mock_validate_group, mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = RLAIFTrainer(model="test-model", model_package_group_name="test-group") + trainer = RLAIFTrainer(model="test-model", model_package_group="test-group") assert trainer.training_type == TrainingType.LORA assert trainer.model == "test-model" @@ -31,7 +31,7 @@ def test_init_with_full_training_type(self, mock_finetuning_options, mock_valida mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = RLAIFTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group_name="test-group") + trainer = RLAIFTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group="test-group") assert trainer.training_type == TrainingType.FULL @patch('sagemaker.train.common_utils.finetune_utils._get_beta_session') @@ -73,7 +73,7 @@ def test_peft_value_for_lora_training(self, mock_training_job_create, mock_model mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = RLAIFTrainer(model="test-model", training_type=TrainingType.LORA, model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = RLAIFTrainer(model="test-model", training_type=TrainingType.LORA, model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) assert mock_training_job_create.called @@ -117,7 +117,7 @@ def test_peft_value_for_full_training(self, mock_training_job_create, mock_model mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = RLAIFTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = RLAIFTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) assert mock_training_job_create.called @@ -129,7 +129,7 @@ def test_training_type_string_value(self, mock_finetuning_options, mock_validate mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = RLAIFTrainer(model="test-model", training_type="CUSTOM", model_package_group_name="test-group") + trainer = RLAIFTrainer(model="test-model", training_type="CUSTOM", model_package_group="test-group") assert trainer.training_type == "CUSTOM" @patch('sagemaker.train.common_utils.finetune_utils._get_beta_session') @@ -161,7 +161,7 @@ def test_init_with_datasets(self, mock_finetuning_options, mock_validate_group, mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = RLAIFTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", training_dataset="s3://bucket/train", validation_dataset="s3://bucket/val" ) @@ -177,7 +177,7 @@ def test_init_with_mlflow_config(self, mock_finetuning_options, mock_validate_gr mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = RLAIFTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", mlflow_resource_arn="arn:aws:mlflow:us-east-1:123456789012:tracking-server/test", mlflow_experiment_name="test-experiment", mlflow_run_name="test-run" @@ -195,7 +195,7 @@ def test_train_without_datasets_raises_error(self, mock_finetuning_options, mock mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) mock_get_session.return_value = Mock() - trainer = RLAIFTrainer(model="test-model", model_package_group_name="test-group") + trainer = RLAIFTrainer(model="test-model", model_package_group="test-group") with pytest.raises(Exception): trainer.train(wait=False) @@ -214,9 +214,9 @@ def test_model_package_group_handling(self, mock_validate_group, mock_get_option trainer = RLAIFTrainer( model="test-model", - model_package_group_name="test-group" + model_package_group="test-group" ) - assert trainer.model_package_group_name == "test-group" + assert trainer.model_package_group == "test-group" @patch('sagemaker.train.rlaif_trainer._validate_and_resolve_model_package_group') @patch('sagemaker.train.rlaif_trainer._get_fine_tuning_options_and_model_arn') @@ -227,7 +227,7 @@ def test_s3_output_path_configuration(self, mock_finetuning_options, mock_valida mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = RLAIFTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", s3_output_path="s3://bucket/output" ) assert trainer.s3_output_path == "s3://bucket/output" @@ -268,7 +268,7 @@ def test_train_with_tags(self, mock_training_job_create, mock_model_package_conf mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = RLAIFTrainer(model="test-model", model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = RLAIFTrainer(model="test-model", model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) mock_training_job_create.assert_called_once() @@ -289,10 +289,10 @@ def test_gated_model_eula_validation(self, mock_finetuning_options, mock_validat # Should raise error when accept_eula=False for gated model with pytest.raises(ValueError, match="gated model and requires EULA acceptance"): - RLAIFTrainer(model="gated-model", model_package_group_name="test-group", accept_eula=False) + RLAIFTrainer(model="gated-model", model_package_group="test-group", accept_eula=False) # Should work when accept_eula=True for gated model - trainer = RLAIFTrainer(model="gated-model", model_package_group_name="test-group", accept_eula=True) + trainer = RLAIFTrainer(model="gated-model", model_package_group="test-group", accept_eula=True) assert trainer.accept_eula == True def test_process_hyperparameters_removes_constructor_handled_keys(self): diff --git a/sagemaker-train/tests/unit/train/test_rlvr_trainer.py b/sagemaker-train/tests/unit/train/test_rlvr_trainer.py index 7128a3545c..c68cd1c94d 100644 --- a/sagemaker-train/tests/unit/train/test_rlvr_trainer.py +++ b/sagemaker-train/tests/unit/train/test_rlvr_trainer.py @@ -20,7 +20,7 @@ def test_init_with_defaults(self, mock_finetuning_options, mock_validate_group, mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = RLVRTrainer(model="test-model", model_package_group_name="test-group") + trainer = RLVRTrainer(model="test-model", model_package_group="test-group") assert trainer.training_type == TrainingType.LORA assert trainer.model == "test-model" @@ -31,7 +31,7 @@ def test_init_with_full_training_type(self, mock_finetuning_options, mock_valida mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = RLVRTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group_name="test-group") + trainer = RLVRTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group="test-group") assert trainer.training_type == TrainingType.FULL @patch('sagemaker.train.common_utils.finetune_utils._get_beta_session') @@ -73,7 +73,7 @@ def test_peft_value_for_lora_training(self, mock_training_job_create, mock_model mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = RLVRTrainer(model="test-model", training_type=TrainingType.LORA, model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = RLVRTrainer(model="test-model", training_type=TrainingType.LORA, model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) assert mock_training_job_create.called @@ -117,7 +117,7 @@ def test_peft_value_for_full_training(self, mock_training_job_create, mock_model mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = RLVRTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = RLVRTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) assert mock_training_job_create.called @@ -129,7 +129,7 @@ def test_training_type_string_value(self, mock_finetuning_options, mock_validate mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = RLVRTrainer(model="test-model", training_type="CUSTOM", model_package_group_name="test-group") + trainer = RLVRTrainer(model="test-model", training_type="CUSTOM", model_package_group="test-group") assert trainer.training_type == "CUSTOM" @patch('sagemaker.train.common_utils.finetune_utils._get_beta_session') @@ -161,7 +161,7 @@ def test_init_with_datasets(self, mock_finetuning_options, mock_validate_group, mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = RLVRTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", training_dataset="s3://bucket/train", validation_dataset="s3://bucket/val" ) @@ -177,7 +177,7 @@ def test_init_with_mlflow_config(self, mock_finetuning_options, mock_validate_gr mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = RLVRTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", mlflow_resource_arn="arn:aws:mlflow:us-east-1:123456789012:tracking-server/test", mlflow_experiment_name="test-experiment", mlflow_run_name="test-run" @@ -195,7 +195,7 @@ def test_train_without_datasets_raises_error(self, mock_finetuning_options, mock mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) mock_get_session.return_value = Mock() - trainer = RLVRTrainer(model="test-model", model_package_group_name="test-group") + trainer = RLVRTrainer(model="test-model", model_package_group="test-group") with pytest.raises(Exception): trainer.train(wait=False) @@ -214,9 +214,9 @@ def test_model_package_group_handling(self, mock_validate_group, mock_get_option trainer = RLVRTrainer( model="test-model", - model_package_group_name="test-group" + model_package_group="test-group" ) - assert trainer.model_package_group_name == "test-group" + assert trainer.model_package_group == "test-group" @patch('sagemaker.train.rlvr_trainer._validate_and_resolve_model_package_group') @patch('sagemaker.train.rlvr_trainer._get_fine_tuning_options_and_model_arn') @@ -227,7 +227,7 @@ def test_s3_output_path_configuration(self, mock_finetuning_options, mock_valida mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = RLVRTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", s3_output_path="s3://bucket/output" ) assert trainer.s3_output_path == "s3://bucket/output" @@ -266,7 +266,7 @@ def test_train_with_tags(self, mock_training_job_create, mock_model_package_conf mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = RLVRTrainer(model="test-model", model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = RLVRTrainer(model="test-model", model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) mock_training_job_create.assert_called_once() @@ -287,10 +287,10 @@ def test_gated_model_eula_validation(self, mock_finetuning_options, mock_validat # Should raise error when accept_eula=False for gated model with pytest.raises(ValueError, match="gated model and requires EULA acceptance"): - RLVRTrainer(model="gated-model", model_package_group_name="test-group", accept_eula=False) + RLVRTrainer(model="gated-model", model_package_group="test-group", accept_eula=False) # Should work when accept_eula=True for gated model - trainer = RLVRTrainer(model="gated-model", model_package_group_name="test-group", accept_eula=True) + trainer = RLVRTrainer(model="gated-model", model_package_group="test-group", accept_eula=True) assert trainer.accept_eula == True def test_process_hyperparameters_removes_constructor_handled_keys(self): diff --git a/sagemaker-train/tests/unit/train/test_sft_trainer.py b/sagemaker-train/tests/unit/train/test_sft_trainer.py index 77b120bd6f..38042594d4 100644 --- a/sagemaker-train/tests/unit/train/test_sft_trainer.py +++ b/sagemaker-train/tests/unit/train/test_sft_trainer.py @@ -20,7 +20,7 @@ def test_init_with_defaults(self, mock_finetuning_options, mock_validate_group, mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = SFTTrainer(model="test-model", model_package_group_name="test-group") + trainer = SFTTrainer(model="test-model", model_package_group="test-group") assert trainer.training_type == TrainingType.LORA assert trainer.model == "test-model" @@ -31,7 +31,7 @@ def test_init_with_full_training_type(self, mock_finetuning_options, mock_valida mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = SFTTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group_name="test-group") + trainer = SFTTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group="test-group") assert trainer.training_type == TrainingType.FULL @patch('sagemaker.train.common_utils.finetune_utils._get_beta_session') @@ -73,7 +73,7 @@ def test_peft_value_for_lora_training(self, mock_training_job_create, mock_model mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = SFTTrainer(model="test-model", training_type=TrainingType.LORA, model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = SFTTrainer(model="test-model", training_type=TrainingType.LORA, model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) assert mock_training_job_create.called @@ -117,7 +117,7 @@ def test_peft_value_for_full_training(self, mock_training_job_create, mock_model mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = SFTTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = SFTTrainer(model="test-model", training_type=TrainingType.FULL, model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) assert mock_training_job_create.called @@ -129,7 +129,7 @@ def test_training_type_string_value(self, mock_finetuning_options, mock_validate mock_hyperparams = Mock() mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) - trainer = SFTTrainer(model="test-model", training_type="CUSTOM", model_package_group_name="test-group") + trainer = SFTTrainer(model="test-model", training_type="CUSTOM", model_package_group="test-group") assert trainer.training_type == "CUSTOM" @patch('sagemaker.train.sft_trainer._resolve_model_and_name') @@ -159,7 +159,7 @@ def test_init_with_datasets(self, mock_finetuning_options, mock_validate_group, mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = SFTTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", training_dataset="s3://bucket/train", validation_dataset="s3://bucket/val" ) @@ -175,7 +175,7 @@ def test_init_with_mlflow_config(self, mock_finetuning_options, mock_validate_gr mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = SFTTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", mlflow_resource_arn="arn:aws:mlflow:us-east-1:123456789012:tracking-server/test", mlflow_experiment_name="test-experiment", mlflow_run_name="test-run" @@ -193,7 +193,7 @@ def test_fit_without_datasets_raises_error(self, mock_finetuning_options, mock_v mock_hyperparams.to_dict.return_value = {} mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) mock_get_session.return_value = Mock() - trainer = SFTTrainer(model="test-model", model_package_group_name="test-group") + trainer = SFTTrainer(model="test-model", model_package_group="test-group") with pytest.raises(Exception): trainer.train(wait=False) @@ -208,9 +208,9 @@ def test_model_package_group_handling(self, mock_validate_group, mock_get_option trainer = SFTTrainer( model="test-model", - model_package_group_name="test-group" + model_package_group="test-group" ) - assert trainer.model_package_group_name == "test-group" + assert trainer.model_package_group == "test-group" @patch('sagemaker.train.sft_trainer._validate_and_resolve_model_package_group') @patch('sagemaker.train.sft_trainer._get_fine_tuning_options_and_model_arn') @@ -221,7 +221,7 @@ def test_s3_output_path_configuration(self, mock_finetuning_options, mock_valida mock_finetuning_options.return_value = (mock_hyperparams, "model-arn", False) trainer = SFTTrainer( model="test-model", - model_package_group_name="test-group", + model_package_group="test-group", s3_output_path="s3://bucket/output" ) assert trainer.s3_output_path == "s3://bucket/output" @@ -237,10 +237,10 @@ def test_gated_model_eula_validation(self, mock_finetuning_options, mock_validat # Should raise error when accept_eula=False for gated model with pytest.raises(ValueError, match="gated model and requires EULA acceptance"): - SFTTrainer(model="gated-model", model_package_group_name="test-group", accept_eula=False) + SFTTrainer(model="gated-model", model_package_group="test-group", accept_eula=False) # Should work when accept_eula=True for gated model - trainer = SFTTrainer(model="gated-model", model_package_group_name="test-group", accept_eula=True) + trainer = SFTTrainer(model="gated-model", model_package_group="test-group", accept_eula=True) assert trainer.accept_eula == True @@ -278,7 +278,7 @@ def test_train_with_tags(self, mock_training_job_create, mock_model_package_conf mock_training_job.wait = Mock() mock_training_job_create.return_value = mock_training_job - trainer = SFTTrainer(model="test-model", model_package_group_name="test-group", training_dataset="s3://bucket/train") + trainer = SFTTrainer(model="test-model", model_package_group="test-group", training_dataset="s3://bucket/train") trainer.train(wait=False) mock_training_job_create.assert_called_once() diff --git a/v3-examples/model-customization-examples/dpo-trainer-e2e.ipynb b/v3-examples/model-customization-examples/dpo-trainer-e2e.ipynb deleted file mode 100644 index ae4f366446..0000000000 --- a/v3-examples/model-customization-examples/dpo-trainer-e2e.ipynb +++ /dev/null @@ -1,240 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "7a96a3ab", - "metadata": {}, - "source": [ - "# Direct Preference Optimization (DPO) Training with SageMaker\n", - "\n", - "This notebook demonstrates how to use the **DPOTrainer** to fine-tune large language models using Direct Preference Optimization (DPO). DPO is a technique that trains models to align with human preferences by learning from preference data without requiring a separate reward model.\n", - "\n", - "## What is DPO?\n", - "\n", - "Direct Preference Optimization (DPO) is a method for training language models to follow human preferences. Unlike traditional RLHF (Reinforcement Learning from Human Feedback), DPO directly optimizes the model using preference pairs without needing a reward model.\n", - "\n", - "**Key Benefits:**\n", - "- Simpler than RLHF - no reward model required\n", - "- More stable training process\n", - "- Direct optimization on preference data\n", - "- Works with LoRA for efficient fine-tuning\n", - "\n", - "## Workflow Overview\n", - "\n", - "1. **Prepare Preference Dataset**: Upload preference data in JSONL format\n", - "2. **Register Dataset**: Create a SageMaker AI Registry dataset\n", - "3. **Configure DPO Trainer**: Set up model, training parameters, and resources\n", - "4. **Execute Training**: Run the DPO fine-tuning job\n", - "5. **Track Results**: Monitor training with MLflow integration" - ] - }, - { - "cell_type": "markdown", - "id": "2446b6a5", - "metadata": {}, - "source": [ - "## Step 1: Prepare and Register Preference Dataset\n", - "\n", - "DPO requires preference data in a specific format where each example contains:\n", - "- **prompt**: The input text\n", - "- **chosen**: The preferred response\n", - "- **rejected**: The less preferred response\n", - "\n", - "The dataset should be in JSONL format with each line containing one preference example." - ] - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": [ - "# Configure AWS credentials and region\n", - "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", - "#! aws configure set region us-west-2" - ], - "id": "3878997b198befc0" - }, - { - "cell_type": "code", - "id": "ed5d2927f430664b", - "metadata": {}, - "source": [ - "from sagemaker.ai_registry.dataset import DataSet\n", - "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", - "\n", - "# Upload dataset to S3\n", - "import boto3\n", - "s3 = boto3.client('s3')\n", - "s3.upload_file(\n", - " './dpo-preference_dataset_train_256.jsonl',\n", - " 'nova-mlflow-us-west-2',\n", - " 'dataset/preference_dataset_train_256.jsonl'\n", - ")\n", - "\n", - "# Register dataset in SageMaker AI Registry\n", - "# This creates a versioned dataset that can be referenced by ARN\n", - "dataset = DataSet.create(\n", - " name=\"demo-6\",\n", - " data_location=\"s3://nova-mlflow-us-west-2/dataset/preference_dataset_train_256.jsonl\", \n", - " customization_technique=CustomizationTechnique.DPO, \n", - " wait=True\n", - ")\n", - "\n", - "print(f\"Dataset ARN: {dataset.arn}\")" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "71071d5c", - "metadata": {}, - "source": [ - "## Step 2: Configure and Execute DPO Training\n", - "\n", - "The **DPOTrainer** provides a high-level interface for DPO fine-tuning with the following key features:\n", - "\n", - "### Key Parameters:\n", - "- **model**: Base model to fine-tune (from SageMaker Hub)\n", - "- **training_type**: Fine-tuning method (LoRA recommended for efficiency)\n", - "- **training_dataset**: ARN of the registered preference dataset\n", - "- **model_package_group_name**: Where to store the fine-tuned model\n", - "- **mlflow_resource_arn**: MLflow tracking server for experiment logging\n", - "\n", - "### Training Features:\n", - "- **Serverless Training**: Automatically managed compute resources\n", - "- **LoRA Integration**: Parameter-efficient fine-tuning\n", - "- **MLflow Tracking**: Automatic experiment and metrics logging\n", - "- **Model Versioning**: Automatic model package creation" - ] - }, - { - "cell_type": "code", - "id": "e42719df1e792227", - "metadata": {}, - "source": [ - "import random\n", - "! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", - "! aws configure set region us-west-2\n", - "\n", - "from sagemaker.train.dpo_trainer import DPOTrainer\n", - "from sagemaker.train.common import TrainingType\n", - "\n" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "0352bdaa-fa13-44c5-a70c-0d9bf7a10477", - "metadata": {}, - "source": [ - "# Create DPOTrainer instance with comprehensive configuration\n", - "trainer = DPOTrainer(\n", - " # Base model from SageMaker Hub\n", - " model=\"meta-textgeneration-llama-3-2-1b-instruct\",\n", - " \n", - " # Use LoRA for efficient fine-tuning\n", - " training_type=TrainingType.LORA,\n", - " \n", - " # Model versioning and storage\n", - " model_package_group_name=\"arn:aws:sagemaker:us-west-2:<>:model-package-group/test-finetuned-models-gamma\",\n", - " \n", - " # MLflow experiment tracking\n", - " mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/ashwpat-test\",\n", - " \n", - " # Training data (from Step 1)\n", - " training_dataset=\"arn:aws:sagemaker:us-west-2:<>:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/demo-6/0.0.4\",\n", - " \n", - " # Output configuration\n", - " s3_output_path=\"s3://nova-mlflow-us-west-2/output\",\n", - " \n", - " # IAM role for training job\n", - " role=\"arn:aws:iam::<>:role/Admin\",\n", - " \n", - " # Unique job name\n", - " base_job_name=f\"dpo-llama-{random.randint(1, 1000)}\",\n", - ")\n", - "\n", - "# Customize training hyperparameters\n", - "# DPO-specific parameters are automatically loaded from the model's recipe\n", - "trainer.hyperparameters.max_epochs = 1 # Quick training for demo\n", - "\n", - "print(\"Starting DPO training job...\")\n", - "print(f\"Job name: {trainer.base_job_name}\")\n", - "print(f\"Base model: {trainer._model_name}\")\n", - "\n", - "# Execute training with monitoring\n", - "training_job = trainer.train(wait=True)\n", - "\n", - "print(f\"Training completed! Job ARN: {training_job.training_job_arn}\")" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "22f6a210-0a0c-4b7a-af4d-2e08eae1c048", - "metadata": { - "scrolled": true - }, - "source": [ - "from sagemaker.core.utils.utils import Unassigned\n", - "import json\n", - "\n", - "print(json.dumps({k: v for k, v in training_job.__dict__.items() if not isinstance(v, Unassigned) and \"Unassigned object\" not in str(v)}, indent=2, default=str))" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "73d7545b", - "metadata": {}, - "source": [ - "## Next Steps\n", - "\n", - "After training completes, you can:\n", - "\n", - "1. **Deploy the Model**: Use `ModelBuilder` to deploy the fine-tuned model\n", - "2. **Evaluate Performance**: Compare responses from base vs fine-tuned model\n", - "3. **Monitor Metrics**: Review training metrics in MLflow\n", - "4. **Iterate**: Adjust hyperparameters and retrain if needed\n", - "\n", - "### Example Deployment:\n", - "```python\n", - "from sagemaker.serve import ModelBuilder\n", - "\n", - "# Deploy the fine-tuned model\n", - "model_builder = ModelBuilder(model=training_job)\n", - "model_builder.build(role_arn=\"arn:aws:iam::account:role/SageMakerRole\")\n", - "endpoint = model_builder.deploy(endpoint_name=\"dpo-finetuned-llama\")\n", - "```\n", - "\n", - "The fine-tuned model will now generate responses that better align with the preferences in your training data." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/v3-examples/model-customization-examples/dpo_trainer_example_notebook_v3_prod.ipynb b/v3-examples/model-customization-examples/dpo_trainer_example_notebook_v3_prod.ipynb index 9cba1c577a..e5fbe4cd99 100644 --- a/v3-examples/model-customization-examples/dpo_trainer_example_notebook_v3_prod.ipynb +++ b/v3-examples/model-customization-examples/dpo_trainer_example_notebook_v3_prod.ipynb @@ -45,34 +45,45 @@ }, { "cell_type": "code", + "execution_count": null, "id": "ed5d2927f430664b", "metadata": {}, + "outputs": [], "source": [ "from sagemaker.ai_registry.dataset import DataSet\n", "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", "\n", - "'''# Upload dataset to S3\n", - "import boto3\n", - "s3 = boto3.client('s3')\n", - "s3.upload_file(\n", - " './dpo-preference_dataset_train_256.jsonl',\n", - " 'nova-mlflow-us-west-2',\n", - " 'dataset/preference_dataset_train_256.jsonl'\n", - ")'''\n", "\n", "# Register dataset in SageMaker AI Registry\n", "# This creates a versioned dataset that can be referenced by ARN\n", - "'''dataset = DataSet.create(\n", + "# Provide a source (it can be local file path or S3 URL)\n", + "dataset = DataSet.create(\n", " name=\"demo-6\",\n", - " data_location=\"s3://nova-mlflow-us-west-2/dataset/preference_dataset_train_256.jsonl\", \n", - " customization_technique=CustomizationTechnique.DPO, \n", - " wait=True\n", + " source=\"s3://nova-mlflow-us-west-2/dataset/preference_dataset_train_256.jsonl\"\n", ")\n", "\n", - "print(f\"Dataset ARN: {dataset.arn}\")'''" - ], + "print(f\"Dataset ARN: {dataset.arn}\")" + ] + }, + { + "cell_type": "markdown", + "id": "28945915-3a40-4a7c-9e7b-7923635780ca", + "metadata": {}, + "source": [ + "##### Create a Model Package group (if not already exists)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba3e3c48-fd53-4304-a09b-a0cc4c1579e1", + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "from sagemaker.core.resources import ModelPackage, ModelPackageGroup\n", + "\n", + "model_package_group=ModelPackageGroup.create(model_package_group_name=\"test-model-package-group\")" + ] }, { "cell_type": "markdown", @@ -84,11 +95,19 @@ "The **DPOTrainer** provides a high-level interface for DPO fine-tuning with the following key features:\n", "\n", "### Key Parameters:\n", - "- **model**: Base model to fine-tune (from SageMaker Hub)\n", - "- **training_type**: Fine-tuning method (LoRA recommended for efficiency)\n", - "- **training_dataset**: ARN of the registered preference dataset\n", - "- **model_package_group_name**: Where to store the fine-tuned model\n", - "- **mlflow_resource_arn**: MLflow tracking server for experiment logging\n", + "**Required Parameters** \n", + "\n", + "* `model`: base_model id on Sagemaker Hubcontent that is available to finetune (or) ModelPackage artifacts\n", + "\n", + "**Optional Parameters**\n", + "* `training_type`: Choose from TrainingType Enum(sagemaker.modules.train.common) either LORA OR FULL.\n", + "* `model_package_group`: ModelPackage group name or ModelPackageGroup object. This parameter is mandatory when a base model ID is provided, but optional when a model package is provided.\n", + "* `mlflow_resource_arn`: MLFlow app ARN to track the training job\n", + "* `mlflow_experiment_name`: MLFlow app experiment name(str)\n", + "* `mlflow_run_name`: MLFlow app run name(str)\n", + "* `training_dataset`: Training Dataset - should be a Dataset ARN or Dataset object (Please note training dataset is required for a training job to run, can be either provided via Trainer or .train())\n", + "* `validation_dataset`: Validation Dataset - should be a Dataset ARN or Dataset object\n", + "* `s3_output_path`: S3 path for the trained model artifacts\n", "\n", "### Training Features:\n", "- **Serverless Training**: Automatically managed compute resources\n", @@ -97,10 +116,22 @@ "- **Model Versioning**: Automatic model package creation" ] }, + { + "cell_type": "markdown", + "id": "6c280036-b476-43b4-8789-15d9b8be6820", + "metadata": {}, + "source": [ + "#### Reference \n", + "Refer this doc for other models that support Model Customization: \n", + "https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html" + ] + }, { "cell_type": "code", + "execution_count": null, "id": "e42719df1e792227", "metadata": {}, + "outputs": [], "source": [ "import random\n", "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", @@ -109,12 +140,11 @@ "from sagemaker.train.dpo_trainer import DPOTrainer\n", "from sagemaker.train.common import TrainingType\n", "\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "0352bdaa-fa13-44c5-a70c-0d9bf7a10477", "metadata": { "ExecuteTime": { @@ -122,6 +152,7 @@ "start_time": "2025-12-05T19:30:51.101703Z" } }, + "outputs": [], "source": [ "# Create DPOTrainer instance with comprehensive configuration\n", "trainer = DPOTrainer(\n", @@ -132,22 +163,17 @@ " training_type=TrainingType.LORA,\n", " \n", " # Model versioning and storage\n", - " model_package_group_name=\"sdk-test-finetuned-models\",\n", - " \n", - " # MLflow experiment tracking\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:{Account-ID}:mlflow-tracking-server/{MLFLOW-NAME}\",\n", - " \n", + " model_package_group=model_package_group, # or use an existing model package group arn\n", + " \n", " # Training data (from Step 1)\n", - " training_dataset=\"s3://mc-flows-sdk-testing/input_data/dpo/preference_dataset_train_256.jsonl\",\n", + " training_dataset=dataset.arn,\n", " \n", " # Output configuration\n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", - " \n", - " # IAM role for training job\n", - " #role=\"arn:aws:iam::{Account-ID}:role/Admin\",\n", + "\n", " \n", " # Unique job name\n", - " base_job_name=f\"dpo-llama-{random.randint(1, 1000)}\",\n", + " base_job_name=f\"dpo-job-{random.randint(1, 1000)}\",\n", " accept_eula=True\n", ")\n", "\n", @@ -163,30 +189,19 @@ "training_job = trainer.train(wait=True)\n", "\n", "print(f\"Training completed! Job ARN: {training_job.training_job_arn}\")" - ], - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'DataSet' is not defined", - "output_type": "error", - "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[0;32mIn[1], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;66;03m# Create DPOTrainer instance with comprehensive configuration\u001B[39;00m\n\u001B[0;32m----> 2\u001B[0m dataset \u001B[38;5;241m=\u001B[39m \u001B[43mDataSet\u001B[49m\u001B[38;5;241m.\u001B[39mget(name\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124marn:aws:sagemaker:us-east-1:729646638167:hub-content/sdktest/DataSet/dpo-nova-1-test-data/0.0.1\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 4\u001B[0m trainer \u001B[38;5;241m=\u001B[39m DPOTrainer(\n\u001B[1;32m 5\u001B[0m \u001B[38;5;66;03m# Base model from SageMaker Hub\u001B[39;00m\n\u001B[1;32m 6\u001B[0m model\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmeta-textgeneration-llama-3-2-1b-instruct\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 28\u001B[0m accept_eula\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[1;32m 29\u001B[0m )\n\u001B[1;32m 31\u001B[0m \u001B[38;5;66;03m# Customize training hyperparameters\u001B[39;00m\n\u001B[1;32m 32\u001B[0m \u001B[38;5;66;03m# DPO-specific parameters are automatically loaded from the model's recipe\u001B[39;00m\n", - "\u001B[0;31mNameError\u001B[0m: name 'DataSet' is not defined" - ] - } - ], - "execution_count": 1 + ] }, { "cell_type": "code", + "execution_count": null, "id": "22f6a210-0a0c-4b7a-af4d-2e08eae1c048", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "from pprint import pprint\n", + "from sagemaker.core.utils.utils import Unassigned\n", "\n", "def pretty_print(obj):\n", " def parse_unassigned(item):\n", @@ -205,15 +220,16 @@ " cleaned = parse_unassigned(obj.__dict__ if hasattr(obj, '__dict__') else obj)\n", " print(json.dumps(cleaned, indent=2, default=str))\n", "pretty_print(training_job)" - ], - "outputs": [], - "execution_count": null + ] }, { + "cell_type": "code", + "execution_count": null, + "id": "eb2b3188-582d-4a3b-9f32-e7f17f962aa0", "metadata": { "scrolled": true }, - "cell_type": "code", + "outputs": [], "source": [ "# Print the training job object\n", "\n", @@ -230,10 +246,7 @@ "# Usage\n", "pretty_print(response)\n", "\n" - ], - "id": "eb2b3188-582d-4a3b-9f32-e7f17f962aa0", - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", diff --git a/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb b/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb index 5b0da85a26..02f14c5c8a 100644 --- a/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb +++ b/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb @@ -22,21 +22,23 @@ ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "4a05468e7078023e", + "metadata": {}, + "outputs": [], "source": [ "# Configure AWS credentials and region\n", "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", "#! aws configure set region us-west-2" - ], - "id": "4a05468e7078023e", - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "cec1af2d-c0c1-4348-8ee7-502a6d7ee2d0", "metadata": {}, + "outputs": [], "source": [ "#!/usr/bin/env python3\n", "\n", @@ -47,7 +49,6 @@ "from sagemaker.core.resources import ModelPackage\n", "import os\n", "#os.environ['SAGEMAKER_REGION'] = 'us-east-1'\n", - "#os.environ['SAGEMAKER_STAGE'] = 'prod'\n", "\n", "import boto3\n", "from sagemaker.core.helper.session_helper import Session\n", @@ -55,9 +56,59 @@ "# For MLFlow native metrics in Trainer wait, run below line with approriate region\n", "os.environ[\"SAGEMAKER_MLFLOW_CUSTOM_ENDPOINT\"] = \"https://mlflow.sagemaker.us-west-2.app.aws\"\n", "\n" - ], + ] + }, + { + "cell_type": "markdown", + "id": "b0438472-4152-4679-9a54-4d4c467bc590", + "metadata": {}, + "source": [ + "### Prepare and Register Dataset\n", + "Prepare and Register Dataset for Finetuning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94602a04-8534-43b0-a04c-591a5d002c09", + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "from sagemaker.ai_registry.dataset import DataSet\n", + "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", + "\n", + "\n", + "\n", + "# Register dataset in SageMaker AI Registry\n", + "# This creates a versioned dataset that can be referenced by ARN\n", + "# Provide a source (it can be local file path or S3 URL)\n", + "dataset = DataSet.create(\n", + " name=\"demo-2\",\n", + " source=\"s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl\"\n", + ")\n", + "\n", + "print(f\"Dataset ARN: {dataset.arn}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5b9fb71e-42e8-4225-8503-02e06573ad0f", + "metadata": {}, + "source": [ + "##### Create a Model Package group (if not already exists)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74fa1495-81a8-4b55-8560-6968fa021a11", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.core.resources import ModelPackage, ModelPackageGroup\n", + "\n", + "model_package_group=ModelPackageGroup.create(model_package_group_name=\"test-model-package-group\")" + ] }, { "cell_type": "markdown", @@ -70,40 +121,50 @@ "* `model`: base_model id on Sagemaker Hubcontent that is available to finetune (or) ModelPackage artifacts\n", "\n", "**Optional Parameters**\n", - "* `reward_model_id`: Bedrock model id, supported evaluation models: https://docs.aws.amazon.com/bedrock/latest/userguide/evaluation-judge.html\n", + "* `reward_model_id`: Bedrock model id to be used as judge.\n", "* `reward_prompt`: Reward prompt ARN or builtin prompts refer: https://docs.aws.amazon.com/bedrock/latest/userguide/model-evaluation-metrics.html\n", - "* `model_package_group_name`: ModelPackage group name or ModelPackageGroup\n", + "* `model_package_group`: ModelPackage group name or ModelPackageGroup object. This parameter is mandatory when a base model ID is provided, but optional when a model package is provided.\n", "* `mlflow_resource_arn`: MLFlow app ARN to track the training job\n", "* `mlflow_experiment_name`: MLFlow app experiment name(str)\n", "* `mlflow_run_name`: MLFlow app run name(str)\n", - "* `training_dataset`: Training Dataset - either Dataset ARN or S3 Path of the dataset (Please note these are required for a training job to run, can be either provided via Trainer or .train())\n", - "* `validation_dataset`: Validation Dataset - either Dataset ARN or S3 Path of the dataset\n", + "* `training_dataset`: Training Dataset - should be a Dataset ARN or Dataset object (Please note training dataset is required for a training job to run, can be either provided via Trainer or .train())\n", + "* `validation_dataset`: Validation Dataset - should be a Dataset ARN or Dataset object\n", "* `s3_output_path`: S3 path for the trained model artifacts" ] }, + { + "cell_type": "markdown", + "id": "8027b315-9dee-4aef-a225-ade4c93c331b", + "metadata": {}, + "source": [ + "#### Reference \n", + "Refer this doc for other models that support Model Customization: \n", + "https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html\n", + "\n", + "Refer this for supported reward models: \n", + "https://github.com/aws/sagemaker-python-sdk/blob/master/sagemaker-train/src/sagemaker/train/constants.py#L46" + ] + }, { "cell_type": "code", + "execution_count": null, "id": "07aefa46-29f2-4fcf-86da-b0bd471e0a6a", "metadata": {}, + "outputs": [], "source": [ "# For fine-tuning \n", "rlaif_trainer = RLAIFTrainer(\n", - " model=\"meta-textgeneration-llama-3-2-1b-instruct\", # Union[str, ModelPackage] \n", - " model_package_group_name=\"sdk-test-finetuned-models\", # Make it Optional\n", - " reward_model_id='anthropic.claude-3-5-sonnet-20240620-v1:0',\n", - " reward_prompt='Builtin.Correctness',\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", - " mlflow_experiment_name=\"test-rlaif-finetuned-models-exp\", # Optional[str]\n", - " mlflow_run_name=\"test-rlaif-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl\", #Optional[]\n", + " model=\"meta-textgeneration-llama-3-2-1b-instruct\", \n", + " model_package_group=model_package_group, # or use an existing model package group arn\n", + " reward_model_id='openai.gpt-oss-120b-1:0',\n", + " reward_prompt='Builtin.Summarize',\n", + " mlflow_experiment_name=\"test-rlaif-finetuned-models-exp\", \n", + " mlflow_run_name=\"test-rlaif-finetuned-models-run\", \n", + " training_dataset=dataset.arn, \n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", " accept_eula=True\n", - " #sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::<>:role/Admin\"\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -117,19 +178,19 @@ }, { "cell_type": "code", + "execution_count": null, "id": "b31d57c0-9777-428d-8792-557f7be4cfda", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "print(\"Default Finetuning Options:\")\n", "pprint(rlaif_trainer.hyperparameters.to_dict()) # rename as hyperparameters\n", "\n", "#set options\n", "rlaif_trainer.hyperparameters.get_info()\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -141,31 +202,23 @@ }, { "cell_type": "code", + "execution_count": null, "id": "5d5fa362-0caf-412d-977c-5e47f0548ea5", "metadata": {}, - "source": [ - "\n", - "training_job = rlaif_trainer.train(wait=True)\n" - ], "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "a0781a22-d9ea-4c9b-a854-5d7efde3539d", - "metadata": {}, "source": [ + "\n", "training_job = rlaif_trainer.train(wait=True)\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "c34b93c8-2e4c-437a-8efb-b8475fb941f3", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "import re\n", "from sagemaker.core.utils.utils import Unassigned\n", @@ -190,9 +243,7 @@ " print(json.dumps(cleaned, indent=2, default=str))\n", "\n", "pretty_print(training_job)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -206,18 +257,18 @@ }, { "cell_type": "code", + "execution_count": null, "id": "860fcbd0-d340-4bde-bfbc-224b4b8b0aed", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "from sagemaker.core.resources import TrainingJob\n", "\n", "response = TrainingJob.get(training_job_name=\"meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251124140754\")\n", "pretty_print(response)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -229,56 +280,77 @@ "Here we are providing a user-defined reward prompt/evaluator ARN" ] }, + { + "cell_type": "markdown", + "id": "3f36ea3d-04f8-4605-b5a0-6595aba7cbce", + "metadata": {}, + "source": [ + "#### Create a custom reward prompt " + ] + }, { "cell_type": "code", + "execution_count": null, + "id": "3f00b5cd-5e05-450b-855b-972dba2ed91a", + "metadata": {}, + "outputs": [], + "source": [ + "from rich.pretty import pprint\n", + "\n", + "from sagemaker.ai_registry.air_constants import REWARD_FUNCTION, REWARD_PROMPT\n", + "from sagemaker.ai_registry.evaluator import Evaluator\n", + "\n", + "evaluator = Evaluator.create(\n", + " name = \"jamj-rp2\",\n", + " source=\"/Users/jamjee/workplace/hubpuller/prompt/custom_prompt.jinja\",\n", + " type = REWARD_PROMPT\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "296913d3-a1ed-4f6a-bc32-afc2e82057aa", + "metadata": {}, + "source": [ + "#### Use it with RLAIF Trainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "05015ab9-3e70-4b61-affe-cd84ed4eccae", "metadata": {}, + "outputs": [], "source": [ "\n", "\n", "# For fine-tuning \n", "rlaif_trainer = RLAIFTrainer(\n", - " model=\"meta-textgeneration-llama-3-2-1b-instruct\", # Union[str, ModelPackage] \n", - " model_package_group_name=\"sdk-test-finetuned-models\", # Make it Optional\n", - " reward_model_id='anthropic.claude-3-5-sonnet-20240620-v1:0',\n", - " reward_prompt=\"arn:aws:sagemaker:us-west-2:<>:hub-content/sdktest/JsonDoc/rlaif-test-prompt/0.0.1\",\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", - " mlflow_experiment_name=\"test-rlaif-finetuned-models-exp\", # Optional[str]\n", - " mlflow_run_name=\"test-rlaif-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl\", #Optional[str]\n", + " model=\"meta-textgeneration-llama-3-2-1b-instruct\", \n", + " model_package_group=\"sdk-test-finetuned-models\",\n", + " reward_model_id='openai.gpt-oss-120b-1:0',\n", + " reward_prompt=evaluator.arn,\n", + " mlflow_experiment_name=\"test-rlaif-finetuned-models-exp\", \n", + " mlflow_run_name=\"test-rlaif-finetuned-models-run\", \n", + " training_dataset=dataset.arn, \n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", " accept_eula=True\n", - " #sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::<>:role/service-role/AmazonSageMaker-ExecutionRole-20250731T162975\"\n", - " #role=\"arn:aws:iam::<>:role/Admin\"\n", ")\n" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "029aa3cf-8a98-487b-8e21-445af9a72e91", - "metadata": {}, - "source": [ - "training_job = rlaif_trainer.train(wait=True,\n", - " logs=True)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "6815868d-0490-43a4-9765-148c4b2ef4af", "metadata": {}, + "outputs": [], "source": [ "training_job = rlaif_trainer.train(wait=True)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "fc3e1c67-d9c5-429a-aed7-07b26106ef2e", "metadata": { "collapsed": true, @@ -287,9 +359,10 @@ }, "scrolled": true }, - "source": "pretty_print(training_job)", "outputs": [], - "execution_count": null + "source": [ + "pretty_print(training_job)" + ] } ], "metadata": { diff --git a/v3-examples/model-customization-examples/rlvr_finetuning_example_notebook_v3-prod.ipynb b/v3-examples/model-customization-examples/rlvr_finetuning_example_notebook_v3_prod.ipynb similarity index 63% rename from v3-examples/model-customization-examples/rlvr_finetuning_example_notebook_v3-prod.ipynb rename to v3-examples/model-customization-examples/rlvr_finetuning_example_notebook_v3_prod.ipynb index 293a3d8c3a..096a087ae0 100644 --- a/v3-examples/model-customization-examples/rlvr_finetuning_example_notebook_v3-prod.ipynb +++ b/v3-examples/model-customization-examples/rlvr_finetuning_example_notebook_v3_prod.ipynb @@ -23,8 +23,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "10c2ef37-2425-4676-bc80-6d278d4e609a", "metadata": {}, + "outputs": [], "source": [ "# Configure AWS credentials and region\n", "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", @@ -45,9 +47,59 @@ "# For MLFlow native metrics in Trainer wait, run below line with approriate region\n", "os.environ[\"SAGEMAKER_MLFLOW_CUSTOM_ENDPOINT\"] = \"https://mlflow.sagemaker.us-west-2.app.aws\"\n", "\n" - ], + ] + }, + { + "cell_type": "markdown", + "id": "a5d60ac9-4a17-4140-bd3d-5dbde79c0dda", + "metadata": {}, + "source": [ + "### Prepare and Register Dataset\n", + "Prepare and Register Dataset for Finetuning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7620545-0164-4f65-a465-d5a999f8ffdd", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.ai_registry.dataset import DataSet\n", + "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", + "\n", + "\n", + "\n", + "# Register dataset in SageMaker AI Registry\n", + "# This creates a versioned dataset that can be referenced by ARN\n", + "# Provide a source (it can be local file path or S3 URL)\n", + "dataset = DataSet.create(\n", + " name=\"demo-2\",\n", + " source=\"s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl\"\n", + ")\n", + "\n", + "print(f\"Dataset ARN: {dataset.arn}\")" + ] + }, + { + "cell_type": "markdown", + "id": "206def32-4685-442f-8596-e36bfae7f33a", + "metadata": {}, + "source": [ + "##### Create a Model Package group (if not already exists)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a419172a-8fcb-4f12-ab25-af9093d732c9", + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "from sagemaker.core.resources import ModelPackage, ModelPackageGroup\n", + "\n", + "model_package_group=ModelPackageGroup.create(model_package_group_name=\"test-model-package-group\")" + ] }, { "cell_type": "markdown", @@ -61,37 +113,43 @@ "\n", "**Optional Parameters**\n", "* `custom_reward_function`: Custom reward function/Evaluator ARN\n", - "* `model_package_group_name`: ModelPackage group name or ModelPackageGroup\n", + "* `model_package_group`: ModelPackage group name or ModelPackageGroup object. This parameter is mandatory when a base model ID is provided, but optional when a model package is provided.\n", "* `mlflow_resource_arn`: MLFlow app ARN to track the training job\n", "* `mlflow_experiment_name`: MLFlow app experiment name(str)\n", "* `mlflow_run_name`: MLFlow app run name(str)\n", - "* `training_dataset`: Training Dataset - either Dataset ARN or S3 Path of the dataset (Please note these are required for a training job to run, can be either provided via Trainer or .train())\n", - "* `validation_dataset`: Validation Dataset - either Dataset ARN or S3 Path of the dataset\n", + "* `training_dataset`: Training Dataset - should be a Dataset ARN or Dataset object (Please note training dataset is required for a training job to run, can be either provided via Trainer or .train())\n", + "* `validation_dataset`: Validation Dataset - should be a Dataset ARN or Dataset object\n", "* `s3_output_path`: S3 path for the trained model artifacts" ] }, + { + "cell_type": "markdown", + "id": "a34eee44-5011-497c-8e80-f0db24a566c8", + "metadata": {}, + "source": [ + "#### Reference \n", + "Refer this doc for other models that support Model Customization: \n", + "https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html" + ] + }, { "cell_type": "code", + "execution_count": null, "id": "58a2ab71-214a-4491-bb0d-979ecf164186", "metadata": {}, + "outputs": [], "source": [ "# For fine-tuning (prod)\n", "rlvr_trainer = RLVRTrainer(\n", - " model=\"meta-textgeneration-llama-3-2-1b-instruct\", # Union[str, ModelPackage] \n", - " model_package_group_name=\"sdk-test-finetuned-models\", #\"test-finetuned-models\", # Make it Optional\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", - " mlflow_experiment_name=\"test-rlvr-finetuned-models-exp\", # Optional[str]\n", - " mlflow_run_name=\"test-rlvr-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl\", #\"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/DataSet/MarketingDemoDataset1/1.0.0\", #Optional[]\n", + " model=\"meta-textgeneration-llama-3-2-1b-instruct\", \n", + " model_package_group=model_package_group, # or use an existing model package group arn\n", + " mlflow_experiment_name=\"test-rlvr-finetuned-models-exp\", \n", + " mlflow_run_name=\"test-rlvr-finetuned-models-run\", \n", + " training_dataset=dataset.arn\n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", - " #sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::<>:role/service-role/AmazonSageMaker-ExecutionRole-20250731T162975\"\n", - " #role=\"arn:aws:iam::<>:role/Admin\",\n", " accept_eula=True\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -105,10 +163,12 @@ }, { "cell_type": "code", + "execution_count": null, "id": "60198ab4-e561-40e4-8f59-7d595f246a4e", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "print(\"Default Finetuning Options:\")\n", "pprint(rlvr_trainer.hyperparameters.to_dict()) # rename as hyperparameters\n", @@ -116,9 +176,7 @@ "#set options\n", "rlvr_trainer.hyperparameters.get_info()\n", "\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -130,18 +188,20 @@ }, { "cell_type": "code", + "execution_count": null, "id": "1f3f65a7-8ba6-4aa1-b6ea-606ddb2068c0", "metadata": {}, + "outputs": [], "source": [ "training_job = rlvr_trainer.train(wait=True)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "a60f83f2-a247-4506-9827-9dc3a603f629", "metadata": {}, + "outputs": [], "source": [ "import re\n", "from sagemaker.core.utils.utils import Unassigned\n", @@ -167,19 +227,17 @@ "\n", "# Usage\n", "pretty_print(training_job)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "fae88520-f1ac-4375-9b2d-b7d33b1241ab", "metadata": {}, + "outputs": [], "source": [ "training_job = rlvr_trainer.train(wait=True)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -193,31 +251,31 @@ }, { "cell_type": "code", + "execution_count": null, "id": "7e9213d4-a08f-413a-9888-88decfcc13a4", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "from sagemaker.core.resources import TrainingJob\n", "\n", "response = TrainingJob.get(training_job_name=\"meta-textgeneration-llama-3-2-3b-instruct-rlvr-20251123033517\")\n", "pretty_print(response)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "edf8cf45-742c-4e32-a41e-55ca65557d67", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "training_job.refresh()\n", "pretty_print(training_job)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -229,75 +287,87 @@ "Here we are providing a user-defined reward function ARN" ] }, + { + "cell_type": "markdown", + "id": "0fc575ea-7acd-4346-a869-6cf121ffb99a", + "metadata": {}, + "source": [ + "#### Create a custom reward function " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1292b745-95ec-41aa-913c-503b2d0de1fd", + "metadata": {}, + "outputs": [], + "source": [ + "from rich.pretty import pprint\n", + "\n", + "from sagemaker.ai_registry.air_constants import REWARD_FUNCTION, REWARD_PROMPT\n", + "from sagemaker.ai_registry.evaluator import Evaluator\n", + "\n", + "# Method : Lambda\n", + "evaluator = Evaluator.create(\n", + " name = \"sdk-new-rf11\",\n", + " source=\"arn:aws:lambda:us-west-2:<>:function:sm-eval-vinayshm-rlvr-llama-321b-instruct-v1-<>8\",\n", + " type=REWARD_FUNCTION\n", + "\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "fa9e6ed8-4ef9-4d8b-afdf-ba8c8b39df35", + "metadata": {}, + "source": [ + "#### Use it with RLVR Trainer" + ] + }, { "cell_type": "code", + "execution_count": null, "id": "f1d6931c-0935-4b2b-9aaf-7de0d0b836a7", "metadata": {}, + "outputs": [], "source": [ "\n", "# For fine-tuning \n", "rlvr_trainer = RLVRTrainer(\n", " model=\"meta-textgeneration-llama-3-2-1b-instruct\", # Union[str, ModelPackage] \n", - " model_package_group_name=\"sdk-test-finetuned-models\", # Make it Optional\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", + " model_package_group=\"sdk-test-finetuned-models\", # Make it Optional\n", " mlflow_experiment_name=\"test-rlvr-finetuned-models-exp\", # Optional[str]\n", " mlflow_run_name=\"test-rlvr-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl\", #Optional[]\n", + " training_dataset=dataset, #Optional[]\n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", - " #sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::<>:role/service-role/AmazonSageMaker-ExecutionRole-20250731T162975\"\n", - " #role=\"arn:aws:iam::<>:role/Admin\",\n", - " custom_reward_function=\"arn:aws:sagemaker:us-west-2:<>:hub-content/sdktest/JsonDoc/rlvr-test-rf/0.0.1\",\n", + " custom_reward_function=evaluator,\n", " accept_eula=True\n", " \n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "4f406ee1-79bc-4062-b164-b599f41f1508", "metadata": {}, - "source": [ - "training_job = rlvr_trainer.train(wait=True)" - ], "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "8738ad78-677a-449c-8854-24da5db238b7", - "metadata": {}, "source": [ "training_job = rlvr_trainer.train(wait=True)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "83e3fde5-2c4d-4669-b807-3fe142eabbc9", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "#training_job.refresh()\n", "pretty_print(training_job)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "6dd03d80-b248-4c7c-b311-f0812652cba5", - "metadata": {}, - "source": [ - "\n", - "#meta-textgeneration-llama-3-2-1b-instruct-rlvr-20251113182932" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -317,10 +387,12 @@ }, { "cell_type": "code", + "execution_count": null, "id": "10375308-eb3f-42bc-a59c-fe65d528fbbd", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "from rich import print as rprint\n", "from rich.pretty import pprint\n", @@ -330,9 +402,7 @@ "model_package = ModelPackage.get(model_package_name=\"arn:aws:sagemaker:us-west-2:<>:model-package/test-finetuned-models-gamma/61\")\n", "\n", "pretty_print(model_package)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -346,49 +416,24 @@ }, { "cell_type": "code", + "execution_count": null, "id": "a9a19b3d-f463-4f27-b27d-427bc7742ea6", "metadata": {}, - "source": [ - "# For fine-tuning \n", - "rlvr_trainer = RLVRTrainer(\n", - " model=model_package, # Union[str, ModelPackage] \n", - " training_type=TrainingType.LORA, \n", - " model_package_group_name=\"test-finetuned-models-gamma\", #\"test-finetuned-models\", # Make it Optional\n", - " mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", - " mlflow_experiment_name=\"test-rlvr-finetuned-models-exp\", # Optional[str]\n", - " mlflow_run_name=\"test-rlvr-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/DataSet/rlvr-rlaif-test-dataset/0.0.2\", #\"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/DataSet/MarketingDemoDataset1/1.0.0\", #Optional[]\n", - " s3_output_path=\"s3://open-models-testing-pdx/output\",\n", - " sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::<>:role/service-role/AmazonSageMaker-ExecutionRole-20250731T162975\"\n", - " role=\"arn:aws:iam::<>:role/Admin\"\n", - ")" - ], "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "d8316a2e-90dd-4d12-a883-0acbfbfc833d", - "metadata": {}, "source": [ "# For fine-tuning \n", "rlvr_trainer = RLVRTrainer(\n", - " model=\"arn:aws:sagemaker:us-west-2:<>:model-package/test-finetuned-models-gamma/61\", # Union[str, ModelPackage] \n", + " model=model_package, # Union[str, ModelPackage] \n", " training_type=TrainingType.LORA, \n", - " model_package_group_name=\"test-finetuned-models-gamma\", #\"test-finetuned-models\", # Make it Optional\n", + " model_package_group=\"test-finetuned-models-gamma\", #\"test-finetuned-models\", # Make it Optional\n", " mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", " mlflow_experiment_name=\"test-rlvr-finetuned-models-exp\", # Optional[str]\n", " mlflow_run_name=\"test-rlvr-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/DataSet/rlvr-rlaif-test-dataset/0.0.2\", #\"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/DataSet/MarketingDemoDataset1/1.0.0\", #Optional[]\n", + " training_dataset=dataset.arn, #\"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/DataSet/MarketingDemoDataset1/1.0.0\", #Optional[]\n", " s3_output_path=\"s3://open-models-testing-pdx/output\",\n", - " sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::<>:role/service-role/AmazonSageMaker-ExecutionRole-20250731T162975\"\n", - " role=\"arn:aws:iam::<>:role/Admin\"\n", + " accept_eula=True\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -400,23 +445,25 @@ }, { "cell_type": "code", + "execution_count": null, "id": "dd3722c9-bddd-465b-ae56-ab1475f4f6fd", "metadata": {}, + "outputs": [], "source": [ "training_job = rlvr_trainer.train(\n", " wait=True,\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "c6bd0ed2-5e8e-4f3b-b148-23840f7d3d75", "metadata": {}, - "source": "pretty_print(training_job)", "outputs": [], - "execution_count": null + "source": [ + "pretty_print(training_job)" + ] }, { "cell_type": "markdown", @@ -428,8 +475,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "c7648ad2-0795-45ed-bfa1-5f039a132426", "metadata": {}, + "outputs": [], "source": [ "import os\n", "os.environ['SAGEMAKER_REGION'] = 'us-east-1'\n", @@ -437,7 +486,7 @@ "# For fine-tuning \n", "rlvr_trainer = RLVRTrainer(\n", " model=\"nova-textgeneration-lite-v2\", # Union[str, ModelPackage] \n", - " model_package_group_name=\"sdk-test-finetuned-models\", #\"test-finetuned-models\", # Make it Optional\n", + " model_package_group=\"sdk-test-finetuned-models\", #\"test-finetuned-models\", # Make it Optional\n", " #mlflow_resource_arn=\"arn:aws:sagemaker:us-east-1:<>:mlflow-app/app-UNBKLOAX64PX\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", " mlflow_experiment_name=\"test-nova-rlvr-finetuned-models-exp\", # Optional[str]\n", " mlflow_run_name=\"test-nova-rlvr-finetuned-models-run\", # Optional[str]\n", @@ -447,144 +496,57 @@ " custom_reward_function=\"arn:aws:sagemaker:us-east-1:<>:hub-content/sdktest/JsonDoc/rlvr-nova-test-rf/0.0.1\",\n", " accept_eula=True\n", ")\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "6621a885-d929-4c4d-b622-459772b4eebf", "metadata": {}, + "outputs": [], "source": [ "rlvr_trainer.hyperparameters.to_dict()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "daa592d7-7053-4994-be34-07bc61fef921", "metadata": {}, + "outputs": [], "source": [ "rlvr_trainer.hyperparameters.data_s3_path = 's3://example-bucket'\n", "\n", "rlvr_trainer.hyperparameters.reward_lambda_arn = 'arn:aws:lambda:us-east-1:<>:function:rlvr-nova-reward-function'" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "881ba1de-d7e4-4b82-ae8a-593306e56a74", - "metadata": {}, - "source": [ - "rlvr_trainer.hyperparameters.to_dict()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "447051d4-2a4c-4db9-a71e-8face7e5d4c5", - "metadata": {}, - "source": [ - "training_job = rlvr_trainer.train(wait=True)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "3a0b0746-27ac-4e7c-ba86-72dffd8f2715", - "metadata": {}, - "source": [ - "training_job = rlvr_trainer.train(wait=False)" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "markdown", - "id": "a9a2d70f-4af7-4e4d-8305-db5463f97f34", - "metadata": {}, - "source": [ - "#### Nova RLVR job (<>)" ] }, { "cell_type": "code", - "id": "7b10e842-6142-4a0a-83c7-55390fc4022c", + "execution_count": null, + "id": "881ba1de-d7e4-4b82-ae8a-593306e56a74", "metadata": {}, - "source": [ - "import os\n", - "os.environ['SAGEMAKER_REGION'] = 'us-east-1'\n", - "\n", - "# For fine-tuning \n", - "rlvr_trainer = RLVRTrainer(\n", - " model=\"nova-textgeneration-lite-v2\", # Union[str, ModelPackage] \n", - " model_package_group_name=\"test-prod-iad-model-pkg-group\", #\"test-finetuned-models\", # Make it Optional\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-east-1:<>:mlflow-app/app-UNBKLOAX64PX\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", - " mlflow_experiment_name=\"test-nova-rlvr-finetuned-models-exp\", # Optional[str]\n", - " mlflow_run_name=\"test-nova-rlvr-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"s3://ease-integ-test-input-<>-us-east-1/converse-serverless-test-data/grpo-64-sample.jsonl\",\n", - " validation_dataset=\"s3://ease-integ-test-input-<>-us-east-1/converse-serverless-test-data/grpo-64-sample.jsonl\",\n", - " s3_output_path=\"s3://ease-integ-test-output-<>-us-east-1/model-customization-algo/\",\n", - " custom_reward_function=\"arn:aws:sagemaker:us-east-1:<>:hub-content/recipestest/JsonDoc/nova-prod-iad-test-evaluator-lambda-reward-function/0.0.1\",\n", - " accept_eula=True\n", - ")\n" - ], "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "bbfc5853-9381-4a9f-b16f-dcf5c59f5999", - "metadata": {}, "source": [ "rlvr_trainer.hyperparameters.to_dict()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "4dbb19c3-5cd1-4b24-95a1-a5cfd93f4e18", - "metadata": {}, - "source": [ - "rlvr_trainer.hyperparameters.data_s3_path = 's3://example-bucket'\n", - "\n", - "rlvr_trainer.hyperparameters.reward_lambda_arn = 'arn:aws:lambda:us-east-1:<>:function:rlvr-nova-reward-function'" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", - "id": "1a491813-bf80-4485-bd32-c473f94af266", + "execution_count": null, + "id": "447051d4-2a4c-4db9-a71e-8face7e5d4c5", "metadata": {}, - "source": [ - "rlvr_trainer.hyperparameters.to_dict()" - ], "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "24f13939-746f-420c-97f1-ece2cb0a8190", - "metadata": {}, "source": [ "training_job = rlvr_trainer.train(wait=True)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "ed22071f-2161-462d-b1ca-f701adfa6e07", "metadata": {}, - "source": [], "outputs": [], - "execution_count": null + "source": [] } ], "metadata": { diff --git a/v3-examples/model-customization-examples/sft_finetuning_example_notebook-pysdk-prod-v3.ipynb b/v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3.ipynb similarity index 74% rename from v3-examples/model-customization-examples/sft_finetuning_example_notebook-pysdk-prod-v3.ipynb rename to v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3.ipynb index 66a0c93a0a..946debc7d7 100644 --- a/v3-examples/model-customization-examples/sft_finetuning_example_notebook-pysdk-prod-v3.ipynb +++ b/v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3.ipynb @@ -22,21 +22,23 @@ ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "87aa2004556ad7c6", + "metadata": {}, + "outputs": [], "source": [ "# Configure AWS credentials and region\n", "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", "#! aws configure set region us-west-2" - ], - "id": "87aa2004556ad7c6", - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "a51be0b5-fd33-4fa0-af2b-d08ce0dc7a8e", "metadata": {}, + "outputs": [], "source": [ "from sagemaker.train.sft_trainer import SFTTrainer\n", "from sagemaker.train.common import TrainingType\n", @@ -53,9 +55,7 @@ "# For MLFlow native metrics in Trainer wait, run below line with approriate region\n", "os.environ[\"SAGEMAKER_MLFLOW_CUSTOM_ENDPOINT\"] = \"https://mlflow.sagemaker.us-west-2.app.aws\"\n", "\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -65,6 +65,58 @@ "## Finetuning with Jumpstart base model" ] }, + { + "cell_type": "markdown", + "id": "e77ccaab-b288-4970-90d7-99d6503d790f", + "metadata": {}, + "source": [ + "### Prepare and Register Dataset\n", + "Prepare and Register Dataset for Finetuning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef4f0e61-de4d-4228-b7a1-ea7497dad547", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.ai_registry.dataset import DataSet\n", + "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", + "\n", + "\n", + "\n", + "# Register dataset in SageMaker AI Registry\n", + "# This creates a versioned dataset that can be referenced by ARN\n", + "# Provide a source (it can be local file path or S3 URL)\n", + "dataset = DataSet.create(\n", + " name=\"demo-1\",\n", + " source=\"s3://mc-flows-sdk-testing/input_data/sft/sample_data_256_final.jsonl\"\n", + ")\n", + "\n", + "print(f\"Dataset ARN: {dataset.arn}\")" + ] + }, + { + "cell_type": "markdown", + "id": "fcd0580e-ebe4-488b-b2b1-489aed9e24f8", + "metadata": {}, + "source": [ + "##### Create a Model Package group (if not already exists)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6937550-f721-43ff-82dd-c513c328dd17", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.core.resources import ModelPackage, ModelPackageGroup\n", + "\n", + "model_package_group=ModelPackageGroup.create(model_package_group_name=\"test-model-package-group\")" + ] + }, { "cell_type": "markdown", "id": "18c4a2a3-2a6d-44c0-a1a7-14938bf2ff83", @@ -77,57 +129,44 @@ "\n", "**Optional Parameters**\n", "* `training_type`: Choose from TrainingType Enum(sagemaker.modules.train.common) either LORA OR FULL.\n", - "* `model_package_group_name`: ModelPackage group name or ModelPackageGroup\n", + "* `model_package_group`: ModelPackage group name or ModelPackageGroup object. This parameter is mandatory when a base model ID is provided, but optional when a model package is provided.\n", "* `mlflow_resource_arn`: MLFlow app ARN to track the training job\n", "* `mlflow_experiment_name`: MLFlow app experiment name(str)\n", "* `mlflow_run_name`: MLFlow app run name(str)\n", - "* `training_dataset`: Training Dataset - either Dataset ARN or S3 Path of the dataset (Please note these are required for a training job to run, can be either provided via Trainer or .train())\n", - "* `validation_dataset`: Validation Dataset - either Dataset ARN or S3 Path of the dataset\n", + "* `training_dataset`: Training Dataset - should be a Dataset ARN or Dataset object (Please note training dataset is required for a training job to run, can be either provided via Trainer or .train())\n", + "* `validation_dataset`: Validation Dataset - should be a Dataset ARN or Dataset object\n", "* `s3_output_path`: S3 path for the trained model artifacts" ] }, + { + "cell_type": "markdown", + "id": "aea9a551-9a6b-4c05-8999-7ca4b0bfdd62", + "metadata": {}, + "source": [ + "#### Reference \n", + "Refer this doc for other models that support Model Customization: \n", + "https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-supported.html" + ] + }, { "cell_type": "code", + "execution_count": null, "id": "88fe8360-de50-481d-932f-564a32be66a0", "metadata": {}, + "outputs": [], "source": [ "# For fine-tuning \n", "sft_trainer = SFTTrainer(\n", - " model=\"meta-textgeneration-llama-3-2-1b-instruct\", # Union[str, ModelPackage]\n", + " model=\"meta-textgeneration-llama-3-2-1b-instruct\", \n", " training_type=TrainingType.LORA, \n", - " model_package_group_name=\"arn:aws:sagemaker:us-west-2:<>:model-package-group/sdk-test-finetuned-models\", # Make it Optional\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", - " mlflow_experiment_name=\"test-finetuned-models-exp\", # Optional[str]\n", - " mlflow_run_name=\"test-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"s3://mc-flows-sdk-testing/input_data/sft/\", #Optional[]\n", + " model_package_group=model_package_group, # or use an existing model package group arn\n", + " mlflow_experiment_name=\"test-finetuned-models-exp\", \n", + " mlflow_run_name=\"test-finetuned-models-run\", \n", + " training_dataset=dataset.arn, \n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", " accept_eula=True\n", ")\n" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "9325b23b-d6a5-425e-b588-77d3b97f3843", - "metadata": {}, - "source": [ - "# For fine-tuning (us-east-1)\n", - "sft_trainer = SFTTrainer(\n", - " model=\"meta-textgeneration-llama-3-2-1b-instruct\", # Union[str, ModelPackage]\n", - " training_type=TrainingType.LORA, \n", - " model_package_group_name=\"arn:aws:sagemaker:us-east-1:<>:model-package-group/sdk-test-finetuned-models-us-east-1\", # Make it Optional\n", - " mlflow_resource_arn=\"arn:aws:sagemaker:us-east-1:<>:mlflow-app/app-J2NPD6IV77BJ\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", - " mlflow_experiment_name=\"test-finetuned-models-exp\", # Optional[str]\n", - " mlflow_run_name=\"test-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"s3://mc-flows-sdk-testing-us-east-1/input_data/sft/train_285.jsonl\", #Optional[]\n", - " s3_output_path=\"s3://mc-flows-sdk-testing-us-east-1/output/\",\n", - " #sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::<>:role/Admin\"\n", - ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -141,29 +180,29 @@ }, { "cell_type": "code", + "execution_count": null, "id": "de183042-bb92-4947-9acd-78d7231bda13", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "print(\"Default Finetuning Options:\")\n", "pprint(sft_trainer.hyperparameters.to_dict()) # rename as hyperparameters" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "6b57838f-81ac-4fbe-9ddf-5588e42bcce1", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "# To update any hyperparameter, simply assign the value, example:\n", "sft_trainer.hyperparameters.global_batch_size=16" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -175,37 +214,27 @@ }, { "cell_type": "code", + "execution_count": null, "id": "4d3b6441-9abb-447b-9307-9606a8c0fabd", "metadata": { - "scrolled": true, "jupyter": { "is_executing": true - } + }, + "scrolled": true }, - "source": [ - "training_job = sft_trainer.train(\n", - " wait=True,\n", - ")" - ], "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "6e9060b2-e269-461c-88fb-dac4e8854b8a", - "metadata": {}, "source": [ "training_job = sft_trainer.train(\n", " wait=True,\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "0373cea6-7419-47f1-a59e-1cb441324dc3", "metadata": {}, + "outputs": [], "source": [ "\n", "\n", @@ -234,14 +263,14 @@ " print(json.dumps(cleaned, indent=2, default=str))\n", "\n", "pretty_print(response)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "e9ee7f8e-b26c-4579-9dbc-f08124f2e944", "metadata": {}, + "outputs": [], "source": [ "#In order to skip waiting and monitor the training Job later\n", "\n", @@ -250,19 +279,19 @@ " wait=False,\n", ")\n", "'''" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "0d99f212-f0bd-43c1-be21-30202fb4a152", "metadata": { "scrolled": true }, - "source": "pretty_print(training_job)", "outputs": [], - "execution_count": null + "source": [ + "pretty_print(training_job)" + ] }, { "cell_type": "markdown", @@ -276,18 +305,18 @@ }, { "cell_type": "code", + "execution_count": null, "id": "6bbe96b4-c8cd-4de3-b4c0-a66fd3086eb2", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "from sagemaker.core.resources import TrainingJob\n", "\n", "response = TrainingJob.get(training_job_name=\"meta-textgeneration-llama-3-2-1b-instruct-sft-20251123162832\")\n", "pretty_print(response)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -307,10 +336,12 @@ }, { "cell_type": "code", + "execution_count": null, "id": "11a16b70-526f-42a1-8d0f-3a8b14f559a5", "metadata": { "scrolled": true }, + "outputs": [], "source": [ "from rich import print as rprint\n", "from rich.pretty import pprint\n", @@ -320,9 +351,7 @@ "model_package = ModelPackage.get(model_package_name=\"arn:aws:sagemaker:us-west-2:<>:model-package/sdk-test-finetuned-models/2\")\n", "\n", "pretty_print(model_package)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -336,23 +365,22 @@ }, { "cell_type": "code", + "execution_count": null, "id": "dc715f3d-543a-4c75-888e-24220d226526", "metadata": {}, + "outputs": [], "source": [ "# For fine-tuning \n", "sft_trainer = SFTTrainer(\n", " model=model_package, # Union[str, ModelPackage]\n", " training_type=TrainingType.LORA, \n", - " model_package_group_name=\"sdk-test-finetuned-models\", # Make it Optional\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", + " model_package_group=\"sdk-test-finetuned-models\", # Make it Optional\n", " mlflow_experiment_name=\"test-finetuned-models-exp\", # Optional[str]\n", " mlflow_run_name=\"test-finetuned-models-run\", # Optional[str]\n", - " training_dataset=\"s3://mc-flows-sdk-testing/input_data/sft/\", #Optional[]\n", + " training_dataset=dataset.arn, #Optional[]\n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", ")\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -364,23 +392,25 @@ }, { "cell_type": "code", + "execution_count": null, "id": "5c3f3380-5305-4178-9120-aeca1ba6ea44", "metadata": {}, + "outputs": [], "source": [ "training_job = sft_trainer.train(\n", " wait=True,\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "d55299f5-639b-435c-8b50-718491c0060a", "metadata": {}, - "source": "pretty_print(training_job)", "outputs": [], - "execution_count": null + "source": [ + "pretty_print(training_job)" + ] }, { "cell_type": "markdown", @@ -392,49 +422,48 @@ }, { "cell_type": "code", + "execution_count": null, "id": "0f11e6d5-7bb9-41b8-8d6c-94377691e3be", "metadata": {}, + "outputs": [], "source": [ "os.environ['SAGEMAKER_REGION'] = 'us-east-1'\n", "\n", "# For fine-tuning \n", "sft_trainer_nova = SFTTrainer(\n", - " #model=\"test-nova-lite-v2\", # Union[str, ModelPackage]\n", + " #model=\"test-nova-lite-v2\", \n", " #model=\"nova-textgeneration-micro\",\n", " model=\"nova-textgeneration-lite-v2\",\n", " training_type=TrainingType.LORA, \n", - " model_package_group_name=\"sdk-test-finetuned-models\", # Make it Optional\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-east-1:<>:mlflow-app/app-UNBKLOAX64PX\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", - " mlflow_experiment_name=\"test-nova-finetuned-models-exp\", # Optional[str]\n", - " mlflow_run_name=\"test-nova-finetuned-models-run\", # Optional[str]\n", + " model_package_group=\"sdk-test-finetuned-models\", \n", + " mlflow_experiment_name=\"test-nova-finetuned-models-exp\", \n", + " mlflow_run_name=\"test-nova-finetuned-models-run\", \n", " training_dataset=\"arn:aws:sagemaker:us-east-1:<>:hub-content/sdktest/DataSet/sft-nova-test-dataset/0.0.1\",\n", " s3_output_path=\"s3://mc-flows-sdk-testing-us-east-1/output/\"\n", ")\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "9acf53aa-4ac5-4a74-8d67-607e0d09820f", "metadata": {}, + "outputs": [], "source": [ "sft_trainer_nova.hyperparameters.to_dict()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "e6ce0ece-0185-4fc4-ae89-218667cf6b14", "metadata": {}, + "outputs": [], "source": [ "training_job = sft_trainer_nova.train(\n", " wait=True,\n", ")" - ], - "outputs": [], - "execution_count": null + ] } ], "metadata": {