From d55a2c3d1a67c5e4f720ed1d0b04391a9ec449c1 Mon Sep 17 00:00:00 2001 From: Michael Benayoun Date: Fri, 14 Nov 2025 17:04:18 +0100 Subject: [PATCH 1/2] add image uri config for vLLM with Optimum Neuron --- .../huggingface-vllm-neuronx.json | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json diff --git a/src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json b/src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json new file mode 100644 index 0000000000..dac51bf03f --- /dev/null +++ b/src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json @@ -0,0 +1,69 @@ +{ + "inference": { + "processors": [ + "inf2" + ], + "version_aliases": { + "0.10": "0.10.2" + }, + "versions": { + "0.10.2": { + "py_versions": [ + "py310" + ], + "registries": { + "af-south-1": "626614931356", + "ap-east-1": "871362719292", + "ap-east-2": "975050140332", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-northeast-3": "364406365360", + "ap-south-1": "763104351884", + "ap-south-2": "772153158452", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-3": "907027046896", + "ap-southeast-4": "457447274322", + "ap-southeast-5": "550225433462", + "ap-southeast-6": "633930458069", + "ap-southeast-7": "590183813437", + "ca-central-1": "763104351884", + "ca-west-1": "204538143572", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-central-2": "380420809688", + "eu-north-1": "763104351884", + "eu-south-1": "692866216735", + "eu-south-2": "503227376785", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "il-central-1": "780543022126", + "me-central-1": "914824155844", + "me-south-1": "217643126080", + "mx-central-1": "637423239942", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-east-1": "446045086412", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-isob-east-1": "094389454867", + "us-isof-east-1": "303241398832", + "us-isof-south-1": "454834333376", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "tag_prefix": "0.10.2", + "repository": "huggingface-vllm-inference-neuronx", + "container_version": { + "inf2": "ubuntu22.04" + }, + "sdk_versions": [ + "sdk2.26.0" + ] + } + } + } +} From 05943a0a6c72f9bbc2d59b6eebb985d16e270b61 Mon Sep 17 00:00:00 2001 From: Michael Benayoun Date: Fri, 14 Nov 2025 17:20:26 +0100 Subject: [PATCH 2/2] adapt API and add tests to make sure the generated URIs are correct --- .../test_huggingface_vllm_neuronx.py | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 tests/unit/sagemaker/image_uris/test_huggingface_vllm_neuronx.py diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_vllm_neuronx.py b/tests/unit/sagemaker/image_uris/test_huggingface_vllm_neuronx.py new file mode 100644 index 0000000000..a638da9415 --- /dev/null +++ b/tests/unit/sagemaker/image_uris/test_huggingface_vllm_neuronx.py @@ -0,0 +1,104 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +import pytest +from packaging.version import parse + +from sagemaker.huggingface import get_huggingface_llm_image_uri +from tests.unit.sagemaker.image_uris import expected_uris + +# Mapping of vLLM versions to expected image tags +VLLM_VERSIONS_MAPPING = { + "inf2": { + "0.10.2": "0.10.2-neuronx-py310-sdk2.26.0-ubuntu22.04", + }, +} + + +@pytest.mark.parametrize("load_config", ["huggingface-vllm-neuronx.json"], indirect=True) +def test_vllm_neuronx_uris(load_config): + """Test that vLLM NeuronX image URIs are correctly generated.""" + VERSIONS = load_config["inference"]["versions"] + device = load_config["inference"]["processors"][0] + + # Fail if device is not in mapping + if device not in VLLM_VERSIONS_MAPPING: + raise ValueError(f"Device {device} not found in VLLM_VERSIONS_MAPPING") + + # Get highest version for the device + highest_version = max(VLLM_VERSIONS_MAPPING[device].keys(), key=lambda x: parse(x)) + + for version in VERSIONS: + ACCOUNTS = load_config["inference"]["versions"][version]["registries"] + for region in ACCOUNTS.keys(): + uri = get_huggingface_llm_image_uri( + "huggingface-vllm-neuronx", + region=region, + version=version, + ) + + # Skip only if test version is higher than highest known version + if parse(version) > parse(highest_version): + print( + f"Skipping version check for {version} as it is higher than " + f"the highest known version {highest_version} in VLLM_VERSIONS_MAPPING." + ) + continue + + expected = expected_uris.huggingface_llm_framework_uri( + "huggingface-vllm-inference-neuronx", + ACCOUNTS[region], + version, + VLLM_VERSIONS_MAPPING[device][version], + region=region, + ) + assert expected == uri + + +@pytest.mark.parametrize("load_config", ["huggingface-vllm-neuronx.json"], indirect=True) +def test_vllm_neuronx_version_aliases(load_config): + """Test that version aliases work correctly.""" + version_aliases = load_config["inference"].get("version_aliases", {}) + + for alias, full_version in version_aliases.items(): + uri_alias = get_huggingface_llm_image_uri( + "huggingface-vllm-neuronx", + region="us-east-1", + version=alias, + ) + uri_full = get_huggingface_llm_image_uri( + "huggingface-vllm-neuronx", + region="us-east-1", + version=full_version, + ) + # URIs should be identical + assert uri_alias == uri_full + + +@pytest.mark.parametrize("load_config", ["huggingface-vllm-neuronx.json"], indirect=True) +def test_vllm_neuronx_all_regions(load_config): + """Test that all regions have valid registry mappings.""" + version = "0.10.2" + registries = load_config["inference"]["versions"][version]["registries"] + + for region in registries.keys(): + uri = get_huggingface_llm_image_uri( + "huggingface-vllm-neuronx", + region=region, + version=version, + ) + # Validate URI format + assert uri.startswith(f"{registries[region]}.dkr.ecr.{region}") + assert "huggingface-vllm-inference-neuronx" in uri + assert "0.10.2" in uri