Skip to content

Commit 00d4ebc

Browse files
authored
Merge pull request #2172 from tkatila/gpu-misc-updates
Misc GPU updates
2 parents 29dcc6d + 8bfa57c commit 00d4ebc

File tree

10 files changed

+256
-45
lines changed

10 files changed

+256
-45
lines changed

build/docker/intel-gpu-levelzero.Dockerfile

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,15 @@ RUN if [ $ROCKYLINUX -eq 0 ]; then \
4444
LATEST_GO=$(curl --no-progress-meter https://go.dev/dl/?mode=json | jq ".[] | select(.version | startswith(\"go${CGO_VERSION}\")).version" | tr -d "\"") && \
4545
wget -q https://go.dev/dl/$LATEST_GO.linux-amd64.tar.gz -O - | tar -xz -C /usr/local && \
4646
cd /runtime && \
47-
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-level-zero-gpu_1.6.32961.7_amd64.deb && \
48-
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-opencl-icd_25.09.32961.7_amd64.deb && \
49-
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/libigdgmm12_22.6.0_amd64.deb && \
50-
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.20.2/level-zero-devel_1.20.2+u22.04_amd64.deb && \
51-
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.20.2/level-zero_1.20.2+u22.04_amd64.deb && \
52-
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-core-2_2.8.3+18762_amd64.deb && \
53-
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-opencl-2_2.8.3+18762_amd64.deb && \
47+
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-core-2_2.20.3+19972_amd64.deb && \
48+
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-opencl-2_2.20.3+19972_amd64.deb && \
49+
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/intel-opencl-icd_25.40.35563.4-0_amd64.deb && \
50+
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libigdgmm12_22.8.2_amd64.deb && \
51+
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libze-intel-gpu1_25.40.35563.4-0_amd64.deb && \
52+
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero_1.24.3+u22.04_amd64.deb && \
53+
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero-devel_1.24.3+u22.04_amd64.deb && \
5454
dpkg -i *.deb && \
55+
rm -f *.deb && \
5556
rm -rf /var/lib/apt/lists/\*; \
5657
else \
5758
source /etc/os-release && dnf install -y gcc jq wget 'dnf-command(config-manager)' && \
@@ -83,9 +84,19 @@ ARG CMD
8384
ARG ROCKYLINUX
8485
COPY --from=builder /runtime /runtime
8586
RUN if [ $ROCKYLINUX -eq 0 ]; then \
86-
apt-get update && apt-get install --no-install-recommends -y ocl-icd-libopencl1 && \
87-
rm /runtime/level-zero-devel_*.deb && \
88-
cd /runtime && dpkg -i *.deb && rm -rf /runtime && \
87+
apt-get update && apt-get install --no-install-recommends -y ocl-icd-libopencl1 wget ca-certificates && \
88+
cd /runtime && \
89+
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-core-2_2.20.3+19972_amd64.deb && \
90+
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-opencl-2_2.20.3+19972_amd64.deb && \
91+
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/intel-opencl-icd_25.40.35563.4-0_amd64.deb && \
92+
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libigdgmm12_22.8.2_amd64.deb && \
93+
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libze-intel-gpu1_25.40.35563.4-0_amd64.deb && \
94+
wget https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero_1.24.3+u22.04_amd64.deb && \
95+
dpkg -i *.deb && \
96+
apt-get -y remove wget ca-certificates && \
97+
apt-get -y autoremove && \
98+
rm -f *.deb && \
99+
rm -rf /var/lib/apt/lists/\* && \
89100
rm "/lib/x86_64-linux-gnu/libze_validation"* && rm "/lib/x86_64-linux-gnu/libze_tracing_layer"*; \
90101
else \
91102
cp -a /runtime//*.so* /usr/lib64/ && cp -a /runtime/OpenCL /etc/ && cp -a /runtime/licenses/* /usr/share/licenses/; \

build/docker/templates/intel-gpu-levelzero.Dockerfile.in

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,15 @@ RUN if [ $ROCKYLINUX -eq 0 ]; then \N
3737
LATEST_GO=$(curl --no-progress-meter https://go.dev/dl/?mode=json | jq ".[] | select(.version | startswith(\"go${CGO_VERSION}\")).version" | tr -d "\"") && \N
3838
wget -q https://go.dev/dl/$LATEST_GO.linux-amd64.tar.gz -O - | tar -xz -C /usr/local && \N
3939
cd /runtime && \N
40-
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-level-zero-gpu_1.6.32961.7_amd64.deb && \N
41-
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-opencl-icd_25.09.32961.7_amd64.deb && \N
42-
wget -q https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/libigdgmm12_22.6.0_amd64.deb && \N
43-
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.20.2/level-zero-devel_1.20.2+u22.04_amd64.deb && \N
44-
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.20.2/level-zero_1.20.2+u22.04_amd64.deb && \N
45-
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-core-2_2.8.3+18762_amd64.deb && \N
46-
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-opencl-2_2.8.3+18762_amd64.deb && \N
40+
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-core-2_2.20.3+19972_amd64.deb && \N
41+
wget -q https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-opencl-2_2.20.3+19972_amd64.deb && \N
42+
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/intel-opencl-icd_25.40.35563.4-0_amd64.deb && \N
43+
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libigdgmm12_22.8.2_amd64.deb && \N
44+
wget -q https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libze-intel-gpu1_25.40.35563.4-0_amd64.deb && \N
45+
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero_1.24.3+u22.04_amd64.deb && \N
46+
wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero-devel_1.24.3+u22.04_amd64.deb && \N
4747
dpkg -i *.deb && \N
48+
rm -f *.deb && \N
4849
rm -rf /var/lib/apt/lists/\*; \N
4950
else \N
5051
source /etc/os-release && dnf install -y gcc jq wget 'dnf-command(config-manager)' && \N
@@ -80,9 +81,19 @@ ARG ROCKYLINUX
8081
COPY --from=builder /runtime /runtime
8182

8283
RUN if [ $ROCKYLINUX -eq 0 ]; then \N
83-
apt-get update && apt-get install --no-install-recommends -y ocl-icd-libopencl1 && \N
84-
rm /runtime/level-zero-devel_*.deb && \N
85-
cd /runtime && dpkg -i *.deb && rm -rf /runtime && \N
84+
apt-get update && apt-get install --no-install-recommends -y ocl-icd-libopencl1 wget ca-certificates && \N
85+
cd /runtime && \N
86+
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-core-2_2.20.3+19972_amd64.deb && \N
87+
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.20.3/intel-igc-opencl-2_2.20.3+19972_amd64.deb && \N
88+
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/intel-opencl-icd_25.40.35563.4-0_amd64.deb && \N
89+
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libigdgmm12_22.8.2_amd64.deb && \N
90+
wget https://github.com/intel/compute-runtime/releases/download/25.40.35563.4/libze-intel-gpu1_25.40.35563.4-0_amd64.deb && \N
91+
wget https://github.com/oneapi-src/level-zero/releases/download/v1.24.3/level-zero_1.24.3+u22.04_amd64.deb && \N
92+
dpkg -i *.deb && \N
93+
apt-get -y remove wget ca-certificates && \N
94+
apt-get -y autoremove && \N
95+
rm -f *.deb && \N
96+
rm -rf /var/lib/apt/lists/\* && \N
8697
rm "/lib/x86_64-linux-gnu/libze_validation"* && rm "/lib/x86_64-linux-gnu/libze_tracing_layer"*; \N
8798
else \N
8899
cp -a /runtime//*.so* /usr/lib64/ && cp -a /runtime/OpenCL /etc/ && cp -a /runtime/licenses/* /usr/share/licenses/; \N

cmd/gpu_levelzero/zes.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,12 @@ static ze_result_t enumerate_zes_devices(void)
137137
for (uint32_t i = 0; i < count; ++i) {
138138
zes_device_handle_t dev_h = zes_handles[i];
139139

140-
zes_pci_properties_t pci_props;
140+
zes_pci_properties_t pci_props = {
141+
.pNext = NULL,
142+
};
143+
141144
if (zesDevicePciGetProperties(dev_h, &pci_props) != ZE_RESULT_SUCCESS) {
145+
print_log(LOG_WARNING, "Failed to get PCI properties for device %d: %X\n", i, res);
142146
continue;
143147
}
144148

@@ -332,8 +336,9 @@ bool zes_device_bus_is_healthy(char* bdf_address, uint32_t* error)
332336
return true;
333337
}
334338

335-
zes_pci_state_t pci_state;
336-
memset(&pci_state, 0, sizeof(pci_state));
339+
zes_pci_state_t pci_state = {
340+
.pNext = NULL,
341+
};
337342

338343
ze_result_t res = zesDevicePciGetState(handle, &pci_state);
339344
if (res == ZE_RESULT_SUCCESS) {
@@ -409,7 +414,9 @@ double zes_device_temp_max(char* bdf_address, char* sensor, uint32_t* error)
409414
}
410415

411416
for (uint32_t i = 0; i < count; ++i) {
412-
zes_temp_properties_t props;
417+
zes_temp_properties_t props = {
418+
.pNext = NULL,
419+
};
413420

414421
res = zesTemperatureGetProperties(tempHandles[i], &props);
415422
if (res != ZE_RESULT_SUCCESS) {

cmd/gpu_plugin/README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Table of Contents
1919
* [CDI support](#cdi-support)
2020
* [KMD and UMD](#kmd-and-umd)
2121
* [Health management](#health-management)
22+
* [by-path mounting](#by-path-mounting)
2223
* [Issues with media workloads on multi-GPU setups](#issues-with-media-workloads-on-multi-gpu-setups)
2324
* [Workaround for QSV and VA-API](#workaround-for-qsv-and-va-api)
2425

@@ -60,6 +61,7 @@ For workloads on different KMDs, see [KMD and UMD](#kmd-and-umd).
6061
| -allow-ids | string | "" | A list of PCI Device IDs that are allowed to be registered as resources. Default is empty (=all registered). Cannot be used together with `deny-ids`. |
6162
| -deny-ids | string | "" | A list of PCI Device IDs that are denied to be registered as resources. Default is empty (=all registered). Cannot be used together with `allow-ids`. |
6263
| -allocation-policy | string | none | 3 possible values: balanced, packed, none. For shared-dev-num > 1: _balanced_ mode spreads workloads among GPU devices, _packed_ mode fills one GPU fully before moving to next, and _none_ selects first available device from kubelet. Default is _none_. |
64+
| -bypath | string | single | 3 possible values: single, none, all. Default is single. Changes how the by-path symlinks are handled by the plugin. More [info](#by-path-mounting). |
6365

6466
The plugin also accepts a number of other arguments (common to all plugins) related to logging.
6567
Please use the -h option to see the complete list of logging related options.
@@ -258,6 +260,25 @@ Kubernetes Device Plugin API allows passing device's healthiness to Kubelet. By
258260

259261
Temperature limit can be provided via the command line argument, default is 100C.
260262

263+
### By-path mounting
264+
265+
The DRM devices for the Intel GPUs register `by-path` symlinks under `/dev/dri/by-path`. For each GPU character device, there is a corresponding symlink in the by-path directory:
266+
```
267+
$ ls -l /dev/dri/by-path/
268+
lrwxrwxrwx 1 root root 8 oct x 13:09 pci-0000:00:02.0-card -> ../card1
269+
lrwxrwxrwx 1 root root 13 oct x 13:09 pci-0000:00:02.0-render -> ../renderD128
270+
```
271+
272+
The Intel GPU UMD uses these symlinks to detect hardware properties in some cases. Mounting the by-path symlinks as __symlinks__ with the Device plugin API (DP API) is not possible. When the symlinks are mounted via the DP API, they are mounted as the actual devices, and the symlink information is lost (pci address).
273+
274+
To support possible all use cases, GPU plugin allows changing the by-path mounting method. The options are:
275+
* `single` - Symlinks are individually mounted per device. Default.
276+
* Mostly Works, but is known to have issues with some pytorch workloads. See [issue](https://github.com/intel/intel-device-plugins-for-kubernetes/issues/2158).
277+
* `none` - No symlinks are mounted.
278+
* Aligned with Docker `privileged` mode devices usage.
279+
* `all` - Mounts whole DRM `by-path` directory. Pro: symlink file types are preserved. Con: symlinks are present for all devices.
280+
* Optimal for scale-up workloads where all the GPUs are used by the workload.
281+
261282
### Issues with media workloads on multi-GPU setups
262283

263284
OneVPL media API, 3D and compute APIs provide device discovery

cmd/gpu_plugin/gpu_plugin.go

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ const (
4343
devfsDriDirectory = "/dev/dri"
4444
wslDxgPath = "/dev/dxg"
4545
wslLibPath = "/usr/lib/wsl"
46-
nfdFeatureDir = "/etc/kubernetes/node-feature-discovery/features.d"
47-
resourceFilename = "intel-gpu-resources.txt"
4846
gpuDeviceRE = `^card[0-9]+$`
4947
controlDeviceRE = `^controlD[0-9]+$`
5048
pciAddressRE = "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]{1}$"
@@ -61,6 +59,10 @@ const (
6159
monitorSuffix = "_monitoring"
6260
monitorID = "all"
6361

62+
bypathOptionNone = "none"
63+
bypathOptionAll = "all"
64+
bypathOptionSingle = "single"
65+
6466
levelzeroAffinityMaskEnvVar = "ZE_AFFINITY_MASK"
6567

6668
// Period of device scans.
@@ -71,8 +73,11 @@ type cliOptions struct {
7173
preferredAllocationPolicy string
7274
allowIDs string
7375
denyIDs string
76+
bypathMount string
7477
sharedDevNum int
75-
temperatureLimit int
78+
globalTempLimit int
79+
memoryTempLimit int
80+
gpuTempLimit int
7681
enableMonitoring bool
7782
wslScan bool
7883
healthManagement bool
@@ -289,6 +294,16 @@ func (dp *devicePlugin) bypathMountsForPci(pciAddress, bypathDir string) []plugi
289294
return mounts
290295
}
291296

297+
func (dp *devicePlugin) bypathMountForAll() []pluginapi.Mount {
298+
return []pluginapi.Mount{
299+
{
300+
ContainerPath: dp.bypathDir,
301+
HostPath: dp.bypathDir,
302+
ReadOnly: true,
303+
},
304+
}
305+
}
306+
292307
type devicePlugin struct {
293308
gpuDeviceReg *regexp.Regexp
294309
controlDeviceReg *regexp.Regexp
@@ -404,13 +419,13 @@ func (dp *devicePlugin) healthStatusForCard(cardPath string) string {
404419
return health
405420
}
406421

407-
limit := float64(dp.options.temperatureLimit)
408-
409422
// Temperatures for different areas
410-
klog.V(4).Infof("Temperatures: Memory=%.1fC, GPU=%.1fC, Global=%.1fC",
423+
klog.V(4).Infof("Temperatures: Memory=%dC, GPU=%dC, Global=%dC",
411424
deviceTemps.Memory, deviceTemps.GPU, deviceTemps.Global)
412425

413-
if deviceTemps.GPU > limit || deviceTemps.Global > limit || deviceTemps.Memory > limit {
426+
if deviceTemps.GPU > dp.options.gpuTempLimit ||
427+
deviceTemps.Global > dp.options.globalTempLimit ||
428+
deviceTemps.Memory > dp.options.memoryTempLimit {
414429
health = pluginapi.Unhealthy
415430
}
416431

@@ -660,8 +675,20 @@ func (dp *devicePlugin) createMountsAndCDIDevices(cardPath, name string, devSpec
660675
mounts := []pluginapi.Mount{}
661676

662677
if dp.bypathFound {
663-
if pciAddr, pciErr := dp.pciAddressForCard(cardPath, name); pciErr == nil {
664-
mounts = dp.bypathMountsForPci(pciAddr, dp.bypathDir)
678+
switch dp.options.bypathMount {
679+
case bypathOptionAll:
680+
klog.V(4).Info("Using by-path mount option: all")
681+
mounts = dp.bypathMountForAll()
682+
case bypathOptionNone:
683+
klog.V(4).Info("Using by-path mount option: none")
684+
// no mounts
685+
case bypathOptionSingle:
686+
fallthrough
687+
default:
688+
klog.V(4).Info("Using by-path mount option: single/default")
689+
if pciAddr, pciErr := dp.pciAddressForCard(cardPath, name); pciErr == nil {
690+
mounts = dp.bypathMountsForPci(pciAddr, dp.bypathDir)
691+
}
665692
}
666693
}
667694

@@ -784,9 +811,12 @@ func main() {
784811
flag.StringVar(&prefix, "prefix", "", "Prefix for devfs & sysfs paths")
785812
flag.BoolVar(&opts.enableMonitoring, "enable-monitoring", false, "whether to enable '*_monitoring' (= all GPUs) resource")
786813
flag.BoolVar(&opts.healthManagement, "health-management", false, "enable GPU health management")
814+
flag.StringVar(&opts.bypathMount, "bypath", bypathOptionSingle, "DRI device 'by-path/' directory mounting options: single, none, all. Default: single")
787815
flag.BoolVar(&opts.wslScan, "wsl", false, "scan for / use WSL devices")
788816
flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device")
789-
flag.IntVar(&opts.temperatureLimit, "temp-limit", 100, "temperature limit at which device is marked unhealthy")
817+
flag.IntVar(&opts.globalTempLimit, "temp-limit", 100, "Global temperature limit at which device is marked unhealthy")
818+
flag.IntVar(&opts.gpuTempLimit, "gpu-temp-limit", 100, "GPU temperature limit at which device is marked unhealthy")
819+
flag.IntVar(&opts.memoryTempLimit, "memory-temp-limit", 100, "Memory temperature limit at which device is marked unhealthy")
790820
flag.StringVar(&opts.preferredAllocationPolicy, "allocation-policy", "none", "modes of allocating GPU devices: balanced, packed and none")
791821
flag.StringVar(&opts.allowIDs, "allow-ids", "", "comma-separated list of device IDs to allow (e.g. 0x49c5,0x49c6)")
792822
flag.StringVar(&opts.denyIDs, "deny-ids", "", "comma-separated list of device IDs to deny (e.g. 0x49c5,0x49c6)")

0 commit comments

Comments
 (0)