@@ -36,6 +36,8 @@ const (
3636 diskPartitionSuffix = "-part"
3737 diskSDPath = "/dev/sd"
3838 diskSDPattern = "/dev/sd*"
39+ diskNvmePath = "/dev/nvme"
40+ diskNvmePattern = "/dev/nvme*"
3941 // How many times to retry for a consistent read of /proc/mounts.
4042 maxListTries = 3
4143 // Number of fields per line in /proc/mounts as per the fstab man page.
@@ -52,11 +54,20 @@ const (
5254 // scsi_id output should be in the form of:
5355 // 0Google PersistentDisk <disk name>
5456 scsiPattern = `^0Google\s+PersistentDisk\s+([\S]+)\s*$`
57+ // google_nvme_id output should be in the form of:
58+ // ID_SERIAL_SHORT=<disk name>
59+ // Note: The google_nvme_id tool prints out multiple lines, hence we don't
60+ // use '^' and '$' to wrap nvmePattern as is done in scsiPattern.
61+ nvmePattern = `ID_SERIAL_SHORT=([\S]+)\s*`
62+ scsiIdPath = "/lib/udev_containerized/scsi_id"
63+ nvmeIdPath = "/lib/udev_containerized/google_nvme_id"
5564)
5665
5766var (
5867 // regex to parse scsi_id output and extract the serial
5968 scsiRegex = regexp .MustCompile (scsiPattern )
69+ // regex to parse google_nvme_id output and extract the serial
70+ nvmeRegex = regexp .MustCompile (nvmePattern )
6071)
6172
6273// DeviceUtils are a collection of methods that act on the devices attached
@@ -107,13 +118,13 @@ func existingDevicePath(devicePaths []string) (string, error) {
107118 return "" , nil
108119}
109120
110- // getScsiSerial assumes that /lib/udev/scsi_id exists and will error if it
121+ // getScsiSerial assumes that scsiIdPath exists and will error if it
111122// doesnt. It is the callers responsibility to verify the existence of this
112123// tool. Calls scsi_id on the given devicePath to get the serial number reported
113124// by that device.
114125func getScsiSerial (devicePath string ) (string , error ) {
115126 out , err := exec .Command (
116- "/lib/udev_containerized/scsi_id" ,
127+ scsiIdPath ,
117128 "--page=0x83" ,
118129 "--whitelisted" ,
119130 fmt .Sprintf ("--device=%v" , devicePath )).CombinedOutput ()
@@ -134,9 +145,58 @@ func parseScsiSerial(output string) (string, error) {
134145 return substrings [1 ], nil
135146}
136147
148+ // getNvmeSerial calls google_nvme_id on the given devicePath to get the serial
149+ // number reported by that device.
150+ // NOTE: getNvmeSerial assumes that nvmeIdPath exists and will error if it
151+ // doesn't. It is the caller's responsibility to verify the existence of this
152+ // tool.
153+ func getNvmeSerial (devicePath string ) (string , error ) {
154+ out , err := exec .Command (
155+ nvmeIdPath ,
156+ fmt .Sprintf ("-d%s" , devicePath )).CombinedOutput ()
157+ if err != nil {
158+ return "" , fmt .Errorf ("google_nvme_id failed for device %q with output %v: %v" , devicePath , out , err )
159+ }
160+
161+ return parseNvmeSerial (string (out ))
162+ }
163+
164+ // Parse the output returned by google_nvme_id and extract the serial number
165+ func parseNvmeSerial (output string ) (string , error ) {
166+ substrings := nvmeRegex .FindStringSubmatch (output )
167+ if substrings == nil {
168+ return "" , fmt .Errorf ("google_nvme_id output cannot be parsed: %q" , output )
169+ }
170+
171+ return substrings [1 ], nil
172+ }
173+
174+ func ensureUdevToolExists (toolPath string ) error {
175+ exists , err := pathutils .Exists (pathutils .CheckFollowSymlink , toolPath )
176+ if err != nil {
177+ return fmt .Errorf ("failed to check existence of %q: %v" , toolPath , err )
178+ }
179+ if ! exists {
180+ // The driver should be containerized with the tool so maybe something is
181+ // wrong with the build process
182+ return fmt .Errorf ("could not find tool at %q, unable to verify device paths" , nvmeIdPath )
183+ }
184+ return nil
185+ }
186+
187+ func ensureUdevToolsExist () error {
188+ if err := ensureUdevToolExists (scsiIdPath ); err != nil {
189+ return err
190+ }
191+ if err := ensureUdevToolExists (nvmeIdPath ); err != nil {
192+ return err
193+ }
194+ return nil
195+ }
196+
137197// VerifyDevicePath returns the first devicePath that maps to a real disk in the
138- // candidate devicePaths or an empty string if none is found. If
139- // /lib/udev_containerized/scsi_id exists it will attempt to fix any issues
198+ // candidate devicePaths or an empty string if none is found.
199+ // If the device is not found, it will attempt to fix any issues
140200// caused by missing paths or mismatched devices by running a udevadm --trigger.
141201func (m * deviceUtils ) VerifyDevicePath (devicePaths []string , deviceName string ) (string , error ) {
142202 var devicePath string
@@ -146,15 +206,10 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
146206 pollTimeout = 3 * time .Second
147207 )
148208
149- scsiIDPath := " /lib/udev_containerized/scsi_id"
150- exists , err := pathutils . Exists ( pathutils . CheckFollowSymlink , scsiIDPath )
209+ // Ensure tools in /lib/udev_containerized directory exist
210+ err = ensureUdevToolsExist ( )
151211 if err != nil {
152- return "" , fmt .Errorf ("failed to check scsi_id existence: %v" , err )
153- }
154- if ! exists {
155- // No SCSI ID tool, the driver should be containerized with the tool so
156- // maybe something is wrong with the build process
157- return "" , fmt .Errorf ("could not find scsi_id tool at %s, unable to verify device paths" , scsiIDPath )
212+ return "" , err
158213 }
159214
160215 err = wait .Poll (pollInterval , pollTimeout , func () (bool , error ) {
@@ -166,40 +221,41 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
166221 }
167222
168223 if len (devicePath ) == 0 {
169- // Couldn't find the path so we need to find a /dev/sdx with the SCSI
170- // serial that matches deviceName. Then we run udevadm trigger on that
171- // device to get the device to show up in /dev/by-id/
224+ // Couldn't find a /dev/disk/by-id path for this deviceName, so we need to
225+ // find a /dev/* with a serial that matches deviceName. Then we attempt
226+ // to repair the symlink.
172227 innerErr := udevadmTriggerForDiskIfExists (deviceName )
173228 if innerErr != nil {
174- return false , fmt .Errorf ("failed to trigger udevadm fix: %v" , innerErr )
229+ return false , fmt .Errorf ("failed to trigger udevadm fix of non existent disk for %q : %v" , deviceName , innerErr )
175230 }
176231 // Go to next retry loop to get the deviceName again after
177232 // potentially fixing it with the udev command
178233 return false , nil
179234 }
180235
181- // If there exists a devicePath we make sure disk at /dev/sdx matches the
182- // expected disk at devicePath by matching SCSI Serial to the disk name
183- devSDX , innerErr := filepath .EvalSymlinks (devicePath )
236+ // If there exists a devicePath we make sure disk at /dev/* matches the
237+ // expected disk at devicePath by matching device Serial to the disk name
238+ devFsPath , innerErr := filepath .EvalSymlinks (devicePath )
184239 if innerErr != nil {
185240 return false , fmt .Errorf ("filepath.EvalSymlinks(%q) failed with %v" , devicePath , innerErr )
186241 }
187- // Check to make sure device path maps to the correct disk
188- if strings .Contains (devSDX , diskSDPath ) {
189- scsiSerial , innerErr := getScsiSerial (devSDX )
190- if innerErr != nil {
191- return false , fmt .Errorf ("couldn't get SCSI serial number for disk %s: %v" , deviceName , innerErr )
192- }
193- // SUCCESS! devicePath points to a /dev/sdx that has a SCSI serial
194- // equivalent to our disk name
195- if scsiSerial == deviceName {
196- return true , nil
197- }
242+
243+ devFsSerial , innerErr := getDevFsSerial (devFsPath )
244+ if innerErr != nil {
245+ return false , fmt .Errorf ("couldn't get serial number for disk %s at path %s: %v" , deviceName , devFsPath , innerErr )
246+ }
247+ // SUCCESS! devicePath points to a /dev/* path that has a serial
248+ // equivalent to our disk name
249+ if len (devFsSerial ) != 0 && devFsSerial == deviceName {
250+ return true , nil
198251 }
199- // The devicePath is not mapped to the correct disk
252+
253+ // A /dev/* path exists, but is either not a recognized /dev prefix type
254+ // (/dev/nvme* or /dev/sd*) or devicePath is not mapped to the correct disk.
255+ // Attempt a repair
200256 innerErr = udevadmTriggerForDiskIfExists (deviceName )
201257 if innerErr != nil {
202- return false , fmt .Errorf ("failed to trigger udevadm fix: %v" , innerErr )
258+ return false , fmt .Errorf ("failed to trigger udevadm fix of misconfigured disk for %q : %v" , deviceName , innerErr )
203259 }
204260 // Go to next retry loop to get the deviceName again after
205261 // potentially fixing it with the udev command
@@ -213,49 +269,78 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
213269 return devicePath , nil
214270}
215271
272+ // getDevFsSerial returns the serial number of the /dev/* path at devFsPath.
273+ // If devFsPath does not start with a known prefix, returns the empty string.
274+ func getDevFsSerial (devFsPath string ) (string , error ) {
275+ switch {
276+ case strings .HasPrefix (devFsPath , diskSDPath ):
277+ return getScsiSerial (devFsPath )
278+ case strings .HasPrefix (devFsPath , diskNvmePath ):
279+ return getNvmeSerial (devFsPath )
280+ default :
281+ return "" , nil
282+ }
283+ }
284+
285+ func findAvailableDevFsPaths () ([]string , error ) {
286+ diskSDPaths , err := filepath .Glob (diskSDPattern )
287+ if err != nil {
288+ return nil , fmt .Errorf ("failed to filepath.Glob(\" %s\" ): %v" , diskSDPattern , err )
289+ }
290+ diskNvmePaths , err := filepath .Glob (diskNvmePattern )
291+ if err != nil {
292+ return nil , fmt .Errorf ("failed to filepath.Glob(\" %s\" ): %v" , diskNvmePattern , err )
293+ }
294+ return append (diskSDPaths , diskNvmePaths ... ), nil
295+ }
296+
216297func udevadmTriggerForDiskIfExists (deviceName string ) error {
217- devToSCSI := map [string ]string {}
218- sds , err := filepath . Glob ( diskSDPattern )
298+ devFsPathToSerial := map [string ]string {}
299+ devFsPaths , err := findAvailableDevFsPaths ( )
219300 if err != nil {
220- return fmt . Errorf ( "failed to filepath.Glob( \" %s \" ): %v" , diskSDPattern , err )
301+ return err
221302 }
222- for _ , devSDX := range sds {
223- scsiSerial , err := getScsiSerial (devSDX )
224- if err != nil {
225- return fmt .Errorf ("failed to get SCSI Serial num: %v" , err )
303+ for _ , devFsPath := range devFsPaths {
304+ devFsSerial , err := getDevFsSerial (devFsPath )
305+ if err != nil || len (devFsSerial ) == 0 {
306+ // If we get an error, ignore. Either this isn't a block device, or it
307+ // isn't something we can get a serial number from
308+ klog .V (7 ).Infof ("failed to get Serial num for disk %s at path %s: %v" , deviceName , devFsPath , err )
309+ continue
226310 }
227- devToSCSI [ devSDX ] = scsiSerial
228- if scsiSerial == deviceName {
311+ devFsPathToSerial [ devFsPath ] = devFsSerial
312+ if devFsSerial == deviceName {
229313 // Found the disk that we're looking for so run a trigger on it
230314 // to resolve its /dev/by-id/ path
231- klog .Warningf ("udevadm --trigger running to fix disk at path %s which has SCSI ID %s" , devSDX , scsiSerial )
232- err := udevadmChangeToDrive (devSDX )
315+ klog .Warningf ("udevadm --trigger running to fix disk at path %s which has serial numberID %s" , devFsPath , devFsSerial )
316+ err := udevadmChangeToDrive (devFsPath )
233317 if err != nil {
234- return fmt .Errorf ("failed to fix disk which has SCSI ID %s: %v" , scsiSerial , err )
318+ return fmt .Errorf ("failed to fix disk which has serial numberID %s: %v" , devFsSerial , err )
235319 }
236320 return nil
237321 }
238322 }
239- klog .Warningf ("udevadm --trigger requested to fix disk %s but no such disk was found in %v" , deviceName , devToSCSI )
323+ klog .Warningf ("udevadm --trigger requested to fix disk %s but no such disk was found in %v" , deviceName , devFsPathToSerial )
240324 return fmt .Errorf ("udevadm --trigger requested to fix disk %s but no such disk was found" , deviceName )
241325}
242326
243327// Calls "udevadm trigger --action=change" on the specified drive. drivePath
244- // must be the block device path to trigger on, in the format "/dev/sd*", or a
245- // symlink to it. This is workaround for Issue #7972. Once the underlying issue
246- // has been resolved, this may be removed.
328+ // must be the block device path to trigger on, in the format "/dev/*", or a
329+ // symlink to it. This is workaround for Issue #7972
330+ // (https://github.com/kubernetes/kubernetes/issues/7972). Once the underlying
331+ // issue has been resolved, this may be removed.
247332// udevadm takes a little bit to work its magic in the background so any callers
248333// should not expect the trigger to complete instantly and may need to poll for
249334// the change
250- func udevadmChangeToDrive (devSDX string ) error {
251- // Call "udevadm trigger --action=change --property-match=DEVNAME=/dev/sd ..."
335+ func udevadmChangeToDrive (devFsPath string ) error {
336+ // Call "udevadm trigger --action=change --property-match=DEVNAME=/dev/..."
252337 out , err := exec .Command (
253338 "udevadm" ,
254339 "trigger" ,
255340 "--action=change" ,
256- fmt .Sprintf ("--property-match=DEVNAME=%s" , devSDX )).CombinedOutput ()
341+ fmt .Sprintf ("--property-match=DEVNAME=%s" , devFsPath )).CombinedOutput ()
257342 if err != nil {
258- return fmt .Errorf ("udevadmChangeToDrive: udevadm trigger failed for drive %q with output %s: %v." , devSDX , string (out ), err )
343+ return fmt .Errorf ("udevadmChangeToDrive: udevadm trigger failed for drive %q with output %s: %v." , devFsPath , string (out ), err )
259344 }
260345 return nil
261346}
0 commit comments