@@ -84,21 +84,78 @@ def get_snaps(snapper_root_config):
8484 default_snap = item ["number" ] if item ["default" ] else default_snap
8585 return active_snap , default_snap
8686
87- # Function to get latest atomic snapshot
88- def get_atomic_snap (snapper_root_config ):
87+ # Function to get latest atomic snapshot of status
88+ # valid status: created, pending, finished
89+ def get_atomic_snap (snapper_root_config , status ):
8990 snaps_json = shell_exec (f"snapper --jsonout -c { snapper_root_config } list --disable-used-space" )[0 ]
9091 snaps = json .loads (snaps_json )
9192 snaps [snapper_root_config ].reverse ()
9293 for item in snaps [snapper_root_config ]:
9394 try :
94- if item ["userdata" ]["atomic" ] == "yes" :
95+ if item ["userdata" ]["atomic" ] == status :
9596 return item ["number" ]
9697 except :
9798 pass
9899
100+ # Function to verify snapshot by booting it up as a container
101+ def verify_snapshot ():
102+ logging .debug ("Booting container" )
103+ cmd = ["systemd-nspawn" , "--directory" , TMP_MOUNT_DIR , "--ephemeral" , "--boot" ]
104+ subprocess .Popen (cmd , stdout = subprocess .DEVNULL , stderr = subprocess .STDOUT )
105+ logging .debug ("Getting container id" )
106+ container_id = None
107+ for _ in range (10 ):
108+ out , ret = shell_exec ("LC_ALL=C machinectl --quiet --no-pager -o json list" )
109+ containers = json .loads (out )
110+ for container in containers :
111+ if ( container ["class" ] == "container" and container ["service" ] == "systemd-nspawn" and
112+ container ["machine" ].startswith (f"{ TMP_MOUNT_DIR .split ('/' ).pop ()} " ) ):
113+ container_id = container ["machine" ]
114+ break
115+ if container_id :
116+ break
117+ time .sleep (1 )
118+ logging .debug (f"Container ID = { container_id } " )
119+ if not container_id :
120+ logging .error ("Could not bootup ephemeral container from snapshot. Cancelling task..." )
121+ cleanup ()
122+ sys .exit ()
123+ logging .debug ("Waiting for container bootup to finish..." )
124+ startup_finished = False
125+ for _ in range (120 ):
126+ out , ret = shell_exec (f"LC_ALL=C machinectl --quiet shell { container_id } /usr/bin/bash -c 'systemd-analyze time'" )
127+ if out .find ("Startup finished" ) != - 1 :
128+ startup_finished = True
129+ break
130+ if startup_finished :
131+ break
132+ time .sleep (1 )
133+ if not startup_finished :
134+ logging .error ("Timeout waiting for bootup of ephemeral container from snapshot. Cancelling task..." )
135+ cleanup ()
136+ sys .exit ()
137+ logging .debug ("Getting failed systemd units" )
138+ out , ret = shell_exec (f"LC_ALL=C machinectl --quiet shell { container_id } /usr/bin/bash -c 'systemctl --quiet --no-pager -o json --failed | cat -'" )
139+ out = json .loads (out )
140+ failed_units = [item ["unit" ] for item in out ]
141+ logging .debug (f"Number of failed units = { len (failed_units )} " )
142+ logging .debug (f"Failed units = { ', ' .join (failed_units )} " )
143+ logging .debug ("Stopping container..." )
144+ shell_exec (f"machinectl stop { container_id } " )
145+ return failed_units
146+
99147# Function to cleanup on SIGINT or successful completion
100148def cleanup ():
101149 logging .info ("Cleaning up..." )
150+ logging .debug ("Stopping ephemeral systemd-nspawn containers..." )
151+ out , ret = shell_exec ("LC_ALL=C machinectl --quiet --no-pager -o json list" )
152+ containers = json .loads (out )
153+ for container in containers :
154+ if ( container ["class" ] == "container" and container ["service" ] == "systemd-nspawn" and
155+ container ["machine" ].startswith (f"{ TMP_MOUNT_DIR .split ('/' ).pop ()} " ) ):
156+ container_id = container ["machine" ]
157+ shell_exec (f"machinectl stop { container_id } " )
158+ logging .debug ("Cleaning up temp mounts..." )
102159 umount_command = f"""
103160LC_ALL=C mount -l | grep '{ TMP_MOUNT_DIR } ' | awk '{{print $3}}' | awk '{{print length, $0}}' | sort -rn | awk '{{print $2}}' | awk '{{system("umount " $0)}}';
104161"""
@@ -107,9 +164,17 @@ def cleanup():
107164 if out == "" and ret == 0 :
108165 break
109166 time .sleep (0.01 )
167+ logging .debug ("Cleaning up temp dirs..." )
110168 shell_exec (f"rmdir { quote (TMP_MOUNT_DIR )} " )
111169 shell_exec (f"rmdir { quote (TMP_DIR )} " )
170+ logging .debug ("Cleaning up unfinished snapshots..." )
171+ snapper_root_config = get_snapper_root_config ()
172+ for status in ["created" , "pending" ]:
173+ snap_num = get_atomic_snap (snapper_root_config , status )
174+ if snap_num :
175+ shell_exec (f"snapper -c { snapper_root_config } delete { snap_num } " )
112176
177+ # Function to handle SIGINT
113178def sigint_handler (signum , frame ):
114179 signal .signal (signum , signal .SIG_IGN ) # ignore additional signals
115180 cleanup ()
@@ -287,16 +352,18 @@ def sigint_handler(signum, frame):
287352 # create new read-write snapshot to perform atomic update in
288353 out , ret = shell_exec (f"snapper -c { snapper_root_config } create -c number " \
289354 f"-d 'Atomic update of #{ base_snap } ' " \
290- f"-u 'atomic=yes ' --from { base_snap } --read-write" )
355+ f"-u 'atomic=created ' --from { base_snap } --read-write" )
291356 if ret != 0 :
292357 logging .error (f"Could not create read-write snapshot to perform atomic update in" )
293358 sys .exit (6 )
294359 # get latest atomic snapshot number we just created
295- atomic_snap = get_atomic_snap (snapper_root_config )
360+ atomic_snap = get_atomic_snap (snapper_root_config , "created" )
296361 logging .debug (f"Latest atomic snapshot number: { atomic_snap } " )
297362 logging .info (f"Using snapshot { base_snap } as base for new snapshot { atomic_snap } " )
298363 snap_subvol = f"@/.snapshots/{ atomic_snap } /snapshot"
299364 snap_dir = snap_subvol .lstrip ("@" )
365+ # update atomic snapshot status
366+ shell_exec (f"snapper -c { snapper_root_config } modify -u 'atomic=pending' { atomic_snap } " )
300367 # check the latest atomic snapshot exists as btrfs subvolume
301368 out , ret = shell_exec (f"LC_ALL=C btrfs subvolume list / | grep '{ snap_subvol } '" )
302369 if ret != 0 :
@@ -320,6 +387,10 @@ def sigint_handler(signum, frame):
320387chroot { TMP_MOUNT_DIR } mount -a;
321388"""
322389 shell_exec (commands )
390+ # verify snapshot prior to performing update
391+ if not NO_VERIFY :
392+ logging .info ("Verifying snapshot prior to update..." )
393+ pre_failed_units = verify_snapshot ()
323394 if COMMAND == "dup" :
324395 # check if dup has anything to do
325396 logging .info ("Checking for packages to upgrade" )
@@ -351,7 +422,7 @@ def sigint_handler(signum, frame):
351422 logging .info ("Command run successfully" )
352423 if SHELL :
353424 logging .info (f"Opening bash shell within chroot of snapshot { atomic_snap } " )
354- logging .info ("Continue with 'exit' or discard with 'exit 1'" )
425+ logging .info ("Continue with 'exit 0 ' or discard with 'exit 1'" )
355426 command = f"""
356427chroot { TMP_MOUNT_DIR } bash -c "export PS1='atomic-update:\${{PWD}} # '; exec bash"
357428"""
@@ -361,6 +432,20 @@ def sigint_handler(signum, frame):
361432 shell_exec (f"snapper -c { snapper_root_config } delete { atomic_snap } " )
362433 cleanup ()
363434 sys .exit ()
435+ # verify snapshot after update
436+ if not NO_VERIFY :
437+ logging .info ("Verifying snapshot post update..." )
438+ post_failed_units = verify_snapshot ()
439+ new_failed_units = list ( set (post_failed_units ) - set (pre_failed_units ) )
440+ if new_failed_units :
441+ logging .error (f"Discarding snapshot { atomic_snap } as the following new " \
442+ f"systemd units have failed since update: { ', ' .join (new_failed_units )} " )
443+ shell_exec (f"snapper -c { snapper_root_config } delete { atomic_snap } " )
444+ cleanup ()
445+ sys .exit ()
446+ # on success, update atomic snapshot status
447+ shell_exec (f"snapper -c { snapper_root_config } modify -u 'atomic=finished' { atomic_snap } " )
448+ # on success, set new snapshot as the default
364449 logging .info (f"Setting snapshot { atomic_snap } ({ snap_dir } ) as the new default" )
365450 shell_exec (f"snapper -c { snapper_root_config } modify --default { atomic_snap } " )
366451 # perform cleanup
0 commit comments