Skip to content

Commit 081fe69

Browse files
committed
Improve handling of long running container bootup
1 parent c49c8ac commit 081fe69

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

atomic-update

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ from shlex import quote
2828
import xml.etree.ElementTree as ET
2929

3030
# Constants
31-
VERSION = "0.1.4"
31+
VERSION = "0.1.5"
3232
ZYPPER_PID_FILE = "/run/zypp.pid"
3333
VALID_CMD = ["dup", "run", "rollback"]
3434
VALID_OPT = ["--reboot", "--apply", "--shell", "--continue", "--no-verify", \
@@ -122,7 +122,7 @@ def verify_snapshot():
122122
sys.exit()
123123
logging.debug("Waiting for container bootup to finish...")
124124
startup_finished = False
125-
for _ in range(120):
125+
for _ in range(60):
126126
out, ret = shell_exec(f"LC_ALL=C machinectl --quiet shell {container_id} /usr/bin/bash -c 'systemd-analyze time'")
127127
if out.find("Startup finished") != -1:
128128
startup_finished = True
@@ -131,12 +131,19 @@ def verify_snapshot():
131131
break
132132
time.sleep(1)
133133
if not startup_finished:
134-
logging.error("Timeout waiting for bootup of ephemeral container from snapshot. Cancelling task...")
135-
cleanup()
136-
sys.exit()
134+
logging.warn("Timeout waiting for bootup of ephemeral container from snapshot")
135+
logging.debug(f"systemd-analyze time output:\n{out}")
136+
out, ret = shell_exec(f"LC_ALL=C machinectl --quiet shell {container_id} /usr/bin/bash -c 'systemctl --quiet --no-pager list-jobs'")
137+
logging.debug(f"systemctl list-jobs output:\n{out}")
137138
logging.debug("Getting failed systemd units")
138139
out, ret = shell_exec(f"LC_ALL=C machinectl --quiet shell {container_id} /usr/bin/bash -c 'systemctl --quiet --no-pager -o json --failed | cat -'")
139-
out = json.loads(out)
140+
try:
141+
out = json.loads(out)
142+
except json.JSONDecodeError:
143+
logging.error("Could not decode JSON output of failed systemd units. Cancelling task...")
144+
logging.debug(f"systemctl --failed output:\n{out}")
145+
cleanup()
146+
sys.exit()
140147
failed_units = [item["unit"] for item in out]
141148
logging.debug(f"Number of failed units = {len(failed_units)}")
142149
logging.debug(f"Failed units = {', '.join(failed_units)}")

0 commit comments

Comments
 (0)