@@ -359,22 +359,37 @@ def from_dict(cls, d, **kwargs):
359359 if attr in d :
360360 setattr (self , attr , d [attr ])
361361
362+ cluster_key = ClusterManager ._cluster_key (self )
363+
362364 if d .get ("controller" ):
363365 controller_info = d ["controller" ]
364366 cls = self .controller_launcher_class = import_item (controller_info ["class" ])
365367 if controller_info ["state" ]:
366- self .controller = cls .from_dict (controller_info ["state" ], parent = self )
368+ try :
369+ self .controller = cls .from_dict (
370+ controller_info ["state" ], parent = self
371+ )
372+ except launcher .NotRunning as e :
373+ self .log .error (f"Controller for { cluster_key } not running: { e } " )
367374
368375 engine_info = d .get ("engines" )
369376 if engine_info :
370377 cls = self .engine_launcher_class = import_item (engine_info ["class" ])
371378 for engine_set_id , engine_state in engine_info .get ("sets" , {}).items ():
372- self .engines [engine_set_id ] = cls .from_dict (
373- engine_state ,
374- engine_set_id = engine_set_id ,
375- parent = self ,
376- )
377-
379+ try :
380+ self .engines [engine_set_id ] = cls .from_dict (
381+ engine_state ,
382+ engine_set_id = engine_set_id ,
383+ parent = self ,
384+ )
385+ except launcher .NotRunning as e :
386+ self .log .error (
387+ f"Engine set { cluster_key } { engine_set_id } not running: { e } "
388+ )
389+ # check if state changed
390+ if self .to_dict () != d :
391+ # if so, update our cluster file
392+ self .update_cluster_file ()
378393 return self
379394
380395 @classmethod
@@ -703,7 +718,8 @@ class ClusterManager(LoggingConfigurable):
703718
704719 _clusters = Dict (help = "My cluster objects" )
705720
706- def _cluster_key (self , cluster ):
721+ @staticmethod
722+ def _cluster_key (cluster ):
707723 """Return a unique cluster key for a cluster
708724
709725 Default is {profile}:{cluster_id}
0 commit comments