@@ -299,6 +299,16 @@ var (
299299 []string {},
300300 )
301301
302+ // SchedulerAttemptsTotal counts total number of scheduling attempts, labeled by status.
303+ SchedulerAttemptsTotal = prometheus .NewCounterVec (
304+ prometheus.CounterOpts {
305+ Subsystem : InferenceExtension ,
306+ Name : "scheduler_attempts_total" ,
307+ Help : metricsutil .HelpMsgWithStability ("Total number of scheduling attempts." , compbasemetrics .ALPHA ),
308+ },
309+ []string {"status" }, // "success", "failure"
310+ )
311+
302312 PluginProcessingLatencies = prometheus .NewHistogramVec (
303313 prometheus.HistogramOpts {
304314 Subsystem : InferenceExtension ,
@@ -419,6 +429,7 @@ func Register(customCollectors ...prometheus.Collector) {
419429 metrics .Registry .MustRegister (inferencePoolAvgQueueSize )
420430 metrics .Registry .MustRegister (inferencePoolReadyPods )
421431 metrics .Registry .MustRegister (SchedulerE2ELatency )
432+ metrics .Registry .MustRegister (SchedulerAttemptsTotal )
422433 metrics .Registry .MustRegister (PluginProcessingLatencies )
423434 metrics .Registry .MustRegister (InferenceExtensionInfo )
424435 metrics .Registry .MustRegister (PrefixCacheSize )
@@ -464,6 +475,7 @@ func Reset() {
464475 inferencePoolAvgQueueSize .Reset ()
465476 inferencePoolReadyPods .Reset ()
466477 SchedulerE2ELatency .Reset ()
478+ SchedulerAttemptsTotal .Reset ()
467479 PluginProcessingLatencies .Reset ()
468480 InferenceExtensionInfo .Reset ()
469481 PrefixCacheSize .Reset ()
@@ -474,7 +486,7 @@ func Reset() {
474486 inferenceModelRewriteDecisionsTotal .Reset ()
475487}
476488
477- // RecordRequstCounter records the number of requests.
489+ // RecordRequestCounter records the number of requests.
478490func RecordRequestCounter (modelName , targetModelName string ) {
479491 requestCounter .WithLabelValues (modelName , targetModelName ).Inc ()
480492}
@@ -696,6 +708,20 @@ func RecordSchedulerE2ELatency(duration time.Duration) {
696708 SchedulerE2ELatency .WithLabelValues ().Observe (duration .Seconds ())
697709}
698710
711+ // RecordSchedulerAttempt records a scheduling attempt with status.
712+ func RecordSchedulerAttempt (err error ) {
713+ if err != nil {
714+ SchedulerAttemptsTotal .WithLabelValues (SchedulerStatusFailure ).Inc ()
715+ } else {
716+ SchedulerAttemptsTotal .WithLabelValues (SchedulerStatusSuccess ).Inc ()
717+ }
718+ }
719+
720+ const (
721+ SchedulerStatusSuccess = "success"
722+ SchedulerStatusFailure = "failure"
723+ )
724+
699725// RecordPluginProcessingLatency records the processing latency for a plugin.
700726func RecordPluginProcessingLatency (extensionPoint , pluginType , pluginName string , duration time.Duration ) {
701727 PluginProcessingLatencies .WithLabelValues (extensionPoint , pluginType , pluginName ).Observe (duration .Seconds ())
0 commit comments