Add PrepareRequestData method for the prefix cache plugin

rahulgurnani · rahulgurnani · commit 78012cfa3f6d · 2025-11-25T21:55:54.000Z
diff --git a/pkg/epp/datalayer/plugins/data_types.go b/pkg/epp/datalayer/plugins/data_types.go
@@ -0,0 +1,29 @@
+package plugins
+
+import (
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
+)
+
+const (
+	PrefixCacheMatchPrecentKey = "PrefixCacheMatchPercentKey"
+)
+
+type PrefixCacheMatchPercent struct {
+	matchPercentage float64
+}
+
+func NewPrefixCacheMatchPercent(matchPercentage float64) *PrefixCacheMatchPercent {
+	return &PrefixCacheMatchPercent{
+		matchPercentage: matchPercentage,
+	}
+}
+
+func (p *PrefixCacheMatchPercent) MatchPercentage() float64 {
+	return p.matchPercentage
+}
+
+func (p *PrefixCacheMatchPercent) Clone() datalayer.Cloneable {
+	return &PrefixCacheMatchPercent{
+		matchPercentage: p.matchPercentage,
+	}
+}
diff --git a/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go b/pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go
@@ -29,6 +29,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/log"
 
 	backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
+	dplugins "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/plugins"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
@@ -207,6 +208,32 @@ func (p *Plugin) WithName(name string) *Plugin {
 	return p
 }
 
+func (p *Plugin) PrepareRequestData(ctx context.Context, request *types.LLMRequest, pods []types.Pod) error {
+	// pre score step, hashing prompt and find longest prefix match.
+	hashes := hashPrompt(ctx, request, getBlockSize(pods, p.config), p.config.MaxPrefixBlocksToMatch)
+	state := &SchedulingContextState{
+		PrefixHashes:       hashes,
+		PrefixCacheServers: p.matchLongestPrefix(ctx, hashes),
+	}
+	for server, matchLen := range state.PrefixCacheServers {
+		log.FromContext(ctx).V(logutil.TRACE).Info("prefix cached state", "server", server, "longest-prefix-match", matchLen)
+
+	}
+
+	total := len(state.PrefixHashes)
+	podScoreFunc := func(pod types.Pod) float64 {
+		if total == 0 {
+			return 0
+		}
+		matchLen := state.PrefixCacheServers[ServerID(pod.GetPod().NamespacedName)]
+		return float64(matchLen) / float64(total)
+	}
+	for _, pod := range pods {
+		pod.Put(dplugins.PrefixCacheMatchPrecentKey, dplugins.NewPrefixCacheMatchPercent(podScoreFunc(pod)))
+	}
+	return nil
+}
+
 // Score returns the scoring result for the given list of pods based on context.
 func (p *Plugin) Score(ctx context.Context, cycleState *types.CycleState, request *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
 	// pre score step, hashing prompt and find longest prefix match.