@@ -28,6 +28,7 @@ import (
2828 k8stypes "k8s.io/apimachinery/pkg/types"
2929 "sigs.k8s.io/controller-runtime/pkg/log"
3030
31+ dplugins "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/plugins"
3132 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
3233 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
3334 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
@@ -206,6 +207,32 @@ func (p *Plugin) WithName(name string) *Plugin {
206207 return p
207208}
208209
210+ func (p * Plugin ) PrepareRequestData (ctx context.Context , request * types.LLMRequest , pods []types.Pod ) error {
211+ // pre score step, hashing prompt and find longest prefix match.
212+ hashes := hashPrompt (ctx , request , getBlockSize (pods , p .config ), p .config .MaxPrefixBlocksToMatch )
213+ state := & SchedulingContextState {
214+ PrefixHashes : hashes ,
215+ PrefixCacheServers : p .matchLongestPrefix (ctx , hashes ),
216+ }
217+ for server , matchLen := range state .PrefixCacheServers {
218+ log .FromContext (ctx ).V (logutil .TRACE ).Info ("prefix cached state" , "server" , server , "longest-prefix-match" , matchLen )
219+
220+ }
221+
222+ total := len (state .PrefixHashes )
223+ podScoreFunc := func (pod types.Pod ) float64 {
224+ if total == 0 {
225+ return 0
226+ }
227+ matchLen := state .PrefixCacheServers [ServerID (pod .GetPod ().NamespacedName )]
228+ return float64 (matchLen ) / float64 (total )
229+ }
230+ for _ , pod := range pods {
231+ pod .Put (dplugins .PrefixCacheMatchPrecentKey , dplugins .NewPrefixCacheMatchPercent (podScoreFunc (pod )))
232+ }
233+ return nil
234+ }
235+
209236// Score returns the scoring result for the given list of pods based on context.
210237func (p * Plugin ) Score (ctx context.Context , cycleState * types.CycleState , request * types.LLMRequest , pods []types.Pod ) map [types.Pod ]float64 {
211238 // pre score step, hashing prompt and find longest prefix match.
0 commit comments