From 01c84332deb36c816a45e05dc457b2bd820a5da9 Mon Sep 17 00:00:00 2001 From: pathoam Date: Sun, 7 Dec 2025 14:20:21 -0700 Subject: [PATCH 1/2] changed to using deque for queue, O(n)->O(1) pop operation --- src/parallax/server/scheduler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/parallax/server/scheduler.py b/src/parallax/server/scheduler.py index 411388f8..da4ef5af 100644 --- a/src/parallax/server/scheduler.py +++ b/src/parallax/server/scheduler.py @@ -20,8 +20,8 @@ """ import time -from collections import OrderedDict -from typing import Dict, List, Optional +from collections import OrderedDict, deque +from typing import Deque, Dict, Optional from parallax.server.kv_cache import KVCacheManager from parallax.server.request import InitialRequest, Request, RequestStatus @@ -73,7 +73,7 @@ def __init__( self.max_total_length = kwargs.get("max_total_length", 1024) # Prefill wait queue (FIFO) for admission - self._wait_queue: List[Request] = [] + self._wait_queue: Deque[Request] = deque() # Keeps track of all in-flight requests self._running_requests: Dict[str, Request] = OrderedDict() @@ -217,9 +217,8 @@ def admit_requests(self): Pushes admitted requests directly into the running set. """ - # TODO: pop directly from wait queue ? while self._wait_queue and len(self._running_requests) < self.max_batch_size: - req = self._wait_queue.pop(0) + req = self._wait_queue.popleft() rid = req.request_id if rid in self._running_requests: continue From f7e04e2380c822adc2359b4807e232c171687a14 Mon Sep 17 00:00:00 2001 From: pathoam Date: Sun, 7 Dec 2025 16:47:13 -0700 Subject: [PATCH 2/2] list import --- src/parallax/server/scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parallax/server/scheduler.py b/src/parallax/server/scheduler.py index da4ef5af..a2b6ba27 100644 --- a/src/parallax/server/scheduler.py +++ b/src/parallax/server/scheduler.py @@ -21,7 +21,7 @@ import time from collections import OrderedDict, deque -from typing import Deque, Dict, Optional +from typing import Deque, Dict, List, Optional from parallax.server.kv_cache import KVCacheManager from parallax.server.request import InitialRequest, Request, RequestStatus