Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions src/parallax/server/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
"""

import time
from collections import OrderedDict
from typing import Dict, List, Optional
from collections import OrderedDict, deque
from typing import Deque, Dict, List, Optional

from parallax.server.kv_cache import KVCacheManager
from parallax.server.request import InitialRequest, Request, RequestStatus
Expand Down Expand Up @@ -73,7 +73,7 @@ def __init__(
self.max_total_length = kwargs.get("max_total_length", 1024)

# Prefill wait queue (FIFO) for admission
self._wait_queue: List[Request] = []
self._wait_queue: Deque[Request] = deque()
# Keeps track of all in-flight requests
self._running_requests: Dict[str, Request] = OrderedDict()

Expand Down Expand Up @@ -217,9 +217,8 @@ def admit_requests(self):

Pushes admitted requests directly into the running set.
"""
# TODO: pop directly from wait queue ?
while self._wait_queue and len(self._running_requests) < self.max_batch_size:
req = self._wait_queue.pop(0)
req = self._wait_queue.popleft()
rid = req.request_id
if rid in self._running_requests:
continue
Expand Down
Loading