Skip to content

Commit c29a23b

Browse files
Prevented fully cached sequences from being processed in prefill phase
1 parent 6ef2a4f commit c29a23b

File tree

2 files changed

+11
-9
lines changed

2 files changed

+11
-9
lines changed

nanovllm/engine/block_manager.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,11 @@ def may_append(self, seq: Sequence):
102102
self._allocate_block(block_id)
103103
block_table.append(block_id)
104104
elif len(seq) % self.block_size == 0:
105-
assert last_block.hash == -1
106-
token_ids = seq.block(seq.num_blocks-1)
107-
prefix = self.blocks[block_table[-2]].hash if len(block_table) > 1 else -1
108-
h = self.compute_hash(token_ids, prefix)
109-
last_block.update(h, token_ids)
110-
self.hash_to_block_id[h] = last_block.block_id
105+
if last_block.hash == -1:
106+
token_ids = seq.block(seq.num_blocks-1)
107+
prefix = self.blocks[block_table[-2]].hash if len(block_table) > 1 else -1
108+
h = self.compute_hash(token_ids, prefix)
109+
last_block.update(h, token_ids)
110+
self.hash_to_block_id[h] = last_block.block_id
111111
else:
112112
assert last_block.hash == -1

nanovllm/engine/scheduler.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@ def schedule(self) -> tuple[list[Sequence], bool]:
3030
seq = self.waiting[0]
3131
if num_batched_tokens + len(seq) > self.max_num_batched_tokens or not self.block_manager.can_allocate(seq):
3232
break
33-
num_seqs += 1
3433
self.block_manager.allocate(seq)
35-
num_batched_tokens += len(seq) - seq.num_cached_tokens
3634
seq.status = SequenceStatus.RUNNING
3735
self.waiting.popleft()
3836
self.running.append(seq)
39-
scheduled_seqs.append(seq)
37+
tokens_to_compute=len(seq) - seq.num_cached_tokens
38+
if tokens_to_compute > 0:
39+
num_seqs += 1
40+
scheduled_seqs.append(seq)
41+
num_batched_tokens += tokens_to_compute
4042
if scheduled_seqs:
4143
return scheduled_seqs, True
4244

0 commit comments

Comments
 (0)