File tree Expand file tree Collapse file tree 2 files changed +11
-9
lines changed Expand file tree Collapse file tree 2 files changed +11
-9
lines changed Original file line number Diff line number Diff line change @@ -102,11 +102,11 @@ def may_append(self, seq: Sequence):
102102 self ._allocate_block (block_id )
103103 block_table .append (block_id )
104104 elif len (seq ) % self .block_size == 0 :
105- assert last_block .hash == - 1
106- token_ids = seq .block (seq .num_blocks - 1 )
107- prefix = self .blocks [block_table [- 2 ]].hash if len (block_table ) > 1 else - 1
108- h = self .compute_hash (token_ids , prefix )
109- last_block .update (h , token_ids )
110- self .hash_to_block_id [h ] = last_block .block_id
105+ if last_block .hash == - 1 :
106+ token_ids = seq .block (seq .num_blocks - 1 )
107+ prefix = self .blocks [block_table [- 2 ]].hash if len (block_table ) > 1 else - 1
108+ h = self .compute_hash (token_ids , prefix )
109+ last_block .update (h , token_ids )
110+ self .hash_to_block_id [h ] = last_block .block_id
111111 else :
112112 assert last_block .hash == - 1
Original file line number Diff line number Diff line change @@ -30,13 +30,15 @@ def schedule(self) -> tuple[list[Sequence], bool]:
3030 seq = self .waiting [0 ]
3131 if num_batched_tokens + len (seq ) > self .max_num_batched_tokens or not self .block_manager .can_allocate (seq ):
3232 break
33- num_seqs += 1
3433 self .block_manager .allocate (seq )
35- num_batched_tokens += len (seq ) - seq .num_cached_tokens
3634 seq .status = SequenceStatus .RUNNING
3735 self .waiting .popleft ()
3836 self .running .append (seq )
39- scheduled_seqs .append (seq )
37+ tokens_to_compute = len (seq ) - seq .num_cached_tokens
38+ if tokens_to_compute > 0 :
39+ num_seqs += 1
40+ scheduled_seqs .append (seq )
41+ num_batched_tokens += tokens_to_compute
4042 if scheduled_seqs :
4143 return scheduled_seqs , True
4244
You can’t perform that action at this time.
0 commit comments