@@ -13,17 +13,20 @@ use std::num::NonZeroU16;
1313use std:: pin:: Pin ;
1414use std:: task:: Poll ;
1515use std:: task:: Waker ;
16+ use std:: time:: Duration ;
1617
1718use hashbrown:: HashMap ;
1819use hashbrown:: hash_map;
1920use pin_project:: pin_project;
21+ use tokio:: time:: Instant ;
2022use tracing:: { info, trace} ;
2123
2224use restate_futures_util:: concurrency:: Concurrency ;
2325use restate_futures_util:: concurrency:: Permit ;
2426use restate_storage_api:: StorageError ;
2527use restate_storage_api:: vqueue_table:: VQueueStore ;
2628use restate_types:: clock:: UniqueTimestamp ;
29+ use restate_types:: time:: MillisSinceEpoch ;
2730use restate_types:: vqueue:: VQueueId ;
2831
2932use crate :: EventDetails ;
@@ -34,6 +37,7 @@ use crate::scheduler::Assignments;
3437use crate :: scheduler:: vqueue_state:: Eligibility ;
3538
3639use super :: Decision ;
40+ use super :: clock:: SchedulerClock ;
3741use super :: vqueue_state:: VQueueState ;
3842
3943/// Capacity to maintain for N vqueues (N=100)
@@ -55,8 +59,9 @@ pub struct DRRScheduler<S: VQueueStore, Token> {
5559 remaining_in_round : usize ,
5660 /// Waker to be notified when scheduler is potentially able to scheduler more work
5761 waker : Waker ,
62+ datum : SchedulerClock ,
5863 /// Time of the last memory reporting and memory compaction
59- last_report : Option < UniqueTimestamp > ,
64+ last_report : Instant ,
6065 // SAFETY NOTE: **must** Keep this at the end since it needs to outlive all readers.
6166 storage : S ,
6267}
@@ -94,17 +99,27 @@ where
9499 q. len( ) ,
95100 ) ;
96101
102+ let datum = SchedulerClock :: new (
103+ UniqueTimestamp :: from_unix_millis ( MillisSinceEpoch :: now ( ) )
104+ . expect ( "clock does not overflow" ) ,
105+ ) ;
106+ // Makes sure we use the same clock datum for the internal timer wheel and for our
107+ // own eligibility checks.
108+ let start = datum. origin_instant ( ) ;
109+ let delayed_eligibility = DelayQueue :: new ( start) ;
110+
97111 Self {
98112 limit_qid_per_poll,
99113 concurrency_limiter,
100114 q,
101115 eligible,
102116 global_sched_round : 0 ,
103117 remaining_in_round : 0 ,
104- delayed_eligibility : DelayQueue :: new ( ) ,
118+ delayed_eligibility,
105119 unconfirmed_capacity_permits : Permit :: new_empty ( ) ,
106120 waker : Waker :: noop ( ) . clone ( ) ,
107- last_report : None ,
121+ datum,
122+ last_report : start,
108123 storage,
109124 }
110125 }
@@ -122,7 +137,6 @@ where
122137
123138 pub fn poll_schedule_next (
124139 mut self : Pin < & mut Self > ,
125- now : UniqueTimestamp ,
126140 cx : & mut std:: task:: Context < ' _ > ,
127141 vqueues : VQueuesMeta < ' _ > ,
128142 ) -> Poll < Result < Decision < S :: Item > , StorageError > > {
@@ -131,14 +145,11 @@ where
131145 Abort ,
132146 }
133147
134- if self
135- . last_report
136- . is_none_or ( |t| now. milliseconds_since ( t) >= 10000 )
137- {
148+ if self . last_report . elapsed ( ) >= Duration :: from_secs ( 10 ) {
138149 vqueues. report ( ) ;
139150 self . report ( ) ;
140151 // also report vqueues states
141- self . last_report = Some ( now) ;
152+ self . last_report = Instant :: now ( ) ;
142153 // compact memory
143154 self . q . shrink_to ( MIN_VQUEUES_CAPACITY ) ;
144155 self . delayed_eligibility . compact ( ) ;
@@ -151,20 +162,6 @@ where
151162 if self . remaining_in_round == 0 {
152163 // Pop all eligible vqueues that were delayed since we are starting a new round
153164 // Once we hit pending, the waker will be registered.
154- //
155- // There is currently an issue due to using two different clock sources. The timer
156- // wheel uses tokio's internal clock but our scheduler's design relies on explicit
157- // monotonic timestamps. This will cause the timer to expire slightly before or after
158- // the actual point at which the input (now) would satisfy the head item's
159- // eligibility requirements. As a result, we will see one of three scenarios:
160- // 1. Time aligns. We pop the timer and the head element will be immediately eligible.
161- // 2. We are woken up before `now` satifies the requirement, in this case will
162- // schedule a new timer to catch up on the difference.
163- // 3. We are worken up after `now` by a few hundres of millis, in this case the
164- // head will be eligible but we will be a little late.
165- //
166- // This will be fixed in a later change after we reason about whether we need any
167- // causal entanglement between the RSM's clock and the scheduler's internal clock.
168165 let previous_round = self . global_sched_round ;
169166 while let Poll :: Ready ( Some ( expired) ) = self . delayed_eligibility . poll_expired ( cx) {
170167 let Some ( qstate) = self . q . get_mut ( expired. get_ref ( ) ) else {
@@ -188,6 +185,7 @@ where
188185 }
189186 }
190187
188+ let now = self . datum . now_ts ( ) ;
191189 while self . remaining_in_round > 0 {
192190 // bail if we exhausted coop budget.
193191 let coop = match tokio:: task:: coop:: poll_proceed ( cx) {
@@ -227,7 +225,10 @@ where
227225 }
228226 Eligibility :: EligibleAt ( wake_up_at) => {
229227 * this. remaining_in_round -= 1 ;
230- qstate. maybe_schedule_wakeup ( wake_up_at, this. delayed_eligibility , now) ;
228+ qstate. maybe_schedule_wakeup (
229+ this. datum . ts_to_future_instant ( wake_up_at) ,
230+ this. delayed_eligibility ,
231+ ) ;
231232 this. eligible . pop_front ( ) ;
232233 break ' single_vqueue Outcome :: ContinueRound ;
233234 }
@@ -306,7 +307,6 @@ where
306307 #[ tracing:: instrument( skip_all) ]
307308 pub fn on_inbox_event (
308309 & mut self ,
309- now : UniqueTimestamp ,
310310 vqueues : VQueuesMeta < ' _ > ,
311311 event : & VQueueEvent < S :: Item > ,
312312 ) -> Result < ( ) , StorageError > {
@@ -326,7 +326,8 @@ where
326326 return Ok ( ( ) ) ;
327327 }
328328
329- match qstate. check_eligibility ( now, meta, config) {
329+ let now_ts = self . datum . now_ts ( ) ;
330+ match qstate. check_eligibility ( now_ts, meta, config) {
330331 Eligibility :: Eligible if !self . eligible . contains ( & qid) => {
331332 // Make eligible immediately.
332333 qstate. deficit . set_last_round ( self . global_sched_round ) ;
@@ -335,9 +336,8 @@ where
335336 }
336337 Eligibility :: EligibleAt ( eligiblility_ts) if !self . eligible . contains ( & qid) => {
337338 qstate. maybe_schedule_wakeup (
338- eligiblility_ts,
339+ self . datum . ts_to_future_instant ( eligiblility_ts) ,
339340 & mut self . delayed_eligibility ,
340- now,
341341 ) ;
342342 }
343343 _ => { /* do nothing */ }
@@ -354,7 +354,10 @@ where
354354 // drop the already acquired permit
355355 let _ = self . unconfirmed_capacity_permits . split ( 1 ) ;
356356
357- if qstate. check_eligibility ( now, meta, config) . is_eligible ( ) {
357+ if qstate
358+ . check_eligibility ( self . datum . now_ts ( ) , meta, config)
359+ . is_eligible ( )
360+ {
358361 if !self . eligible . contains ( & qid) {
359362 self . eligible . push_back ( qid) ;
360363 qstate. deficit . set_last_round ( self . global_sched_round ) ;
@@ -377,7 +380,7 @@ where
377380 // drop the already acquired permit
378381 let _ = self . unconfirmed_capacity_permits . split ( 1 ) ;
379382 } else if qstate. notify_removed ( item_hash) {
380- match qstate. check_eligibility ( now , meta, config) {
383+ match qstate. check_eligibility ( self . datum . now_ts ( ) , meta, config) {
381384 Eligibility :: Eligible if !self . eligible . contains ( & qid) => {
382385 // Make eligible immediately.
383386 qstate. deficit . set_last_round ( self . global_sched_round ) ;
@@ -388,9 +391,8 @@ where
388391 if !self . eligible . contains ( & qid) =>
389392 {
390393 qstate. maybe_schedule_wakeup (
391- eligiblility_ts,
394+ self . datum . ts_to_future_instant ( eligiblility_ts) ,
392395 & mut self . delayed_eligibility ,
393- now,
394396 ) ;
395397 }
396398 _ => { /* do nothing */ }
0 commit comments