@@ -65,6 +65,7 @@ class BaseRmEnv(ABC, gym.Env):
6565    color_index : List [int ]
6666    color_cache : Dict [int , int ]
6767    simulator : DeepRmSimulator 
68+     has_reset : bool 
6869
6970    @abstractmethod  
7071    def  __init__ (self , ** kwargs ):
@@ -137,6 +138,7 @@ def __init__(self, **kwargs):
137138            job_slots = self .job_slots ,
138139        )
139140        self .reward_range  =  (- np .inf , 0 )
141+         self .has_reset  =  False 
140142
141143    def  jobs_in_system (self ):
142144        return  self .scheduler .jobs_in_system 
@@ -161,13 +163,17 @@ def reset(self) -> np.ndarray:
161163        scheduler  =  NullScheduler (
162164            self .processors , self .memory , ignore_memory = self .ignore_memory 
163165        )
164-         wl  =  build_workload (self .workload_config , self .random_seed [0 ])
166+         wl  =  build_workload (
167+             self .workload_config ,
168+             self .random_seed [0 ] if  not  self .has_reset  else  None 
169+         )
165170        if  self .update_time_limit  and  hasattr (wl , 'trace' ):
166171            self .time_limit  =  self .tolerance_factor  *  (
167172                wl .trace [- 1 ].submission_time  +   # type: ignore 
168173                wl .trace [- 1 ].execution_time   # type: ignore 
169174            )
170175        self .simulator .reset (wl , scheduler )
176+         self .has_reset  =  True 
171177        return  self .state 
172178
173179    def  _render_state (self ):
0 commit comments