Move rollout related configs from cluster config to rollout_config.

wang2yn84 · wang2yn84 · commit 8f75b23c0ce9 · 2025-10-27T21:54:54.000Z
diff --git a/scripts/grpo_demo_llama3_qwen2.py b/scripts/grpo_demo_llama3_qwen2.py
@@ -796,11 +796,12 @@ def evaluate(
         temperature=TEMPERATURE,
         top_p=TOP_P,
         top_k=TOP_K,
+        rollout_vllm_model_version=VLLM_MODEL_VERSION,
+        rollout_vllm_hbm_utilization=0.2,
+        rollout_vllm_tpu_backend_type="jax",
+        rollout_vllm_server_mode=args.rollout_server_mode,
     ),
-    rollout_vllm_model_version=VLLM_MODEL_VERSION,
-    rollout_vllm_hbm_utilization=0.2,
-    rollout_vllm_tpu_backend_type="jax",
-    rollout_vllm_server_mode=args.rollout_server_mode,
+
 )
 
 grpo_config = grpo_learner.GRPOConfig(
diff --git a/tunix/rl/rl_cluster.py b/tunix/rl/rl_cluster.py
@@ -35,7 +35,6 @@
 from jax.typing import ArrayLike  # pylint: disable=g-importing-member
 import jaxtyping
 import optax
-from tunix.generate import mappings
 # Internal placeholder for sglang_jax rollout worker stub, don't change this line.
 # Internal placeholder for vllm rollout worker stub, don't change this line.
 from tunix.rl import reshard
@@ -181,22 +180,7 @@ class ClusterConfig:
   rollout_config: (
       dict[Mode, base_rollout.RolloutConfig] | base_rollout.RolloutConfig
   )
-  rollout_mapping_config: mappings.MappingConfig | None = None
 
-  rollout_vllm_server_mode: bool = False
-  rollout_vllm_model_version: str = ""
-  rollout_vllm_lora_config: dict[str, Any] | None = None
-  rollout_vllm_hbm_utilization: float = 0.2
-  rollout_vllm_init_with_random_weights: bool = True
-  rollout_vllm_tpu_backend_type: str | None = None
-  rollout_vllm_swap_space_size_gb: float = 4.0  # in GiB
-
-  rollout_sglang_jax_model_version: str = ""
-  rollout_sglang_jax_context_length: int = 8192
-  rollout_sglang_jax_mem_fraction_static: float = 0.2
-  rollout_sglang_jax_init_with_random_weights: bool = True
-  rollout_sglang_jax_disable_radix_cache: bool = True
-  rollout_sglang_jax_enable_deterministic_sampling: bool = False
 
 
 class RLCluster:
@@ -403,29 +387,16 @@ def _init_cluster(self):
     elif self.cluster_config.rollout_engine == "vllm":
       from tunix.rl.rollout import vllm_rollout
 
-      if self.cluster_config.rollout_vllm_model_version is None:
+      if self.cluster_config.rollout_config.rollout_vllm_model_version is None:
         raise ValueError("Rollout vllm model version or path is missing!")
 
-      backend = (
-          self.cluster_config.rollout_engine
-          + "_"
-          + self.cluster_config.rollout_vllm_tpu_backend_type
-      )
       # TODO(linchai): maybe support offloading for vllm rollout.
       self._rollout = vllm_rollout.VllmRollout(
           self.rollout_actor,
           self.tokenizer,
           cache_config_or_size=max_kv_cache_size,
           mesh=self.r2m[Role.ROLLOUT],
-          model_version=self.cluster_config.rollout_vllm_model_version,
-          hbm_utilization=self.cluster_config.rollout_vllm_hbm_utilization,
-          init_with_random_weights=self.cluster_config.rollout_vllm_init_with_random_weights,
-          tpu_backend_type=self.cluster_config.rollout_vllm_tpu_backend_type,
-          swap_space=self.cluster_config.rollout_vllm_swap_space_size_gb,
-          lora_config=self.cluster_config.rollout_vllm_lora_config,
-          rollout_engine=backend,
-          mapping_config=self.cluster_config.rollout_mapping_config,
-          server_mode=self.cluster_config.rollout_vllm_server_mode,
+          rollout_config=self.cluster_config.rollout_config,
       )
     elif self.cluster_config.rollout_engine == "sglang_jax":
       from tunix.rl.rollout import sglang_jax_rollout
@@ -434,13 +405,7 @@ def _init_cluster(self):
           self.rollout_actor,
           self.tokenizer,
           mesh=self.r2m[Role.ROLLOUT],
-          model_version=self.cluster_config.rollout_sglang_jax_model_version,
-          context_length=self.cluster_config.rollout_sglang_jax_context_length,
-          mem_fraction_static=self.cluster_config.rollout_sglang_jax_mem_fraction_static,
-          init_with_random_weights=self.cluster_config.rollout_sglang_jax_init_with_random_weights,
-          disable_radix_cache=self.cluster_config.rollout_sglang_jax_disable_radix_cache,
-          enable_deterministic_sampling=self.cluster_config.rollout_sglang_jax_enable_deterministic_sampling,
-          mapping_config=self.cluster_config.rollout_mapping_config,
+          rollout_config=self.cluster_config.rollout_config,
       )
 
     else:
diff --git a/tunix/rl/rollout/base_rollout.py b/tunix/rl/rollout/base_rollout.py
@@ -22,6 +22,7 @@
 from jax import numpy as jnp
 import jaxtyping
 
+from tunix.generate import mappings
 
 @dataclasses.dataclass(frozen=True)
 class CacheConfig:
@@ -96,6 +97,53 @@ class RolloutConfig:
   # will be used.
   eos_tokens: list[int] | None = None
 
+  # Weights mapping config for the rollout model.
+  rollout_mapping_config: mappings.MappingConfig | None = None
+
+  # vLLM specific rollout configs.
+
+  # Whether to run rollout in vLLM server mode or batch inference mode.
+  rollout_vllm_server_mode: bool = False
+
+  # Model version for vLLM rollout engine.
+  rollout_vllm_model_version: str = ""
+
+  # LoRA config for vLLM rollout engine.
+  rollout_vllm_lora_config: dict[str, Any] | None = None
+
+  # Allocated HBM fraction for vLLM rollout engine.
+  rollout_vllm_hbm_utilization: float = 0.2
+
+  # Whether to initialize vLLM model with random weights or huggingface weights.
+  rollout_vllm_init_with_random_weights: bool = True
+
+  # TPU backend type for vLLM rollout engine, "jax" or "torchax", default to "jax".
+  rollout_vllm_tpu_backend_type: str | None = None
+
+  # Swap space size for vLLM rollout engine, in GiB.
+  rollout_vllm_swap_space_size_gb: float = 4.0
+
+
+  # SG-Lang JAX specific rollout configs.
+
+  # Model version for SG-Lang JAX rollout engine.
+  rollout_sglang_jax_model_version: str = ""
+
+  # Context length for SG-Lang JAX rollout engine.
+  rollout_sglang_jax_context_length: int = 8192
+
+  # Allocated HBM fraction for SG-Lang JAX rollout engine.
+  rollout_sglang_jax_mem_fraction_static: float = 0.2
+
+  # Whether to initialize SG-Lang JAX model with random weights.
+  rollout_sglang_jax_init_with_random_weights: bool = True
+
+  # Radix cache disabling flag for SG-Lang JAX rollout engine. Default to True for RL.
+  rollout_sglang_jax_disable_radix_cache: bool = True
+
+  # Whether to enable deterministic sampling for SG-Lang JAX rollout engine.
+  rollout_sglang_jax_enable_deterministic_sampling: bool = False
+
 
 class BaseRollout(abc.ABC):
   """Base RolloutWorker."""
diff --git a/tunix/rl/rollout/sglang_jax_rollout.py b/tunix/rl/rollout/sglang_jax_rollout.py
@@ -33,29 +33,22 @@ def __init__(
       model: Any,
       tokenizer: Any,
       mesh: jax.sharding.Mesh,
-      model_version: str,
-      context_length: int,
-      mem_fraction_static: float,
-      init_with_random_weights: bool,
-      disable_radix_cache: bool,
-      enable_deterministic_sampling: bool,
-      mapping_config: Optional[mappings.MappingConfig] = None,
-      rollout_engine: str = "sglang_jax",
+      rollout_config: base_rollout.RolloutConfig,
   ):
     self.mesh = mesh
     mapping_config = mappings.MappingConfig.build(
-        mapping_obj=mapping_config, model=model, backend=rollout_engine
+        mapping_obj=rollout_config.rollout_mapping_config, model=model, backend="sglang_jax",
     )
     self._sampler = sglang_jax_sampler.SglangJaxSampler(
         tokenizer=tokenizer,
         config=sglang_jax_sampler.SglangJaxConfig(
             mesh=mesh,
-            context_length=context_length,
-            model_version=model_version,
-            mem_fraction_static=mem_fraction_static,
-            init_with_random_weights=init_with_random_weights,
-            disable_radix_cache=disable_radix_cache,
-            enable_deterministic_sampling=enable_deterministic_sampling,
+            context_length=rollout_config.rollout_sglang_jax_context_length,
+            model_version=rollout_config.rollout_sglang_jax_model_version,
+            mem_fraction_static=rollout_config.rollout_sglang_jax_mem_fraction_static,
+            init_with_random_weights=rollout_config.rollout_sglang_jax_init_with_random_weights,
+            disable_radix_cache=rollout_config.rollout_sglang_jax_disable_radix_cache,
+            enable_deterministic_sampling=rollout_config.rollout_sglang_jax_enable_deterministic_sampling,
             mapping_config=mapping_config,
         ),
     )
diff --git a/tunix/rl/rollout/vllm_rollout.py b/tunix/rl/rollout/vllm_rollout.py
@@ -33,33 +33,25 @@ def __init__(
       tokenizer: Any,
       cache_config_or_size: base_rollout.CacheConfig | int,
       mesh: jax.sharding.Mesh,
-      model_version: str,
-      hbm_utilization: float,
-      init_with_random_weights: bool,
-      tpu_backend_type: str,
-      swap_space: float = 4.0,  # in GiB
-      server_mode: bool = False,
-      lora_config: Optional[Dict[str, str]] = None,
-      mapping_config: Optional[mappings.MappingConfig] = None,
-      rollout_engine: str = "vllm_jax",
+      rollout_config: base_rollout.RolloutConfig,
   ):
     self.mesh = mesh
     mapping_config = mappings.MappingConfig.build(
-        mapping_obj=mapping_config, model=model, backend=rollout_engine
+        mapping_obj=rollout_config.rollout_mapping_config, model=model, backend="vllm_jax",
     )
     self._sampler = vllm_sampler.VllmSampler(
         tokenizer=tokenizer,
         config=vllm_sampler.VllmConfig(
             max_model_len=cache_config_or_size,
             mesh=mesh,
-            model_version=model_version,
-            hbm_utilization=hbm_utilization,
-            init_with_random_weights=init_with_random_weights,
-            tpu_backend_type=tpu_backend_type,
+            model_version=rollout_config.rollout_vllm_model_version,
+            hbm_utilization=rollout_config.rollout_vllm_hbm_utilization,
+            init_with_random_weights=rollout_config.rollout_vllm_init_with_random_weights,
+            tpu_backend_type=rollout_config.rollout_vllm_tpu_backend_type,
             mapping_config=mapping_config,
-            lora_config=lora_config,
-            swap_space=swap_space,
-            server_mode=server_mode,
+            lora_config=rollout_config.rollout_vllm_lora_config,
+            swap_space=rollout_config.rollout_vllm_swap_space_size_gb,
+            server_mode=rollout_config.rollout_vllm_server_mode,
         ),
     )
     state = nnx.state(model)