Skip to content

Commit 28d2b5e

Browse files
committed
Rename CRD to CR
Signed-off-by: Andrey Velichkevich <[email protected]>
1 parent b368d79 commit 28d2b5e

File tree

3 files changed

+85
-85
lines changed

3 files changed

+85
-85
lines changed

kubeflow/optimizer/backends/kubernetes/backend.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def list_jobs(self) -> list[OptimizationJob]:
189189
return result
190190

191191
for optimization_job in optimization_job_list.items:
192-
result.append(self.__get_optimization_job_from_crd(optimization_job))
192+
result.append(self.__get_optimization_job_from_custom_resource(optimization_job))
193193

194194
except multiprocessing.TimeoutError as e:
195195
raise TimeoutError(
@@ -224,7 +224,7 @@ def get_job(self, name: str) -> OptimizationJob:
224224
except Exception as e:
225225
raise RuntimeError(f"Failed to get OptimizationJob: {self.namespace}/{name}") from e
226226

227-
return self.__get_optimization_job_from_crd(optimization_job) # type: ignore
227+
return self.__get_optimization_job_from_custom_resource(optimization_job) # type: ignore
228228

229229
def delete_job(self, name: str):
230230
"""Delete the OptimizationJob"""
@@ -244,44 +244,44 @@ def delete_job(self, name: str):
244244

245245
logger.debug(f"OptimizationJob {self.namespace}/{name} has been deleted")
246246

247-
def __get_optimization_job_from_crd(
247+
def __get_optimization_job_from_custom_resource(
248248
self,
249-
optimization_job_crd: models.V1beta1Experiment,
249+
optimization_job_cr: models.V1beta1Experiment,
250250
) -> OptimizationJob:
251251
if not (
252-
optimization_job_crd.metadata
253-
and optimization_job_crd.metadata.name
254-
and optimization_job_crd.metadata.namespace
255-
and optimization_job_crd.spec
256-
and optimization_job_crd.spec.parameters
257-
and optimization_job_crd.spec.objective
258-
and optimization_job_crd.spec.algorithm
259-
and optimization_job_crd.spec.max_trial_count
260-
and optimization_job_crd.spec.parallel_trial_count
261-
and optimization_job_crd.metadata.creation_timestamp
252+
optimization_job_cr.metadata
253+
and optimization_job_cr.metadata.name
254+
and optimization_job_cr.metadata.namespace
255+
and optimization_job_cr.spec
256+
and optimization_job_cr.spec.parameters
257+
and optimization_job_cr.spec.objective
258+
and optimization_job_cr.spec.algorithm
259+
and optimization_job_cr.spec.max_trial_count
260+
and optimization_job_cr.spec.parallel_trial_count
261+
and optimization_job_cr.metadata.creation_timestamp
262262
):
263-
raise Exception(f"OptimizationJob CRD is invalid: {optimization_job_crd}")
263+
raise Exception(f"OptimizationJob CR is invalid: {optimization_job_cr}")
264264

265265
optimization_job = OptimizationJob(
266-
name=optimization_job_crd.metadata.name,
266+
name=optimization_job_cr.metadata.name,
267267
search_space=utils.get_search_space_from_katib_spec(
268-
optimization_job_crd.spec.parameters
268+
optimization_job_cr.spec.parameters
269269
),
270-
objectives=utils.get_objectives_from_katib_spec(optimization_job_crd.spec.objective),
271-
algorithm=utils.get_algorithm_from_katib_spec(optimization_job_crd.spec.algorithm),
270+
objectives=utils.get_objectives_from_katib_spec(optimization_job_cr.spec.objective),
271+
algorithm=utils.get_algorithm_from_katib_spec(optimization_job_cr.spec.algorithm),
272272
trial_config=TrialConfig(
273-
num_trials=optimization_job_crd.spec.max_trial_count,
274-
parallel_trials=optimization_job_crd.spec.parallel_trial_count,
275-
max_failed_trials=optimization_job_crd.spec.max_failed_trial_count,
273+
num_trials=optimization_job_cr.spec.max_trial_count,
274+
parallel_trials=optimization_job_cr.spec.parallel_trial_count,
275+
max_failed_trials=optimization_job_cr.spec.max_failed_trial_count,
276276
),
277-
trials=self.__get_trials_from_crd(optimization_job_crd.metadata.name),
278-
creation_timestamp=optimization_job_crd.metadata.creation_timestamp,
277+
trials=self.__get_trials_from_job(optimization_job_cr.metadata.name),
278+
creation_timestamp=optimization_job_cr.metadata.creation_timestamp,
279279
status=constants.OPTIMIZATION_JOB_CREATED, # The default OptimizationJob status.
280280
)
281281

282282
# Update the OptimizationJob status from Experiment conditions.
283-
if optimization_job_crd.status and optimization_job_crd.status.conditions:
284-
for c in optimization_job_crd.status.conditions:
283+
if optimization_job_cr.status and optimization_job_cr.status.conditions:
284+
for c in optimization_job_cr.status.conditions:
285285
if c.type == constants.EXPERIMENT_SUCCEEDED and c.status == "True":
286286
optimization_job.status = constants.OPTIMIZATION_JOB_COMPLETE
287287
elif c.type == constants.OPTIMIZATION_JOB_FAILED and c.status == "True":
@@ -293,7 +293,7 @@ def __get_optimization_job_from_crd(
293293

294294
return optimization_job
295295

296-
def __get_trials_from_crd(self, optimization_job_name: str) -> list[Trial]:
296+
def __get_trials_from_job(self, optimization_job_name: str) -> list[Trial]:
297297
result = []
298298
try:
299299
thread = self.custom_api.list_namespaced_custom_object(
@@ -314,7 +314,7 @@ def __get_trials_from_crd(self, optimization_job_name: str) -> list[Trial]:
314314

315315
for t in trial_list.items:
316316
if not (t.metadata and t.metadata.name and t.spec and t.spec.parameter_assignments):
317-
raise ValueError(f"Trial CRD is invalid: {t}")
317+
raise ValueError(f"Trial CR is invalid: {t}")
318318

319319
# Trial name is equal to the TrainJob name.
320320
trial = Trial(

kubeflow/trainer/backends/kubernetes/backend.py

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def list_runtimes(self) -> list[types.Runtime]:
8787
f"{constants.RUNTIME_FRAMEWORK_LABEL} label."
8888
)
8989
continue
90-
result.append(self.__get_runtime_from_crd(runtime))
90+
result.append(self.__get_runtime_from_cr(runtime))
9191

9292
except multiprocessing.TimeoutError as e:
9393
raise TimeoutError(
@@ -129,7 +129,7 @@ def get_runtime(self, name: str) -> types.Runtime:
129129
f"{self.namespace}/{name}"
130130
) from e
131131

132-
return self.__get_runtime_from_crd(runtime) # type: ignore
132+
return self.__get_runtime_from_cr(runtime) # type: ignore
133133

134134
def get_runtime_packages(self, runtime: types.Runtime):
135135
if runtime.trainer.trainer_type == types.TrainerType.BUILTIN_TRAINER:
@@ -249,7 +249,7 @@ def list_jobs(self, runtime: Optional[types.Runtime] = None) -> list[types.Train
249249
):
250250
continue
251251

252-
result.append(self.__get_trainjob_from_crd(trainjob))
252+
result.append(self.__get_trainjob_from_cr(trainjob))
253253

254254
except multiprocessing.TimeoutError as e:
255255
raise TimeoutError(
@@ -288,7 +288,7 @@ def get_job(self, name: str) -> types.TrainJob:
288288
f"Failed to get {constants.TRAINJOB_KIND}: {self.namespace}/{name}"
289289
) from e
290290

291-
return self.__get_trainjob_from_crd(trainjob) # type: ignore
291+
return self.__get_trainjob_from_cr(trainjob) # type: ignore
292292

293293
def get_job_logs(
294294
self,
@@ -395,66 +395,66 @@ def delete_job(self, name: str):
395395

396396
logger.debug(f"{constants.TRAINJOB_KIND} {self.namespace}/{name} has been deleted")
397397

398-
def __get_runtime_from_crd(
398+
def __get_runtime_from_cr(
399399
self,
400-
runtime_crd: models.TrainerV1alpha1ClusterTrainingRuntime,
400+
runtime_cr: models.TrainerV1alpha1ClusterTrainingRuntime,
401401
) -> types.Runtime:
402402
if not (
403-
runtime_crd.metadata
404-
and runtime_crd.metadata.name
405-
and runtime_crd.spec
406-
and runtime_crd.spec.ml_policy
407-
and runtime_crd.spec.template.spec
408-
and runtime_crd.spec.template.spec.replicated_jobs
403+
runtime_cr.metadata
404+
and runtime_cr.metadata.name
405+
and runtime_cr.spec
406+
and runtime_cr.spec.ml_policy
407+
and runtime_cr.spec.template.spec
408+
and runtime_cr.spec.template.spec.replicated_jobs
409409
):
410-
raise Exception(f"ClusterTrainingRuntime CRD is invalid: {runtime_crd}")
410+
raise Exception(f"ClusterTrainingRuntime CR is invalid: {runtime_cr}")
411411

412412
if not (
413-
runtime_crd.metadata.labels
414-
and constants.RUNTIME_FRAMEWORK_LABEL in runtime_crd.metadata.labels
413+
runtime_cr.metadata.labels
414+
and constants.RUNTIME_FRAMEWORK_LABEL in runtime_cr.metadata.labels
415415
):
416416
raise Exception(
417-
f"Runtime {runtime_crd.metadata.name} must have "
417+
f"Runtime {runtime_cr.metadata.name} must have "
418418
f"{constants.RUNTIME_FRAMEWORK_LABEL} label"
419419
)
420420

421421
return types.Runtime(
422-
name=runtime_crd.metadata.name,
422+
name=runtime_cr.metadata.name,
423423
trainer=utils.get_runtime_trainer(
424-
runtime_crd.metadata.labels[constants.RUNTIME_FRAMEWORK_LABEL],
425-
runtime_crd.spec.template.spec.replicated_jobs,
426-
runtime_crd.spec.ml_policy,
424+
runtime_cr.metadata.labels[constants.RUNTIME_FRAMEWORK_LABEL],
425+
runtime_cr.spec.template.spec.replicated_jobs,
426+
runtime_cr.spec.ml_policy,
427427
),
428428
)
429429

430-
def __get_trainjob_from_crd(
430+
def __get_trainjob_from_cr(
431431
self,
432-
trainjob_crd: models.TrainerV1alpha1TrainJob,
432+
trainjob_cr: models.TrainerV1alpha1TrainJob,
433433
) -> types.TrainJob:
434434
if not (
435-
trainjob_crd.metadata
436-
and trainjob_crd.metadata.name
437-
and trainjob_crd.metadata.namespace
438-
and trainjob_crd.spec
439-
and trainjob_crd.metadata.creation_timestamp
435+
trainjob_cr.metadata
436+
and trainjob_cr.metadata.name
437+
and trainjob_cr.metadata.namespace
438+
and trainjob_cr.spec
439+
and trainjob_cr.metadata.creation_timestamp
440440
):
441-
raise Exception(f"TrainJob CRD is invalid: {trainjob_crd}")
441+
raise Exception(f"TrainJob CR is invalid: {trainjob_cr}")
442442

443-
name = trainjob_crd.metadata.name
444-
namespace = trainjob_crd.metadata.namespace
443+
name = trainjob_cr.metadata.name
444+
namespace = trainjob_cr.metadata.namespace
445445

446-
runtime = self.get_runtime(trainjob_crd.spec.runtime_ref.name)
446+
runtime = self.get_runtime(trainjob_cr.spec.runtime_ref.name)
447447

448-
# Construct the TrainJob from the CRD.
448+
# Construct the TrainJob from the CR.
449449
trainjob = types.TrainJob(
450450
name=name,
451-
creation_timestamp=trainjob_crd.metadata.creation_timestamp,
451+
creation_timestamp=trainjob_cr.metadata.creation_timestamp,
452452
runtime=runtime,
453453
steps=[],
454454
# Number of nodes is taken from TrainJob or TrainingRuntime
455455
num_nodes=(
456-
trainjob_crd.spec.trainer.num_nodes
457-
if trainjob_crd.spec.trainer and trainjob_crd.spec.trainer.num_nodes
456+
trainjob_cr.spec.trainer.num_nodes
457+
if trainjob_cr.spec.trainer and trainjob_cr.spec.trainer.num_nodes
458458
else runtime.trainer.num_nodes
459459
),
460460
status=constants.TRAINJOB_CREATED, # The default TrainJob status.
@@ -516,8 +516,8 @@ def __get_trainjob_from_crd(
516516
) from e
517517

518518
# Update the TrainJob status from its conditions.
519-
if trainjob_crd.status and trainjob_crd.status.conditions:
520-
for c in trainjob_crd.status.conditions:
519+
if trainjob_cr.status and trainjob_cr.status.conditions:
520+
for c in trainjob_cr.status.conditions:
521521
if (
522522
c.type == constants.TRAINJOB_COMPLETE
523523
and c.status == "True"
@@ -554,20 +554,20 @@ def _get_trainjob_spec(
554554
runtime = self.get_runtime(constants.TORCH_RUNTIME)
555555

556556
# Build the Trainer.
557-
trainer_crd = models.TrainerV1alpha1Trainer()
557+
trainer_cr = models.TrainerV1alpha1Trainer()
558558

559559
if trainer:
560560
# If users choose to use a custom training function.
561561
if isinstance(trainer, types.CustomTrainer):
562562
if runtime.trainer.trainer_type != types.TrainerType.CUSTOM_TRAINER:
563563
raise ValueError(f"CustomTrainer can't be used with {runtime} runtime")
564-
trainer_crd = utils.get_trainer_crd_from_custom_trainer(runtime, trainer)
564+
trainer_cr = utils.get_trainer_cr_from_custom_trainer(runtime, trainer)
565565

566566
# If users choose to use a builtin trainer for post-training.
567567
elif isinstance(trainer, types.BuiltinTrainer):
568568
if runtime.trainer.trainer_type != types.TrainerType.BUILTIN_TRAINER:
569569
raise ValueError(f"BuiltinTrainer can't be used with {runtime} runtime")
570-
trainer_crd = utils.get_trainer_crd_from_builtin_trainer(
570+
trainer_cr = utils.get_trainer_cr_from_builtin_trainer(
571571
runtime, trainer, initializer
572572
)
573573

@@ -579,7 +579,7 @@ def _get_trainjob_spec(
579579

580580
return models.TrainerV1alpha1TrainJobSpec(
581581
runtimeRef=models.TrainerV1alpha1RuntimeRef(name=runtime.name),
582-
trainer=(trainer_crd if trainer_crd != models.TrainerV1alpha1Trainer() else None),
582+
trainer=(trainer_cr if trainer_cr != models.TrainerV1alpha1Trainer() else None),
583583
initializer=(
584584
models.TrainerV1alpha1Initializer(
585585
dataset=utils.get_dataset_initializer(initializer.dataset),

kubeflow/trainer/backends/kubernetes/utils.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -343,26 +343,26 @@ def get_command_using_train_func(
343343
return command
344344

345345

346-
def get_trainer_crd_from_custom_trainer(
346+
def get_trainer_cr_from_custom_trainer(
347347
runtime: types.Runtime,
348348
trainer: types.CustomTrainer,
349349
) -> models.TrainerV1alpha1Trainer:
350350
"""
351-
Get the Trainer CRD from the custom trainer.
351+
Get the Trainer CR from the custom trainer.
352352
"""
353-
trainer_crd = models.TrainerV1alpha1Trainer()
353+
trainer_cr = models.TrainerV1alpha1Trainer()
354354

355355
# Add number of nodes to the Trainer.
356356
if trainer.num_nodes:
357-
trainer_crd.num_nodes = trainer.num_nodes
357+
trainer_cr.num_nodes = trainer.num_nodes
358358

359359
# Add resources per node to the Trainer.
360360
if trainer.resources_per_node:
361-
trainer_crd.resources_per_node = get_resources_per_node(trainer.resources_per_node)
361+
trainer_cr.resources_per_node = get_resources_per_node(trainer.resources_per_node)
362362

363363
# Add command to the Trainer.
364364
# TODO: Support train function parameters.
365-
trainer_crd.command = get_command_using_train_func(
365+
trainer_cr.command = get_command_using_train_func(
366366
runtime,
367367
trainer.func,
368368
trainer.func_args,
@@ -372,41 +372,41 @@ def get_trainer_crd_from_custom_trainer(
372372

373373
# Add environment variables to the Trainer.
374374
if trainer.env:
375-
trainer_crd.env = [
375+
trainer_cr.env = [
376376
models.IoK8sApiCoreV1EnvVar(name=key, value=value) for key, value in trainer.env.items()
377377
]
378378

379-
return trainer_crd
379+
return trainer_cr
380380

381381

382-
def get_trainer_crd_from_builtin_trainer(
382+
def get_trainer_cr_from_builtin_trainer(
383383
runtime: types.Runtime,
384384
trainer: types.BuiltinTrainer,
385385
initializer: Optional[types.Initializer] = None,
386386
) -> models.TrainerV1alpha1Trainer:
387387
"""
388-
Get the Trainer CRD from the builtin trainer.
388+
Get the Trainer CR from the builtin trainer.
389389
"""
390390
if not isinstance(trainer.config, types.TorchTuneConfig):
391391
raise ValueError(f"The BuiltinTrainer config is invalid: {trainer.config}")
392392

393-
trainer_crd = models.TrainerV1alpha1Trainer()
393+
trainer_cr = models.TrainerV1alpha1Trainer()
394394

395395
# Add number of nodes to the Trainer.
396396
if trainer.config.num_nodes:
397-
trainer_crd.num_nodes = trainer.config.num_nodes
397+
trainer_cr.num_nodes = trainer.config.num_nodes
398398

399399
# Add resources per node to the Trainer.
400400
if trainer.config.resources_per_node:
401-
trainer_crd.resources_per_node = get_resources_per_node(trainer.config.resources_per_node)
401+
trainer_cr.resources_per_node = get_resources_per_node(trainer.config.resources_per_node)
402402

403-
trainer_crd.command = list(runtime.trainer.command)
403+
trainer_cr.command = list(runtime.trainer.command)
404404
# Parse args in the TorchTuneConfig to the Trainer, preparing for the mutation of
405405
# the torchtune config in the runtime plugin.
406406
# Ref:https://github.com/kubeflow/trainer/tree/master/docs/proposals/2401-llm-trainer-v2
407-
trainer_crd.args = get_args_using_torchtune_config(trainer.config, initializer)
407+
trainer_cr.args = get_args_using_torchtune_config(trainer.config, initializer)
408408

409-
return trainer_crd
409+
return trainer_cr
410410

411411

412412
def get_args_using_torchtune_config(

0 commit comments

Comments
 (0)