huggingface · rwightman · Dec 1, 2025 · Oct 15, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/benchmark.py b/benchmark.py
@@ -25,13 +25,6 @@
 from timm.utils import setup_default_logging, set_jit_fuser, decay_batch_step, check_batch_size_retry, ParseKwargs,\
     reparameterize_model
 
-has_apex = False
-try:
-    from apex import amp
-    has_apex = True
-except ImportError:
-    pass
-
 try:
     from deepspeed.profiling.flops_profiler import get_model_profile
     has_deepspeed_profiling = True

diff --git a/inference.py b/inference.py
@@ -23,12 +23,6 @@
 from timm.models import create_model
 from timm.utils import AverageMeter, setup_default_logging, set_jit_fuser, ParseKwargs
 
-try:
-    from apex import amp
-    has_apex = True
-except ImportError:
-    has_apex = False
-
 try:
     from functorch.compile import memory_efficient_fusion
     has_functorch = True
@@ -170,7 +164,7 @@ def main():
         assert args.model_dtype in ('float32', 'float16', 'bfloat16')
         model_dtype = getattr(torch, args.model_dtype)
 
-    # resolve AMP arguments based on PyTorch / Apex availability
+    # resolve AMP arguments based on PyTorch availability
     amp_autocast = suppress
     if args.amp:
         assert model_dtype is None or model_dtype == torch.float32, 'float32 model dtype must be used with AMP'

diff --git a/timm/task/__init__.py b/timm/task/__init__.py
@@ -0,0 +1,17 @@
+"""Training task abstractions for timm.
+
+This module provides task-based abstractions for training loops where each task
+encapsulates both the forward pass and loss computation, returning a dictionary
+with loss components and outputs for logging.
+"""
+from .task import TrainingTask
+from .classification import ClassificationTask
+from .distillation import DistillationTeacher, LogitDistillationTask, FeatureDistillationTask
+
+__all__ = [
+    'TrainingTask',
+    'ClassificationTask',
+    'DistillationTeacher',
+    'LogitDistillationTask',
+    'FeatureDistillationTask',
+]
diff --git a/timm/task/classification.py b/timm/task/classification.py
@@ -0,0 +1,90 @@
+"""Classification training task."""
+import logging
+from typing import Callable, Dict, Optional, Union
+
+import torch
+import torch.nn as nn
+
+from .task import TrainingTask
+
+_logger = logging.getLogger(__name__)
+
+
+class ClassificationTask(TrainingTask):
+    """Standard supervised classification task.
+
+    Simple task that performs a forward pass through the model and computes
+    the classification loss.
+
+    Args:
+        model: The model to train
+        criterion: Loss function (e.g., CrossEntropyLoss)
+        device: Device for task tensors/buffers
+        dtype: Dtype for task tensors/buffers
+        verbose: Enable info logging
+
+    Example:
+        >>> task = ClassificationTask(model, nn.CrossEntropyLoss(), device=torch.device('cuda'))
+        >>> result = task(input, target)
+        >>> result['loss'].backward()
+    """
+
+    def __init__(
+            self,
+            model: nn.Module,
+            criterion: Union[nn.Module, Callable],
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
+            verbose: bool = True,
+    ):
+        super().__init__(device=device, dtype=dtype, verbose=verbose)
+        self.model = model
+        self.criterion = criterion
+
+        if self.verbose:
+            loss_name = getattr(criterion, '__name__', None) or type(criterion).__name__
+            _logger.info(f"ClassificationTask: criterion={loss_name}")
+
+    def prepare_distributed(
+            self,
+            device_ids: Optional[list] = None,
+            **ddp_kwargs
+    ) -> 'ClassificationTask':
+        """Prepare task for distributed training.
+
+        Wraps the model in DistributedDataParallel (DDP).
+
+        Args:
+            device_ids: List of device IDs for DDP (e.g., [local_rank])
+            **ddp_kwargs: Additional arguments passed to DistributedDataParallel
+
+        Returns:
+            self (for method chaining)
+        """
+        from torch.nn.parallel import DistributedDataParallel as DDP
+        self.model = DDP(self.model, device_ids=device_ids, **ddp_kwargs)
+        return self
+
+    def forward(
+            self,
+            input: torch.Tensor,
+            target: torch.Tensor,
+    ) -> Dict[str, torch.Tensor]:
+        """Forward pass through model and compute classification loss.
+
+        Args:
+            input: Input tensor [B, C, H, W]
+            target: Target labels [B]
+
+        Returns:
+            Dictionary containing:
+                - 'loss': Classification loss
+                - 'output': Model logits
+        """
+        output = self.model(input)
+        loss = self.criterion(output, target)
+
+        return {
+            'loss': loss,
+            'output': output,
+        }