| 11 | from .config import Config |
| 12 | |
| 13 | class Trainer: |
| 14 | def __init__( |
| 15 | self, |
| 16 | model: nn.Module, |
| 17 | train_loader: DataLoader, |
| 18 | val_loader: Optional[DataLoader], |
| 19 | optimizer: torch.optim.Optimizer, |
| 20 | criterion: nn.Module, |
| 21 | config: Config, |
| 22 | scheduler: Optional[Any] = None, |
| 23 | device: Optional[str] = None |
| 24 | ): |
| 25 | self.model = model |
| 26 | self.train_loader = train_loader |
| 27 | self.val_loader = val_loader |
| 28 | self.optimizer = optimizer |
| 29 | self.criterion = criterion |
| 30 | self.config = config |
| 31 | self.scheduler = scheduler |
| 32 | self.device = device or config.device |
| 33 | |
| 34 | self.model.to(self.device) |
| 35 | |
| 36 | self.scaler = amp.GradScaler('cuda') if config.use_amp and self.device == 'cuda' else None |
| 37 | self.logger = Logger(config.log_dir, config.model_name) |
| 38 | self.metrics = MetricsTracker() |
| 39 | |
| 40 | self.current_epoch = 0 |
| 41 | self.global_step = 0 |
| 42 | self.best_val_loss = float('inf') |
| 43 | |
| 44 | os.makedirs(config.model_dir, exist_ok=True) |
| 45 | |
| 46 | self.logger.info(f"Trainer initialized with device: {self.device}") |
| 47 | self.logger.info(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}") |
| 48 | self.logger.info(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}") |
| 49 | |
| 50 | def train_epoch(self) -> Dict[str, float]: |
| 51 | self.model.train() |
| 52 | epoch_loss = 0.0 |
| 53 | correct = 0 |
| 54 | total = 0 |
| 55 | |
| 56 | pbar = tqdm(self.train_loader, desc=f"Epoch {self.current_epoch + 1}/{self.config.epochs}") |
| 57 | |
| 58 | for batch_idx, (inputs, targets) in enumerate(pbar): |
| 59 | inputs = inputs.to(self.device, non_blocking=True) |
| 60 | targets = targets.to(self.device, non_blocking=True) |
| 61 | |
| 62 | self.optimizer.zero_grad(set_to_none=True) |
| 63 | |
| 64 | if self.scaler is not None: |
| 65 | with amp.autocast('cuda'): |
| 66 | outputs = self.model(inputs) |
| 67 | loss = self.criterion(outputs, targets) |
| 68 | |
| 69 | self.scaler.scale(loss).backward() |
| 70 | |