| 287 | |
| 288 | |
| 289 | class BltTransformerLayer(MllamaSelfAttentionDecoderLayer): |
| 290 | def __init__(self, config, layer_idx: int): |
| 291 | super().__init__() |
| 292 | |
| 293 | self.self_attn = BltSelfAttention(config=config, layer_idx=layer_idx) |
| 294 | self.mlp = BltMLP(config) |
| 295 | self.input_layernorm = BltRMSNorm(config.hidden_size, eps=config.rms_norm_eps) |
| 296 | self.post_attention_layernorm = BltRMSNorm(config.hidden_size, eps=config.rms_norm_eps) |
| 297 | |
| 298 | |
| 299 | class BltSelfAttention(MllamaTextSelfAttention): |