| 70 | ) |
| 71 | @require_accelerate |
| 72 | class FbgemmFp8Test(unittest.TestCase): |
| 73 | model_name = "meta-llama/Meta-Llama-3-8B" |
| 74 | |
| 75 | input_text = "What are we having for dinner?" |
| 76 | max_new_tokens = 9 |
| 77 | |
| 78 | EXPECTED_OUTPUT = set[Any]( |
| 79 | [ |
| 80 | "What are we having for dinner?\nI'm having a steak and a salad", |
| 81 | "What are we having for dinner? I don’t know. What are we having", |
| 82 | "What are we having for dinner? I don’t know, what are you having", |
| 83 | ] |
| 84 | ) |
| 85 | |
| 86 | device_map = "xpu" if is_torch_xpu_available() else "cuda" |
| 87 | |
| 88 | offload_device_map = { |
| 89 | "model.embed_tokens": 0, |
| 90 | "model.layers.0": 0, |
| 91 | "model.layers.1": 0, |
| 92 | "model.layers.2": 0, |
| 93 | "model.layers.3": 0, |
| 94 | "model.layers.4": 0, |
| 95 | "model.layers.5": 0, |
| 96 | "model.layers.6": 0, |
| 97 | "model.layers.7": 0, |
| 98 | "model.layers.8": 0, |
| 99 | "model.layers.9": 0, |
| 100 | "model.layers.10": 0, |
| 101 | "model.layers.11": 0, |
| 102 | "model.layers.12": 0, |
| 103 | "model.layers.13": 0, |
| 104 | "model.layers.14": 0, |
| 105 | "model.layers.15": 0, |
| 106 | "model.layers.16": "cpu", |
| 107 | "model.layers.17": "cpu", |
| 108 | "model.layers.18": "cpu", |
| 109 | "model.layers.19": "cpu", |
| 110 | "model.layers.20": "disk", |
| 111 | "model.layers.21": "disk", |
| 112 | "model.layers.22": "disk", |
| 113 | "model.layers.23": "disk", |
| 114 | "model.layers.24": "disk", |
| 115 | "model.layers.25": "disk", |
| 116 | "model.layers.26": "disk", |
| 117 | "model.layers.27": "disk", |
| 118 | "model.layers.28": "disk", |
| 119 | "model.layers.29": "disk", |
| 120 | "model.layers.30": "disk", |
| 121 | "model.layers.31": "disk", |
| 122 | "model.norm": "disk", |
| 123 | "lm_head": "disk", |
| 124 | } |
| 125 | |
| 126 | # called only once for all test in this class |
| 127 | @classmethod |
| 128 | def setUpClass(cls): |
| 129 | """ |
nothing calls this directly
no test coverage detected