MCPcopy
hub / github.com/huggingface/transformers / FbgemmFp8Test

Class FbgemmFp8Test

tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py:72–291  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

70)
71@require_accelerate
72class FbgemmFp8Test(unittest.TestCase):
73 model_name = "meta-llama/Meta-Llama-3-8B"
74
75 input_text = "What are we having for dinner?"
76 max_new_tokens = 9
77
78 EXPECTED_OUTPUT = set[Any](
79 [
80 "What are we having for dinner?\nI'm having a steak and a salad",
81 "What are we having for dinner? I don’t know. What are we having",
82 "What are we having for dinner? I don’t know, what are you having",
83 ]
84 )
85
86 device_map = "xpu" if is_torch_xpu_available() else "cuda"
87
88 offload_device_map = {
89 "model.embed_tokens": 0,
90 "model.layers.0": 0,
91 "model.layers.1": 0,
92 "model.layers.2": 0,
93 "model.layers.3": 0,
94 "model.layers.4": 0,
95 "model.layers.5": 0,
96 "model.layers.6": 0,
97 "model.layers.7": 0,
98 "model.layers.8": 0,
99 "model.layers.9": 0,
100 "model.layers.10": 0,
101 "model.layers.11": 0,
102 "model.layers.12": 0,
103 "model.layers.13": 0,
104 "model.layers.14": 0,
105 "model.layers.15": 0,
106 "model.layers.16": "cpu",
107 "model.layers.17": "cpu",
108 "model.layers.18": "cpu",
109 "model.layers.19": "cpu",
110 "model.layers.20": "disk",
111 "model.layers.21": "disk",
112 "model.layers.22": "disk",
113 "model.layers.23": "disk",
114 "model.layers.24": "disk",
115 "model.layers.25": "disk",
116 "model.layers.26": "disk",
117 "model.layers.27": "disk",
118 "model.layers.28": "disk",
119 "model.layers.29": "disk",
120 "model.layers.30": "disk",
121 "model.layers.31": "disk",
122 "model.norm": "disk",
123 "lm_head": "disk",
124 }
125
126 # called only once for all test in this class
127 @classmethod
128 def setUpClass(cls):
129 """

Callers

nothing calls this directly

Calls 1

is_torch_xpu_availableFunction · 0.90

Tested by

no test coverage detected