MCPcopy
hub / github.com/huggingface/transformers / run_benchmark

Function run_benchmark

benchmark/benches/llama.py:67–353  ·  benchmark/benches/llama.py::run_benchmark
(
    logger: Logger,
    repository: str,
    branch: str,
    commit_id: str,
    commit_msg: str,
    metrics_recorder=None,
    num_tokens_to_generate=100,
)

Source from the content-addressed store, hash-verified

65
66
67def run_benchmark(
68 logger: Logger,
69 repository: str,
70 branch: str,
71 commit_id: str,
72 commit_msg: str,
73 metrics_recorder=None,
74 num_tokens_to_generate=100,
75):
76 class="cm"># Check if required ML dependencies are available
77 if not TRANSFORMERS_AVAILABLE:
78 logger.error(class="st">"Transformers and torch are required to run the LLaMA benchmark. Please install them with:")
79 logger.error(class="st">"pip install torch transformers")
80 logger.error(class="st">"Skipping LLaMA benchmark due to missing dependencies.")
81 return
82
83 continue_metric_collection = Event()
84 metrics_thread = None
85 model_id = class="st">"meta-llama/Llama-2-7b-hf"
86
87 class="cm"># If no metrics_recorder is provided, create one for backward compatibility
88 if metrics_recorder is None:
89 try:
90 metrics_recorder = MetricsRecorder(
91 psycopg2.connect(class="st">"dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True
92 )
93 should_close_recorder = True
94 except Exception as e:
95 logger.error(fclass="st">"Failed to create metrics recorder: {e}")
96 return
97 else:
98 should_close_recorder = False
99 try:
100 gpu_stats = gpustat.GPUStatCollection.new_query()
101 gpu_name = gpu_stats[0][class="st">"name"]
102 benchmark_id = metrics_recorder.initialise_benchmark({class="st">"gpu_name": gpu_name, class="st">"model_id": model_id})
103 logger.info(fclass="st">"running benchmark class="cm">#{benchmark_id} on {gpu_name} for {model_id}")
104 metrics_thread = Thread(
105 target=collect_metrics,
106 args=[benchmark_id, continue_metric_collection, metrics_recorder],
107 )
108 metrics_thread.start()
109 logger.info(class="st">"started background thread to fetch device metrics")
110
111 os.environ[class="st">"TOKENIZERS_PARALLELISM"] = class="st">"false" class="cm"># silence warnings when compiling
112
113 device = class="st">"cuda"
114
115 logger.info(class="st">"downloading weights")
116 class="cm"># This is to avoid counting download in model load time measurement
117 model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
118 gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
119 logger.info(class="st">"loading model")
120 start = perf_counter()
121 model = AutoModelForCausalLM.from_pretrained(
122 model_id, dtype=torch.float16, generation_config=gen_config
123 ).eval()
124 model.to(device)

Callers

nothing calls this directly

Calls 15

initialise_benchmarkMethod · 0.95
closeMethod · 0.95
MetricsRecorderClass · 0.90
GenerationConfigClass · 0.90
StaticCacheClass · 0.90
sampleFunction · 0.85
evalMethod · 0.80
setMethod · 0.80
joinMethod · 0.80
errorMethod · 0.45
infoMethod · 0.45

Tested by

no test coverage detected