hub / github.com/huggingface/transformers / run_benchmark

Function run_benchmark

benchmark/benches/llama.py:67–353 · benchmark/benches/llama.py::run_benchmark

(
    logger: Logger,
    repository: str,
    branch: str,
    commit_id: str,
    commit_msg: str,
    metrics_recorder=None,
    num_tokens_to_generate=100,
)

Source from the content-addressed store, hash-verified

65
66
67	def run_benchmark(
68	logger: Logger,
69	repository: str,
70	branch: str,
71	commit_id: str,
72	commit_msg: str,
73	metrics_recorder=None,
74	num_tokens_to_generate=100,
75	):
76	class="cm"># Check if required ML dependencies are available
77	if not TRANSFORMERS_AVAILABLE:
78	logger.error(class="st">"Transformers and torch are required to run the LLaMA benchmark. Please install them with:")
79	logger.error(class="st">"pip install torch transformers")
80	logger.error(class="st">"Skipping LLaMA benchmark due to missing dependencies.")
81	return
82
83	continue_metric_collection = Event()
84	metrics_thread = None
85	model_id = class="st">"meta-llama/Llama-2-7b-hf"
86
87	class="cm"># If no metrics_recorder is provided, create one for backward compatibility
88	if metrics_recorder is None:
89	try:
90	metrics_recorder = MetricsRecorder(
91	psycopg2.connect(class="st">"dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True
92	)
93	should_close_recorder = True
94	except Exception as e:
95	logger.error(fclass="st">"Failed to create metrics recorder: {e}")
96	return
97	else:
98	should_close_recorder = False
99	try:
100	gpu_stats = gpustat.GPUStatCollection.new_query()
101	gpu_name = gpu_stats[0][class="st">"name"]
102	benchmark_id = metrics_recorder.initialise_benchmark({class="st">"gpu_name": gpu_name, class="st">"model_id": model_id})
103	logger.info(fclass="st">"running benchmark class="cm">#{benchmark_id} on {gpu_name} for {model_id}")
104	metrics_thread = Thread(
105	target=collect_metrics,
106	args=[benchmark_id, continue_metric_collection, metrics_recorder],
107	)
108	metrics_thread.start()
109	logger.info(class="st">"started background thread to fetch device metrics")
110
111	os.environ[class="st">"TOKENIZERS_PARALLELISM"] = class="st">"false" class="cm"># silence warnings when compiling
112
113	device = class="st">"cuda"
114
115	logger.info(class="st">"downloading weights")
116	class="cm"># This is to avoid counting download in model load time measurement
117	model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
118	gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
119	logger.info(class="st">"loading model")
120	start = perf_counter()
121	model = AutoModelForCausalLM.from_pretrained(
122	model_id, dtype=torch.float16, generation_config=gen_config
123	).eval()
124	model.to(device)

Callers

nothing calls this directly

Calls 15

initialise_benchmarkMethod · 0.95

collect_model_measurementsMethod · 0.95

closeMethod · 0.95

MetricsRecorderClass · 0.90

GenerationConfigClass · 0.90

StaticCacheClass · 0.90

sampleFunction · 0.85

evalMethod · 0.80

setMethod · 0.80

joinMethod · 0.80

errorMethod · 0.45

infoMethod · 0.45

Tested by

no test coverage detected