MCPcopy
hub / github.com/opentrace/opentrace / run

Method run

agent/src/opentrace_agent/benchmarks/swe_bench.py:384–462  ·  view source on GitHub ↗

Run the full SWE-bench evaluation. Parameters ---------- instances_path : str or Path Path to SWE-bench instances JSON file. agent_fn : AgentFn The agent function to test. use_opentrace : bool Whether to provide OpenTrace t

(
        self,
        instances_path: str | Path,
        agent_fn: AgentFn,
        *,
        use_opentrace: bool = True,
        limit: int | None = None,
        on_progress: Callable[[int, int, SWEBenchResult], None] | None = None,
        workers: int = 1,
    )

Source from the content-addressed store, hash-verified

382 return result
383
384 def run(
385 self,
386 instances_path: str | Path,
387 agent_fn: AgentFn,
388 *,
389 use_opentrace: bool = True,
390 limit: int | None = None,
391 on_progress: Callable[[int, int, SWEBenchResult], None] | None = None,
392 workers: int = 1,
393 ) -> SWEBenchReport:
394 """Run the full SWE-bench evaluation.
395
396 Parameters
397 ----------
398 instances_path : str or Path
399 Path to SWE-bench instances JSON file.
400 agent_fn : AgentFn
401 The agent function to test.
402 use_opentrace : bool
403 Whether to provide OpenTrace tools.
404 limit : int or None
405 Max instances to run (for quick testing).
406 on_progress : callable or None
407 Called after each instance with ``(completed, total, result)``.
408 workers : int
409 Number of instances to run in parallel (default 1 = sequential).
410 """
411 instances = self.load_instances(instances_path)
412 if limit:
413 instances = instances[:limit]
414
415 t0 = time.monotonic()
416 total = len(instances)
417
418 if workers <= 1:
419 # Sequential
420 results = []
421 for i, instance in enumerate(instances):
422 logger.info(
423 "[%d/%d] Running %s (opentrace=%s)",
424 i + 1,
425 total,
426 instance.instance_id,
427 use_opentrace,
428 )
429 result = self.run_instance(instance, agent_fn, use_opentrace=use_opentrace)
430 results.append(result)
431 if on_progress is not None:
432 on_progress(i + 1, total, result)
433 else:
434 # Parallel with thread pool
435 from concurrent.futures import ThreadPoolExecutor, as_completed
436
437 results = [None] * total # type: ignore[list-item]
438 completed = 0
439
440 def _run(idx: int, inst: Any) -> tuple[int, SWEBenchResult]:
441 logger.info(

Callers 12

run_swe_bench_cliFunction · 0.95
run_comparisonMethod · 0.95
_run_rgFunction · 0.45
_secure_path_windowsFunction · 0.45
_get_machine_idFunction · 0.45
mcp_cmdFunction · 0.45
serveFunction · 0.45
_runFunction · 0.45
agent_fnFunction · 0.45
clone_repoMethod · 0.45
index_repoMethod · 0.45

Calls 4

load_instancesMethod · 0.95
run_instanceMethod · 0.95
SWEBenchReportClass · 0.85
on_progressFunction · 0.70

Tested by 1