Solve a SWE-bench instance using Claude with optional OpenTrace tools.
(
problem_statement: str,
repo_path: Path,
mcp_config: dict[str, Any] | None,
)
| 345 | client = anthropic.Anthropic(api_key=key) |
| 346 | |
| 347 | def agent_fn( |
| 348 | problem_statement: str, |
| 349 | repo_path: Path, |
| 350 | mcp_config: dict[str, Any] | None, |
| 351 | ) -> str: |
| 352 | """Solve a SWE-bench instance using Claude with optional OpenTrace tools.""" |
| 353 | # Build tool dispatcher |
| 354 | mcp_tools = None |
| 355 | if mcp_config and "db_path" in mcp_config: |
| 356 | try: |
| 357 | from opentrace_agent.cli.mcp_server import create_mcp_server |
| 358 | from opentrace_agent.store import GraphStore |
| 359 | |
| 360 | store = GraphStore(mcp_config["db_path"], read_only=True) |
| 361 | server = create_mcp_server(store) |
| 362 | mcp_tools = server._tool_manager._tools |
| 363 | except Exception as e: |
| 364 | logger.warning("Failed to load OpenTrace tools: %s", e) |
| 365 | |
| 366 | dispatcher = ToolDispatcher(repo_path, mcp_tools) |
| 367 | |
| 368 | # Select tools and system prompt |
| 369 | use_opentrace = mcp_tools is not None |
| 370 | if use_opentrace: |
| 371 | tools = OPENTRACE_TOOLS |
| 372 | system = SYSTEM_PROMPT.format( |
| 373 | opentrace_section=OPENTRACE_SECTION, |
| 374 | graph_strategy=GRAPH_STRATEGY, |
| 375 | ) |
| 376 | else: |
| 377 | tools = [t for t in OPENTRACE_TOOLS if t["name"] in ("read_file", "list_directory", "generate_patch")] |
| 378 | system = SYSTEM_PROMPT.format( |
| 379 | opentrace_section="", |
| 380 | graph_strategy=NO_GRAPH_STRATEGY, |
| 381 | ) |
| 382 | |
| 383 | messages: list[dict[str, Any]] = [ |
| 384 | {"role": "user", "content": f"Here is the problem to solve:\n\n{problem_statement}"}, |
| 385 | ] |
| 386 | |
| 387 | for turn in range(max_turns): |
| 388 | logger.debug("Turn %d/%d (messages=%d)", turn + 1, max_turns, len(messages)) |
| 389 | |
| 390 | response = client.messages.create( |
| 391 | model=model, |
| 392 | max_tokens=max_tokens, |
| 393 | system=system, |
| 394 | tools=tools, |
| 395 | messages=messages, |
| 396 | ) |
| 397 | |
| 398 | # Check if we got tool calls |
| 399 | tool_uses = [b for b in response.content if b.type == "tool_use"] |
| 400 | text_blocks = [b for b in response.content if b.type == "text"] |
| 401 | |
| 402 | if text_blocks: |
| 403 | for tb in text_blocks: |
| 404 | logger.debug("Claude: %s", tb.text[:200]) |
no test coverage detected