MCPcopy Index your code
hub / github.com/OpenBMB/ChatDev / _parse_dataframe

Function _parse_dataframe

server/services/batch_parser.py:63–102  ·  view source on GitHub ↗
(df: pd.DataFrame)

Source from the content-addressed store, hash-verified

61
62
63def _parse_dataframe(df: pd.DataFrame) -> List[BatchTask]:
64 column_map = {str(col).strip().lower(): col for col in df.columns}
65 id_col = column_map.get("id")
66 task_col = column_map.get("task")
67 attachments_col = column_map.get("attachments")
68 vars_col = column_map.get("vars")
69
70 tasks: List[BatchTask] = []
71 seen_ids: set[str] = set()
72
73 for row_index, row in enumerate(df.to_dict(orient="records"), start=1):
74 task_prompt = _get_cell_text(row, task_col)
75 attachment_paths = _parse_json_list(row, attachments_col, row_index)
76 vars_override = _parse_json_dict(row, vars_col, row_index)
77
78 if not task_prompt and not attachment_paths:
79 raise ValidationError(
80 "Task and attachments cannot both be empty",
81 details={"row_index": row_index},
82 )
83
84 task_id = _get_cell_text(row, id_col)
85 if task_id:
86 if task_id in seen_ids:
87 raise ValidationError(
88 "Duplicate ID in batch file",
89 details={"row_index": row_index, "task_id": task_id},
90 )
91 seen_ids.add(task_id)
92
93 tasks.append(
94 BatchTask(
95 row_index=row_index,
96 task_id=task_id or None,
97 task_prompt=task_prompt,
98 attachment_paths=attachment_paths,
99 vars_override=vars_override,
100 )
101 )
102 return tasks
103
104
105def _get_cell_text(row: Dict[str, Any], column: Optional[str]) -> str:

Callers 1

parse_batch_fileFunction · 0.85

Calls 7

ValidationErrorClass · 0.90
_get_cell_textFunction · 0.85
_parse_json_listFunction · 0.85
_parse_json_dictFunction · 0.85
BatchTaskClass · 0.85
getMethod · 0.45
to_dictMethod · 0.45

Tested by

no test coverage detected