(df: pd.DataFrame)
| 61 | |
| 62 | |
| 63 | def _parse_dataframe(df: pd.DataFrame) -> List[BatchTask]: |
| 64 | column_map = {str(col).strip().lower(): col for col in df.columns} |
| 65 | id_col = column_map.get("id") |
| 66 | task_col = column_map.get("task") |
| 67 | attachments_col = column_map.get("attachments") |
| 68 | vars_col = column_map.get("vars") |
| 69 | |
| 70 | tasks: List[BatchTask] = [] |
| 71 | seen_ids: set[str] = set() |
| 72 | |
| 73 | for row_index, row in enumerate(df.to_dict(orient="records"), start=1): |
| 74 | task_prompt = _get_cell_text(row, task_col) |
| 75 | attachment_paths = _parse_json_list(row, attachments_col, row_index) |
| 76 | vars_override = _parse_json_dict(row, vars_col, row_index) |
| 77 | |
| 78 | if not task_prompt and not attachment_paths: |
| 79 | raise ValidationError( |
| 80 | "Task and attachments cannot both be empty", |
| 81 | details={"row_index": row_index}, |
| 82 | ) |
| 83 | |
| 84 | task_id = _get_cell_text(row, id_col) |
| 85 | if task_id: |
| 86 | if task_id in seen_ids: |
| 87 | raise ValidationError( |
| 88 | "Duplicate ID in batch file", |
| 89 | details={"row_index": row_index, "task_id": task_id}, |
| 90 | ) |
| 91 | seen_ids.add(task_id) |
| 92 | |
| 93 | tasks.append( |
| 94 | BatchTask( |
| 95 | row_index=row_index, |
| 96 | task_id=task_id or None, |
| 97 | task_prompt=task_prompt, |
| 98 | attachment_paths=attachment_paths, |
| 99 | vars_override=vars_override, |
| 100 | ) |
| 101 | ) |
| 102 | return tasks |
| 103 | |
| 104 | |
| 105 | def _get_cell_text(row: Dict[str, Any], column: Optional[str]) -> str: |
no test coverage detected