()
| 41 | action = "generate_with_thinking" if request.thinking else "generate" |
| 42 | |
| 43 | async def stream(): |
| 44 | async for token in client.stream_async( |
| 45 | "streaming_chat.chat_app:ChatApp", |
| 46 | action, |
| 47 | request.prompt |
| 48 | ): |
| 49 | data = json.dumps({"token": token}) |
| 50 | yield f"data: {data}\n\n" |
| 51 | yield "data: [DONE]\n\n" |
| 52 | |
| 53 | return StreamingResponse( |
| 54 | stream(), |