Call the IBM watsonx.ai inference endpoint which then generate the response. Args: prompts: List of strings (prompts) to pass into the model. stop: Optional list of stop words to use when generating. run_manager: Optional callback manager. Returns:
(
self,
prompts: List[str],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
stream: Optional[bool] = None,
**kwargs: Any,
)
| 336 | return result.generations[0][0].text |
| 337 | |
| 338 | def _generate( |
| 339 | self, |
| 340 | prompts: List[str], |
| 341 | stop: Optional[List[str]] = None, |
| 342 | run_manager: Optional[CallbackManagerForLLMRun] = None, |
| 343 | stream: Optional[bool] = None, |
| 344 | **kwargs: Any, |
| 345 | ) -> LLMResult: |
| 346 | """Call the IBM watsonx.ai inference endpoint which then generate the response. |
| 347 | Args: |
| 348 | prompts: List of strings (prompts) to pass into the model. |
| 349 | stop: Optional list of stop words to use when generating. |
| 350 | run_manager: Optional callback manager. |
| 351 | Returns: |
| 352 | The full LLMResult output. |
| 353 | Example: |
| 354 | .. code-block:: python |
| 355 | |
| 356 | response = watsonx_llm.generate(["What is a molecule"]) |
| 357 | """ |
| 358 | params = self._get_chat_params(stop=stop) |
| 359 | should_stream = stream if stream is not None else self.streaming |
| 360 | if should_stream: |
| 361 | if len(prompts) > 1: |
| 362 | raise ValueError( |
| 363 | f"WatsonxLLM currently only supports single prompt, got {prompts}" |
| 364 | ) |
| 365 | generation = GenerationChunk(text="") |
| 366 | stream_iter = self._stream( |
| 367 | prompts[0], stop=stop, run_manager=run_manager, **kwargs |
| 368 | ) |
| 369 | for chunk in stream_iter: |
| 370 | if generation is None: |
| 371 | generation = chunk |
| 372 | else: |
| 373 | generation += chunk |
| 374 | assert generation is not None |
| 375 | if isinstance(generation.generation_info, dict): |
| 376 | llm_output = generation.generation_info.pop("llm_output") |
| 377 | return LLMResult(generations=[[generation]], llm_output=llm_output) |
| 378 | return LLMResult(generations=[[generation]]) |
| 379 | else: |
| 380 | response = self.watsonx_model.generate( |
| 381 | prompt=prompts, params=params, **kwargs |
| 382 | ) |
| 383 | return self._create_llm_result(response) |
| 384 | |
| 385 | def _stream( |
| 386 | self, |
no test coverage detected