`Arxiv` retriever. It wraps load() to get_relevant_documents(). It uses all ArxivAPIWrapper arguments without any change.
| 17 | |
| 18 | |
| 19 | class ArxivRetriever(BaseRetriever, ArxivAPIWrapper): |
| 20 | """`Arxiv` retriever. |
| 21 | |
| 22 | It wraps load() to get_relevant_documents(). |
| 23 | It uses all ArxivAPIWrapper arguments without any change. |
| 24 | """ |
| 25 | |
| 26 | get_full_documents: bool = False |
| 27 | |
| 28 | def _get_relevant_documents( |
| 29 | self, query: str, *, run_manager: CallbackManagerForRetrieverRun |
| 30 | ) -> List[Document]: |
| 31 | try: |
| 32 | if self.is_arxiv_identifier(query): |
| 33 | results = self.arxiv_search( |
| 34 | id_list=query.split(), |
| 35 | max_results=self.top_k_results, |
| 36 | ).results() |
| 37 | else: |
| 38 | results = self.arxiv_search( # type: ignore |
| 39 | query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results |
| 40 | ).results() |
| 41 | except self.arxiv_exceptions as ex: |
| 42 | return [Document(page_content=f"Arxiv exception: {ex}")] |
| 43 | docs = [ |
| 44 | Document( |
| 45 | page_content=result.summary, |
| 46 | metadata={ |
| 47 | "Published": result.updated.date(), |
| 48 | "Title": result.title, |
| 49 | "Authors": ", ".join(a.name for a in result.authors), |
| 50 | }, |
| 51 | ) |
| 52 | for result in results |
| 53 | ] |
| 54 | return docs |
| 55 | |
| 56 | |
| 57 | description = ( |