(url: str)
| 63 | |
| 64 | |
| 65 | def _ingest(url: str) -> dict: |
| 66 | loader = PyPDFLoader(url) |
| 67 | data = loader.load() |
| 68 | |
| 69 | # Split docs |
| 70 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) |
| 71 | docs = text_splitter.split_documents(data) |
| 72 | |
| 73 | # Insert the documents in MongoDB Atlas Vector Search |
| 74 | _ = MongoDBAtlasVectorSearch.from_documents( |
| 75 | documents=docs, |
| 76 | embedding=OpenAIEmbeddings(disallowed_special=()), |
| 77 | collection=MONGODB_COLLECTION, |
| 78 | index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME, |
| 79 | ) |
| 80 | return {} |
| 81 | |
| 82 | |
| 83 | ingest = RunnableLambda(_ingest) |
nothing calls this directly
no test coverage detected