Class for storing a piece of text and associated metadata. Example: .. code-block:: python from langchain_core.documents import Document document = Document( page_content="Hello, world!", metadata={"source": "https://example.com
| 245 | |
| 246 | |
| 247 | class Document(BaseMedia): |
| 248 | """Class for storing a piece of text and associated metadata. |
| 249 | |
| 250 | Example: |
| 251 | |
| 252 | .. code-block:: python |
| 253 | |
| 254 | from langchain_core.documents import Document |
| 255 | |
| 256 | document = Document( |
| 257 | page_content="Hello, world!", |
| 258 | metadata={"source": "https://example.com"} |
| 259 | ) |
| 260 | """ |
| 261 | |
| 262 | page_content: str |
| 263 | """String text.""" |
| 264 | type: Literal["Document"] = "Document" |
| 265 | |
| 266 | def __init__(self, page_content: str, **kwargs: Any) -> None: |
| 267 | """Pass page_content in as positional or named arg.""" |
| 268 | # my-py is complaining that page_content is not defined on the base class. |
| 269 | # Here, we're relying on pydantic base class to handle the validation. |
| 270 | super().__init__(page_content=page_content, **kwargs) # type: ignore[call-arg] |
| 271 | |
| 272 | @classmethod |
| 273 | def is_lc_serializable(cls) -> bool: |
| 274 | """Return whether this class is serializable.""" |
| 275 | return True |
| 276 | |
| 277 | @classmethod |
| 278 | def get_lc_namespace(cls) -> List[str]: |
| 279 | """Get the namespace of the langchain object.""" |
| 280 | return ["langchain", "schema", "document"] |
| 281 | |
| 282 | def __str__(self) -> str: |
| 283 | """Override __str__ to restrict it to page_content and metadata.""" |
| 284 | # The format matches pydantic format for __str__. |
| 285 | # |
| 286 | # The purpose of this change is to make sure that user code that |
| 287 | # feeds Document objects directly into prompts remains unchanged |
| 288 | # due to the addition of the id field (or any other fields in the future). |
| 289 | # |
| 290 | # This override will likely be removed in the future in favor of |
| 291 | # a more general solution of formatting content directly inside the prompts. |
| 292 | if self.metadata: |
| 293 | return f"page_content='{self.page_content}' metadata={self.metadata}" |
| 294 | else: |
| 295 | return f"page_content='{self.page_content}'" |
no outgoing calls