Fix #294 (tested)

This commit is contained in:
MDW 2023-05-19 16:23:09 +02:00
parent a862ff2be6
commit 4cda348cf8
1 changed files with 22 additions and 18 deletions

View File

@ -24,6 +24,28 @@ from langchain.docstore.document import Document
from constants import CHROMA_SETTINGS
class MyElmLoader(UnstructuredEmailLoader):
"""Wrapper to fallback to text/plain when default does not work"""
def load(self) -> List[Document]:
"""Wrapper adding fallback for elm without html"""
try:
try:
doc = UnstructuredEmailLoader.load(self)
except ValueError as e:
if 'text/html content not found in email' in str(e):
# Try plain text
self.unstructured_kwargs["content_source"]="text/plain"
doc = UnstructuredEmailLoader.load(self)
else:
raise
except Exception as e:
# Add file_path to exception message
raise type(e)(f"{self.file_path}: {e}") from e
return doc
# Map file extensions to document loaders and their arguments
LOADER_MAPPING = {
".csv": (CSVLoader, {}),
@ -47,24 +69,6 @@ LOADER_MAPPING = {
load_dotenv()
class MyElmLoader(UnstructuredEmailLoader):
"""Wrapper to fallback to text/plain when default does not work"""
def load(self) -> List[Document]:
"""Wrapper adding fallback for elm without html"""
try:
doc = UnstructuredEmailLoader.load()
except ValueError as e:
if 'text/html content not found in email' in str(e):
# Try plain text
self.unstructured_kwargs["content_source"]="text/plain"
doc = UnstructuredEmailLoader.load()
else:
raise
return doc
def load_single_document(file_path: str) -> Document:
ext = "." + file_path.rsplit(".", 1)[-1]
if ext in LOADER_MAPPING: