From 66a9f9cde040ff54f9cc9e8dc7d26f7dce1795bd Mon Sep 17 00:00:00 2001 From: Fabio Rossini Sluzala Date: Wed, 17 May 2023 12:04:16 -0300 Subject: [PATCH] Add .doc .ppt (Word and PowerPoint 97/2003 formats) --- ingest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ingest.py b/ingest.py index d28edd5..020dde5 100644 --- a/ingest.py +++ b/ingest.py @@ -32,6 +32,7 @@ LOADER_MAPPING = { ".csv": (CSVLoader, {}), # ".docx": (Docx2txtLoader, {}), ".docx": (UnstructuredWordDocumentLoader, {}), + ".doc": (UnstructuredWordDocumentLoader, {}), ".enex": (EverNoteLoader, {}), ".eml": (UnstructuredEmailLoader, {}), ".epub": (UnstructuredEPubLoader, {}), @@ -40,6 +41,7 @@ LOADER_MAPPING = { ".odt": (UnstructuredODTLoader, {}), ".pdf": (PDFMinerLoader, {}), ".pptx": (UnstructuredPowerPointLoader, {}), + ".ppt": (UnstructuredPowerPointLoader, {}), ".txt": (TextLoader, {"encoding": "utf8"}), # Add more mappings for other file extensions and loaders as needed }