Merge pull request #254 from Fabio3rs/formatOffice97-2003
Add .doc .ppt (Word and PowerPoint 97/2003 formats)
This commit is contained in:
		
						commit
						b9f8dc312f
					
				|  | @ -37,6 +37,7 @@ The supported extensions are: | |||
| 
 | ||||
|    - `.csv`: CSV, | ||||
|    - `.docx`: Word Document, | ||||
|    - `.doc`: Word Document, | ||||
|    - `.enex`: EverNote, | ||||
|    - `.eml`: Email, | ||||
|    - `.epub`: EPub, | ||||
|  | @ -46,6 +47,7 @@ The supported extensions are: | |||
|    - `.odt`: Open Document Text, | ||||
|    - `.pdf`: Portable Document Format (PDF), | ||||
|    - `.pptx` : PowerPoint Document, | ||||
|    - `.ppt` : PowerPoint Document, | ||||
|    - `.txt`: Text file (UTF-8), | ||||
| 
 | ||||
| Run the following command to ingest all the data. | ||||
|  |  | |||
|  | @ -28,6 +28,7 @@ from constants import CHROMA_SETTINGS | |||
| LOADER_MAPPING = { | ||||
|     ".csv": (CSVLoader, {}), | ||||
|     # ".docx": (Docx2txtLoader, {}), | ||||
|     ".doc": (UnstructuredWordDocumentLoader, {}), | ||||
|     ".docx": (UnstructuredWordDocumentLoader, {}), | ||||
|     ".enex": (EverNoteLoader, {}), | ||||
|     ".eml": (UnstructuredEmailLoader, {}), | ||||
|  | @ -36,6 +37,7 @@ LOADER_MAPPING = { | |||
|     ".md": (UnstructuredMarkdownLoader, {}), | ||||
|     ".odt": (UnstructuredODTLoader, {}), | ||||
|     ".pdf": (PDFMinerLoader, {}), | ||||
|     ".ppt": (UnstructuredPowerPointLoader, {}), | ||||
|     ".pptx": (UnstructuredPowerPointLoader, {}), | ||||
|     ".txt": (TextLoader, {"encoding": "utf8"}), | ||||
|     # Add more mappings for other file extensions and loaders as needed | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue