Merge pull request #254 from Fabio3rs/formatOffice97-2003
Add .doc .ppt (Word and PowerPoint 97/2003 formats)
This commit is contained in:
		
						commit
						b9f8dc312f
					
				|  | @ -37,6 +37,7 @@ The supported extensions are: | ||||||
| 
 | 
 | ||||||
|    - `.csv`: CSV, |    - `.csv`: CSV, | ||||||
|    - `.docx`: Word Document, |    - `.docx`: Word Document, | ||||||
|  |    - `.doc`: Word Document, | ||||||
|    - `.enex`: EverNote, |    - `.enex`: EverNote, | ||||||
|    - `.eml`: Email, |    - `.eml`: Email, | ||||||
|    - `.epub`: EPub, |    - `.epub`: EPub, | ||||||
|  | @ -46,6 +47,7 @@ The supported extensions are: | ||||||
|    - `.odt`: Open Document Text, |    - `.odt`: Open Document Text, | ||||||
|    - `.pdf`: Portable Document Format (PDF), |    - `.pdf`: Portable Document Format (PDF), | ||||||
|    - `.pptx` : PowerPoint Document, |    - `.pptx` : PowerPoint Document, | ||||||
|  |    - `.ppt` : PowerPoint Document, | ||||||
|    - `.txt`: Text file (UTF-8), |    - `.txt`: Text file (UTF-8), | ||||||
| 
 | 
 | ||||||
| Run the following command to ingest all the data. | Run the following command to ingest all the data. | ||||||
|  |  | ||||||
|  | @ -28,6 +28,7 @@ from constants import CHROMA_SETTINGS | ||||||
| LOADER_MAPPING = { | LOADER_MAPPING = { | ||||||
|     ".csv": (CSVLoader, {}), |     ".csv": (CSVLoader, {}), | ||||||
|     # ".docx": (Docx2txtLoader, {}), |     # ".docx": (Docx2txtLoader, {}), | ||||||
|  |     ".doc": (UnstructuredWordDocumentLoader, {}), | ||||||
|     ".docx": (UnstructuredWordDocumentLoader, {}), |     ".docx": (UnstructuredWordDocumentLoader, {}), | ||||||
|     ".enex": (EverNoteLoader, {}), |     ".enex": (EverNoteLoader, {}), | ||||||
|     ".eml": (UnstructuredEmailLoader, {}), |     ".eml": (UnstructuredEmailLoader, {}), | ||||||
|  | @ -36,6 +37,7 @@ LOADER_MAPPING = { | ||||||
|     ".md": (UnstructuredMarkdownLoader, {}), |     ".md": (UnstructuredMarkdownLoader, {}), | ||||||
|     ".odt": (UnstructuredODTLoader, {}), |     ".odt": (UnstructuredODTLoader, {}), | ||||||
|     ".pdf": (PDFMinerLoader, {}), |     ".pdf": (PDFMinerLoader, {}), | ||||||
|  |     ".ppt": (UnstructuredPowerPointLoader, {}), | ||||||
|     ".pptx": (UnstructuredPowerPointLoader, {}), |     ".pptx": (UnstructuredPowerPointLoader, {}), | ||||||
|     ".txt": (TextLoader, {"encoding": "utf8"}), |     ".txt": (TextLoader, {"encoding": "utf8"}), | ||||||
|     # Add more mappings for other file extensions and loaders as needed |     # Add more mappings for other file extensions and loaders as needed | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue