Enhancement better performance for PDF loader
This commit is contained in:
		
							parent
							
								
									9d47d03d18
								
							
						
					
					
						commit
						05c7330643
					
				|  | @ -9,7 +9,7 @@ from tqdm import tqdm | ||||||
| from langchain.document_loaders import ( | from langchain.document_loaders import ( | ||||||
|     CSVLoader, |     CSVLoader, | ||||||
|     EverNoteLoader, |     EverNoteLoader, | ||||||
|     PDFMinerLoader, |     PyMuPDFLoader, | ||||||
|     TextLoader, |     TextLoader, | ||||||
|     UnstructuredEmailLoader, |     UnstructuredEmailLoader, | ||||||
|     UnstructuredEPubLoader, |     UnstructuredEPubLoader, | ||||||
|  | @ -73,7 +73,7 @@ LOADER_MAPPING = { | ||||||
|     ".html": (UnstructuredHTMLLoader, {}), |     ".html": (UnstructuredHTMLLoader, {}), | ||||||
|     ".md": (UnstructuredMarkdownLoader, {}), |     ".md": (UnstructuredMarkdownLoader, {}), | ||||||
|     ".odt": (UnstructuredODTLoader, {}), |     ".odt": (UnstructuredODTLoader, {}), | ||||||
|     ".pdf": (PDFMinerLoader, {}), |     ".pdf": (PyMuPDFLoader, {}), | ||||||
|     ".ppt": (UnstructuredPowerPointLoader, {}), |     ".ppt": (UnstructuredPowerPointLoader, {}), | ||||||
|     ".pptx": (UnstructuredPowerPointLoader, {}), |     ".pptx": (UnstructuredPowerPointLoader, {}), | ||||||
|     ".txt": (TextLoader, {"encoding": "utf8"}), |     ".txt": (TextLoader, {"encoding": "utf8"}), | ||||||
|  |  | ||||||
|  | @ -3,7 +3,7 @@ gpt4all==0.2.3 | ||||||
| chromadb==0.3.23 | chromadb==0.3.23 | ||||||
| llama-cpp-python==0.1.50 | llama-cpp-python==0.1.50 | ||||||
| urllib3==2.0.2 | urllib3==2.0.2 | ||||||
| pdfminer.six==20221105 | PyMuPDF==1.22.3 | ||||||
| python-dotenv==1.0.0 | python-dotenv==1.0.0 | ||||||
| unstructured==0.6.6 | unstructured==0.6.6 | ||||||
| extract-msg==0.41.1 | extract-msg==0.41.1 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue