Enhancement better performance for PDF loader

This commit is contained in:
sj 2023-06-07 23:48:31 +08:00
parent 9d47d03d18
commit 05c7330643
2 changed files with 3 additions and 3 deletions

View File

@ -9,7 +9,7 @@ from tqdm import tqdm
from langchain.document_loaders import (
CSVLoader,
EverNoteLoader,
PDFMinerLoader,
PyMuPDFLoader,
TextLoader,
UnstructuredEmailLoader,
UnstructuredEPubLoader,
@ -73,7 +73,7 @@ LOADER_MAPPING = {
".html": (UnstructuredHTMLLoader, {}),
".md": (UnstructuredMarkdownLoader, {}),
".odt": (UnstructuredODTLoader, {}),
".pdf": (PDFMinerLoader, {}),
".pdf": (PyMuPDFLoader, {}),
".ppt": (UnstructuredPowerPointLoader, {}),
".pptx": (UnstructuredPowerPointLoader, {}),
".txt": (TextLoader, {"encoding": "utf8"}),

View File

@ -3,7 +3,7 @@ gpt4all==0.2.3
chromadb==0.3.23
llama-cpp-python==0.1.50
urllib3==2.0.2
pdfminer.six==20221105
PyMuPDF==1.22.3
python-dotenv==1.0.0
unstructured==0.6.6
extract-msg==0.41.1