Enhancement better performance for PDF loader

This commit is contained in:
sj 2023-06-07 23:48:31 +08:00
parent 9d47d03d18
commit 05c7330643
2 changed files with 3 additions and 3 deletions

View File

@ -9,7 +9,7 @@ from tqdm import tqdm
from langchain.document_loaders import ( from langchain.document_loaders import (
CSVLoader, CSVLoader,
EverNoteLoader, EverNoteLoader,
PDFMinerLoader, PyMuPDFLoader,
TextLoader, TextLoader,
UnstructuredEmailLoader, UnstructuredEmailLoader,
UnstructuredEPubLoader, UnstructuredEPubLoader,
@ -73,7 +73,7 @@ LOADER_MAPPING = {
".html": (UnstructuredHTMLLoader, {}), ".html": (UnstructuredHTMLLoader, {}),
".md": (UnstructuredMarkdownLoader, {}), ".md": (UnstructuredMarkdownLoader, {}),
".odt": (UnstructuredODTLoader, {}), ".odt": (UnstructuredODTLoader, {}),
".pdf": (PDFMinerLoader, {}), ".pdf": (PyMuPDFLoader, {}),
".ppt": (UnstructuredPowerPointLoader, {}), ".ppt": (UnstructuredPowerPointLoader, {}),
".pptx": (UnstructuredPowerPointLoader, {}), ".pptx": (UnstructuredPowerPointLoader, {}),
".txt": (TextLoader, {"encoding": "utf8"}), ".txt": (TextLoader, {"encoding": "utf8"}),

View File

@ -3,7 +3,7 @@ gpt4all==0.2.3
chromadb==0.3.23 chromadb==0.3.23
llama-cpp-python==0.1.50 llama-cpp-python==0.1.50
urllib3==2.0.2 urllib3==2.0.2
pdfminer.six==20221105 PyMuPDF==1.22.3
python-dotenv==1.0.0 python-dotenv==1.0.0
unstructured==0.6.6 unstructured==0.6.6
extract-msg==0.41.1 extract-msg==0.41.1