Merge pull request #660 from doskoi/master

Improving performance for PDF loader
This commit is contained in:
Iván Martínez 2023-06-11 19:10:08 +02:00 committed by GitHub
commit 51fa989679
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 3 additions and 3 deletions

View File

@ -9,7 +9,7 @@ from tqdm import tqdm
from langchain.document_loaders import (
CSVLoader,
EverNoteLoader,
PDFMinerLoader,
PyMuPDFLoader,
TextLoader,
UnstructuredEmailLoader,
UnstructuredEPubLoader,
@ -73,7 +73,7 @@ LOADER_MAPPING = {
".html": (UnstructuredHTMLLoader, {}),
".md": (UnstructuredMarkdownLoader, {}),
".odt": (UnstructuredODTLoader, {}),
".pdf": (PDFMinerLoader, {}),
".pdf": (PyMuPDFLoader, {}),
".ppt": (UnstructuredPowerPointLoader, {}),
".pptx": (UnstructuredPowerPointLoader, {}),
".txt": (TextLoader, {"encoding": "utf8"}),

View File

@ -3,7 +3,7 @@ gpt4all==0.2.3
chromadb==0.3.23
llama-cpp-python==0.1.50
urllib3==2.0.2
pdfminer.six==20221105
PyMuPDF==1.22.3
python-dotenv==1.0.0
unstructured==0.6.6
extract-msg==0.41.1