Merge pull request #660 from doskoi/master
Improving performance for PDF loader
This commit is contained in:
commit
51fa989679
|
@ -9,7 +9,7 @@ from tqdm import tqdm
|
||||||
from langchain.document_loaders import (
|
from langchain.document_loaders import (
|
||||||
CSVLoader,
|
CSVLoader,
|
||||||
EverNoteLoader,
|
EverNoteLoader,
|
||||||
PDFMinerLoader,
|
PyMuPDFLoader,
|
||||||
TextLoader,
|
TextLoader,
|
||||||
UnstructuredEmailLoader,
|
UnstructuredEmailLoader,
|
||||||
UnstructuredEPubLoader,
|
UnstructuredEPubLoader,
|
||||||
|
@ -73,7 +73,7 @@ LOADER_MAPPING = {
|
||||||
".html": (UnstructuredHTMLLoader, {}),
|
".html": (UnstructuredHTMLLoader, {}),
|
||||||
".md": (UnstructuredMarkdownLoader, {}),
|
".md": (UnstructuredMarkdownLoader, {}),
|
||||||
".odt": (UnstructuredODTLoader, {}),
|
".odt": (UnstructuredODTLoader, {}),
|
||||||
".pdf": (PDFMinerLoader, {}),
|
".pdf": (PyMuPDFLoader, {}),
|
||||||
".ppt": (UnstructuredPowerPointLoader, {}),
|
".ppt": (UnstructuredPowerPointLoader, {}),
|
||||||
".pptx": (UnstructuredPowerPointLoader, {}),
|
".pptx": (UnstructuredPowerPointLoader, {}),
|
||||||
".txt": (TextLoader, {"encoding": "utf8"}),
|
".txt": (TextLoader, {"encoding": "utf8"}),
|
||||||
|
|
|
@ -3,7 +3,7 @@ gpt4all==0.2.3
|
||||||
chromadb==0.3.23
|
chromadb==0.3.23
|
||||||
llama-cpp-python==0.1.50
|
llama-cpp-python==0.1.50
|
||||||
urllib3==2.0.2
|
urllib3==2.0.2
|
||||||
pdfminer.six==20221105
|
PyMuPDF==1.22.3
|
||||||
python-dotenv==1.0.0
|
python-dotenv==1.0.0
|
||||||
unstructured==0.6.6
|
unstructured==0.6.6
|
||||||
extract-msg==0.41.1
|
extract-msg==0.41.1
|
||||||
|
|
Loading…
Reference in New Issue