Optimize load_documents function with multiprocessing

This commit is contained in:
jiangzhuo 2023-05-19 02:35:20 +09:00 committed by Iván Martínez
parent 04f6706bbb
commit e3b769d33a
1 changed files with 4 additions and 1 deletions

View File

@ -3,6 +3,7 @@ import os
import glob
from typing import List
from dotenv import load_dotenv
from multiprocessing import Pool
from langchain.document_loaders import (
CSVLoader,
@ -87,7 +88,9 @@ def load_documents(source_dir: str) -> List[Document]:
all_files.extend(
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
)
return [load_single_document(file_path) for file_path in all_files]
with Pool(processes=os.cpu_count()) as pool:
documents = pool.map(load_single_document, all_files)
return documents
def main():