From e3b769d33a18242a5b01388435cbba0e0e47c873 Mon Sep 17 00:00:00 2001 From: jiangzhuo Date: Fri, 19 May 2023 02:35:20 +0900 Subject: [PATCH] Optimize load_documents function with multiprocessing --- ingest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ingest.py b/ingest.py index 271e80d..ee97e7c 100755 --- a/ingest.py +++ b/ingest.py @@ -3,6 +3,7 @@ import os import glob from typing import List from dotenv import load_dotenv +from multiprocessing import Pool from langchain.document_loaders import ( CSVLoader, @@ -87,7 +88,9 @@ def load_documents(source_dir: str) -> List[Document]: all_files.extend( glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True) ) - return [load_single_document(file_path) for file_path in all_files] + with Pool(processes=os.cpu_count()) as pool: + documents = pool.map(load_single_document, all_files) + return documents def main():