From 81b221bccbe1ce3a332e73658847452ed70cf71c Mon Sep 17 00:00:00 2001 From: jiangzhuo Date: Fri, 19 May 2023 02:35:20 +0900 Subject: [PATCH] Optimize load_documents function with multiprocessing --- ingest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ingest.py b/ingest.py index 47b5192..2566289 100644 --- a/ingest.py +++ b/ingest.py @@ -2,6 +2,7 @@ import os import glob from typing import List from dotenv import load_dotenv +from multiprocessing import Pool from langchain.document_loaders import ( CSVLoader, @@ -64,7 +65,9 @@ def load_documents(source_dir: str) -> List[Document]: all_files.extend( glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True) ) - return [load_single_document(file_path) for file_path in all_files] + with Pool(processes=os.cpu_count()) as pool: + documents = pool.map(load_single_document, all_files) + return documents def main():