Optimize load_documents function with multiprocessing

2023-05-19 02:35:20 +09:00 · 2023-05-19 02:35:20 +09:00 · e3b769d33a
parent 04f6706bbb
commit e3b769d33a
1 changed files with 4 additions and 1 deletions
--- a/ingest.py
+++ b/ingest.py
@ -3,6 +3,7 @@ import os
 import glob
 from typing import List
 from dotenv import load_dotenv
+from multiprocessing import Pool

 from langchain.document_loaders import (
    CSVLoader,
@ -87,7 +88,9 @@ def load_documents(source_dir: str) -> List[Document]:
        all_files.extend(
            glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
        )
-    return [load_single_document(file_path) for file_path in all_files]
+    with Pool(processes=os.cpu_count()) as pool:
+        documents = pool.map(load_single_document, all_files)
+    return documents


 def main():