Optimize load_documents function with multiprocessing
This commit is contained in:
		
							parent
							
								
									ad64589c8f
								
							
						
					
					
						commit
						81b221bccb
					
				|  | @ -2,6 +2,7 @@ import os | |||
| import glob | ||||
| from typing import List | ||||
| from dotenv import load_dotenv | ||||
| from multiprocessing import Pool | ||||
| 
 | ||||
| from langchain.document_loaders import ( | ||||
|     CSVLoader, | ||||
|  | @ -64,7 +65,9 @@ def load_documents(source_dir: str) -> List[Document]: | |||
|         all_files.extend( | ||||
|             glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True) | ||||
|         ) | ||||
|     return [load_single_document(file_path) for file_path in all_files] | ||||
|     with Pool(processes=os.cpu_count()) as pool: | ||||
|         documents = pool.map(load_single_document, all_files) | ||||
|     return documents | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue