Merge pull request #1 from parampavar/parampavar-support-ingestion-of-uppercase-fileextensions
Adding support to ingest files with extensions in uppercase
This commit is contained in:
		
						commit
						6dc494d30f
					
				|  | @ -82,7 +82,7 @@ LOADER_MAPPING = { | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def load_single_document(file_path: str) -> List[Document]: | def load_single_document(file_path: str) -> List[Document]: | ||||||
|     ext = "." + file_path.rsplit(".", 1)[-1] |     ext = "." + file_path.rsplit(".", 1)[-1].lower() | ||||||
|     if ext in LOADER_MAPPING: |     if ext in LOADER_MAPPING: | ||||||
|         loader_class, loader_args = LOADER_MAPPING[ext] |         loader_class, loader_args = LOADER_MAPPING[ext] | ||||||
|         loader = loader_class(file_path, **loader_args) |         loader = loader_class(file_path, **loader_args) | ||||||
|  | @ -97,7 +97,10 @@ def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Docum | ||||||
|     all_files = [] |     all_files = [] | ||||||
|     for ext in LOADER_MAPPING: |     for ext in LOADER_MAPPING: | ||||||
|         all_files.extend( |         all_files.extend( | ||||||
|             glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True) |             glob.glob(os.path.join(source_dir, f"**/*{ext.lower()}"), recursive=True) | ||||||
|  |         ) | ||||||
|  |         all_files.extend( | ||||||
|  |             glob.glob(os.path.join(source_dir, f"**/*{ext.upper()}"), recursive=True) | ||||||
|         ) |         ) | ||||||
|     filtered_files = [file_path for file_path in all_files if file_path not in ignored_files] |     filtered_files = [file_path for file_path in all_files if file_path not in ignored_files] | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue