fixed the the csv file reading issue

This commit is contained in:
Ravindra Prasad 2023-05-31 00:04:56 +05:30
parent 60e6bd25eb
commit db341e2a40
1 changed files with 5 additions and 6 deletions

View File

@ -81,16 +81,15 @@ LOADER_MAPPING = {
} }
def load_single_document(file_path: str) -> Document: def load_single_document(file_path: str) -> List[Document]:
ext = "." + file_path.rsplit(".", 1)[-1] ext = "." + file_path.rsplit(".", 1)[-1]
if ext in LOADER_MAPPING: if ext in LOADER_MAPPING:
loader_class, loader_args = LOADER_MAPPING[ext] loader_class, loader_args = LOADER_MAPPING[ext]
loader = loader_class(file_path, **loader_args) loader = loader_class(file_path, **loader_args)
return loader.load()[0] return loader.load()
raise ValueError(f"Unsupported file extension '{ext}'") raise ValueError(f"Unsupported file extension '{ext}'")
def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]: def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
""" """
Loads all documents from the source documents directory, ignoring specified files Loads all documents from the source documents directory, ignoring specified files
@ -105,8 +104,8 @@ def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Docum
with Pool(processes=os.cpu_count()) as pool: with Pool(processes=os.cpu_count()) as pool:
results = [] results = []
with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar: with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
for i, doc in enumerate(pool.imap_unordered(load_single_document, filtered_files)): for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
results.append(doc) results.extend(docs)
pbar.update() pbar.update()
return results return results