fixed the the csv file reading issue
This commit is contained in:
parent
60e6bd25eb
commit
db341e2a40
11
ingest.py
11
ingest.py
|
@ -81,16 +81,15 @@ LOADER_MAPPING = {
|
|||
}
|
||||
|
||||
|
||||
def load_single_document(file_path: str) -> Document:
|
||||
def load_single_document(file_path: str) -> List[Document]:
|
||||
ext = "." + file_path.rsplit(".", 1)[-1]
|
||||
if ext in LOADER_MAPPING:
|
||||
loader_class, loader_args = LOADER_MAPPING[ext]
|
||||
loader = loader_class(file_path, **loader_args)
|
||||
return loader.load()[0]
|
||||
return loader.load()
|
||||
|
||||
raise ValueError(f"Unsupported file extension '{ext}'")
|
||||
|
||||
|
||||
def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
|
||||
"""
|
||||
Loads all documents from the source documents directory, ignoring specified files
|
||||
|
@ -98,15 +97,15 @@ def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Docum
|
|||
all_files = []
|
||||
for ext in LOADER_MAPPING:
|
||||
all_files.extend(
|
||||
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
||||
glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
|
||||
)
|
||||
filtered_files = [file_path for file_path in all_files if file_path not in ignored_files]
|
||||
|
||||
with Pool(processes=os.cpu_count()) as pool:
|
||||
results = []
|
||||
with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
|
||||
for i, doc in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
|
||||
results.append(doc)
|
||||
for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
|
||||
results.extend(docs)
|
||||
pbar.update()
|
||||
|
||||
return results
|
||||
|
|
Loading…
Reference in New Issue