Reuse existing stored index during ingestion (#1220)
This commit is contained in:
parent
aa70d3d9f0
commit
f394ca61bb
|
@ -133,14 +133,27 @@ class IngestService:
|
||||||
document.excluded_embed_metadata_keys = ["doc_id"]
|
document.excluded_embed_metadata_keys = ["doc_id"]
|
||||||
# We don't want the LLM to receive these metadata in the context
|
# We don't want the LLM to receive these metadata in the context
|
||||||
document.excluded_llm_metadata_keys = ["file_name", "doc_id", "page_label"]
|
document.excluded_llm_metadata_keys = ["file_name", "doc_id", "page_label"]
|
||||||
# create vectorStore index
|
|
||||||
VectorStoreIndex.from_documents(
|
try:
|
||||||
documents,
|
# Load the index from storage and insert new documents,
|
||||||
storage_context=self.storage_context,
|
index = load_index_from_storage(
|
||||||
service_context=self.ingest_service_context,
|
storage_context=self.storage_context,
|
||||||
store_nodes_override=True, # Force store nodes in index and document stores
|
service_context=self.ingest_service_context,
|
||||||
show_progress=True,
|
store_nodes_override=True, # Force store nodes in index and document stores
|
||||||
)
|
show_progress=True,
|
||||||
|
)
|
||||||
|
for doc in documents:
|
||||||
|
index.insert(doc)
|
||||||
|
except ValueError:
|
||||||
|
# Or create a new one if there is none
|
||||||
|
VectorStoreIndex.from_documents(
|
||||||
|
documents,
|
||||||
|
storage_context=self.storage_context,
|
||||||
|
service_context=self.ingest_service_context,
|
||||||
|
store_nodes_override=True, # Force store nodes in index and document stores
|
||||||
|
show_progress=True,
|
||||||
|
)
|
||||||
|
|
||||||
# persist the index and nodes
|
# persist the index and nodes
|
||||||
self.storage_context.persist(persist_dir=local_data_path)
|
self.storage_context.persist(persist_dir=local_data_path)
|
||||||
return [
|
return [
|
||||||
|
|
Loading…
Reference in New Issue