Reuse existing stored index during ingestion (#1220)
This commit is contained in:
parent
aa70d3d9f0
commit
f394ca61bb
|
@ -133,14 +133,27 @@ class IngestService:
|
|||
document.excluded_embed_metadata_keys = ["doc_id"]
|
||||
# We don't want the LLM to receive these metadata in the context
|
||||
document.excluded_llm_metadata_keys = ["file_name", "doc_id", "page_label"]
|
||||
# create vectorStore index
|
||||
VectorStoreIndex.from_documents(
|
||||
documents,
|
||||
storage_context=self.storage_context,
|
||||
service_context=self.ingest_service_context,
|
||||
store_nodes_override=True, # Force store nodes in index and document stores
|
||||
show_progress=True,
|
||||
)
|
||||
|
||||
try:
|
||||
# Load the index from storage and insert new documents,
|
||||
index = load_index_from_storage(
|
||||
storage_context=self.storage_context,
|
||||
service_context=self.ingest_service_context,
|
||||
store_nodes_override=True, # Force store nodes in index and document stores
|
||||
show_progress=True,
|
||||
)
|
||||
for doc in documents:
|
||||
index.insert(doc)
|
||||
except ValueError:
|
||||
# Or create a new one if there is none
|
||||
VectorStoreIndex.from_documents(
|
||||
documents,
|
||||
storage_context=self.storage_context,
|
||||
service_context=self.ingest_service_context,
|
||||
store_nodes_override=True, # Force store nodes in index and document stores
|
||||
show_progress=True,
|
||||
)
|
||||
|
||||
# persist the index and nodes
|
||||
self.storage_context.persist(persist_dir=local_data_path)
|
||||
return [
|
||||
|
|
Loading…
Reference in New Issue