Reuse existing stored index during ingestion (#1220)

2023-11-12 22:14:38 +01:00 · 2023-11-12 22:14:38 +01:00 · f394ca61bb
parent aa70d3d9f0
commit f394ca61bb
1 changed files with 21 additions and 8 deletions
--- a/private_gpt/server/ingest/ingest_service.py
+++ b/private_gpt/server/ingest/ingest_service.py
@ -133,14 +133,27 @@ class IngestService:
            document.excluded_embed_metadata_keys = ["doc_id"]
            # We don't want the LLM to receive these metadata in the context
            document.excluded_llm_metadata_keys = ["file_name", "doc_id", "page_label"]
-        # create vectorStore index
-        VectorStoreIndex.from_documents(
-            documents,
-            storage_context=self.storage_context,
-            service_context=self.ingest_service_context,
-            store_nodes_override=True,  # Force store nodes in index and document stores
-            show_progress=True,
-        )
+
+        try:
+            # Load the index from storage and insert new documents,
+            index = load_index_from_storage(
+                storage_context=self.storage_context,
+                service_context=self.ingest_service_context,
+                store_nodes_override=True,  # Force store nodes in index and document stores
+                show_progress=True,
+            )
+            for doc in documents:
+                index.insert(doc)
+        except ValueError:
+            # Or create a new one if there is none
+            VectorStoreIndex.from_documents(
+                documents,
+                storage_context=self.storage_context,
+                service_context=self.ingest_service_context,
+                store_nodes_override=True,  # Force store nodes in index and document stores
+                show_progress=True,
+            )
+
        # persist the index and nodes
        self.storage_context.persist(persist_dir=local_data_path)
        return [