feat(UI): Select file to Query or Delete + Delete ALL (#1612)
--------- Co-authored-by: Robin Boone <rboone@sofics.com>
This commit is contained in:
		
							parent
							
								
									24fb80ca38
								
							
						
					
					
						commit
						aa13afde07
					
				|  | @ -1,4 +1,4 @@ | ||||||
| # This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. | # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. | ||||||
| 
 | 
 | ||||||
| [[package]] | [[package]] | ||||||
| name = "accelerate" | name = "accelerate" | ||||||
|  | @ -1273,13 +1273,13 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] | ||||||
| 
 | 
 | ||||||
| [[package]] | [[package]] | ||||||
| name = "gradio" | name = "gradio" | ||||||
| version = "4.10.0" | version = "4.19.0" | ||||||
| description = "Python library for easily interacting with trained machine learning models" | description = "Python library for easily interacting with trained machine learning models" | ||||||
| optional = false | optional = false | ||||||
| python-versions = ">=3.8" | python-versions = ">=3.8" | ||||||
| files = [ | files = [ | ||||||
|     {file = "gradio-4.10.0-py3-none-any.whl", hash = "sha256:7595185716aff430381d010087d6ebc4eadef06fefc3dc1cfa76edcdd2c109db"}, |     {file = "gradio-4.19.0-py3-none-any.whl", hash = "sha256:d09732190acc0f33b5e7ea3235d267472bf74beeea62dabb7a82f93193155e09"}, | ||||||
|     {file = "gradio-4.10.0.tar.gz", hash = "sha256:d4ca039aa7f5c2783b2bbf7b465153c80bb4257edcca4d8b9c59ce6f61a75b97"}, |     {file = "gradio-4.19.0.tar.gz", hash = "sha256:e77e3ce8a4113865abd1dcf92cc9426d9da4896e0a6fd2824a0c90ec751dd442"}, | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| [package.dependencies] | [package.dependencies] | ||||||
|  | @ -1287,7 +1287,7 @@ aiofiles = ">=22.0,<24.0" | ||||||
| altair = ">=4.2.0,<6.0" | altair = ">=4.2.0,<6.0" | ||||||
| fastapi = "*" | fastapi = "*" | ||||||
| ffmpy = "*" | ffmpy = "*" | ||||||
| gradio-client = "0.7.3" | gradio-client = "0.10.0" | ||||||
| httpx = "*" | httpx = "*" | ||||||
| huggingface-hub = ">=0.19.3" | huggingface-hub = ">=0.19.3" | ||||||
| importlib-resources = ">=1.3,<7.0" | importlib-resources = ">=1.3,<7.0" | ||||||
|  | @ -1303,6 +1303,7 @@ pydantic = ">=2.0" | ||||||
| pydub = "*" | pydub = "*" | ||||||
| python-multipart = "*" | python-multipart = "*" | ||||||
| pyyaml = ">=5.0,<7.0" | pyyaml = ">=5.0,<7.0" | ||||||
|  | ruff = ">=0.1.7" | ||||||
| semantic-version = ">=2.0,<3.0" | semantic-version = ">=2.0,<3.0" | ||||||
| tomlkit = "0.12.0" | tomlkit = "0.12.0" | ||||||
| typer = {version = ">=0.9,<1.0", extras = ["all"]} | typer = {version = ">=0.9,<1.0", extras = ["all"]} | ||||||
|  | @ -1314,13 +1315,13 @@ oauth = ["authlib", "itsdangerous"] | ||||||
| 
 | 
 | ||||||
| [[package]] | [[package]] | ||||||
| name = "gradio-client" | name = "gradio-client" | ||||||
| version = "0.7.3" | version = "0.10.0" | ||||||
| description = "Python library for easily interacting with trained machine learning models" | description = "Python library for easily interacting with trained machine learning models" | ||||||
| optional = false | optional = false | ||||||
| python-versions = ">=3.8" | python-versions = ">=3.8" | ||||||
| files = [ | files = [ | ||||||
|     {file = "gradio_client-0.7.3-py3-none-any.whl", hash = "sha256:b91073770470ceb9f284977064c35bc0cffaf868eb887bf352db77aa01fe342a"}, |     {file = "gradio_client-0.10.0-py3-none-any.whl", hash = "sha256:2bcfe61710f9f1c8f336fa9ff0f5c5f0ea52079233196cd753ad30cccdfd585c"}, | ||||||
|     {file = "gradio_client-0.7.3.tar.gz", hash = "sha256:8146a1d19a125b38088dd201ddacd0008ea47ef9b0504d1c5b87ca09a43f4dcd"}, |     {file = "gradio_client-0.10.0.tar.gz", hash = "sha256:feaee70f18363d76f81a7d25fc3456f40ed5f92417e642c8f1bf86dc65e3a981"}, | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| [package.dependencies] | [package.dependencies] | ||||||
|  | @ -6111,4 +6112,4 @@ chroma = ["chromadb"] | ||||||
| [metadata] | [metadata] | ||||||
| lock-version = "2.0" | lock-version = "2.0" | ||||||
| python-versions = ">=3.11,<3.12" | python-versions = ">=3.11,<3.12" | ||||||
| content-hash = "c2bcf29b5c894a0fae9682145cd001dfb57bb4919c9097b5e27323ddee58fc8c" | content-hash = "121bf7797b74c02efaf11712e178c9c01880b79701eeff6485ede9ca8b25d307" | ||||||
|  |  | ||||||
|  | @ -189,6 +189,12 @@ class UISettings(BaseModel): | ||||||
|     default_query_system_prompt: str = Field( |     default_query_system_prompt: str = Field( | ||||||
|         None, description="The default system prompt to use for the query mode." |         None, description="The default system prompt to use for the query mode." | ||||||
|     ) |     ) | ||||||
|  |     delete_file_button_enabled: bool = Field( | ||||||
|  |         True, description="If the button to delete a file is enabled or not." | ||||||
|  |     ) | ||||||
|  |     delete_all_files_button_enabled: bool = Field( | ||||||
|  |         False, description="If the button to delete all files is enabled or not." | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class QdrantSettings(BaseModel): | class QdrantSettings(BaseModel): | ||||||
|  |  | ||||||
|  | @ -15,6 +15,7 @@ from pydantic import BaseModel | ||||||
| 
 | 
 | ||||||
| from private_gpt.constants import PROJECT_ROOT_PATH | from private_gpt.constants import PROJECT_ROOT_PATH | ||||||
| from private_gpt.di import global_injector | from private_gpt.di import global_injector | ||||||
|  | from private_gpt.open_ai.extensions.context_filter import ContextFilter | ||||||
| from private_gpt.server.chat.chat_service import ChatService, CompletionGen | from private_gpt.server.chat.chat_service import ChatService, CompletionGen | ||||||
| from private_gpt.server.chunks.chunks_service import Chunk, ChunksService | from private_gpt.server.chunks.chunks_service import Chunk, ChunksService | ||||||
| from private_gpt.server.ingest.ingest_service import IngestService | from private_gpt.server.ingest.ingest_service import IngestService | ||||||
|  | @ -31,7 +32,7 @@ UI_TAB_TITLE = "My Private GPT" | ||||||
| 
 | 
 | ||||||
| SOURCES_SEPARATOR = "\n\n Sources: \n" | SOURCES_SEPARATOR = "\n\n Sources: \n" | ||||||
| 
 | 
 | ||||||
| MODES = ["Query Docs", "Search in Docs", "LLM Chat"] | MODES = ["Query Files", "Search Files", "LLM Chat (no context from files)"] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Source(BaseModel): | class Source(BaseModel): | ||||||
|  | @ -74,6 +75,8 @@ class PrivateGptUi: | ||||||
|         # Cache the UI blocks |         # Cache the UI blocks | ||||||
|         self._ui_block = None |         self._ui_block = None | ||||||
| 
 | 
 | ||||||
|  |         self._selected_filename = None | ||||||
|  | 
 | ||||||
|         # Initialize system prompt based on default mode |         # Initialize system prompt based on default mode | ||||||
|         self.mode = MODES[0] |         self.mode = MODES[0] | ||||||
|         self._system_prompt = self._get_default_system_prompt(self.mode) |         self._system_prompt = self._get_default_system_prompt(self.mode) | ||||||
|  | @ -132,20 +135,34 @@ class PrivateGptUi: | ||||||
|                 ), |                 ), | ||||||
|             ) |             ) | ||||||
|         match mode: |         match mode: | ||||||
|             case "Query Docs": |             case "Query Files": | ||||||
|  | 
 | ||||||
|  |                 # Use only the selected file for the query | ||||||
|  |                 context_filter = None | ||||||
|  |                 if self._selected_filename is not None: | ||||||
|  |                     docs_ids = [] | ||||||
|  |                     for ingested_document in self._ingest_service.list_ingested(): | ||||||
|  |                         if ( | ||||||
|  |                             ingested_document.doc_metadata["file_name"] | ||||||
|  |                             == self._selected_filename | ||||||
|  |                         ): | ||||||
|  |                             docs_ids.append(ingested_document.doc_id) | ||||||
|  |                     context_filter = ContextFilter(docs_ids=docs_ids) | ||||||
|  | 
 | ||||||
|                 query_stream = self._chat_service.stream_chat( |                 query_stream = self._chat_service.stream_chat( | ||||||
|                     messages=all_messages, |                     messages=all_messages, | ||||||
|                     use_context=True, |                     use_context=True, | ||||||
|  |                     context_filter=context_filter, | ||||||
|                 ) |                 ) | ||||||
|                 yield from yield_deltas(query_stream) |                 yield from yield_deltas(query_stream) | ||||||
|             case "LLM Chat": |             case "LLM Chat (no context from files)": | ||||||
|                 llm_stream = self._chat_service.stream_chat( |                 llm_stream = self._chat_service.stream_chat( | ||||||
|                     messages=all_messages, |                     messages=all_messages, | ||||||
|                     use_context=False, |                     use_context=False, | ||||||
|                 ) |                 ) | ||||||
|                 yield from yield_deltas(llm_stream) |                 yield from yield_deltas(llm_stream) | ||||||
| 
 | 
 | ||||||
|             case "Search in Docs": |             case "Search Files": | ||||||
|                 response = self._chunks_service.retrieve_relevant( |                 response = self._chunks_service.retrieve_relevant( | ||||||
|                     text=message, limit=4, prev_next_chunks=0 |                     text=message, limit=4, prev_next_chunks=0 | ||||||
|                 ) |                 ) | ||||||
|  | @ -166,10 +183,10 @@ class PrivateGptUi: | ||||||
|         p = "" |         p = "" | ||||||
|         match mode: |         match mode: | ||||||
|             # For query chat mode, obtain default system prompt from settings |             # For query chat mode, obtain default system prompt from settings | ||||||
|             case "Query Docs": |             case "Query Files": | ||||||
|                 p = settings().ui.default_query_system_prompt |                 p = settings().ui.default_query_system_prompt | ||||||
|             # For chat mode, obtain default system prompt from settings |             # For chat mode, obtain default system prompt from settings | ||||||
|             case "LLM Chat": |             case "LLM Chat (no context from files)": | ||||||
|                 p = settings().ui.default_chat_system_prompt |                 p = settings().ui.default_chat_system_prompt | ||||||
|             # For any other mode, clear the system prompt |             # For any other mode, clear the system prompt | ||||||
|             case _: |             case _: | ||||||
|  | @ -205,8 +222,71 @@ class PrivateGptUi: | ||||||
|     def _upload_file(self, files: list[str]) -> None: |     def _upload_file(self, files: list[str]) -> None: | ||||||
|         logger.debug("Loading count=%s files", len(files)) |         logger.debug("Loading count=%s files", len(files)) | ||||||
|         paths = [Path(file) for file in files] |         paths = [Path(file) for file in files] | ||||||
|  | 
 | ||||||
|  |         # remove all existing Documents with name identical to a new file upload: | ||||||
|  |         file_names = [path.name for path in paths] | ||||||
|  |         doc_ids_to_delete = [] | ||||||
|  |         for ingested_document in self._ingest_service.list_ingested(): | ||||||
|  |             if ( | ||||||
|  |                 ingested_document.doc_metadata | ||||||
|  |                 and ingested_document.doc_metadata["file_name"] in file_names | ||||||
|  |             ): | ||||||
|  |                 doc_ids_to_delete.append(ingested_document.doc_id) | ||||||
|  |         if len(doc_ids_to_delete) > 0: | ||||||
|  |             logger.info( | ||||||
|  |                 "Uploading file(s) which were already ingested: %s document(s) will be replaced.", | ||||||
|  |                 len(doc_ids_to_delete), | ||||||
|  |             ) | ||||||
|  |             for doc_id in doc_ids_to_delete: | ||||||
|  |                 self._ingest_service.delete(doc_id) | ||||||
|  | 
 | ||||||
|         self._ingest_service.bulk_ingest([(str(path.name), path) for path in paths]) |         self._ingest_service.bulk_ingest([(str(path.name), path) for path in paths]) | ||||||
| 
 | 
 | ||||||
|  |     def _delete_all_files(self) -> Any: | ||||||
|  |         ingested_files = self._ingest_service.list_ingested() | ||||||
|  |         logger.debug("Deleting count=%s files", len(ingested_files)) | ||||||
|  |         for ingested_document in ingested_files: | ||||||
|  |             self._ingest_service.delete(ingested_document.doc_id) | ||||||
|  |         return [ | ||||||
|  |             gr.List(self._list_ingested_files()), | ||||||
|  |             gr.components.Button(interactive=False), | ||||||
|  |             gr.components.Button(interactive=False), | ||||||
|  |             gr.components.Textbox("All files"), | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  |     def _delete_selected_file(self) -> Any: | ||||||
|  |         logger.debug("Deleting selected %s", self._selected_filename) | ||||||
|  |         # Note: keep looping for pdf's (each page became a Document) | ||||||
|  |         for ingested_document in self._ingest_service.list_ingested(): | ||||||
|  |             if ( | ||||||
|  |                 ingested_document.doc_metadata | ||||||
|  |                 and ingested_document.doc_metadata["file_name"] | ||||||
|  |                 == self._selected_filename | ||||||
|  |             ): | ||||||
|  |                 self._ingest_service.delete(ingested_document.doc_id) | ||||||
|  |         return [ | ||||||
|  |             gr.List(self._list_ingested_files()), | ||||||
|  |             gr.components.Button(interactive=False), | ||||||
|  |             gr.components.Button(interactive=False), | ||||||
|  |             gr.components.Textbox("All files"), | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  |     def _deselect_selected_file(self) -> Any: | ||||||
|  |         self._selected_filename = None | ||||||
|  |         return [ | ||||||
|  |             gr.components.Button(interactive=False), | ||||||
|  |             gr.components.Button(interactive=False), | ||||||
|  |             gr.components.Textbox("All files"), | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  |     def _selected_a_file(self, select_data: gr.SelectData) -> Any: | ||||||
|  |         self._selected_filename = select_data.value | ||||||
|  |         return [ | ||||||
|  |             gr.components.Button(interactive=True), | ||||||
|  |             gr.components.Button(interactive=True), | ||||||
|  |             gr.components.Textbox(self._selected_filename), | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|     def _build_ui_blocks(self) -> gr.Blocks: |     def _build_ui_blocks(self) -> gr.Blocks: | ||||||
|         logger.debug("Creating the UI blocks") |         logger.debug("Creating the UI blocks") | ||||||
|         with gr.Blocks( |         with gr.Blocks( | ||||||
|  | @ -235,7 +315,7 @@ class PrivateGptUi: | ||||||
|                     mode = gr.Radio( |                     mode = gr.Radio( | ||||||
|                         MODES, |                         MODES, | ||||||
|                         label="Mode", |                         label="Mode", | ||||||
|                         value="Query Docs", |                         value="Query Files", | ||||||
|                     ) |                     ) | ||||||
|                     upload_button = gr.components.UploadButton( |                     upload_button = gr.components.UploadButton( | ||||||
|                         "Upload File(s)", |                         "Upload File(s)", | ||||||
|  | @ -247,6 +327,7 @@ class PrivateGptUi: | ||||||
|                         self._list_ingested_files, |                         self._list_ingested_files, | ||||||
|                         headers=["File name"], |                         headers=["File name"], | ||||||
|                         label="Ingested Files", |                         label="Ingested Files", | ||||||
|  |                         height=235, | ||||||
|                         interactive=False, |                         interactive=False, | ||||||
|                         render=False,  # Rendered under the button |                         render=False,  # Rendered under the button | ||||||
|                     ) |                     ) | ||||||
|  | @ -260,6 +341,57 @@ class PrivateGptUi: | ||||||
|                         outputs=ingested_dataset, |                         outputs=ingested_dataset, | ||||||
|                     ) |                     ) | ||||||
|                     ingested_dataset.render() |                     ingested_dataset.render() | ||||||
|  |                     deselect_file_button = gr.components.Button( | ||||||
|  |                         "De-select selected file", size="sm", interactive=False | ||||||
|  |                     ) | ||||||
|  |                     selected_text = gr.components.Textbox( | ||||||
|  |                         "All files", label="Selected for Query or Deletion", max_lines=1 | ||||||
|  |                     ) | ||||||
|  |                     delete_file_button = gr.components.Button( | ||||||
|  |                         "🗑️ Delete selected file", | ||||||
|  |                         size="sm", | ||||||
|  |                         visible=settings().ui.delete_file_button_enabled, | ||||||
|  |                         interactive=False, | ||||||
|  |                     ) | ||||||
|  |                     delete_files_button = gr.components.Button( | ||||||
|  |                         "⚠️ Delete ALL files", | ||||||
|  |                         size="sm", | ||||||
|  |                         visible=settings().ui.delete_all_files_button_enabled, | ||||||
|  |                     ) | ||||||
|  |                     deselect_file_button.click( | ||||||
|  |                         self._deselect_selected_file, | ||||||
|  |                         outputs=[ | ||||||
|  |                             delete_file_button, | ||||||
|  |                             deselect_file_button, | ||||||
|  |                             selected_text, | ||||||
|  |                         ], | ||||||
|  |                     ) | ||||||
|  |                     ingested_dataset.select( | ||||||
|  |                         fn=self._selected_a_file, | ||||||
|  |                         outputs=[ | ||||||
|  |                             delete_file_button, | ||||||
|  |                             deselect_file_button, | ||||||
|  |                             selected_text, | ||||||
|  |                         ], | ||||||
|  |                     ) | ||||||
|  |                     delete_file_button.click( | ||||||
|  |                         self._delete_selected_file, | ||||||
|  |                         outputs=[ | ||||||
|  |                             ingested_dataset, | ||||||
|  |                             delete_file_button, | ||||||
|  |                             deselect_file_button, | ||||||
|  |                             selected_text, | ||||||
|  |                         ], | ||||||
|  |                     ) | ||||||
|  |                     delete_files_button.click( | ||||||
|  |                         self._delete_all_files, | ||||||
|  |                         outputs=[ | ||||||
|  |                             ingested_dataset, | ||||||
|  |                             delete_file_button, | ||||||
|  |                             deselect_file_button, | ||||||
|  |                             selected_text, | ||||||
|  |                         ], | ||||||
|  |                     ) | ||||||
|                     system_prompt_input = gr.Textbox( |                     system_prompt_input = gr.Textbox( | ||||||
|                         placeholder=self._system_prompt, |                         placeholder=self._system_prompt, | ||||||
|                         label="System Prompt", |                         label="System Prompt", | ||||||
|  |  | ||||||
|  | @ -31,7 +31,7 @@ types-pyyaml = "^6.0.12.12" | ||||||
| [tool.poetry.group.ui] | [tool.poetry.group.ui] | ||||||
| optional = true | optional = true | ||||||
| [tool.poetry.group.ui.dependencies] | [tool.poetry.group.ui.dependencies] | ||||||
| gradio = "^4.4.1" | gradio = "^4.19.0" | ||||||
| 
 | 
 | ||||||
| [tool.poetry.group.local] | [tool.poetry.group.local] | ||||||
| optional = true | optional = true | ||||||
|  |  | ||||||
|  | @ -18,10 +18,11 @@ class LocalIngestWorker: | ||||||
|         self.total_documents = 0 |         self.total_documents = 0 | ||||||
|         self.current_document_count = 0 |         self.current_document_count = 0 | ||||||
| 
 | 
 | ||||||
|         self._files_under_root_folder: list[Path] = list() |         self._files_under_root_folder: list[Path] = [] | ||||||
| 
 | 
 | ||||||
|     def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None: |     def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None: | ||||||
|         """Search all files under the root folder recursively. |         """Search all files under the root folder recursively. | ||||||
|  | 
 | ||||||
|         Count them at the same time |         Count them at the same time | ||||||
|         """ |         """ | ||||||
|         for file_path in root_path.iterdir(): |         for file_path in root_path.iterdir(): | ||||||
|  |  | ||||||
|  | @ -31,6 +31,9 @@ ui: | ||||||
|     You can only answer questions about the provided context.  |     You can only answer questions about the provided context.  | ||||||
|     If you know the answer but it is not based in the provided context, don't provide  |     If you know the answer but it is not based in the provided context, don't provide  | ||||||
|     the answer, just state the answer is not in the context provided. |     the answer, just state the answer is not in the context provided. | ||||||
|  |   delete_file_button_enabled: true | ||||||
|  |   delete_all_files_button_enabled: true | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| llm: | llm: | ||||||
|   mode: local |   mode: local | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue