Allow passing a system prompt (#1318)
This commit is contained in:
		
							parent
							
								
									9c192ddd73
								
							
						
					
					
						commit
						64ed9cd872
					
				|  | @ -23,7 +23,7 @@ | ||||||
|           "Contextual Completions" |           "Contextual Completions" | ||||||
|         ], |         ], | ||||||
|         "summary": "Completion", |         "summary": "Completion", | ||||||
|           "description": "We recommend most users use our Chat completions API.\n\nGiven a prompt, the model will return one predicted completion. If `use_context`\nis set to `true`, the model will use context coming from the ingested documents\nto create the response. The documents being used can be filtered using the\n`context_filter` and passing the document IDs to be used. Ingested documents IDs\ncan be found using `/ingest/list` endpoint. If you want all ingested documents to\nbe used, remove `context_filter` altogether.\n\nWhen using `'include_sources': true`, the API will return the source Chunks used\nto create the response, which come from the context provided.\n\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n```\n{\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n\"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n\"finish_reason\":null}]}\n```", |         "description": "We recommend most users use our Chat completions API.\n\nGiven a prompt, the model will return one predicted completion.\n\nOptionally include a `system_prompt` to influence the way the LLM answers.\n\nIf `use_context`\nis set to `true`, the model will use context coming from the ingested documents\nto create the response. The documents being used can be filtered using the\n`context_filter` and passing the document IDs to be used. Ingested documents IDs\ncan be found using `/ingest/list` endpoint. If you want all ingested documents to\nbe used, remove `context_filter` altogether.\n\nWhen using `'include_sources': true`, the API will return the source Chunks used\nto create the response, which come from the context provided.\n\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n```\n{\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n\"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n\"finish_reason\":null}]}\n```", | ||||||
|         "operationId": "prompt_completion_v1_completions_post", |         "operationId": "prompt_completion_v1_completions_post", | ||||||
|         "requestBody": { |         "requestBody": { | ||||||
|           "content": { |           "content": { | ||||||
|  | @ -65,7 +65,7 @@ | ||||||
|           "Contextual Completions" |           "Contextual Completions" | ||||||
|         ], |         ], | ||||||
|         "summary": "Chat Completion", |         "summary": "Chat Completion", | ||||||
|           "description": "Given a list of messages comprising a conversation, return a response.\n\nIf `use_context` is set to `true`, the model will use context coming\nfrom the ingested documents to create the response. The documents being used can\nbe filtered using the `context_filter` and passing the document IDs to be used.\nIngested documents IDs can be found using `/ingest/list` endpoint. If you want\nall ingested documents to be used, remove `context_filter` altogether.\n\nWhen using `'include_sources': true`, the API will return the source Chunks used\nto create the response, which come from the context provided.\n\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n```\n{\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n\"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n\"finish_reason\":null}]}\n```", |         "description": "Given a list of messages comprising a conversation, return a response.\n\nOptionally include a `system_prompt` to influence the way the LLM answers.\n\nIf `use_context` is set to `true`, the model will use context coming\nfrom the ingested documents to create the response. The documents being used can\nbe filtered using the `context_filter` and passing the document IDs to be used.\nIngested documents IDs can be found using `/ingest/list` endpoint. If you want\nall ingested documents to be used, remove `context_filter` altogether.\n\nWhen using `'include_sources': true`, the API will return the source Chunks used\nto create the response, which come from the context provided.\n\nWhen using `'stream': true`, the API will return data chunks following [OpenAI's\nstreaming model](https://platform.openai.com/docs/api-reference/chat/streaming):\n```\n{\"id\":\"12345\",\"object\":\"completion.chunk\",\"created\":1694268190,\n\"model\":\"private-gpt\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\n\"finish_reason\":null}]}\n```", | ||||||
|         "operationId": "chat_completion_v1_chat_completions_post", |         "operationId": "chat_completion_v1_chat_completions_post", | ||||||
|         "requestBody": { |         "requestBody": { | ||||||
|           "content": { |           "content": { | ||||||
|  | @ -338,6 +338,17 @@ | ||||||
|             "type": "array", |             "type": "array", | ||||||
|             "title": "Messages" |             "title": "Messages" | ||||||
|           }, |           }, | ||||||
|  |           "system_prompt": { | ||||||
|  |             "anyOf": [ | ||||||
|  |               { | ||||||
|  |                 "type": "string" | ||||||
|  |               }, | ||||||
|  |               { | ||||||
|  |                 "type": "null" | ||||||
|  |               } | ||||||
|  |             ], | ||||||
|  |             "title": "System Prompt" | ||||||
|  |           }, | ||||||
|           "use_context": { |           "use_context": { | ||||||
|             "type": "boolean", |             "type": "boolean", | ||||||
|             "title": "Use Context", |             "title": "Use Context", | ||||||
|  | @ -384,6 +395,7 @@ | ||||||
|               } |               } | ||||||
|             ], |             ], | ||||||
|             "stream": false, |             "stream": false, | ||||||
|  |             "system_prompt": "You are a rapper. Always answer with a rap.", | ||||||
|             "use_context": true |             "use_context": true | ||||||
|           } |           } | ||||||
|         ] |         ] | ||||||
|  | @ -391,10 +403,7 @@ | ||||||
|       "Chunk": { |       "Chunk": { | ||||||
|         "properties": { |         "properties": { | ||||||
|           "object": { |           "object": { | ||||||
|               "type": "string", |             "const": "context.chunk", | ||||||
|               "enum": [ |  | ||||||
|                 "context.chunk" |  | ||||||
|               ], |  | ||||||
|             "title": "Object" |             "title": "Object" | ||||||
|           }, |           }, | ||||||
|           "score": { |           "score": { | ||||||
|  | @ -506,17 +515,11 @@ | ||||||
|       "ChunksResponse": { |       "ChunksResponse": { | ||||||
|         "properties": { |         "properties": { | ||||||
|           "object": { |           "object": { | ||||||
|               "type": "string", |             "const": "list", | ||||||
|               "enum": [ |  | ||||||
|                 "list" |  | ||||||
|               ], |  | ||||||
|             "title": "Object" |             "title": "Object" | ||||||
|           }, |           }, | ||||||
|           "model": { |           "model": { | ||||||
|               "type": "string", |             "const": "private-gpt", | ||||||
|               "enum": [ |  | ||||||
|                 "private-gpt" |  | ||||||
|               ], |  | ||||||
|             "title": "Model" |             "title": "Model" | ||||||
|           }, |           }, | ||||||
|           "data": { |           "data": { | ||||||
|  | @ -541,6 +544,17 @@ | ||||||
|             "type": "string", |             "type": "string", | ||||||
|             "title": "Prompt" |             "title": "Prompt" | ||||||
|           }, |           }, | ||||||
|  |           "system_prompt": { | ||||||
|  |             "anyOf": [ | ||||||
|  |               { | ||||||
|  |                 "type": "string" | ||||||
|  |               }, | ||||||
|  |               { | ||||||
|  |                 "type": "null" | ||||||
|  |               } | ||||||
|  |             ], | ||||||
|  |             "title": "System Prompt" | ||||||
|  |           }, | ||||||
|           "use_context": { |           "use_context": { | ||||||
|             "type": "boolean", |             "type": "boolean", | ||||||
|             "title": "Use Context", |             "title": "Use Context", | ||||||
|  | @ -616,10 +630,7 @@ | ||||||
|             "title": "Index" |             "title": "Index" | ||||||
|           }, |           }, | ||||||
|           "object": { |           "object": { | ||||||
|               "type": "string", |             "const": "embedding", | ||||||
|               "enum": [ |  | ||||||
|                 "embedding" |  | ||||||
|               ], |  | ||||||
|             "title": "Object" |             "title": "Object" | ||||||
|           }, |           }, | ||||||
|           "embedding": { |           "embedding": { | ||||||
|  | @ -670,17 +681,11 @@ | ||||||
|       "EmbeddingsResponse": { |       "EmbeddingsResponse": { | ||||||
|         "properties": { |         "properties": { | ||||||
|           "object": { |           "object": { | ||||||
|               "type": "string", |             "const": "list", | ||||||
|               "enum": [ |  | ||||||
|                 "list" |  | ||||||
|               ], |  | ||||||
|             "title": "Object" |             "title": "Object" | ||||||
|           }, |           }, | ||||||
|           "model": { |           "model": { | ||||||
|               "type": "string", |             "const": "private-gpt", | ||||||
|               "enum": [ |  | ||||||
|                 "private-gpt" |  | ||||||
|               ], |  | ||||||
|             "title": "Model" |             "title": "Model" | ||||||
|           }, |           }, | ||||||
|           "data": { |           "data": { | ||||||
|  | @ -715,33 +720,22 @@ | ||||||
|       "HealthResponse": { |       "HealthResponse": { | ||||||
|         "properties": { |         "properties": { | ||||||
|           "status": { |           "status": { | ||||||
|               "type": "string", |             "const": "ok", | ||||||
|               "enum": [ |             "title": "Status", | ||||||
|                 "ok" |             "default": "ok" | ||||||
|               ], |  | ||||||
|               "title": "Status" |  | ||||||
|           } |           } | ||||||
|         }, |         }, | ||||||
|         "type": "object", |         "type": "object", | ||||||
|           "required": [ |  | ||||||
|             "status" |  | ||||||
|           ], |  | ||||||
|         "title": "HealthResponse" |         "title": "HealthResponse" | ||||||
|       }, |       }, | ||||||
|       "IngestResponse": { |       "IngestResponse": { | ||||||
|         "properties": { |         "properties": { | ||||||
|           "object": { |           "object": { | ||||||
|               "type": "string", |             "const": "list", | ||||||
|               "enum": [ |  | ||||||
|                 "list" |  | ||||||
|               ], |  | ||||||
|             "title": "Object" |             "title": "Object" | ||||||
|           }, |           }, | ||||||
|           "model": { |           "model": { | ||||||
|               "type": "string", |             "const": "private-gpt", | ||||||
|               "enum": [ |  | ||||||
|                 "private-gpt" |  | ||||||
|               ], |  | ||||||
|             "title": "Model" |             "title": "Model" | ||||||
|           }, |           }, | ||||||
|           "data": { |           "data": { | ||||||
|  | @ -763,10 +757,7 @@ | ||||||
|       "IngestedDoc": { |       "IngestedDoc": { | ||||||
|         "properties": { |         "properties": { | ||||||
|           "object": { |           "object": { | ||||||
|               "type": "string", |             "const": "ingest.document", | ||||||
|               "enum": [ |  | ||||||
|                 "ingest.document" |  | ||||||
|               ], |  | ||||||
|             "title": "Object" |             "title": "Object" | ||||||
|           }, |           }, | ||||||
|           "doc_id": { |           "doc_id": { | ||||||
|  | @ -888,10 +879,7 @@ | ||||||
|             ] |             ] | ||||||
|           }, |           }, | ||||||
|           "model": { |           "model": { | ||||||
|               "type": "string", |             "const": "private-gpt", | ||||||
|               "enum": [ |  | ||||||
|                 "private-gpt" |  | ||||||
|               ], |  | ||||||
|             "title": "Model" |             "title": "Model" | ||||||
|           }, |           }, | ||||||
|           "choices": { |           "choices": { | ||||||
|  | @ -1021,4 +1009,4 @@ | ||||||
|       "description": "Simple health API to make sure the server is up and running." |       "description": "Simple health API to make sure the server is up and running." | ||||||
|     } |     } | ||||||
|   ] |   ] | ||||||
|   } | } | ||||||
|  | @ -28,10 +28,14 @@ class ChatBody(BaseModel): | ||||||
|             "examples": [ |             "examples": [ | ||||||
|                 { |                 { | ||||||
|                     "messages": [ |                     "messages": [ | ||||||
|  |                         { | ||||||
|  |                             "role": "system", | ||||||
|  |                             "content": "You are a rapper. Always answer with a rap.", | ||||||
|  |                         }, | ||||||
|                         { |                         { | ||||||
|                             "role": "user", |                             "role": "user", | ||||||
|                             "content": "How do you fry an egg?", |                             "content": "How do you fry an egg?", | ||||||
|                         } |                         }, | ||||||
|                     ], |                     ], | ||||||
|                     "stream": False, |                     "stream": False, | ||||||
|                     "use_context": True, |                     "use_context": True, | ||||||
|  | @ -56,6 +60,9 @@ def chat_completion( | ||||||
| ) -> OpenAICompletion | StreamingResponse: | ) -> OpenAICompletion | StreamingResponse: | ||||||
|     """Given a list of messages comprising a conversation, return a response. |     """Given a list of messages comprising a conversation, return a response. | ||||||
| 
 | 
 | ||||||
|  |     Optionally include an initial `role: system` message to influence the way | ||||||
|  |     the LLM answers. | ||||||
|  | 
 | ||||||
|     If `use_context` is set to `true`, the model will use context coming |     If `use_context` is set to `true`, the model will use context coming | ||||||
|     from the ingested documents to create the response. The documents being used can |     from the ingested documents to create the response. The documents being used can | ||||||
|     be filtered using the `context_filter` and passing the document IDs to be used. |     be filtered using the `context_filter` and passing the document IDs to be used. | ||||||
|  | @ -79,7 +86,9 @@ def chat_completion( | ||||||
|     ] |     ] | ||||||
|     if body.stream: |     if body.stream: | ||||||
|         completion_gen = service.stream_chat( |         completion_gen = service.stream_chat( | ||||||
|             all_messages, body.use_context, body.context_filter |             messages=all_messages, | ||||||
|  |             use_context=body.use_context, | ||||||
|  |             context_filter=body.context_filter, | ||||||
|         ) |         ) | ||||||
|         return StreamingResponse( |         return StreamingResponse( | ||||||
|             to_openai_sse_stream( |             to_openai_sse_stream( | ||||||
|  | @ -89,7 +98,11 @@ def chat_completion( | ||||||
|             media_type="text/event-stream", |             media_type="text/event-stream", | ||||||
|         ) |         ) | ||||||
|     else: |     else: | ||||||
|         completion = service.chat(all_messages, body.use_context, body.context_filter) |         completion = service.chat( | ||||||
|  |             messages=all_messages, | ||||||
|  |             use_context=body.use_context, | ||||||
|  |             context_filter=body.context_filter, | ||||||
|  |         ) | ||||||
|         return to_openai_response( |         return to_openai_response( | ||||||
|             completion.response, completion.sources if body.include_sources else None |             completion.response, completion.sources if body.include_sources else None | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|  | @ -1,12 +1,13 @@ | ||||||
|  | from dataclasses import dataclass | ||||||
|  | 
 | ||||||
| from injector import inject, singleton | from injector import inject, singleton | ||||||
| from llama_index import ServiceContext, StorageContext, VectorStoreIndex | from llama_index import ServiceContext, StorageContext, VectorStoreIndex | ||||||
| from llama_index.chat_engine import ContextChatEngine | from llama_index.chat_engine import ContextChatEngine, SimpleChatEngine | ||||||
| from llama_index.chat_engine.types import ( | from llama_index.chat_engine.types import ( | ||||||
|     BaseChatEngine, |     BaseChatEngine, | ||||||
| ) | ) | ||||||
| from llama_index.indices.postprocessor import MetadataReplacementPostProcessor | from llama_index.indices.postprocessor import MetadataReplacementPostProcessor | ||||||
| from llama_index.llm_predictor.utils import stream_chat_response_to_tokens | from llama_index.llms import ChatMessage, MessageRole | ||||||
| from llama_index.llms import ChatMessage |  | ||||||
| from llama_index.types import TokenGen | from llama_index.types import TokenGen | ||||||
| from pydantic import BaseModel | from pydantic import BaseModel | ||||||
| 
 | 
 | ||||||
|  | @ -30,6 +31,40 @@ class CompletionGen(BaseModel): | ||||||
|     sources: list[Chunk] | None = None |     sources: list[Chunk] | None = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @dataclass | ||||||
|  | class ChatEngineInput: | ||||||
|  |     system_message: ChatMessage | None = None | ||||||
|  |     last_message: ChatMessage | None = None | ||||||
|  |     chat_history: list[ChatMessage] | None = None | ||||||
|  | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def from_messages(cls, messages: list[ChatMessage]) -> "ChatEngineInput": | ||||||
|  |         # Detect if there is a system message, extract the last message and chat history | ||||||
|  |         system_message = ( | ||||||
|  |             messages[0] | ||||||
|  |             if len(messages) > 0 and messages[0].role == MessageRole.SYSTEM | ||||||
|  |             else None | ||||||
|  |         ) | ||||||
|  |         last_message = ( | ||||||
|  |             messages[-1] | ||||||
|  |             if len(messages) > 0 and messages[-1].role == MessageRole.USER | ||||||
|  |             else None | ||||||
|  |         ) | ||||||
|  |         # Remove from messages list the system message and last message, | ||||||
|  |         # if they exist. The rest is the chat history. | ||||||
|  |         if system_message: | ||||||
|  |             messages.pop(0) | ||||||
|  |         if last_message: | ||||||
|  |             messages.pop(-1) | ||||||
|  |         chat_history = messages if len(messages) > 0 else None | ||||||
|  | 
 | ||||||
|  |         return cls( | ||||||
|  |             system_message=system_message, | ||||||
|  |             last_message=last_message, | ||||||
|  |             chat_history=chat_history, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| @singleton | @singleton | ||||||
| class ChatService: | class ChatService: | ||||||
|     @inject |     @inject | ||||||
|  | @ -58,18 +93,28 @@ class ChatService: | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     def _chat_engine( |     def _chat_engine( | ||||||
|         self, context_filter: ContextFilter | None = None |         self, | ||||||
|  |         system_prompt: str | None = None, | ||||||
|  |         use_context: bool = False, | ||||||
|  |         context_filter: ContextFilter | None = None, | ||||||
|     ) -> BaseChatEngine: |     ) -> BaseChatEngine: | ||||||
|  |         if use_context: | ||||||
|             vector_index_retriever = self.vector_store_component.get_retriever( |             vector_index_retriever = self.vector_store_component.get_retriever( | ||||||
|                 index=self.index, context_filter=context_filter |                 index=self.index, context_filter=context_filter | ||||||
|             ) |             ) | ||||||
|             return ContextChatEngine.from_defaults( |             return ContextChatEngine.from_defaults( | ||||||
|  |                 system_prompt=system_prompt, | ||||||
|                 retriever=vector_index_retriever, |                 retriever=vector_index_retriever, | ||||||
|                 service_context=self.service_context, |                 service_context=self.service_context, | ||||||
|                 node_postprocessors=[ |                 node_postprocessors=[ | ||||||
|                     MetadataReplacementPostProcessor(target_metadata_key="window"), |                     MetadataReplacementPostProcessor(target_metadata_key="window"), | ||||||
|                 ], |                 ], | ||||||
|             ) |             ) | ||||||
|  |         else: | ||||||
|  |             return SimpleChatEngine.from_defaults( | ||||||
|  |                 system_prompt=system_prompt, | ||||||
|  |                 service_context=self.service_context, | ||||||
|  |             ) | ||||||
| 
 | 
 | ||||||
|     def stream_chat( |     def stream_chat( | ||||||
|         self, |         self, | ||||||
|  | @ -77,24 +122,34 @@ class ChatService: | ||||||
|         use_context: bool = False, |         use_context: bool = False, | ||||||
|         context_filter: ContextFilter | None = None, |         context_filter: ContextFilter | None = None, | ||||||
|     ) -> CompletionGen: |     ) -> CompletionGen: | ||||||
|         if use_context: |         chat_engine_input = ChatEngineInput.from_messages(messages) | ||||||
|             last_message = messages[-1].content |         last_message = ( | ||||||
|             chat_engine = self._chat_engine(context_filter=context_filter) |             chat_engine_input.last_message.content | ||||||
|  |             if chat_engine_input.last_message | ||||||
|  |             else None | ||||||
|  |         ) | ||||||
|  |         system_prompt = ( | ||||||
|  |             chat_engine_input.system_message.content | ||||||
|  |             if chat_engine_input.system_message | ||||||
|  |             else None | ||||||
|  |         ) | ||||||
|  |         chat_history = ( | ||||||
|  |             chat_engine_input.chat_history if chat_engine_input.chat_history else None | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         chat_engine = self._chat_engine( | ||||||
|  |             system_prompt=system_prompt, | ||||||
|  |             use_context=use_context, | ||||||
|  |             context_filter=context_filter, | ||||||
|  |         ) | ||||||
|         streaming_response = chat_engine.stream_chat( |         streaming_response = chat_engine.stream_chat( | ||||||
|             message=last_message if last_message is not None else "", |             message=last_message if last_message is not None else "", | ||||||
|                 chat_history=messages[:-1], |             chat_history=chat_history, | ||||||
|         ) |         ) | ||||||
|             sources = [ |         sources = [Chunk.from_node(node) for node in streaming_response.source_nodes] | ||||||
|                 Chunk.from_node(node) for node in streaming_response.source_nodes |  | ||||||
|             ] |  | ||||||
|         completion_gen = CompletionGen( |         completion_gen = CompletionGen( | ||||||
|             response=streaming_response.response_gen, sources=sources |             response=streaming_response.response_gen, sources=sources | ||||||
|         ) |         ) | ||||||
|         else: |  | ||||||
|             stream = self.llm_service.llm.stream_chat(messages) |  | ||||||
|             completion_gen = CompletionGen( |  | ||||||
|                 response=stream_chat_response_to_tokens(stream) |  | ||||||
|             ) |  | ||||||
|         return completion_gen |         return completion_gen | ||||||
| 
 | 
 | ||||||
|     def chat( |     def chat( | ||||||
|  | @ -103,18 +158,30 @@ class ChatService: | ||||||
|         use_context: bool = False, |         use_context: bool = False, | ||||||
|         context_filter: ContextFilter | None = None, |         context_filter: ContextFilter | None = None, | ||||||
|     ) -> Completion: |     ) -> Completion: | ||||||
|         if use_context: |         chat_engine_input = ChatEngineInput.from_messages(messages) | ||||||
|             last_message = messages[-1].content |         last_message = ( | ||||||
|             chat_engine = self._chat_engine(context_filter=context_filter) |             chat_engine_input.last_message.content | ||||||
|  |             if chat_engine_input.last_message | ||||||
|  |             else None | ||||||
|  |         ) | ||||||
|  |         system_prompt = ( | ||||||
|  |             chat_engine_input.system_message.content | ||||||
|  |             if chat_engine_input.system_message | ||||||
|  |             else None | ||||||
|  |         ) | ||||||
|  |         chat_history = ( | ||||||
|  |             chat_engine_input.chat_history if chat_engine_input.chat_history else None | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         chat_engine = self._chat_engine( | ||||||
|  |             system_prompt=system_prompt, | ||||||
|  |             use_context=use_context, | ||||||
|  |             context_filter=context_filter, | ||||||
|  |         ) | ||||||
|         wrapped_response = chat_engine.chat( |         wrapped_response = chat_engine.chat( | ||||||
|             message=last_message if last_message is not None else "", |             message=last_message if last_message is not None else "", | ||||||
|                 chat_history=messages[:-1], |             chat_history=chat_history, | ||||||
|         ) |         ) | ||||||
|         sources = [Chunk.from_node(node) for node in wrapped_response.source_nodes] |         sources = [Chunk.from_node(node) for node in wrapped_response.source_nodes] | ||||||
|         completion = Completion(response=wrapped_response.response, sources=sources) |         completion = Completion(response=wrapped_response.response, sources=sources) | ||||||
|         else: |  | ||||||
|             chat_response = self.llm_service.llm.chat(messages) |  | ||||||
|             response_content = chat_response.message.content |  | ||||||
|             response = response_content if response_content is not None else "" |  | ||||||
|             completion = Completion(response=response) |  | ||||||
|         return completion |         return completion | ||||||
|  |  | ||||||
|  | @ -15,6 +15,7 @@ completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated | ||||||
| 
 | 
 | ||||||
| class CompletionsBody(BaseModel): | class CompletionsBody(BaseModel): | ||||||
|     prompt: str |     prompt: str | ||||||
|  |     system_prompt: str | None = None | ||||||
|     use_context: bool = False |     use_context: bool = False | ||||||
|     context_filter: ContextFilter | None = None |     context_filter: ContextFilter | None = None | ||||||
|     include_sources: bool = True |     include_sources: bool = True | ||||||
|  | @ -25,6 +26,7 @@ class CompletionsBody(BaseModel): | ||||||
|             "examples": [ |             "examples": [ | ||||||
|                 { |                 { | ||||||
|                     "prompt": "How do you fry an egg?", |                     "prompt": "How do you fry an egg?", | ||||||
|  |                     "system_prompt": "You are a rapper. Always answer with a rap.", | ||||||
|                     "stream": False, |                     "stream": False, | ||||||
|                     "use_context": False, |                     "use_context": False, | ||||||
|                     "include_sources": False, |                     "include_sources": False, | ||||||
|  | @ -46,7 +48,11 @@ def prompt_completion( | ||||||
| ) -> OpenAICompletion | StreamingResponse: | ) -> OpenAICompletion | StreamingResponse: | ||||||
|     """We recommend most users use our Chat completions API. |     """We recommend most users use our Chat completions API. | ||||||
| 
 | 
 | ||||||
|     Given a prompt, the model will return one predicted completion. If `use_context` |     Given a prompt, the model will return one predicted completion. | ||||||
|  | 
 | ||||||
|  |     Optionally include a `system_prompt` to influence the way the LLM answers. | ||||||
|  | 
 | ||||||
|  |     If `use_context` | ||||||
|     is set to `true`, the model will use context coming from the ingested documents |     is set to `true`, the model will use context coming from the ingested documents | ||||||
|     to create the response. The documents being used can be filtered using the |     to create the response. The documents being used can be filtered using the | ||||||
|     `context_filter` and passing the document IDs to be used. Ingested documents IDs |     `context_filter` and passing the document IDs to be used. Ingested documents IDs | ||||||
|  | @ -64,9 +70,13 @@ def prompt_completion( | ||||||
|     "finish_reason":null}]} |     "finish_reason":null}]} | ||||||
|     ``` |     ``` | ||||||
|     """ |     """ | ||||||
|     message = OpenAIMessage(content=body.prompt, role="user") |     messages = [OpenAIMessage(content=body.prompt, role="user")] | ||||||
|  |     # If system prompt is passed, create a fake message with the system prompt. | ||||||
|  |     if body.system_prompt: | ||||||
|  |         messages.insert(0, OpenAIMessage(content=body.system_prompt, role="system")) | ||||||
|  | 
 | ||||||
|     chat_body = ChatBody( |     chat_body = ChatBody( | ||||||
|         messages=[message], |         messages=messages, | ||||||
|         use_context=body.use_context, |         use_context=body.use_context, | ||||||
|         stream=body.stream, |         stream=body.stream, | ||||||
|         include_sources=body.include_sources, |         include_sources=body.include_sources, | ||||||
|  |  | ||||||
|  | @ -116,6 +116,17 @@ class PrivateGptUi: | ||||||
|         all_messages = [*build_history(), new_message] |         all_messages = [*build_history(), new_message] | ||||||
|         match mode: |         match mode: | ||||||
|             case "Query Docs": |             case "Query Docs": | ||||||
|  |                 # Add a system message to force the behaviour of the LLM | ||||||
|  |                 # to answer only questions about the provided context. | ||||||
|  |                 all_messages.insert( | ||||||
|  |                     0, | ||||||
|  |                     ChatMessage( | ||||||
|  |                         content="You can only answer questions about the provided context. If you know the answer " | ||||||
|  |                         "but it is not based in the provided context, don't provide the answer, just state " | ||||||
|  |                         "the answer is not in the context provided.", | ||||||
|  |                         role=MessageRole.SYSTEM, | ||||||
|  |                     ), | ||||||
|  |                 ) | ||||||
|                 query_stream = self._chat_service.stream_chat( |                 query_stream = self._chat_service.stream_chat( | ||||||
|                     messages=all_messages, |                     messages=all_messages, | ||||||
|                     use_context=True, |                     use_context=True, | ||||||
|  |  | ||||||
|  | @ -22,6 +22,7 @@ ui: | ||||||
| 
 | 
 | ||||||
| llm: | llm: | ||||||
|   mode: local |   mode: local | ||||||
|  | 
 | ||||||
| embedding: | embedding: | ||||||
|   # Should be matching the value above in most cases |   # Should be matching the value above in most cases | ||||||
|   mode: local |   mode: local | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue