From 45df99feb7eb308d7dd4770039814558b75d78ae Mon Sep 17 00:00:00 2001 From: jcbonnet-fwd <141936727+jcbonnet-fwd@users.noreply.github.com> Date: Fri, 10 May 2024 16:44:08 +0200 Subject: [PATCH] Add timeout parameter for better support of openailike LLM tools on local computer (like LM Studio). (#1858) feat(llm): Improve settings of the OpenAILike LLM --- private_gpt/components/llm/llm_component.py | 3 +++ private_gpt/settings/settings.py | 4 ++++ settings-vllm.yaml | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index 51d71a3..c29638b 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -123,6 +123,9 @@ class LLMComponent: max_new_tokens=settings.llm.max_new_tokens, messages_to_prompt=prompt_style.messages_to_prompt, completion_to_prompt=prompt_style.completion_to_prompt, + tokenizer=settings.llm.tokenizer, + timeout=openai_settings.request_timeout, + reuse_client=False, ) case "ollama": try: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index c4c5e20..bd83fb8 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -205,6 +205,10 @@ class OpenAISettings(BaseModel): "gpt-3.5-turbo", description="OpenAI Model to use. Example: 'gpt-4'.", ) + request_timeout: float = Field( + 120.0, + description="Time elapsed until openailike server times out the request. Default is 120s. Format is float. ", + ) class OllamaSettings(BaseModel): diff --git a/settings-vllm.yaml b/settings-vllm.yaml index 5a0a68c..1bfab6b 100644 --- a/settings-vllm.yaml +++ b/settings-vllm.yaml @@ -3,6 +3,9 @@ server: llm: mode: openailike + max_new_tokens: 512 + tokenizer: mistralai/Mistral-7B-Instruct-v0.2 + temperature: 0.1 embedding: mode: huggingface @@ -15,3 +18,4 @@ openai: api_base: http://localhost:8000/v1 api_key: EMPTY model: facebook/opt-125m + request_timeout: 600.0 \ No newline at end of file