From 20bad17c9857809158e689e9671402136c1e3d84 Mon Sep 17 00:00:00 2001 From: Javier Martinez Date: Mon, 29 Jul 2024 13:25:42 +0200 Subject: [PATCH] feat(llm): autopull ollama models (#2019) * chore: update ollama (llm) * feat: allow to autopull ollama models * fix: mypy * chore: install always ollama client * refactor: check connection and pull ollama method to utils * docs: update ollama config with autopulling info --- fern/docs/pages/installation/installation.mdx | 16 +++++---- poetry.lock | 33 ++++++++++++++----- .../embedding/embedding_component.py | 32 +++++++++++++++++- private_gpt/components/llm/llm_component.py | 23 +++++++++++-- private_gpt/settings/settings.py | 4 +++ private_gpt/utils/ollama.py | 32 ++++++++++++++++++ pyproject.toml | 9 +++-- settings.yaml | 1 + 8 files changed, 129 insertions(+), 21 deletions(-) create mode 100644 private_gpt/utils/ollama.py diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx index 80f1c74..c3d232a 100644 --- a/fern/docs/pages/installation/installation.mdx +++ b/fern/docs/pages/installation/installation.mdx @@ -130,18 +130,22 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll After the installation, make sure the Ollama desktop app is closed. -Install the models to be used, the default settings-ollama.yaml is configured to user `mistral 7b` LLM (~4GB) and `nomic-embed-text` Embeddings (~275MB). Therefore: +Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings): +```bash +ollama serve +``` + +Install the models to be used, the default settings-ollama.yaml is configured to user mistral 7b LLM (~4GB) and nomic-embed-text Embeddings (~275MB) + +By default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property. + +In any case, if you want to manually pull models, run the following commands: ```bash ollama pull mistral ollama pull nomic-embed-text ``` -Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings): -```bash -ollama serve -``` - Once done, on a different terminal, you can install PrivateGPT with the following command: ```bash poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant" diff --git a/poetry.lock b/poetry.lock index 2ac5cec..a7467be 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2022,13 +2022,13 @@ test = ["Cython (>=0.29.24,<0.30.0)"] [[package]] name = "httpx" -version = "0.25.2" +version = "0.27.0" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" files = [ - {file = "httpx-0.25.2-py3-none-any.whl", hash = "sha256:a05d3d052d9b2dfce0e3896636467f8a5342fb2b902c819428e1ac65413ca118"}, - {file = "httpx-0.25.2.tar.gz", hash = "sha256:8b8fcaa0c8ea7b05edd69a094e63a2094c4efcb48129fb757361bc423c0ad9e8"}, + {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, + {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, ] [package.dependencies] @@ -2625,17 +2625,18 @@ llama-index-core = ">=0.10.1,<0.11.0" [[package]] name = "llama-index-llms-ollama" -version = "0.1.5" +version = "0.2.2" description = "llama-index llms ollama integration" optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_llms_ollama-0.1.5-py3-none-any.whl", hash = "sha256:8e237978765458c9b175d2e25fc25162df8dc70a538b1b9ef9ea18617f8cdf5a"}, - {file = "llama_index_llms_ollama-0.1.5.tar.gz", hash = "sha256:75697d96c860d87e80cce90c9ea425cbd236918458e0feaaee03597068ba9844"}, + {file = "llama_index_llms_ollama-0.2.2-py3-none-any.whl", hash = "sha256:c224d7c17d641045bc9b6a6681dab434c1c421af0bacb5825eea444fefd8ed78"}, + {file = "llama_index_llms_ollama-0.2.2.tar.gz", hash = "sha256:0c7f192cb8b768707bd5154b97e2a41284732d62070eb76190dee125e95245ea"}, ] [package.dependencies] llama-index-core = ">=0.10.1,<0.11.0" +ollama = ">=0.3.0" [[package]] name = "llama-index-llms-openai" @@ -3633,6 +3634,20 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "ollama" +version = "0.3.0" +description = "The official Python client for Ollama." +optional = true +python-versions = "<4.0,>=3.8" +files = [ + {file = "ollama-0.3.0-py3-none-any.whl", hash = "sha256:cd7010c4e2a37d7f08f36cd35c4592b14f1ec0d1bf3df10342cd47963d81ad7a"}, + {file = "ollama-0.3.0.tar.gz", hash = "sha256:6ff493a2945ba76cdd6b7912a1cd79a45cfd9ba9120d14adeb63b2b5a7f353da"}, +] + +[package.dependencies] +httpx = ">=0.27.0,<0.28.0" + [[package]] name = "onnxruntime" version = "1.17.1" @@ -6844,13 +6859,13 @@ cffi = ["cffi (>=1.11)"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-huggingface = ["llama-index-embeddings-huggingface"] -embeddings-ollama = ["llama-index-embeddings-ollama"] +embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] llms-azopenai = ["llama-index-llms-azure-openai"] llms-gemini = ["google-generativeai", "llama-index-llms-gemini"] llms-llama-cpp = ["llama-index-llms-llama-cpp"] -llms-ollama = ["llama-index-llms-ollama"] +llms-ollama = ["llama-index-llms-ollama", "ollama"] llms-openai = ["llama-index-llms-openai"] llms-openai-like = ["llama-index-llms-openai-like"] llms-sagemaker = ["boto3"] @@ -6866,4 +6881,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "66edb004ccbe7915d68567ea31a7851c87717185e2b504048cc645d1d511a66e" +content-hash = "dce5b88d92bcfa047bf1e4c9fe2dbb9c63eb864d6bbca2340801ac0a2f02a8d4" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index 29ef1cf..89a577b 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -71,16 +71,46 @@ class EmbeddingComponent: from llama_index.embeddings.ollama import ( # type: ignore OllamaEmbedding, ) + from ollama import Client # type: ignore except ImportError as e: raise ImportError( "Local dependencies not found, install with `poetry install --extras embeddings-ollama`" ) from e ollama_settings = settings.ollama + + # Calculate embedding model. If not provided tag, it will be use latest + model_name = ( + ollama_settings.embedding_model + ":latest" + if ":" not in ollama_settings.embedding_model + else ollama_settings.embedding_model + ) + self.embedding_model = OllamaEmbedding( - model_name=ollama_settings.embedding_model, + model_name=model_name, base_url=ollama_settings.embedding_api_base, ) + + if ollama_settings.autopull_models: + if ollama_settings.autopull_models: + from private_gpt.utils.ollama import ( + check_connection, + pull_model, + ) + + # TODO: Reuse llama-index client when llama-index is updated + client = Client( + host=ollama_settings.embedding_api_base, + timeout=ollama_settings.request_timeout, + ) + + if not check_connection(client): + raise ValueError( + f"Failed to connect to Ollama, " + f"check if Ollama server is running on {ollama_settings.api_base}" + ) + pull_model(client, model_name) + case "azopenai": try: from llama_index.embeddings.azure_openai import ( # type: ignore diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index d4ab81f..e3a0281 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -146,8 +146,15 @@ class LLMComponent: "repeat_penalty": ollama_settings.repeat_penalty, # ollama llama-cpp } - self.llm = Ollama( - model=ollama_settings.llm_model, + # calculate llm model. If not provided tag, it will be use latest + model_name = ( + ollama_settings.llm_model + ":latest" + if ":" not in ollama_settings.llm_model + else ollama_settings.llm_model + ) + + llm = Ollama( + model=model_name, base_url=ollama_settings.api_base, temperature=settings.llm.temperature, context_window=settings.llm.context_window, @@ -155,6 +162,16 @@ class LLMComponent: request_timeout=ollama_settings.request_timeout, ) + if ollama_settings.autopull_models: + from private_gpt.utils.ollama import check_connection, pull_model + + if not check_connection(llm.client): + raise ValueError( + f"Failed to connect to Ollama, " + f"check if Ollama server is running on {ollama_settings.api_base}" + ) + pull_model(llm.client, model_name) + if ( ollama_settings.keep_alive != ollama_settings.model_fields["keep_alive"].default @@ -172,6 +189,8 @@ class LLMComponent: Ollama.complete = add_keep_alive(Ollama.complete) Ollama.stream_complete = add_keep_alive(Ollama.stream_complete) + self.llm = llm + case "azopenai": try: from llama_index.llms.azure_openai import ( # type: ignore diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 30514dd..40b96ae 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -290,6 +290,10 @@ class OllamaSettings(BaseModel): 120.0, description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ", ) + autopull_models: bool = Field( + False, + description="If set to True, the Ollama will automatically pull the models from the API base.", + ) class AzureOpenAISettings(BaseModel): diff --git a/private_gpt/utils/ollama.py b/private_gpt/utils/ollama.py new file mode 100644 index 0000000..41c7ecc --- /dev/null +++ b/private_gpt/utils/ollama.py @@ -0,0 +1,32 @@ +import logging + +try: + from ollama import Client # type: ignore +except ImportError as e: + raise ImportError( + "Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`" + ) from e + +logger = logging.getLogger(__name__) + + +def check_connection(client: Client) -> bool: + try: + client.list() + return True + except Exception as e: + logger.error(f"Failed to connect to Ollama: {e!s}") + return False + + +def pull_model(client: Client, model_name: str, raise_error: bool = True) -> None: + try: + installed_models = [model["name"] for model in client.list().get("models", {})] + if model_name not in installed_models: + logger.info(f"Pulling model {model_name}. Please wait...") + client.pull(model_name) + logger.info(f"Model {model_name} pulled successfully") + except Exception as e: + logger.error(f"Failed to pull model {model_name}: {e!s}") + if raise_error: + raise e diff --git a/pyproject.toml b/pyproject.toml index 7b34c07..1144c31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ llama-index-readers-file = "^0.1.27" llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true} llama-index-llms-openai = {version = "^0.1.25", optional = true} llama-index-llms-openai-like = {version ="^0.1.3", optional = true} -llama-index-llms-ollama = {version ="^0.1.5", optional = true} +llama-index-llms-ollama = {version ="^0.2.2", optional = true} llama-index-llms-azure-openai = {version ="^0.1.8", optional = true} llama-index-llms-gemini = {version ="^0.1.11", optional = true} llama-index-embeddings-ollama = {version ="^0.1.2", optional = true} @@ -62,16 +62,19 @@ ffmpy = {git = "https://github.com/EuDs63/ffmpy.git", rev = "333a19ee4d21f32537c # Optional Google Gemini dependency google-generativeai = {version ="^0.5.4", optional = true} +# Optional Ollama client +ollama = {version ="^0.3.0", optional = true} + [tool.poetry.extras] ui = ["gradio", "ffmpy"] llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-openai = ["llama-index-llms-openai"] llms-openai-like = ["llama-index-llms-openai-like"] -llms-ollama = ["llama-index-llms-ollama"] +llms-ollama = ["llama-index-llms-ollama", "ollama"] llms-sagemaker = ["boto3"] llms-azopenai = ["llama-index-llms-azure-openai"] llms-gemini = ["llama-index-llms-gemini", "google-generativeai"] -embeddings-ollama = ["llama-index-embeddings-ollama"] +embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"] embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] diff --git a/settings.yaml b/settings.yaml index cd8fccd..cd977a0 100644 --- a/settings.yaml +++ b/settings.yaml @@ -117,6 +117,7 @@ ollama: embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama keep_alive: 5m request_timeout: 120.0 + autopull_models: true azopenai: api_key: ${AZ_OPENAI_API_KEY:}