feat(llm): autopull ollama models (#2019)

* chore: update ollama (llm)

* feat: allow to autopull ollama models

* fix: mypy

* chore: install always ollama client

* refactor: check connection and pull ollama method to utils

* docs: update ollama config with autopulling info
This commit is contained in:
Javier Martinez 2024-07-29 13:25:42 +02:00 committed by GitHub
parent dabf556dae
commit 20bad17c98
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 129 additions and 21 deletions

View File

@ -130,18 +130,22 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll
After the installation, make sure the Ollama desktop app is closed. After the installation, make sure the Ollama desktop app is closed.
Install the models to be used, the default settings-ollama.yaml is configured to user `mistral 7b` LLM (~4GB) and `nomic-embed-text` Embeddings (~275MB). Therefore: Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
```bash
ollama serve
```
Install the models to be used, the default settings-ollama.yaml is configured to user mistral 7b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)
By default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property.
In any case, if you want to manually pull models, run the following commands:
```bash ```bash
ollama pull mistral ollama pull mistral
ollama pull nomic-embed-text ollama pull nomic-embed-text
``` ```
Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
```bash
ollama serve
```
Once done, on a different terminal, you can install PrivateGPT with the following command: Once done, on a different terminal, you can install PrivateGPT with the following command:
```bash ```bash
poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant" poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant"

33
poetry.lock generated
View File

@ -2022,13 +2022,13 @@ test = ["Cython (>=0.29.24,<0.30.0)"]
[[package]] [[package]]
name = "httpx" name = "httpx"
version = "0.25.2" version = "0.27.0"
description = "The next generation HTTP client." description = "The next generation HTTP client."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "httpx-0.25.2-py3-none-any.whl", hash = "sha256:a05d3d052d9b2dfce0e3896636467f8a5342fb2b902c819428e1ac65413ca118"}, {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
{file = "httpx-0.25.2.tar.gz", hash = "sha256:8b8fcaa0c8ea7b05edd69a094e63a2094c4efcb48129fb757361bc423c0ad9e8"}, {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
] ]
[package.dependencies] [package.dependencies]
@ -2625,17 +2625,18 @@ llama-index-core = ">=0.10.1,<0.11.0"
[[package]] [[package]]
name = "llama-index-llms-ollama" name = "llama-index-llms-ollama"
version = "0.1.5" version = "0.2.2"
description = "llama-index llms ollama integration" description = "llama-index llms ollama integration"
optional = true optional = true
python-versions = "<4.0,>=3.8.1" python-versions = "<4.0,>=3.8.1"
files = [ files = [
{file = "llama_index_llms_ollama-0.1.5-py3-none-any.whl", hash = "sha256:8e237978765458c9b175d2e25fc25162df8dc70a538b1b9ef9ea18617f8cdf5a"}, {file = "llama_index_llms_ollama-0.2.2-py3-none-any.whl", hash = "sha256:c224d7c17d641045bc9b6a6681dab434c1c421af0bacb5825eea444fefd8ed78"},
{file = "llama_index_llms_ollama-0.1.5.tar.gz", hash = "sha256:75697d96c860d87e80cce90c9ea425cbd236918458e0feaaee03597068ba9844"}, {file = "llama_index_llms_ollama-0.2.2.tar.gz", hash = "sha256:0c7f192cb8b768707bd5154b97e2a41284732d62070eb76190dee125e95245ea"},
] ]
[package.dependencies] [package.dependencies]
llama-index-core = ">=0.10.1,<0.11.0" llama-index-core = ">=0.10.1,<0.11.0"
ollama = ">=0.3.0"
[[package]] [[package]]
name = "llama-index-llms-openai" name = "llama-index-llms-openai"
@ -3633,6 +3634,20 @@ rsa = ["cryptography (>=3.0.0)"]
signals = ["blinker (>=1.4.0)"] signals = ["blinker (>=1.4.0)"]
signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
[[package]]
name = "ollama"
version = "0.3.0"
description = "The official Python client for Ollama."
optional = true
python-versions = "<4.0,>=3.8"
files = [
{file = "ollama-0.3.0-py3-none-any.whl", hash = "sha256:cd7010c4e2a37d7f08f36cd35c4592b14f1ec0d1bf3df10342cd47963d81ad7a"},
{file = "ollama-0.3.0.tar.gz", hash = "sha256:6ff493a2945ba76cdd6b7912a1cd79a45cfd9ba9120d14adeb63b2b5a7f353da"},
]
[package.dependencies]
httpx = ">=0.27.0,<0.28.0"
[[package]] [[package]]
name = "onnxruntime" name = "onnxruntime"
version = "1.17.1" version = "1.17.1"
@ -6844,13 +6859,13 @@ cffi = ["cffi (>=1.11)"]
embeddings-azopenai = ["llama-index-embeddings-azure-openai"] embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-gemini = ["llama-index-embeddings-gemini"]
embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-huggingface = ["llama-index-embeddings-huggingface"]
embeddings-ollama = ["llama-index-embeddings-ollama"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
embeddings-openai = ["llama-index-embeddings-openai"] embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"] embeddings-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"] llms-azopenai = ["llama-index-llms-azure-openai"]
llms-gemini = ["google-generativeai", "llama-index-llms-gemini"] llms-gemini = ["google-generativeai", "llama-index-llms-gemini"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-ollama = ["llama-index-llms-ollama"] llms-ollama = ["llama-index-llms-ollama", "ollama"]
llms-openai = ["llama-index-llms-openai"] llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"] llms-openai-like = ["llama-index-llms-openai-like"]
llms-sagemaker = ["boto3"] llms-sagemaker = ["boto3"]
@ -6866,4 +6881,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.11,<3.12" python-versions = ">=3.11,<3.12"
content-hash = "66edb004ccbe7915d68567ea31a7851c87717185e2b504048cc645d1d511a66e" content-hash = "dce5b88d92bcfa047bf1e4c9fe2dbb9c63eb864d6bbca2340801ac0a2f02a8d4"

View File

@ -71,16 +71,46 @@ class EmbeddingComponent:
from llama_index.embeddings.ollama import ( # type: ignore from llama_index.embeddings.ollama import ( # type: ignore
OllamaEmbedding, OllamaEmbedding,
) )
from ollama import Client # type: ignore
except ImportError as e: except ImportError as e:
raise ImportError( raise ImportError(
"Local dependencies not found, install with `poetry install --extras embeddings-ollama`" "Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
) from e ) from e
ollama_settings = settings.ollama ollama_settings = settings.ollama
# Calculate embedding model. If not provided tag, it will be use latest
model_name = (
ollama_settings.embedding_model + ":latest"
if ":" not in ollama_settings.embedding_model
else ollama_settings.embedding_model
)
self.embedding_model = OllamaEmbedding( self.embedding_model = OllamaEmbedding(
model_name=ollama_settings.embedding_model, model_name=model_name,
base_url=ollama_settings.embedding_api_base, base_url=ollama_settings.embedding_api_base,
) )
if ollama_settings.autopull_models:
if ollama_settings.autopull_models:
from private_gpt.utils.ollama import (
check_connection,
pull_model,
)
# TODO: Reuse llama-index client when llama-index is updated
client = Client(
host=ollama_settings.embedding_api_base,
timeout=ollama_settings.request_timeout,
)
if not check_connection(client):
raise ValueError(
f"Failed to connect to Ollama, "
f"check if Ollama server is running on {ollama_settings.api_base}"
)
pull_model(client, model_name)
case "azopenai": case "azopenai":
try: try:
from llama_index.embeddings.azure_openai import ( # type: ignore from llama_index.embeddings.azure_openai import ( # type: ignore

View File

@ -146,8 +146,15 @@ class LLMComponent:
"repeat_penalty": ollama_settings.repeat_penalty, # ollama llama-cpp "repeat_penalty": ollama_settings.repeat_penalty, # ollama llama-cpp
} }
self.llm = Ollama( # calculate llm model. If not provided tag, it will be use latest
model=ollama_settings.llm_model, model_name = (
ollama_settings.llm_model + ":latest"
if ":" not in ollama_settings.llm_model
else ollama_settings.llm_model
)
llm = Ollama(
model=model_name,
base_url=ollama_settings.api_base, base_url=ollama_settings.api_base,
temperature=settings.llm.temperature, temperature=settings.llm.temperature,
context_window=settings.llm.context_window, context_window=settings.llm.context_window,
@ -155,6 +162,16 @@ class LLMComponent:
request_timeout=ollama_settings.request_timeout, request_timeout=ollama_settings.request_timeout,
) )
if ollama_settings.autopull_models:
from private_gpt.utils.ollama import check_connection, pull_model
if not check_connection(llm.client):
raise ValueError(
f"Failed to connect to Ollama, "
f"check if Ollama server is running on {ollama_settings.api_base}"
)
pull_model(llm.client, model_name)
if ( if (
ollama_settings.keep_alive ollama_settings.keep_alive
!= ollama_settings.model_fields["keep_alive"].default != ollama_settings.model_fields["keep_alive"].default
@ -172,6 +189,8 @@ class LLMComponent:
Ollama.complete = add_keep_alive(Ollama.complete) Ollama.complete = add_keep_alive(Ollama.complete)
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete) Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
self.llm = llm
case "azopenai": case "azopenai":
try: try:
from llama_index.llms.azure_openai import ( # type: ignore from llama_index.llms.azure_openai import ( # type: ignore

View File

@ -290,6 +290,10 @@ class OllamaSettings(BaseModel):
120.0, 120.0,
description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ", description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ",
) )
autopull_models: bool = Field(
False,
description="If set to True, the Ollama will automatically pull the models from the API base.",
)
class AzureOpenAISettings(BaseModel): class AzureOpenAISettings(BaseModel):

View File

@ -0,0 +1,32 @@
import logging
try:
from ollama import Client # type: ignore
except ImportError as e:
raise ImportError(
"Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`"
) from e
logger = logging.getLogger(__name__)
def check_connection(client: Client) -> bool:
try:
client.list()
return True
except Exception as e:
logger.error(f"Failed to connect to Ollama: {e!s}")
return False
def pull_model(client: Client, model_name: str, raise_error: bool = True) -> None:
try:
installed_models = [model["name"] for model in client.list().get("models", {})]
if model_name not in installed_models:
logger.info(f"Pulling model {model_name}. Please wait...")
client.pull(model_name)
logger.info(f"Model {model_name} pulled successfully")
except Exception as e:
logger.error(f"Failed to pull model {model_name}: {e!s}")
if raise_error:
raise e

View File

@ -22,7 +22,7 @@ llama-index-readers-file = "^0.1.27"
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true} llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
llama-index-llms-openai = {version = "^0.1.25", optional = true} llama-index-llms-openai = {version = "^0.1.25", optional = true}
llama-index-llms-openai-like = {version ="^0.1.3", optional = true} llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
llama-index-llms-ollama = {version ="^0.1.5", optional = true} llama-index-llms-ollama = {version ="^0.2.2", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true} llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
llama-index-llms-gemini = {version ="^0.1.11", optional = true} llama-index-llms-gemini = {version ="^0.1.11", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true} llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
@ -62,16 +62,19 @@ ffmpy = {git = "https://github.com/EuDs63/ffmpy.git", rev = "333a19ee4d21f32537c
# Optional Google Gemini dependency # Optional Google Gemini dependency
google-generativeai = {version ="^0.5.4", optional = true} google-generativeai = {version ="^0.5.4", optional = true}
# Optional Ollama client
ollama = {version ="^0.3.0", optional = true}
[tool.poetry.extras] [tool.poetry.extras]
ui = ["gradio", "ffmpy"] ui = ["gradio", "ffmpy"]
llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-llama-cpp = ["llama-index-llms-llama-cpp"]
llms-openai = ["llama-index-llms-openai"] llms-openai = ["llama-index-llms-openai"]
llms-openai-like = ["llama-index-llms-openai-like"] llms-openai-like = ["llama-index-llms-openai-like"]
llms-ollama = ["llama-index-llms-ollama"] llms-ollama = ["llama-index-llms-ollama", "ollama"]
llms-sagemaker = ["boto3"] llms-sagemaker = ["boto3"]
llms-azopenai = ["llama-index-llms-azure-openai"] llms-azopenai = ["llama-index-llms-azure-openai"]
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"] llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
embeddings-ollama = ["llama-index-embeddings-ollama"] embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
embeddings-huggingface = ["llama-index-embeddings-huggingface"] embeddings-huggingface = ["llama-index-embeddings-huggingface"]
embeddings-openai = ["llama-index-embeddings-openai"] embeddings-openai = ["llama-index-embeddings-openai"]
embeddings-sagemaker = ["boto3"] embeddings-sagemaker = ["boto3"]

View File

@ -117,6 +117,7 @@ ollama:
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
keep_alive: 5m keep_alive: 5m
request_timeout: 120.0 request_timeout: 120.0
autopull_models: true
azopenai: azopenai:
api_key: ${AZ_OPENAI_API_KEY:} api_key: ${AZ_OPENAI_API_KEY:}