feat(llm): autopull ollama models (#2019)
* chore: update ollama (llm) * feat: allow to autopull ollama models * fix: mypy * chore: install always ollama client * refactor: check connection and pull ollama method to utils * docs: update ollama config with autopulling info
This commit is contained in:
parent
dabf556dae
commit
20bad17c98
|
@ -130,18 +130,22 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll
|
||||||
|
|
||||||
After the installation, make sure the Ollama desktop app is closed.
|
After the installation, make sure the Ollama desktop app is closed.
|
||||||
|
|
||||||
Install the models to be used, the default settings-ollama.yaml is configured to user `mistral 7b` LLM (~4GB) and `nomic-embed-text` Embeddings (~275MB). Therefore:
|
Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
|
||||||
|
```bash
|
||||||
|
ollama serve
|
||||||
|
```
|
||||||
|
|
||||||
|
Install the models to be used, the default settings-ollama.yaml is configured to user mistral 7b LLM (~4GB) and nomic-embed-text Embeddings (~275MB)
|
||||||
|
|
||||||
|
By default, PGPT will automatically pull models as needed. This behavior can be changed by modifying the `ollama.autopull_models` property.
|
||||||
|
|
||||||
|
In any case, if you want to manually pull models, run the following commands:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull mistral
|
ollama pull mistral
|
||||||
ollama pull nomic-embed-text
|
ollama pull nomic-embed-text
|
||||||
```
|
```
|
||||||
|
|
||||||
Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings):
|
|
||||||
```bash
|
|
||||||
ollama serve
|
|
||||||
```
|
|
||||||
|
|
||||||
Once done, on a different terminal, you can install PrivateGPT with the following command:
|
Once done, on a different terminal, you can install PrivateGPT with the following command:
|
||||||
```bash
|
```bash
|
||||||
poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant"
|
poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant"
|
||||||
|
|
|
@ -2022,13 +2022,13 @@ test = ["Cython (>=0.29.24,<0.30.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httpx"
|
name = "httpx"
|
||||||
version = "0.25.2"
|
version = "0.27.0"
|
||||||
description = "The next generation HTTP client."
|
description = "The next generation HTTP client."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
files = [
|
files = [
|
||||||
{file = "httpx-0.25.2-py3-none-any.whl", hash = "sha256:a05d3d052d9b2dfce0e3896636467f8a5342fb2b902c819428e1ac65413ca118"},
|
{file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
|
||||||
{file = "httpx-0.25.2.tar.gz", hash = "sha256:8b8fcaa0c8ea7b05edd69a094e63a2094c4efcb48129fb757361bc423c0ad9e8"},
|
{file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
@ -2625,17 +2625,18 @@ llama-index-core = ">=0.10.1,<0.11.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-index-llms-ollama"
|
name = "llama-index-llms-ollama"
|
||||||
version = "0.1.5"
|
version = "0.2.2"
|
||||||
description = "llama-index llms ollama integration"
|
description = "llama-index llms ollama integration"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = "<4.0,>=3.8.1"
|
python-versions = "<4.0,>=3.8.1"
|
||||||
files = [
|
files = [
|
||||||
{file = "llama_index_llms_ollama-0.1.5-py3-none-any.whl", hash = "sha256:8e237978765458c9b175d2e25fc25162df8dc70a538b1b9ef9ea18617f8cdf5a"},
|
{file = "llama_index_llms_ollama-0.2.2-py3-none-any.whl", hash = "sha256:c224d7c17d641045bc9b6a6681dab434c1c421af0bacb5825eea444fefd8ed78"},
|
||||||
{file = "llama_index_llms_ollama-0.1.5.tar.gz", hash = "sha256:75697d96c860d87e80cce90c9ea425cbd236918458e0feaaee03597068ba9844"},
|
{file = "llama_index_llms_ollama-0.2.2.tar.gz", hash = "sha256:0c7f192cb8b768707bd5154b97e2a41284732d62070eb76190dee125e95245ea"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
llama-index-core = ">=0.10.1,<0.11.0"
|
llama-index-core = ">=0.10.1,<0.11.0"
|
||||||
|
ollama = ">=0.3.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-index-llms-openai"
|
name = "llama-index-llms-openai"
|
||||||
|
@ -3633,6 +3634,20 @@ rsa = ["cryptography (>=3.0.0)"]
|
||||||
signals = ["blinker (>=1.4.0)"]
|
signals = ["blinker (>=1.4.0)"]
|
||||||
signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
|
signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ollama"
|
||||||
|
version = "0.3.0"
|
||||||
|
description = "The official Python client for Ollama."
|
||||||
|
optional = true
|
||||||
|
python-versions = "<4.0,>=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "ollama-0.3.0-py3-none-any.whl", hash = "sha256:cd7010c4e2a37d7f08f36cd35c4592b14f1ec0d1bf3df10342cd47963d81ad7a"},
|
||||||
|
{file = "ollama-0.3.0.tar.gz", hash = "sha256:6ff493a2945ba76cdd6b7912a1cd79a45cfd9ba9120d14adeb63b2b5a7f353da"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
httpx = ">=0.27.0,<0.28.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "onnxruntime"
|
name = "onnxruntime"
|
||||||
version = "1.17.1"
|
version = "1.17.1"
|
||||||
|
@ -6844,13 +6859,13 @@ cffi = ["cffi (>=1.11)"]
|
||||||
embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
|
embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
|
||||||
embeddings-gemini = ["llama-index-embeddings-gemini"]
|
embeddings-gemini = ["llama-index-embeddings-gemini"]
|
||||||
embeddings-huggingface = ["llama-index-embeddings-huggingface"]
|
embeddings-huggingface = ["llama-index-embeddings-huggingface"]
|
||||||
embeddings-ollama = ["llama-index-embeddings-ollama"]
|
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
|
||||||
embeddings-openai = ["llama-index-embeddings-openai"]
|
embeddings-openai = ["llama-index-embeddings-openai"]
|
||||||
embeddings-sagemaker = ["boto3"]
|
embeddings-sagemaker = ["boto3"]
|
||||||
llms-azopenai = ["llama-index-llms-azure-openai"]
|
llms-azopenai = ["llama-index-llms-azure-openai"]
|
||||||
llms-gemini = ["google-generativeai", "llama-index-llms-gemini"]
|
llms-gemini = ["google-generativeai", "llama-index-llms-gemini"]
|
||||||
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
|
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
|
||||||
llms-ollama = ["llama-index-llms-ollama"]
|
llms-ollama = ["llama-index-llms-ollama", "ollama"]
|
||||||
llms-openai = ["llama-index-llms-openai"]
|
llms-openai = ["llama-index-llms-openai"]
|
||||||
llms-openai-like = ["llama-index-llms-openai-like"]
|
llms-openai-like = ["llama-index-llms-openai-like"]
|
||||||
llms-sagemaker = ["boto3"]
|
llms-sagemaker = ["boto3"]
|
||||||
|
@ -6866,4 +6881,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.11,<3.12"
|
python-versions = ">=3.11,<3.12"
|
||||||
content-hash = "66edb004ccbe7915d68567ea31a7851c87717185e2b504048cc645d1d511a66e"
|
content-hash = "dce5b88d92bcfa047bf1e4c9fe2dbb9c63eb864d6bbca2340801ac0a2f02a8d4"
|
||||||
|
|
|
@ -71,16 +71,46 @@ class EmbeddingComponent:
|
||||||
from llama_index.embeddings.ollama import ( # type: ignore
|
from llama_index.embeddings.ollama import ( # type: ignore
|
||||||
OllamaEmbedding,
|
OllamaEmbedding,
|
||||||
)
|
)
|
||||||
|
from ollama import Client # type: ignore
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
|
"Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
ollama_settings = settings.ollama
|
ollama_settings = settings.ollama
|
||||||
|
|
||||||
|
# Calculate embedding model. If not provided tag, it will be use latest
|
||||||
|
model_name = (
|
||||||
|
ollama_settings.embedding_model + ":latest"
|
||||||
|
if ":" not in ollama_settings.embedding_model
|
||||||
|
else ollama_settings.embedding_model
|
||||||
|
)
|
||||||
|
|
||||||
self.embedding_model = OllamaEmbedding(
|
self.embedding_model = OllamaEmbedding(
|
||||||
model_name=ollama_settings.embedding_model,
|
model_name=model_name,
|
||||||
base_url=ollama_settings.embedding_api_base,
|
base_url=ollama_settings.embedding_api_base,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ollama_settings.autopull_models:
|
||||||
|
if ollama_settings.autopull_models:
|
||||||
|
from private_gpt.utils.ollama import (
|
||||||
|
check_connection,
|
||||||
|
pull_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: Reuse llama-index client when llama-index is updated
|
||||||
|
client = Client(
|
||||||
|
host=ollama_settings.embedding_api_base,
|
||||||
|
timeout=ollama_settings.request_timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not check_connection(client):
|
||||||
|
raise ValueError(
|
||||||
|
f"Failed to connect to Ollama, "
|
||||||
|
f"check if Ollama server is running on {ollama_settings.api_base}"
|
||||||
|
)
|
||||||
|
pull_model(client, model_name)
|
||||||
|
|
||||||
case "azopenai":
|
case "azopenai":
|
||||||
try:
|
try:
|
||||||
from llama_index.embeddings.azure_openai import ( # type: ignore
|
from llama_index.embeddings.azure_openai import ( # type: ignore
|
||||||
|
|
|
@ -146,8 +146,15 @@ class LLMComponent:
|
||||||
"repeat_penalty": ollama_settings.repeat_penalty, # ollama llama-cpp
|
"repeat_penalty": ollama_settings.repeat_penalty, # ollama llama-cpp
|
||||||
}
|
}
|
||||||
|
|
||||||
self.llm = Ollama(
|
# calculate llm model. If not provided tag, it will be use latest
|
||||||
model=ollama_settings.llm_model,
|
model_name = (
|
||||||
|
ollama_settings.llm_model + ":latest"
|
||||||
|
if ":" not in ollama_settings.llm_model
|
||||||
|
else ollama_settings.llm_model
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = Ollama(
|
||||||
|
model=model_name,
|
||||||
base_url=ollama_settings.api_base,
|
base_url=ollama_settings.api_base,
|
||||||
temperature=settings.llm.temperature,
|
temperature=settings.llm.temperature,
|
||||||
context_window=settings.llm.context_window,
|
context_window=settings.llm.context_window,
|
||||||
|
@ -155,6 +162,16 @@ class LLMComponent:
|
||||||
request_timeout=ollama_settings.request_timeout,
|
request_timeout=ollama_settings.request_timeout,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if ollama_settings.autopull_models:
|
||||||
|
from private_gpt.utils.ollama import check_connection, pull_model
|
||||||
|
|
||||||
|
if not check_connection(llm.client):
|
||||||
|
raise ValueError(
|
||||||
|
f"Failed to connect to Ollama, "
|
||||||
|
f"check if Ollama server is running on {ollama_settings.api_base}"
|
||||||
|
)
|
||||||
|
pull_model(llm.client, model_name)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
ollama_settings.keep_alive
|
ollama_settings.keep_alive
|
||||||
!= ollama_settings.model_fields["keep_alive"].default
|
!= ollama_settings.model_fields["keep_alive"].default
|
||||||
|
@ -172,6 +189,8 @@ class LLMComponent:
|
||||||
Ollama.complete = add_keep_alive(Ollama.complete)
|
Ollama.complete = add_keep_alive(Ollama.complete)
|
||||||
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
|
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
|
||||||
|
|
||||||
|
self.llm = llm
|
||||||
|
|
||||||
case "azopenai":
|
case "azopenai":
|
||||||
try:
|
try:
|
||||||
from llama_index.llms.azure_openai import ( # type: ignore
|
from llama_index.llms.azure_openai import ( # type: ignore
|
||||||
|
|
|
@ -290,6 +290,10 @@ class OllamaSettings(BaseModel):
|
||||||
120.0,
|
120.0,
|
||||||
description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ",
|
description="Time elapsed until ollama times out the request. Default is 120s. Format is float. ",
|
||||||
)
|
)
|
||||||
|
autopull_models: bool = Field(
|
||||||
|
False,
|
||||||
|
description="If set to True, the Ollama will automatically pull the models from the API base.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AzureOpenAISettings(BaseModel):
|
class AzureOpenAISettings(BaseModel):
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
try:
|
||||||
|
from ollama import Client # type: ignore
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"Ollama dependencies not found, install with `poetry install --extras llms-ollama or embeddings-ollama`"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def check_connection(client: Client) -> bool:
|
||||||
|
try:
|
||||||
|
client.list()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to connect to Ollama: {e!s}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def pull_model(client: Client, model_name: str, raise_error: bool = True) -> None:
|
||||||
|
try:
|
||||||
|
installed_models = [model["name"] for model in client.list().get("models", {})]
|
||||||
|
if model_name not in installed_models:
|
||||||
|
logger.info(f"Pulling model {model_name}. Please wait...")
|
||||||
|
client.pull(model_name)
|
||||||
|
logger.info(f"Model {model_name} pulled successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to pull model {model_name}: {e!s}")
|
||||||
|
if raise_error:
|
||||||
|
raise e
|
|
@ -22,7 +22,7 @@ llama-index-readers-file = "^0.1.27"
|
||||||
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
|
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
|
||||||
llama-index-llms-openai = {version = "^0.1.25", optional = true}
|
llama-index-llms-openai = {version = "^0.1.25", optional = true}
|
||||||
llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
|
llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
|
||||||
llama-index-llms-ollama = {version ="^0.1.5", optional = true}
|
llama-index-llms-ollama = {version ="^0.2.2", optional = true}
|
||||||
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
|
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
|
||||||
llama-index-llms-gemini = {version ="^0.1.11", optional = true}
|
llama-index-llms-gemini = {version ="^0.1.11", optional = true}
|
||||||
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
|
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
|
||||||
|
@ -62,16 +62,19 @@ ffmpy = {git = "https://github.com/EuDs63/ffmpy.git", rev = "333a19ee4d21f32537c
|
||||||
# Optional Google Gemini dependency
|
# Optional Google Gemini dependency
|
||||||
google-generativeai = {version ="^0.5.4", optional = true}
|
google-generativeai = {version ="^0.5.4", optional = true}
|
||||||
|
|
||||||
|
# Optional Ollama client
|
||||||
|
ollama = {version ="^0.3.0", optional = true}
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
ui = ["gradio", "ffmpy"]
|
ui = ["gradio", "ffmpy"]
|
||||||
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
|
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
|
||||||
llms-openai = ["llama-index-llms-openai"]
|
llms-openai = ["llama-index-llms-openai"]
|
||||||
llms-openai-like = ["llama-index-llms-openai-like"]
|
llms-openai-like = ["llama-index-llms-openai-like"]
|
||||||
llms-ollama = ["llama-index-llms-ollama"]
|
llms-ollama = ["llama-index-llms-ollama", "ollama"]
|
||||||
llms-sagemaker = ["boto3"]
|
llms-sagemaker = ["boto3"]
|
||||||
llms-azopenai = ["llama-index-llms-azure-openai"]
|
llms-azopenai = ["llama-index-llms-azure-openai"]
|
||||||
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
|
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
|
||||||
embeddings-ollama = ["llama-index-embeddings-ollama"]
|
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
|
||||||
embeddings-huggingface = ["llama-index-embeddings-huggingface"]
|
embeddings-huggingface = ["llama-index-embeddings-huggingface"]
|
||||||
embeddings-openai = ["llama-index-embeddings-openai"]
|
embeddings-openai = ["llama-index-embeddings-openai"]
|
||||||
embeddings-sagemaker = ["boto3"]
|
embeddings-sagemaker = ["boto3"]
|
||||||
|
|
|
@ -117,6 +117,7 @@ ollama:
|
||||||
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
|
embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
|
||||||
keep_alive: 5m
|
keep_alive: 5m
|
||||||
request_timeout: 120.0
|
request_timeout: 120.0
|
||||||
|
autopull_models: true
|
||||||
|
|
||||||
azopenai:
|
azopenai:
|
||||||
api_key: ${AZ_OPENAI_API_KEY:}
|
api_key: ${AZ_OPENAI_API_KEY:}
|
||||||
|
|
Loading…
Reference in New Issue