From 8c6a81a07fc9c800d53f62a33f5ae3b5247a22a6 Mon Sep 17 00:00:00 2001 From: R-Y-M-R <32427668+R-Y-M-R@users.noreply.github.com> Date: Thu, 11 May 2023 10:17:18 -0400 Subject: [PATCH 1/2] Fix: Disable Chroma Telemetry Opts-out of anonymized telemetry being tracked in Chroma. See: https://docs.trychroma.com/telemetry --- chroma_preference.py | 11 +++++++++++ ingest.py | 5 +++-- privateGPT.py | 5 +++-- 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 chroma_preference.py diff --git a/chroma_preference.py b/chroma_preference.py new file mode 100644 index 0000000..d5e4383 --- /dev/null +++ b/chroma_preference.py @@ -0,0 +1,11 @@ +from chromadb.config import Settings + +# Define the folder for storing database +PERSIST_DIRECTORY = 'db' + +# Define the Chroma settings +CHROMA_SETTINGS = Settings( + chroma_db_impl='duckdb+parquet', + persist_directory=PERSIST_DIRECTORY, + anonymized_telemetry=False +) \ No newline at end of file diff --git a/ingest.py b/ingest.py index e8b08e6..ee900c1 100644 --- a/ingest.py +++ b/ingest.py @@ -3,6 +3,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import LlamaCppEmbeddings from sys import argv +from chroma_preference import PERSIST_DIRECTORY +from chroma_preference import CHROMA_SETTINGS def main(): # Load document and split in chunks @@ -13,8 +15,7 @@ def main(): # Create embeddings llama = LlamaCppEmbeddings(model_path="./models/ggml-model-q4_0.bin") # Create and store locally vectorstore - persist_directory = 'db' - db = Chroma.from_documents(texts, llama, persist_directory=persist_directory) + db = Chroma.from_documents(texts, llama, persist_directory=PERSIST_DIRECTORY, client_settings=CHROMA_SETTINGS) db.persist() db = None diff --git a/privateGPT.py b/privateGPT.py index 817a5e3..7ebaec1 100644 --- a/privateGPT.py +++ b/privateGPT.py @@ -3,12 +3,13 @@ from langchain.embeddings import LlamaCppEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.vectorstores import Chroma from langchain.llms import GPT4All +from chroma_preference import PERSIST_DIRECTORY +from chroma_preference import CHROMA_SETTINGS def main(): # Load stored vectorstore llama = LlamaCppEmbeddings(model_path="./models/ggml-model-q4_0.bin") - persist_directory = 'db' - db = Chroma(persist_directory=persist_directory, embedding_function=llama) + db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=llama, client_settings=CHROMA_SETTINGS) retriever = db.as_retriever() # Prepare the LLM callbacks = [StreamingStdOutCallbackHandler()] From f12ea568e5fe723bfb14d421289f35133d659f6b Mon Sep 17 00:00:00 2001 From: R-Y-M-R <32427668+R-Y-M-R@users.noreply.github.com> Date: Thu, 11 May 2023 10:29:07 -0400 Subject: [PATCH 2/2] Use constants.py file --- chroma_preference.py => constants.py | 0 ingest.py | 4 ++-- privateGPT.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) rename chroma_preference.py => constants.py (100%) diff --git a/chroma_preference.py b/constants.py similarity index 100% rename from chroma_preference.py rename to constants.py diff --git a/ingest.py b/ingest.py index ee900c1..ac1921e 100644 --- a/ingest.py +++ b/ingest.py @@ -3,8 +3,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import LlamaCppEmbeddings from sys import argv -from chroma_preference import PERSIST_DIRECTORY -from chroma_preference import CHROMA_SETTINGS +from constants import PERSIST_DIRECTORY +from constants import CHROMA_SETTINGS def main(): # Load document and split in chunks diff --git a/privateGPT.py b/privateGPT.py index 7ebaec1..a95613b 100644 --- a/privateGPT.py +++ b/privateGPT.py @@ -3,8 +3,8 @@ from langchain.embeddings import LlamaCppEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.vectorstores import Chroma from langchain.llms import GPT4All -from chroma_preference import PERSIST_DIRECTORY -from chroma_preference import CHROMA_SETTINGS +from constants import PERSIST_DIRECTORY +from constants import CHROMA_SETTINGS def main(): # Load stored vectorstore