From 918b384e38f34a709c1f96ccfa81bcbb67e10d0e Mon Sep 17 00:00:00 2001 From: R-Y-M-R <32427668+R-Y-M-R@users.noreply.github.com> Date: Thu, 11 May 2023 09:50:40 -0400 Subject: [PATCH 1/4] Update langchain and llama versions Bumped versions in requirements.txt, tested OK. langchain 0.0.165 release: https://github.com/hwchase17/langchain/releases/tag/v0.0.165 llama 0.1.48 release: https://github.com/abetlen/llama-cpp-python/releases/tag/v0.1.48 --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4c1ce9e..a9b6a77 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -langchain==0.0.162 +langchain==0.0.165 pygpt4all==1.1.0 chromadb==0.3.22 -llama-cpp-python==0.1.47 +llama-cpp-python==0.1.48 urllib3==1.26.6 From 8c6a81a07fc9c800d53f62a33f5ae3b5247a22a6 Mon Sep 17 00:00:00 2001 From: R-Y-M-R <32427668+R-Y-M-R@users.noreply.github.com> Date: Thu, 11 May 2023 10:17:18 -0400 Subject: [PATCH 2/4] Fix: Disable Chroma Telemetry Opts-out of anonymized telemetry being tracked in Chroma. See: https://docs.trychroma.com/telemetry --- chroma_preference.py | 11 +++++++++++ ingest.py | 5 +++-- privateGPT.py | 5 +++-- 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 chroma_preference.py diff --git a/chroma_preference.py b/chroma_preference.py new file mode 100644 index 0000000..d5e4383 --- /dev/null +++ b/chroma_preference.py @@ -0,0 +1,11 @@ +from chromadb.config import Settings + +# Define the folder for storing database +PERSIST_DIRECTORY = 'db' + +# Define the Chroma settings +CHROMA_SETTINGS = Settings( + chroma_db_impl='duckdb+parquet', + persist_directory=PERSIST_DIRECTORY, + anonymized_telemetry=False +) \ No newline at end of file diff --git a/ingest.py b/ingest.py index e8b08e6..ee900c1 100644 --- a/ingest.py +++ b/ingest.py @@ -3,6 +3,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import LlamaCppEmbeddings from sys import argv +from chroma_preference import PERSIST_DIRECTORY +from chroma_preference import CHROMA_SETTINGS def main(): # Load document and split in chunks @@ -13,8 +15,7 @@ def main(): # Create embeddings llama = LlamaCppEmbeddings(model_path="./models/ggml-model-q4_0.bin") # Create and store locally vectorstore - persist_directory = 'db' - db = Chroma.from_documents(texts, llama, persist_directory=persist_directory) + db = Chroma.from_documents(texts, llama, persist_directory=PERSIST_DIRECTORY, client_settings=CHROMA_SETTINGS) db.persist() db = None diff --git a/privateGPT.py b/privateGPT.py index 817a5e3..7ebaec1 100644 --- a/privateGPT.py +++ b/privateGPT.py @@ -3,12 +3,13 @@ from langchain.embeddings import LlamaCppEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.vectorstores import Chroma from langchain.llms import GPT4All +from chroma_preference import PERSIST_DIRECTORY +from chroma_preference import CHROMA_SETTINGS def main(): # Load stored vectorstore llama = LlamaCppEmbeddings(model_path="./models/ggml-model-q4_0.bin") - persist_directory = 'db' - db = Chroma(persist_directory=persist_directory, embedding_function=llama) + db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=llama, client_settings=CHROMA_SETTINGS) retriever = db.as_retriever() # Prepare the LLM callbacks = [StreamingStdOutCallbackHandler()] From f12ea568e5fe723bfb14d421289f35133d659f6b Mon Sep 17 00:00:00 2001 From: R-Y-M-R <32427668+R-Y-M-R@users.noreply.github.com> Date: Thu, 11 May 2023 10:29:07 -0400 Subject: [PATCH 3/4] Use constants.py file --- chroma_preference.py => constants.py | 0 ingest.py | 4 ++-- privateGPT.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) rename chroma_preference.py => constants.py (100%) diff --git a/chroma_preference.py b/constants.py similarity index 100% rename from chroma_preference.py rename to constants.py diff --git a/ingest.py b/ingest.py index ee900c1..ac1921e 100644 --- a/ingest.py +++ b/ingest.py @@ -3,8 +3,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import LlamaCppEmbeddings from sys import argv -from chroma_preference import PERSIST_DIRECTORY -from chroma_preference import CHROMA_SETTINGS +from constants import PERSIST_DIRECTORY +from constants import CHROMA_SETTINGS def main(): # Load document and split in chunks diff --git a/privateGPT.py b/privateGPT.py index 7ebaec1..a95613b 100644 --- a/privateGPT.py +++ b/privateGPT.py @@ -3,8 +3,8 @@ from langchain.embeddings import LlamaCppEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.vectorstores import Chroma from langchain.llms import GPT4All -from chroma_preference import PERSIST_DIRECTORY -from chroma_preference import CHROMA_SETTINGS +from constants import PERSIST_DIRECTORY +from constants import CHROMA_SETTINGS def main(): # Load stored vectorstore From 85528db7431d57a2bb2ca6e3e8d0fb37a668fd82 Mon Sep 17 00:00:00 2001 From: R-Y-M-R <32427668+R-Y-M-R@users.noreply.github.com> Date: Thu, 11 May 2023 12:37:00 -0400 Subject: [PATCH 4/4] Update langchain to 0.0.166 Tested. Release: https://github.com/hwchase17/langchain/releases/tag/v0.0.166 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a9b6a77..7ba0b52 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.0.165 +langchain==0.0.166 pygpt4all==1.1.0 chromadb==0.3.22 llama-cpp-python==0.1.48