Merge pull request #44 from R-Y-M-R/Fix/DisableChromaTelemetry

Disable chroma telemetry. Extract constants.
This commit is contained in:
Iván Martínez 2023-05-11 19:38:43 +02:00 committed by GitHub
commit 56c1be36ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 4 deletions

11
constants.py Normal file
View File

@ -0,0 +1,11 @@
from chromadb.config import Settings
# Define the folder for storing database
PERSIST_DIRECTORY = 'db'
# Define the Chroma settings
CHROMA_SETTINGS = Settings(
chroma_db_impl='duckdb+parquet',
persist_directory=PERSIST_DIRECTORY,
anonymized_telemetry=False
)

View File

@ -3,6 +3,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma from langchain.vectorstores import Chroma
from langchain.embeddings import LlamaCppEmbeddings from langchain.embeddings import LlamaCppEmbeddings
from sys import argv from sys import argv
from constants import PERSIST_DIRECTORY
from constants import CHROMA_SETTINGS
def main(): def main():
# Load document and split in chunks # Load document and split in chunks
@ -13,8 +15,7 @@ def main():
# Create embeddings # Create embeddings
llama = LlamaCppEmbeddings(model_path="./models/ggml-model-q4_0.bin") llama = LlamaCppEmbeddings(model_path="./models/ggml-model-q4_0.bin")
# Create and store locally vectorstore # Create and store locally vectorstore
persist_directory = 'db' db = Chroma.from_documents(texts, llama, persist_directory=PERSIST_DIRECTORY, client_settings=CHROMA_SETTINGS)
db = Chroma.from_documents(texts, llama, persist_directory=persist_directory)
db.persist() db.persist()
db = None db = None

View File

@ -3,12 +3,13 @@ from langchain.embeddings import LlamaCppEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma from langchain.vectorstores import Chroma
from langchain.llms import GPT4All from langchain.llms import GPT4All
from constants import PERSIST_DIRECTORY
from constants import CHROMA_SETTINGS
def main(): def main():
# Load stored vectorstore # Load stored vectorstore
llama = LlamaCppEmbeddings(model_path="./models/ggml-model-q4_0.bin") llama = LlamaCppEmbeddings(model_path="./models/ggml-model-q4_0.bin")
persist_directory = 'db' db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=llama, client_settings=CHROMA_SETTINGS)
db = Chroma(persist_directory=persist_directory, embedding_function=llama)
retriever = db.as_retriever() retriever = db.as_retriever()
# Prepare the LLM # Prepare the LLM
callbacks = [StreamingStdOutCallbackHandler()] callbacks = [StreamingStdOutCallbackHandler()]