diff --git a/example.env b/example.env index 149eca2..8290784 100644 --- a/example.env +++ b/example.env @@ -1,5 +1,5 @@ PERSIST_DIRECTORY=db -LLAMA_EMBEDDINGS_MODEL=models/ggml-model-q4_0.bin MODEL_TYPE=GPT4All MODEL_PATH=models/ggml-gpt4all-j-v1.3-groovy.bin +EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2 MODEL_N_CTX=1000 \ No newline at end of file diff --git a/ingest.py b/ingest.py index 4c95586..2c70362 100644 --- a/ingest.py +++ b/ingest.py @@ -6,7 +6,7 @@ from dotenv import load_dotenv from langchain.document_loaders import TextLoader, PDFMinerLoader, CSVLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma -from langchain.embeddings import LlamaCppEmbeddings +from langchain.embeddings import HuggingFaceEmbeddings from langchain.docstore.document import Document from constants import CHROMA_SETTINGS @@ -38,22 +38,23 @@ def main(): # Load environment variables persist_directory = os.environ.get('PERSIST_DIRECTORY') source_directory = os.environ.get('SOURCE_DIRECTORY', 'source_documents') - llama_embeddings_model = os.environ.get('LLAMA_EMBEDDINGS_MODEL') - model_n_ctx = os.environ.get('MODEL_N_CTX') + embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME') # Load documents and split in chunks print(f"Loading documents from {source_directory}") + chunk_size = 500 + chunk_overlap = 50 documents = load_documents(source_directory) - text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) texts = text_splitter.split_documents(documents) print(f"Loaded {len(documents)} documents from {source_directory}") - print(f"Split into {len(texts)} chunks of text (max. 500 tokens each)") + print(f"Split into {len(texts)} chunks of text (max. {chunk_size} characters each)") # Create embeddings - llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) + embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) # Create and store locally vectorstore - db = Chroma.from_documents(texts, llama, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS) + db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS) db.persist() db = None diff --git a/privateGPT.py b/privateGPT.py index 4c603a2..ae08bb9 100644 --- a/privateGPT.py +++ b/privateGPT.py @@ -1,6 +1,6 @@ from dotenv import load_dotenv from langchain.chains import RetrievalQA -from langchain.embeddings import LlamaCppEmbeddings +from langchain.embeddings import HuggingFaceEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.vectorstores import Chroma from langchain.llms import GPT4All, LlamaCpp @@ -8,7 +8,7 @@ import os load_dotenv() -llama_embeddings_model = os.environ.get("LLAMA_EMBEDDINGS_MODEL") +embeddings_model_name = os.environ.get("EMBEDDINGS_MODEL_NAME") persist_directory = os.environ.get('PERSIST_DIRECTORY') model_type = os.environ.get('MODEL_TYPE') @@ -18,8 +18,8 @@ model_n_ctx = os.environ.get('MODEL_N_CTX') from constants import CHROMA_SETTINGS def main(): - llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) - db = Chroma(persist_directory=persist_directory, embedding_function=llama, client_settings=CHROMA_SETTINGS) + embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) + db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS) retriever = db.as_retriever() # Prepare the LLM callbacks = [StreamingStdOutCallbackHandler()]