Merge branch 'main' into main
This commit is contained in:
		
						commit
						f60dbb520e
					
				|  | @ -0,0 +1,11 @@ | ||||||
|  | from chromadb.config import Settings | ||||||
|  | 
 | ||||||
|  | # Define the folder for storing database | ||||||
|  | PERSIST_DIRECTORY = 'db' | ||||||
|  | 
 | ||||||
|  | # Define the Chroma settings | ||||||
|  | CHROMA_SETTINGS = Settings( | ||||||
|  |         chroma_db_impl='duckdb+parquet', | ||||||
|  |         persist_directory=PERSIST_DIRECTORY, | ||||||
|  |         anonymized_telemetry=False | ||||||
|  | ) | ||||||
|  | @ -3,6 +3,8 @@ from langchain.document_loaders import TextLoader, PDFMinerLoader, CSVLoader | ||||||
| from langchain.text_splitter import RecursiveCharacterTextSplitter | from langchain.text_splitter import RecursiveCharacterTextSplitter | ||||||
| from langchain.vectorstores import Chroma | from langchain.vectorstores import Chroma | ||||||
| from langchain.embeddings import LlamaCppEmbeddings | from langchain.embeddings import LlamaCppEmbeddings | ||||||
|  | from constants import PERSIST_DIRECTORY | ||||||
|  | from constants import CHROMA_SETTINGS | ||||||
| 
 | 
 | ||||||
| def main(): | def main(): | ||||||
|     llama_embeddings_model = os.environ.get('LLAMA_EMBEDDINGS_MODEL') |     llama_embeddings_model = os.environ.get('LLAMA_EMBEDDINGS_MODEL') | ||||||
|  | @ -23,7 +25,7 @@ def main(): | ||||||
|     # Create embeddings |     # Create embeddings | ||||||
|     llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) |     llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) | ||||||
|     # Create and store locally vectorstore |     # Create and store locally vectorstore | ||||||
|     db = Chroma.from_documents(texts, llama, persist_directory=persist_directory) |     db = Chroma.from_documents(texts, llama, persist_directory=PERSIST_DIRECTORY, client_settings=CHROMA_SETTINGS) | ||||||
|     db.persist() |     db.persist() | ||||||
|     db = None |     db = None | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -12,10 +12,11 @@ model_type = os.environ.get('MODEL_TYPE') | ||||||
| model_path = os.environ.get('MODEL_PATH') | model_path = os.environ.get('MODEL_PATH') | ||||||
| model_n_ctx = os.environ.get('MODEL_N_CTX') | model_n_ctx = os.environ.get('MODEL_N_CTX') | ||||||
| 
 | 
 | ||||||
|  | from constants import CHROMA_SETTINGS | ||||||
|  | 
 | ||||||
| def main(): | def main(): | ||||||
|     # Load stored vectorstore |  | ||||||
|     llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) |     llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) | ||||||
|     db = Chroma(persist_directory=persist_directory, embedding_function=llama) |     db = Chroma(persist_directory=persist_directory, embedding_function=llama, client_settings=CHROMA_SETTINGS) | ||||||
|     retriever = db.as_retriever() |     retriever = db.as_retriever() | ||||||
|     # Prepare the LLM |     # Prepare the LLM | ||||||
|     callbacks = [StreamingStdOutCallbackHandler()] |     callbacks = [StreamingStdOutCallbackHandler()] | ||||||
|  |  | ||||||
|  | @ -1,5 +1,5 @@ | ||||||
| langchain==0.0.162 | langchain==0.0.166 | ||||||
| pygpt4all==1.1.0 | pygpt4all==1.1.0 | ||||||
| chromadb==0.3.22 | chromadb==0.3.22 | ||||||
| llama-cpp-python==0.1.47 | llama-cpp-python==0.1.48 | ||||||
| urllib3==1.26.6 | urllib3==1.26.6 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue