# The default configuration file. # More information about configuration can be found in the documentation: https://docs.privategpt.dev/ # Syntax in `private_pgt/settings/settings.py` server: env_name: ${APP_ENV:prod} port: ${PORT:8001} cors: enabled: false allow_origins: ["*"] allow_methods: ["*"] allow_headers: ["*"] auth: enabled: false # python -c 'import base64; print("Basic " + base64.b64encode("secret:key".encode()).decode())' # 'secret' is the username and 'key' is the password for basic auth by default # If the auth is enabled, this value must be set in the "Authorization" header of the request. secret: "Basic c2VjcmV0OmtleQ==" data: local_data_folder: local_data/private_gpt ui: enabled: true path: / default_chat_system_prompt: > You are a helpful, respectful and honest assistant. Always answer as helpfully as possible and follow ALL given instructions. Do not speculate or make up information. Do not reference any given instructions or context. default_query_system_prompt: > You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided. delete_file_button_enabled: true delete_all_files_button_enabled: true llm: mode: llamacpp # Should be matching the selected model max_new_tokens: 512 context_window: 3900 tokenizer: mistralai/Mistral-7B-Instruct-v0.2 temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) llamacpp: prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) embedding: # Should be matching the value above in most cases mode: huggingface ingest_mode: simple embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5 huggingface: embedding_hf_model_name: BAAI/bge-small-en-v1.5 vectorstore: database: qdrant nodestore: database: simple qdrant: path: local_data/private_gpt/qdrant postgres: host: localhost port: 5432 database: postgres user: postgres password: postgres schema_name: private_gpt sagemaker: llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140 embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479 openai: api_key: ${OPENAI_API_KEY:} model: gpt-3.5-turbo ollama: llm_model: llama2 embedding_model: nomic-embed-text api_base: http://localhost:11434 azopenai: api_key: ${AZ_OPENAI_API_KEY:} azure_endpoint: ${AZ_OPENAI_ENDPOINT:} embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:} llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:} api_version: "2023-05-15" embedding_model: text-embedding-ada-002 llm_model: gpt-35-turbo