From 2a24cb9e600ead49e2ef616d7f785eaf0613dc0b Mon Sep 17 00:00:00 2001 From: Mayank Chhabra Date: Fri, 18 Aug 2023 00:18:19 +0700 Subject: [PATCH] Set 70B's GQA to 8 --- docker-compose-70b.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker-compose-70b.yml b/docker-compose-70b.yml index 25f3a42..cee2e8a 100644 --- a/docker-compose-70b.yml +++ b/docker-compose-70b.yml @@ -9,6 +9,12 @@ services: restart: on-failure environment: MODEL: '/models/llama-2-70b-chat.bin' + # Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently, + # it's not possible to change this using --n_gqa with llama-cpp-python in + # run.sh, so we expose it as an environment variable. + # See: https://github.com/abetlen/llama-cpp-python/issues/528 + # and: https://github.com/facebookresearch/llama/issues/407 + N_GQA: '8' llama-gpt-ui: image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'