version: '3.6' services: llama-gpt-api: image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest' # build: # context: ./api # dockerfile: Dockerfile command: /bin/sh -c "make build && python3 -m llama_cpp.server --n_ctx 4096 --n_threads 8 --n_gpu_layers 0 --n_batch 2096" environment: MODEL: '/models/llama-2-7b-chat.bin' llama-gpt-ui: # image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' build: context: ./ui dockerfile: Dockerfile ports: - 3000:3000 environment: - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' - 'OPENAI_API_HOST=http://llama-gpt-api:8000' - 'DEFAULT_MODEL=/models/llama-2-7b-chat.bin' - 'WAIT_HOSTS=llama-gpt-api:8000' - 'WAIT_TIMEOUT=600'