Added max_new_tokens as a config option to llm yaml block (#1317)
* added max_new_tokens as a configuration option to the llm block in settings * Update fern/docs/pages/manual/settings.mdx Co-authored-by: lopagela <lpglm@orange.fr> * Update private_gpt/settings/settings.py Add default value for max_new_tokens = 256 Co-authored-by: lopagela <lpglm@orange.fr> * Addressed location of docs comment * reformatting from running 'make check' * remove default config value from settings.yaml --------- Co-authored-by: lopagela <lpglm@orange.fr>
This commit is contained in:
		
							parent
							
								
									baf29f06fa
								
							
						
					
					
						commit
						9c192ddd73
					
				|  | @ -89,6 +89,21 @@ Currently, not all the parameters of `llama.cpp` and `llama-cpp-python` are avai | ||||||
| In case you need to customize parameters such as the number of layers loaded into the GPU, you might change | In case you need to customize parameters such as the number of layers loaded into the GPU, you might change | ||||||
| these at the `llm_component.py` file under the `private_gpt/components/llm/llm_component.py`. | these at the `llm_component.py` file under the `private_gpt/components/llm/llm_component.py`. | ||||||
| 
 | 
 | ||||||
|  | ##### Available LLM config options | ||||||
|  | 
 | ||||||
|  | The `llm` section of the settings allows for the following configurations: | ||||||
|  | 
 | ||||||
|  | - `mode`: how to run your llm | ||||||
|  | - `max_new_tokens`: this lets you configure the number of new tokens the LLM will generate and add to the context window (by default Llama.cpp uses `256`) | ||||||
|  | 
 | ||||||
|  | Example: | ||||||
|  | 
 | ||||||
|  | ```yaml | ||||||
|  | llm: | ||||||
|  |   mode: local | ||||||
|  |   max_new_tokens: 256 | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
| If you are getting an out of memory error, you might also try a smaller model or stick to the proposed | If you are getting an out of memory error, you might also try a smaller model or stick to the proposed | ||||||
| recommended models, instead of custom tuning the parameters. | recommended models, instead of custom tuning the parameters. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -31,6 +31,7 @@ class LLMComponent: | ||||||
|                 self.llm = LlamaCPP( |                 self.llm = LlamaCPP( | ||||||
|                     model_path=str(models_path / settings.local.llm_hf_model_file), |                     model_path=str(models_path / settings.local.llm_hf_model_file), | ||||||
|                     temperature=0.1, |                     temperature=0.1, | ||||||
|  |                     max_new_tokens=settings.llm.max_new_tokens, | ||||||
|                     # llama2 has a context window of 4096 tokens, |                     # llama2 has a context window of 4096 tokens, | ||||||
|                     # but we set it lower to allow for some wiggle room |                     # but we set it lower to allow for some wiggle room | ||||||
|                     context_window=3900, |                     context_window=3900, | ||||||
|  |  | ||||||
|  | @ -82,6 +82,10 @@ class DataSettings(BaseModel): | ||||||
| 
 | 
 | ||||||
| class LLMSettings(BaseModel): | class LLMSettings(BaseModel): | ||||||
|     mode: Literal["local", "openai", "sagemaker", "mock"] |     mode: Literal["local", "openai", "sagemaker", "mock"] | ||||||
|  |     max_new_tokens: int = Field( | ||||||
|  |         256, | ||||||
|  |         description="The maximum number of token that the LLM is authorized to generate in one completion.", | ||||||
|  |     ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class VectorstoreSettings(BaseModel): | class VectorstoreSettings(BaseModel): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue