From de7b0ee7992697fd8442f3d0012835c248ea7b2e Mon Sep 17 00:00:00 2001 From: Aymeric Roucher <69208727+aymeric-roucher@users.noreply.github.com> Date: Fri, 24 Jan 2025 16:32:35 +0100 Subject: [PATCH] Improve inference choice examples (#311) * Improve inference choice examples * Fix style --------- Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- examples/agent_from_any_llm.py | 51 +++++++++++++++++++++ examples/tool_calling_agent_from_any_llm.py | 30 ------------ examples/tool_calling_agent_mcp.py | 29 ------------ examples/tool_calling_agent_ollama.py | 29 ------------ src/smolagents/models.py | 10 ++-- 5 files changed, 56 insertions(+), 93 deletions(-) create mode 100644 examples/agent_from_any_llm.py delete mode 100644 examples/tool_calling_agent_from_any_llm.py delete mode 100644 examples/tool_calling_agent_mcp.py delete mode 100644 examples/tool_calling_agent_ollama.py diff --git a/examples/agent_from_any_llm.py b/examples/agent_from_any_llm.py new file mode 100644 index 0000000..eff667f --- /dev/null +++ b/examples/agent_from_any_llm.py @@ -0,0 +1,51 @@ +from typing import Optional + +from smolagents import HfApiModel, LiteLLMModel, TransformersModel, tool +from smolagents.agents import CodeAgent, ToolCallingAgent + + +# Choose which inference type to use! + +available_inferences = ["hf_api", "transformers", "ollama", "litellm"] +chosen_inference = "transformers" + +print(f"Chose model {chosen_inference}") + +if chosen_inference == "hf_api": + model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct") + +elif chosen_inference == "transformers": + model = TransformersModel(model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", device_map="auto", max_new_tokens=1000) + +elif chosen_inference == "ollama": + model = LiteLLMModel( + model_id="ollama_chat/llama3.2", + api_base="http://localhost:11434", # replace with remote open-ai compatible server if necessary + api_key="your-api-key", # replace with API key if necessary + ) + +elif chosen_inference == "litellm": + # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest' + model = LiteLLMModel(model_id="gpt-4o") + + +@tool +def get_weather(location: str, celsius: Optional[bool] = False) -> str: + """ + Get weather in the next days at given location. + Secretly this tool does not care about the location, it hates the weather everywhere. + + Args: + location: the location + celsius: the temperature + """ + return "The weather is UNGODLY with torrential rains and temperatures below -10°C" + + +agent = ToolCallingAgent(tools=[get_weather], model=model) + +print("ToolCallingAgent:", agent.run("What's the weather like in Paris?")) + +agent = CodeAgent(tools=[get_weather], model=model) + +print("ToolCallingAgent:", agent.run("What's the weather like in Paris?")) diff --git a/examples/tool_calling_agent_from_any_llm.py b/examples/tool_calling_agent_from_any_llm.py deleted file mode 100644 index c9004a4..0000000 --- a/examples/tool_calling_agent_from_any_llm.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import Optional - -from smolagents import LiteLLMModel, tool -from smolagents.agents import ToolCallingAgent - - -# Choose which LLM engine to use! -# model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct") -# model = TransformersModel(model_id="meta-llama/Llama-3.2-2B-Instruct") - -# For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-20240620' -model = LiteLLMModel(model_id="gpt-4o") - - -@tool -def get_weather(location: str, celsius: Optional[bool] = False) -> str: - """ - Get weather in the next days at given location. - Secretly this tool does not care about the location, it hates the weather everywhere. - - Args: - location: the location - celsius: the temperature - """ - return "The weather is UNGODLY with torrential rains and temperatures below -10°C" - - -agent = ToolCallingAgent(tools=[get_weather], model=model) - -print(agent.run("What's the weather like in Paris?")) diff --git a/examples/tool_calling_agent_mcp.py b/examples/tool_calling_agent_mcp.py deleted file mode 100644 index dfc847b..0000000 --- a/examples/tool_calling_agent_mcp.py +++ /dev/null @@ -1,29 +0,0 @@ -"""An example of loading a ToolCollection directly from an MCP server. - -Requirements: to run this example, you need to have uv installed and in your path in -order to run the MCP server with uvx see `mcp_server_params` below. - -Note this is just a demo MCP server that was implemented for the purpose of this example. -It only provide a single tool to search amongst pubmed papers abstracts. - -Usage: ->>> uv run examples/tool_calling_agent_mcp.py -""" - -import os - -from mcp import StdioServerParameters - -from smolagents import CodeAgent, HfApiModel, ToolCollection - - -mcp_server_params = StdioServerParameters( - command="uvx", - args=["--quiet", "pubmedmcp@0.1.3"], - env={"UV_PYTHON": "3.12", **os.environ}, -) - -with ToolCollection.from_mcp(mcp_server_params) as tool_collection: - # print(tool_collection.tools[0](request={"term": "efficient treatment hangover"})) - agent = CodeAgent(tools=tool_collection.tools, model=HfApiModel(), max_steps=4) - agent.run("Find me one risk associated with drinking alcohol regularly on low doses for humans.") diff --git a/examples/tool_calling_agent_ollama.py b/examples/tool_calling_agent_ollama.py deleted file mode 100644 index ceafb57..0000000 --- a/examples/tool_calling_agent_ollama.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Optional - -from smolagents import LiteLLMModel, tool -from smolagents.agents import ToolCallingAgent - - -model = LiteLLMModel( - model_id="ollama_chat/llama3.2", - api_base="http://localhost:11434", # replace with remote open-ai compatible server if necessary - api_key="your-api-key", # replace with API key if necessary -) - - -@tool -def get_weather(location: str, celsius: Optional[bool] = False) -> str: - """ - Get weather in the next days at given location. - Secretly this tool does not care about the location, it hates the weather everywhere. - - Args: - location: the location - celsius: the temperature - """ - return "The weather is UNGODLY with torrential rains and temperatures below -10°C" - - -agent = ToolCallingAgent(tools=[get_weather], model=model) - -print(agent.run("What's the weather like in Paris?")) diff --git a/src/smolagents/models.py b/src/smolagents/models.py index 6ef16e6..9eebf75 100644 --- a/src/smolagents/models.py +++ b/src/smolagents/models.py @@ -480,7 +480,6 @@ class TransformersModel(Model): messages=messages, stop_sequences=stop_sequences, grammar=grammar, - tools_to_call_from=tools_to_call_from, **kwargs, ) @@ -497,9 +496,6 @@ class TransformersModel(Model): if max_new_tokens: completion_kwargs["max_new_tokens"] = max_new_tokens - if stop_sequences: - completion_kwargs["stopping_criteria"] = self.make_stopping_criteria(stop_sequences) - if tools_to_call_from is not None: prompt_tensor = self.tokenizer.apply_chat_template( messages, @@ -518,7 +514,11 @@ class TransformersModel(Model): prompt_tensor = prompt_tensor.to(self.model.device) count_prompt_tokens = prompt_tensor["input_ids"].shape[1] - out = self.model.generate(**prompt_tensor, **completion_kwargs) + out = self.model.generate( + **prompt_tensor, + stopping_criteria=(self.make_stopping_criteria(stop_sequences) if stop_sequences else None), + **completion_kwargs, + ) generated_tokens = out[0, count_prompt_tokens:] output = self.tokenizer.decode(generated_tokens, skip_special_tokens=True) self.last_input_token_count = count_prompt_tokens