diff --git a/README.md b/README.md index cc0f7b0..b86419b 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=HfApiModel()) -agent.run("What time would the world's fastest car take to travel from New York to San Francisco?") +agent.run("What time would it take for a leopard at full speed to run through Pont des Arts?") ``` > TODO: Add video diff --git a/docs/source/guided_tour.md b/docs/source/guided_tour.md index f965483..e2906b7 100644 --- a/docs/source/guided_tour.md +++ b/docs/source/guided_tour.md @@ -23,33 +23,10 @@ In this guided visit, you will learn how to build an agent, how to run it, and h To initialize a minimal agent, you need at least these two arguments: -- An LLM to power your agent - because the agent is different from a simple LLM, it is a system that uses a LLM as its engine. +- An text-generation model to power your agent - because the agent is different from a simple LLM, it is a system that uses a LLM as its engine. - A list of tools from which the agent pick tools to execute -For defining your LLM, you can make a `custom_model` method which accepts a list of [messages](./chat_templating) and returns text. This callable also needs to accept a `stop_sequences` argument that indicates when to stop generating. - -```python -from huggingface_hub import login, InferenceClient - -login("") - -model_id = "meta-llama/Llama-3.3-70B-Instruct" - -client = InferenceClient(model=model_id) - -def custom_model(messages, stop_sequences=["Task"]) -> str: - response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) - answer = response.choices[0].message.content - return answer -``` - -You could use any `custom_model` method as long as: -1. it follows the [messages format](./chat_templating) (`List[Dict[str, str]]`) for its input `messages`, and it returns a `str`. -2. it stops generating outputs at the sequences passed in the argument `stop_sequences` - -Additionally, `custom_model` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to model, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs. - -For convenience, we provide pre-built classes for your model engine: +For your model, you can use any of these options: - [`TransformersModel`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`. - [`HfApiModel`] leverages a `huggingface_hub.InferenceClient` under the hood. - We also provide [`LiteLLMModel`], which lets you call 100+ different models through [LiteLLM](https://docs.litellm.ai/)! @@ -60,6 +37,11 @@ Once you have these two arguments, `tools` and `model`, you can create an agent ```python from smolagents import CodeAgent, HfApiModel +from huggingface_hub import login + +login("") + +model_id = "meta-llama/Llama-3.3-70B-Instruct" model = HfApiModel(model_id=model_id) agent = CodeAgent(tools=[], model=model, add_base_tools=True) diff --git a/docs/source/reference/agents.md b/docs/source/reference/agents.md index da51963..cac98aa 100644 --- a/docs/source/reference/agents.md +++ b/docs/source/reference/agents.md @@ -57,10 +57,30 @@ We provide two types of agents, based on the main [`Agent`] class. ## Models -You're free to create and use your own engines to be usable by the Agents framework. -These engines have the following specification: -1. Follow the [messages format](../chat_templating.md) for its input (`List[Dict[str, str]]`) and return a string. -2. Stop generating outputs *before* the sequences passed in the argument `stop_sequences` +You're free to create and use your own models to power your agent. + +You could use any `model` callable for your agent, as long as: +1. It follows the [messages format](./chat_templating) (`List[Dict[str, str]]`) for its input `messages`, and it returns a `str`. +2. It stops generating outputs *before* the sequences passed in the argument `stop_sequences` + +For defining your LLM, you can make a `custom_model` method which accepts a list of [messages](./chat_templating) and returns text. This callable also needs to accept a `stop_sequences` argument that indicates when to stop generating. + +```python +from huggingface_hub import login, InferenceClient + +login("") + +model_id = "meta-llama/Llama-3.3-70B-Instruct" + +client = InferenceClient(model=model_id) + +def custom_model(messages, stop_sequences=["Task"]) -> str: + response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) + answer = response.choices[0].message.content + return answer +``` + +Additionally, `custom_model` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to model, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs. ### TransformersModel diff --git a/examples/e2b_example.py b/examples/e2b_example.py index 0f3b0e8..049dc15 100644 --- a/examples/e2b_example.py +++ b/examples/e2b_example.py @@ -23,7 +23,6 @@ class GetCatImageTool(Tool): return Image.open(BytesIO(response.content)) -LAUNCH_GRADIO = False get_cat_image = GetCatImageTool() @@ -34,11 +33,11 @@ agent = CodeAgent( use_e2b_executor=True ) -if LAUNCH_GRADIO: - from smolagents import GradioUI +agent.run( + "Return me an image of a cat. Directly use the image provided in your state.", additional_args={"cat_image":get_cat_image()} +) # Asking to directly return the image from state tests that additional_args are properly sent to server. - GradioUI(agent).launch() -else: - agent.run( - "Return me an image of a cat. Directly use the image provided in your state.", additional_args={"cat_image":get_cat_image()} - ) # Asking to directly return the image from state tests that additional_args are properly sent to server. +# Try the agent in a Gradio UI +from smolagents import GradioUI + +GradioUI(agent).launch() \ No newline at end of file diff --git a/src/smolagents/default_tools.py b/src/smolagents/default_tools.py index a9451f6..4c71294 100644 --- a/src/smolagents/default_tools.py +++ b/src/smolagents/default_tools.py @@ -153,15 +153,21 @@ class DuckDuckGoSearchTool(Tool): } output_type = "any" - def forward(self, query: str) -> list[dict[str, str]]: + def __init__(self): + super().__init__(self) try: from duckduckgo_search import DDGS except ImportError: raise ImportError( "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`." ) - results = DDGS().text(query, max_results=7) - return results + self.ddgs = DDGS() + + + def forward(self, query: str) -> str: + results = self.ddgs.text(query, max_results=10) + postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results] + return "## Search Results\n\n" + "\n\n".join(postprocessed_results) class GoogleSearchTool(Tool): @@ -246,7 +252,7 @@ class GoogleSearchTool(Tool): ) web_snippets.append(redacted_version) - return "## Web Results\n" + "\n\n".join(web_snippets) + return "## Search Results\n" + "\n\n".join(web_snippets) class VisitWebpageTool(Tool):