From 775f6e8d7f6f07f861d7f056cbd92e323ff6cce0 Mon Sep 17 00:00:00 2001 From: Aymeric Date: Tue, 24 Dec 2024 12:18:18 +0100 Subject: [PATCH] Make doc buildable with new names --- docs/README.md | 4 ++-- docs/source/reference/agents.md | 11 ++++------- docs/source/reference/tools.md | 6 +++--- pyproject.toml | 1 + src/smolagents/agents.py | 3 +-- src/smolagents/default_tools.py | 19 ++++++++++++------- src/smolagents/gradio_ui.py | 4 ++-- src/smolagents/tools.py | 31 ++++++++++++++++++++++--------- 8 files changed, 47 insertions(+), 32 deletions(-) diff --git a/docs/README.md b/docs/README.md index 768595a..3c716fa 100644 --- a/docs/README.md +++ b/docs/README.md @@ -47,7 +47,7 @@ Once you have setup the `doc-builder` and additional packages with the pip insta you can generate the documentation by typing the following command: ```bash -doc-builder build agents docs/source/ --build_dir ~/tmp/test-build +doc-builder build smolagents docs/source/ --build_dir ~/tmp/test-build ``` You can adapt the `--build_dir` to set any temporary folder that you prefer. This command will create it and generate @@ -59,7 +59,7 @@ Markdown editor. To preview the docs, run the following command: ```bash -doc-builder preview agents docs/source/ +doc-builder preview smolagents docs/source/ ``` The docs will be viewable at [http://localhost:5173](http://localhost:5173). You can also preview the docs once you diff --git a/docs/source/reference/agents.md b/docs/source/reference/agents.md index 4e82ad8..059c877 100644 --- a/docs/source/reference/agents.md +++ b/docs/source/reference/agents.md @@ -30,21 +30,18 @@ contains the API docs for the underlying classes. Our agents inherit from [`MultiStepAgent`], which means they can act in multiple steps, each step consisting of one thought, then one tool call and execution. Read more in [this conceptual guide](../conceptual_guides/react). We provide two types of agents, based on the main [`Agent`] class. + - [`CodeAgent`] is the default agent, it writes its tool calls in Python code. - [`JsonAgent`] writes its tool calls in JSON. - - [`CodeAgent`] writes its tool calls in Python code. - -### BaseAgent - -[[autodoc]] BaseAgent -### React agents +### Classes of agents [[autodoc]] MultiStepAgent +[[autodoc]] CodeAgent + [[autodoc]] JsonAgent -[[autodoc]] CodeAgent ### ManagedAgent diff --git a/docs/source/reference/tools.md b/docs/source/reference/tools.md index 8f53a16..60d510b 100644 --- a/docs/source/reference/tools.md +++ b/docs/source/reference/tools.md @@ -71,12 +71,12 @@ These types have three specific purposes: ### AgentText -[[autodoc]] agents.types.AgentText +[[autodoc]] smolagents.types.AgentText ### AgentImage -[[autodoc]] agents.types.AgentImage +[[autodoc]] smolagents.types.AgentImage ### AgentAudio -[[autodoc]] agents.types.AgentAudio +[[autodoc]] smolagents.types.AgentAudio diff --git a/pyproject.toml b/pyproject.toml index c792337..e867a76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "duckduckgo-search>=6.3.7", "python-dotenv>=1.0.1", "e2b-code-interpreter>=1.0.3", + "torch>=2.5.1", ] [project.optional-dependencies] diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py index ce178d0..ac2f01d 100644 --- a/src/smolagents/agents.py +++ b/src/smolagents/agents.py @@ -177,6 +177,7 @@ class MultiStepAgent: Agent class that solves the given task step by step, using the ReAct framework: While the objective is not reached, the agent will perform a cycle of action (given by the LLM) and observation (obtained from the environment). """ + def __init__( self, tools: Union[List[Tool], Toolbox], @@ -378,7 +379,6 @@ class MultiStepAgent: ) return rationale.strip(), action.strip() - def provide_final_answer(self, task) -> str: """ This method provides a final answer to the task, based on the logs of the agent's interactions. @@ -1148,7 +1148,6 @@ class ManagedAgent: __all__ = [ "AgentError", - "BaseAgent", "ManagedAgent", "MultiStepAgent", "CodeAgent", diff --git a/src/smolagents/default_tools.py b/src/smolagents/default_tools.py index d820fc0..52b9c09 100644 --- a/src/smolagents/default_tools.py +++ b/src/smolagents/default_tools.py @@ -18,11 +18,13 @@ import json import re from dataclasses import dataclass from typing import Dict -import torch from huggingface_hub import hf_hub_download, list_spaces from transformers.utils import is_offline_mode -from transformers.models.whisper import WhisperProcessor, WhisperForConditionalGeneration +from transformers.models.whisper import ( + WhisperProcessor, + WhisperForConditionalGeneration, +) from .local_python_executor import ( BASE_BUILTIN_MODULES, @@ -136,10 +138,6 @@ class UserInputTool(Tool): user_input = input(f"{question} => ") return user_input -import re - -from .tools import Tool - class DuckDuckGoSearchTool(Tool): name = "web_search" @@ -221,4 +219,11 @@ class SpeechToTextTool(PipelineTool): return self.pre_processor.batch_decode(outputs, skip_special_tokens=True)[0] -__all__ = ["PythonInterpreterTool", "FinalAnswerTool", "UserInputTool", "DuckDuckGoSearchTool", "VisitWebpageTool", "SpeechToTextTool"] +__all__ = [ + "PythonInterpreterTool", + "FinalAnswerTool", + "UserInputTool", + "DuckDuckGoSearchTool", + "VisitWebpageTool", + "SpeechToTextTool", +] diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py index 8d99194..4a724db 100644 --- a/src/smolagents/gradio_ui.py +++ b/src/smolagents/gradio_ui.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from .types import AgentAudio, AgentImage, AgentText, handle_agent_output_types -from .agents import BaseAgent, AgentStep, ActionStep +from .agents import MultiStepAgent, AgentStep, ActionStep import gradio as gr @@ -83,7 +83,7 @@ def stream_to_gradio( class GradioUI: """A one-line interface to launch your agent in Gradio""" - def __init__(self, agent: BaseAgent): + def __init__(self, agent: MultiStepAgent): self.agent = agent def interact_with_agent(self, prompt, messages): diff --git a/src/smolagents/tools.py b/src/smolagents/tools.py index 8472e0b..45b1201 100644 --- a/src/smolagents/tools.py +++ b/src/smolagents/tools.py @@ -20,6 +20,7 @@ import inspect import json import os import tempfile +import torch import textwrap from functools import lru_cache, wraps from pathlib import Path @@ -42,6 +43,7 @@ from transformers.utils import ( is_torch_available, ) from transformers.dynamic_module_utils import get_imports +from transformers import AutoProcessor from .types import ImageType, handle_agent_input_types, handle_agent_output_types from .utils import instance_to_source @@ -753,7 +755,7 @@ def launch_gradio_demo(tool: Tool): TOOL_MAPPING = { "python_interpreter": "PythonInterpreterTool", "web_search": "DuckDuckGoSearchTool", - "transcriber": "SpeechToTextTool" + "transcriber": "SpeechToTextTool", } @@ -1004,8 +1006,6 @@ class Toolbox: toolbox_description += f"\t{tool.name}: {tool.description}\n" return toolbox_description -from transformers import AutoProcessor -from .types import handle_agent_input_types, handle_agent_output_types class PipelineTool(Tool): """ @@ -1073,7 +1073,9 @@ class PipelineTool(Tool): if model is None: if self.default_checkpoint is None: - raise ValueError("This tool does not implement a default checkpoint, you need to pass one.") + raise ValueError( + "This tool does not implement a default checkpoint, you need to pass one." + ) model = self.default_checkpoint if pre_processor is None: pre_processor = model @@ -1098,15 +1100,21 @@ class PipelineTool(Tool): from accelerate import PartialState if isinstance(self.pre_processor, str): - self.pre_processor = self.pre_processor_class.from_pretrained(self.pre_processor, **self.hub_kwargs) + self.pre_processor = self.pre_processor_class.from_pretrained( + self.pre_processor, **self.hub_kwargs + ) if isinstance(self.model, str): - self.model = self.model_class.from_pretrained(self.model, **self.model_kwargs, **self.hub_kwargs) + self.model = self.model_class.from_pretrained( + self.model, **self.model_kwargs, **self.hub_kwargs + ) if self.post_processor is None: self.post_processor = self.pre_processor elif isinstance(self.post_processor, str): - self.post_processor = self.post_processor_class.from_pretrained(self.post_processor, **self.hub_kwargs) + self.post_processor = self.post_processor_class.from_pretrained( + self.post_processor, **self.hub_kwargs + ) if self.device is None: if self.device_map is not None: @@ -1149,8 +1157,12 @@ class PipelineTool(Tool): import torch from accelerate.utils import send_to_device - tensor_inputs = {k: v for k, v in encoded_inputs.items() if isinstance(v, torch.Tensor)} - non_tensor_inputs = {k: v for k, v in encoded_inputs.items() if not isinstance(v, torch.Tensor)} + tensor_inputs = { + k: v for k, v in encoded_inputs.items() if isinstance(v, torch.Tensor) + } + non_tensor_inputs = { + k: v for k, v in encoded_inputs.items() if not isinstance(v, torch.Tensor) + } encoded_inputs = send_to_device(tensor_inputs, self.device) outputs = self.forward({**encoded_inputs, **non_tensor_inputs}) @@ -1159,6 +1171,7 @@ class PipelineTool(Tool): return handle_agent_output_types(decoded_outputs, self.output_type) + __all__ = [ "AUTHORIZED_TYPES", "Tool",