Make doc buildable with new names

2024-12-24 12:18:18 +01:00 · 2024-12-24 12:18:18 +01:00 · 775f6e8d7f
parent 021ece34ca
commit 775f6e8d7f
8 changed files with 47 additions and 32 deletions
--- a/docs/README.md
+++ b/docs/README.md
@ -47,7 +47,7 @@ Once you have setup the `doc-builder` and additional packages with the pip insta
 you can generate the documentation by typing the following command:

 ```bash
-doc-builder build agents docs/source/ --build_dir ~/tmp/test-build
+doc-builder build smolagents docs/source/ --build_dir ~/tmp/test-build
 ```

 You can adapt the `--build_dir` to set any temporary folder that you prefer. This command will create it and generate
@ -59,7 +59,7 @@ Markdown editor.
 To preview the docs, run the following command:

 ```bash
-doc-builder preview agents docs/source/
+doc-builder preview smolagents docs/source/
 ```

 The docs will be viewable at [http://localhost:5173](http://localhost:5173). You can also preview the docs once you
--- a/docs/source/reference/agents.md
+++ b/docs/source/reference/agents.md
@ -30,21 +30,18 @@ contains the API docs for the underlying classes.
 Our agents inherit from [`MultiStepAgent`], which means they can act in multiple steps, each step consisting of one thought, then one tool call and execution. Read more in [this conceptual guide](../conceptual_guides/react).

 We provide two types of agents, based on the main [`Agent`] class.
+  - [`CodeAgent`] is the default agent, it writes its tool calls in Python code.
  - [`JsonAgent`] writes its tool calls in JSON.
-  - [`CodeAgent`] writes its tool calls in Python code.
-
-### BaseAgent
-
-[[autodoc]] BaseAgent


-### React agents
+### Classes of agents

 [[autodoc]] MultiStepAgent

+[[autodoc]] CodeAgent
+
 [[autodoc]] JsonAgent

-[[autodoc]] CodeAgent

 ### ManagedAgent

--- a/docs/source/reference/tools.md
+++ b/docs/source/reference/tools.md
@ -71,12 +71,12 @@ These types have three specific purposes:

 ### AgentText

-[[autodoc]] agents.types.AgentText
+[[autodoc]] smolagents.types.AgentText

 ### AgentImage

-[[autodoc]] agents.types.AgentImage
+[[autodoc]] smolagents.types.AgentImage

 ### AgentAudio

-[[autodoc]] agents.types.AgentAudio
+[[autodoc]] smolagents.types.AgentAudio
--- a/pyproject.toml
+++ b/pyproject.toml
@ -22,6 +22,7 @@ dependencies = [
    "duckduckgo-search>=6.3.7",
    "python-dotenv>=1.0.1",
    "e2b-code-interpreter>=1.0.3",
+    "torch>=2.5.1",
 ]

 [project.optional-dependencies]
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@ -177,6 +177,7 @@ class MultiStepAgent:
    Agent class that solves the given task step by step, using the ReAct framework:
    While the objective is not reached, the agent will perform a cycle of action (given by the LLM) and observation (obtained from the environment).
    """
+
    def __init__(
        self,
        tools: Union[List[Tool], Toolbox],
@ -378,7 +379,6 @@ class MultiStepAgent:
            )
        return rationale.strip(), action.strip()

-
    def provide_final_answer(self, task) -> str:
        """
        This method provides a final answer to the task, based on the logs of the agent's interactions.
@ -1148,7 +1148,6 @@ class ManagedAgent:

 __all__ = [
    "AgentError",
-    "BaseAgent",
    "ManagedAgent",
    "MultiStepAgent",
    "CodeAgent",
--- a/src/smolagents/default_tools.py
+++ b/src/smolagents/default_tools.py
@ -18,11 +18,13 @@ import json
 import re
 from dataclasses import dataclass
 from typing import Dict
-import torch
 from huggingface_hub import hf_hub_download, list_spaces

 from transformers.utils import is_offline_mode
-from transformers.models.whisper import WhisperProcessor, WhisperForConditionalGeneration
+from transformers.models.whisper import (
+    WhisperProcessor,
+    WhisperForConditionalGeneration,
+)

 from .local_python_executor import (
    BASE_BUILTIN_MODULES,
@ -136,10 +138,6 @@ class UserInputTool(Tool):
        user_input = input(f"{question} => ")
        return user_input

-import re
-
-from .tools import Tool
-

 class DuckDuckGoSearchTool(Tool):
    name = "web_search"
@ -221,4 +219,11 @@ class SpeechToTextTool(PipelineTool):
        return self.pre_processor.batch_decode(outputs, skip_special_tokens=True)[0]


-__all__ = ["PythonInterpreterTool", "FinalAnswerTool", "UserInputTool", "DuckDuckGoSearchTool", "VisitWebpageTool", "SpeechToTextTool"]
+__all__ = [
+    "PythonInterpreterTool",
+    "FinalAnswerTool",
+    "UserInputTool",
+    "DuckDuckGoSearchTool",
+    "VisitWebpageTool",
+    "SpeechToTextTool",
+]
--- a/src/smolagents/gradio_ui.py
+++ b/src/smolagents/gradio_ui.py
@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
-from .agents import BaseAgent, AgentStep, ActionStep
+from .agents import MultiStepAgent, AgentStep, ActionStep
 import gradio as gr


@ -83,7 +83,7 @@ def stream_to_gradio(
 class GradioUI:
    """A one-line interface to launch your agent in Gradio"""

-    def __init__(self, agent: BaseAgent):
+    def __init__(self, agent: MultiStepAgent):
        self.agent = agent

    def interact_with_agent(self, prompt, messages):
--- a/src/smolagents/tools.py
+++ b/src/smolagents/tools.py
@ -20,6 +20,7 @@ import inspect
 import json
 import os
 import tempfile
+import torch
 import textwrap
 from functools import lru_cache, wraps
 from pathlib import Path
@ -42,6 +43,7 @@ from transformers.utils import (
    is_torch_available,
 )
 from transformers.dynamic_module_utils import get_imports
+from transformers import AutoProcessor

 from .types import ImageType, handle_agent_input_types, handle_agent_output_types
 from .utils import instance_to_source
@ -753,7 +755,7 @@ def launch_gradio_demo(tool: Tool):
 TOOL_MAPPING = {
    "python_interpreter": "PythonInterpreterTool",
    "web_search": "DuckDuckGoSearchTool",
-    "transcriber": "SpeechToTextTool"
+    "transcriber": "SpeechToTextTool",
 }


@ -1004,8 +1006,6 @@ class Toolbox:
            toolbox_description += f"\t{tool.name}: {tool.description}\n"
        return toolbox_description

-from transformers import AutoProcessor
-from .types import handle_agent_input_types, handle_agent_output_types

 class PipelineTool(Tool):
    """
@ -1073,7 +1073,9 @@ class PipelineTool(Tool):

        if model is None:
            if self.default_checkpoint is None:
-                raise ValueError("This tool does not implement a default checkpoint, you need to pass one.")
+                raise ValueError(
+                    "This tool does not implement a default checkpoint, you need to pass one."
+                )
            model = self.default_checkpoint
        if pre_processor is None:
            pre_processor = model
@ -1098,15 +1100,21 @@ class PipelineTool(Tool):
        from accelerate import PartialState

        if isinstance(self.pre_processor, str):
-            self.pre_processor = self.pre_processor_class.from_pretrained(self.pre_processor, **self.hub_kwargs)
+            self.pre_processor = self.pre_processor_class.from_pretrained(
+                self.pre_processor, **self.hub_kwargs
+            )

        if isinstance(self.model, str):
-            self.model = self.model_class.from_pretrained(self.model, **self.model_kwargs, **self.hub_kwargs)
+            self.model = self.model_class.from_pretrained(
+                self.model, **self.model_kwargs, **self.hub_kwargs
+            )

        if self.post_processor is None:
            self.post_processor = self.pre_processor
        elif isinstance(self.post_processor, str):
-            self.post_processor = self.post_processor_class.from_pretrained(self.post_processor, **self.hub_kwargs)
+            self.post_processor = self.post_processor_class.from_pretrained(
+                self.post_processor, **self.hub_kwargs
+            )

        if self.device is None:
            if self.device_map is not None:
@ -1149,8 +1157,12 @@ class PipelineTool(Tool):
        import torch
        from accelerate.utils import send_to_device

-        tensor_inputs = {k: v for k, v in encoded_inputs.items() if isinstance(v, torch.Tensor)}
-        non_tensor_inputs = {k: v for k, v in encoded_inputs.items() if not isinstance(v, torch.Tensor)}
+        tensor_inputs = {
+            k: v for k, v in encoded_inputs.items() if isinstance(v, torch.Tensor)
+        }
+        non_tensor_inputs = {
+            k: v for k, v in encoded_inputs.items() if not isinstance(v, torch.Tensor)
+        }

        encoded_inputs = send_to_device(tensor_inputs, self.device)
        outputs = self.forward({**encoded_inputs, **non_tensor_inputs})
@ -1159,6 +1171,7 @@ class PipelineTool(Tool):

        return handle_agent_output_types(decoded_outputs, self.output_type)

+
 __all__ = [
    "AUTHORIZED_TYPES",
    "Tool",