Add examples to use any LLM as engine

2024-12-13 19:48:55 +01:00 · 2024-12-13 19:48:55 +01:00 · 9232528232
parent 8ed03634b0
commit 9232528232
20 changed files with 202 additions and 65 deletions
--- a/docs/source/agents.md
+++ b/docs/source/agents.md
@ -261,7 +261,7 @@ For maximum flexibility, you can overwrite the whole system prompt template by p
 ```python
 from transformers import JsonAgent
-from transformers.agents import PythonInterpreterTool
+from agents import PythonInterpreterTool
 agent = JsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
 ```
@ -381,14 +381,14 @@ Multi-agent has been introduced in Microsoft's framework [Autogen](https://huggi
 It simply means having several agents working together to solve your task instead of only one.
 It empirically yields better performance on most benchmarks. The reason for this better performance is conceptually simple: for many tasks, rather than using a do-it-all system, you would prefer to specialize units on sub-tasks. Here, having agents with separate tool sets and memories allows to achieve efficient specialization.
-You can easily build hierarchical multi-agent systems with `transformers.agents`.
+You can easily build hierarchical multi-agent systems with `agents`.
 To do so, encapsulate the agent in a [`ManagedAgent`] object. This object needs arguments `agent`, `name`, and a `description`, which will then be embedded in the manager agent's system prompt to let it know how to call this managed agent, as we also do for tools.
 Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]:
 ```py
-from transformers.agents import CodeAgent, HfApiEngine, DuckDuckGoSearchTool, ManagedAgent
+from agents import CodeAgent, HfApiEngine, DuckDuckGoSearchTool, ManagedAgent
 llm_engine = HfApiEngine()
--- a/docs/source/building_good_agents.md
+++ b/docs/source/building_good_agents.md
@ -19,7 +19,7 @@ There's a world of difference between building an agent that works and one that
 In this guide, we're going to see best practices for building agents.
 > [!TIP]
-> If you're new to `transformers.agents`, make sure to first read the [intro to agents](./intro_agents).
+> If you're new to `agents`, make sure to first read the [intro to agents](./intro_agents).
 ### The best agentic systems are the simplest: simplify the workflow as much as you can
--- a/docs/source/main_classes/agent.md
+++ b/docs/source/main_classes/agent.md
@ -156,12 +156,12 @@ These types have three specific purposes:
 ### AgentText
-[[autodoc]] transformers.agents.agent_types.AgentText
+[[autodoc]] agents.types.AgentText
 ### AgentImage
-[[autodoc]] transformers.agents.agent_types.AgentImage
+[[autodoc]] agents.types.AgentImage
 ### AgentAudio
-[[autodoc]] transformers.agents.agent_types.AgentAudio
+[[autodoc]] agents.types.AgentAudio
--- a/docs/source/tools.md
+++ b/docs/source/tools.md
@ -20,7 +20,7 @@ rendered properly in your Markdown viewer.
 Here, we're going to see advanced tool usage.
 > [!TIP]
-> If you're new to `transformers.agents`, make sure to first read the main [agents documentation](./agents).
+> If you're new to `agents`, make sure to first read the main [agents documentation](./agents).
 ### Directly define a tool by subclassing Tool, and share it to the Hub
--- a/examples/create_agent_any_llm.py
+++ b/examples/create_agent_any_llm.py
@ -0,0 +1,30 @@
 from agents import OpenAIEngine, AnthropicEngine, HfApiEngine, CodeAgent
 from dotenv import load_dotenv
 load_dotenv()
 openai_engine = OpenAIEngine(model_name="gpt-4o")
 agent = CodeAgent([], llm_engine=openai_engine)
 print("\n\n##############")
 print("Running OpenAI agent:")
 agent.run("What is the 10th Fibonacci Number?")
 anthropic_engine = AnthropicEngine()
 agent = CodeAgent([], llm_engine=anthropic_engine)
 print("\n\n##############")
 print("Running Anthropic agent:")
 agent.run("What is the 10th Fibonacci Number?")
 # Here, our token stored as HF_TOKEN environment variable has accesses 'Make calls to the serverless Inference API' and 'Read access to contents of all public gated repos you can access'
 llama_engine = HfApiEngine(model="meta-llama/Llama-3.3-70B-Instruct")
 agent = CodeAgent([], llm_engine=llama_engine)
 print("\n\n##############")
 print("Running Llama3.3-70B agent:")
 agent.run("What is the 10th Fibonacci Number?")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -22,3 +22,8 @@ dependencies = [
    "duckduckgo-search>=6.3.7",
    "python-dotenv>=1.0.1"
 ]
 [project.optional-dependencies]
 dev = [
    "anthropic",
 ]
--- a/src/agents/init.py
+++ b/src/agents/init.py
@ -26,9 +26,17 @@ from transformers.utils.import_utils import define_import_structure
 if TYPE_CHECKING:
    from .agents import *
-    from .llm_engine import *
+    from .default_tools import *
    from .gradio_ui import *
    from .llm_engines import *
    from .local_python_executor import *
    from .monitoring import *
    from .prompts import *
    from .search import *
    from .tools import *
    from .types import *
    from .utils import *
 else:
    import sys
--- a/src/agents/agents.py
+++ b/src/agents/agents.py
@ -22,9 +22,9 @@ from rich.syntax import Syntax
 from transformers.utils import is_torch_available
 from .utils import console, parse_code_blob, parse_json_tool_call, truncate_content
-from .agent_types import AgentAudio, AgentImage
+from .types import AgentAudio, AgentImage
 from .default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool
-from .llm_engine import HfApiEngine, MessageRole
+from .llm_engines import HfApiEngine, MessageRole
 from .monitoring import Monitor
 from .prompts import (
    CODE_SYSTEM_PROMPT,
@ -492,7 +492,7 @@ class ReactAgent(BaseAgent):
        Example:
        ```py
-        from transformers.agents import CodeAgent
+        from agents import CodeAgent
        agent = CodeAgent(tools=[])
        agent.run("What is the result of 2 power 3.7384?")
        ```
@ -811,7 +811,7 @@ class JsonAgent(ReactAgent):
            )
            log_entry.llm_output = llm_output
        except Exception as e:
-            raise AgentGenerationError(f"Error in generating llm output: {e}.")
+            raise AgentGenerationError(f"Error in generating llm_engine output: {e}.")
        if self.verbose:
            console.rule("[italic]Output message of the LLM:")
@ -944,7 +944,7 @@ class CodeAgent(ReactAgent):
            )
            log_entry.llm_output = llm_output
        except Exception as e:
-            raise AgentGenerationError(f"Error in generating llm output: {e}.")
+            raise AgentGenerationError(f"Error in generating llm_engine output: {e}.")
        if self.verbose:
            console.rule("[italic]Output message of the LLM:")
@ -1074,4 +1074,4 @@ And even if your task resolution is not successful, please return as much contex
        else:
            return output
-__all__ = ["BaseAgent", "ManagedAgent", "ReactAgent", "CodeAgent", "JsonAgent", "Toolbox"]
+__all__ = ["AgentError", "BaseAgent", "ManagedAgent", "ReactAgent", "CodeAgent", "JsonAgent", "Toolbox"]
--- a/src/agents/gradio_ui.py
+++ b/src/agents/gradio_ui.py
@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .agent_types import AgentAudio, AgentImage, AgentText
+from .types import AgentAudio, AgentImage, AgentText
 from .agents import BaseAgent, AgentStep, ActionStep
 import gradio as gr
--- a/src/agents/llm_engines.py
+++ b/src/agents/llm_engines.py
@ -22,10 +22,20 @@ from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer, Pipeline
 import logging
-
+import os
 from openai import OpenAI
 logger = logging.getLogger(__name__)
 DEFAULT_JSONAGENT_REGEX_GRAMMAR = {
    "type": "regex",
    "value": 'Thought: .+?\\nAction:\\n\\{\\n\\s{4}"action":\\s"[^"\\n]+",\\n\\s{4}"action_input":\\s"[^"\\n]+"\\n\\}\\n<end_action>',
 }
 DEFAULT_CODEAGENT_REGEX_GRAMMAR = {
    "type": "regex",
    "value": "Thought: .+?\\nCode:\\n```(?:py|python)?\\n(?:.|\\s)+?\\n```<end_action>",
 }
 class MessageRole(str, Enum):
    USER = "user"
@ -38,6 +48,13 @@ class MessageRole(str, Enum):
    def roles(cls):
        return [r.value for r in cls]
 openai_role_conversions = {
    MessageRole.TOOL_RESPONSE: MessageRole.USER,
 }
 llama_role_conversions = {
    MessageRole.TOOL_RESPONSE: MessageRole.USER,
 }
 def get_clean_message_list(
    message_list: List[Dict[str, str]], role_conversions: Dict[str, str] = {}
@ -73,11 +90,6 @@ def get_clean_message_list(
    return final_message_list
 llama_role_conversions = {
    MessageRole.TOOL_RESPONSE: MessageRole.USER,
 }
 class HfEngine:
    def __init__(self, model_id: Optional[str] = None):
        self.last_input_token_count = None
@ -106,6 +118,7 @@ class HfEngine:
        messages: List[Dict[str, str]],
        stop_sequences: Optional[List[str]] = None,
        grammar: Optional[str] = None,
        max_tokens: int = 1500
    ):
        raise NotImplementedError
@ -114,6 +127,7 @@ class HfEngine:
        messages: List[Dict[str, str]],
        stop_sequences: Optional[List[str]] = None,
        grammar: Optional[str] = None,
        max_tokens: int = 1500,
    ) -> str:
        """Process the input messages and return the model's response.
@ -133,7 +147,7 @@ class HfEngine:
        Example:
            ```python
            >>> engine = HfApiEngine(
-            ...     model="meta-llama/Meta-Llama-3.1-8B-Instruct",
+            ...     model="Qwen/Qwen2.5-Coder-32B-Instruct",
            ...     token="your_hf_token_here",
            ...     max_tokens=2000
            ... )
@ -149,7 +163,7 @@ class HfEngine:
            )
        if stop_sequences is None:
            stop_sequences = []
-        response = self.generate(messages, stop_sequences, grammar)
+        response = self.generate(messages, stop_sequences, grammar, max_tokens)
        self.last_input_token_count = len(
            self.tokenizer.apply_chat_template(messages, tokenize=True)
        )
@ -168,11 +182,12 @@ class HfApiEngine(HfEngine):
    This engine allows you to communicate with Hugging Face's models using the Inference API. It can be used in both serverless mode or with a dedicated endpoint, supporting features like stop sequences and grammar customization.
    Parameters:
-        model (`str`, *optional*, defaults to `"meta-llama/Meta-Llama-3.1-8B-Instruct"`):
+        model (`str`, *optional*, defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`):
            The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
        token (`str`, *optional*):
-            Token used by the Hugging Face API for authentication.
+            Token used by the Hugging Face API for authentication. This token need to be authorized 'Make calls to the serverless Inference API'.
-            If not provided, the class will use the token stored in the Hugging Face CLI configuration.
+            If the model is gated (like Llama-3 models), the token also needs 'Read access to contents of all public gated repos you can access'.
            If not provided, the class will try to use environment variable 'HF_TOKEN', else use the token stored in the Hugging Face CLI configuration.
        max_tokens (`int`, *optional*, defaults to 1500):
            The maximum number of tokens allowed in the output.
        timeout (`int`, *optional*, defaults to 120):
@ -185,21 +200,22 @@ class HfApiEngine(HfEngine):
    def __init__(
        self,
-        model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct",
+        model: str = "Qwen/Qwen2.5-Coder-32B-Instruct",
        token: Optional[str] = None,
        max_tokens: Optional[int] = 1500,
        timeout: Optional[int] = 120,
    ):
        super().__init__(model_id=model)
        self.model = model
        if token is None:
            token = os.getenv("HF_TOKEN")
        self.client = InferenceClient(self.model, token=token, timeout=timeout)
        self.max_tokens = max_tokens
    def generate(
        self,
        messages: List[Dict[str, str]],
        stop_sequences: Optional[List[str]] = None,
        grammar: Optional[str] = None,
        max_tokens: int = 1500,
    ) -> str:
        # Get clean message list
        messages = get_clean_message_list(
@ -211,12 +227,12 @@ class HfApiEngine(HfEngine):
            response = self.client.chat_completion(
                messages,
                stop=stop_sequences,
                max_tokens=self.max_tokens,
                response_format=grammar,
                max_tokens=max_tokens,
            )
        else:
            response = self.client.chat_completion(
-                messages, stop=stop_sequences, max_tokens=self.max_tokens
+                messages, stop=stop_sequences, max_tokens=max_tokens
            )
        response = response.choices[0].message.content
@ -235,7 +251,7 @@ class TransformersEngine(HfEngine):
        messages: List[Dict[str, str]],
        stop_sequences: Optional[List[str]] = None,
        grammar: Optional[str] = None,
-        max_length: int = 1500,
+        max_tokens: int = 1500,
    ) -> str:
        # Get clean message list
        messages = get_clean_message_list(
@ -251,7 +267,7 @@ class TransformersEngine(HfEngine):
        output = self.pipeline(
            messages,
            stop_strings=stop_strings,
-            max_length=max_length,
+            max_length=max_tokens,
            tokenizer=self.pipeline.tokenizer,
        )
@ -259,14 +275,95 @@ class TransformersEngine(HfEngine):
        return response
-DEFAULT_JSONAGENT_REGEX_GRAMMAR = {
+class OpenAIEngine:
-    "type": "regex",
+    def __init__(self, model_name: Optional[str] = None, api_key: Optional[str] = None, base_url: Optional[str] = None):
-    "value": 'Thought: .+?\\nAction:\\n\\{\\n\\s{4}"action":\\s"[^"\\n]+",\\n\\s{4}"action_input":\\s"[^"\\n]+"\\n\\}\\n<end_action>',
+        """Creates a LLM Engine that follows OpenAI format.
 }
-DEFAULT_CODEAGENT_REGEX_GRAMMAR = {
+        Args:
-    "type": "regex",
+           model_name (`str`, *optional*): the model name to use.
-    "value": "Thought: .+?\\nCode:\\n```(?:py|python)?\\n(?:.|\\s)+?\\n```<end_action>",
+           api_key (`str`, *optional*): your API key.
-}
+           base_url (`str`, *optional*): the URL to use if using a different inference service than OpenAI, for instance "https://api-inference.huggingface.co/v1/".
        """
        if model_name is None:
            model_name = "gpt-4o"
        if api_key is None:
            api_key = os.getenv("OPENAI_API_KEY")
        self.model_name = model_name
        self.client = OpenAI(
            base_url=base_url,
            api_key=api_key,
        )
-__all__ = ["MessageRole", "llama_role_conversions", "get_clean_message_list", "HfEngine", "TransformersEngine", "HfApiEngine"]
+    def __call__(
        self,
        messages: List[Dict[str, str]],
        stop_sequences: Optional[List[str]] = None,
        grammar: Optional[str] = None,
        max_tokens: int = 1500,
    ) -> str:
        messages = get_clean_message_list(messages, role_conversions=openai_role_conversions)
        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=messages,
            stop=stop_sequences,
            temperature=0.5,
            max_tokens=max_tokens,
        )
        return response.choices[0].message.content
 class AnthropicEngine:
    def __init__(self, model_name="claude-3-5-sonnet-20240620", use_bedrock=False):
        from anthropic import Anthropic, AnthropicBedrock
        self.model_name = model_name
        if use_bedrock:
            self.model_name = "anthropic.claude-3-5-sonnet-20240620-v1:0"
            self.client = AnthropicBedrock(
                aws_access_key=os.getenv("AWS_BEDROCK_ID"),
                aws_secret_key=os.getenv("AWS_BEDROCK_KEY"),
                aws_region="us-east-1",
            )
        else:
            self.client = Anthropic(
                api_key=os.getenv("ANTHROPIC_API_KEY"),
            )
    def __call__(
        self,
        messages: List[Dict[str, str]],
        stop_sequences: Optional[List[str]] = None,
        grammar: Optional[str] = None,
        max_tokens: int = 1500,
    ) -> str:
        messages = get_clean_message_list(messages, role_conversions=openai_role_conversions)
        index_system_message, system_prompt = None, None
        for index, message in enumerate(messages):
            if message["role"] == MessageRole.SYSTEM:
                index_system_message = index
                system_prompt = message["content"]
        if system_prompt is None:
            raise Exception("No system prompt found!")
        filtered_messages = [message for i, message in enumerate(messages) if i != index_system_message]
        if len(filtered_messages) == 0:
            print("Error, no user message:", messages)
            assert False
        response = self.client.messages.create(
            model=self.model_name,
            system=system_prompt,
            messages=filtered_messages,
            stop_sequences=stop_sequences,
            temperature=0.5,
            max_tokens=max_tokens,
        )
        full_response_text = ""
        for content_block in response.content:
            if content_block.type == "text":
                full_response_text += content_block.text
        return full_response_text
 __all__ = ["MessageRole", "llama_role_conversions", "get_clean_message_list", "HfEngine", "TransformersEngine", "HfApiEngine", "OpenAIEngine", "AnthropicEngine"]
--- a/src/agents/local_python_executor.py
+++ b/src/agents/local_python_executor.py
@ -231,7 +231,6 @@ def evaluate_class_def(class_def, state, static_tools, custom_tools):
 def evaluate_augassign(expression, state, static_tools, custom_tools):
    # Helper function to get current value and set new value based on the target type
    def get_current_value(target):
        if isinstance(target, ast.Name):
            return state.get(target.id, 0)
@ -254,7 +253,6 @@ def evaluate_augassign(expression, state, static_tools, custom_tools):
    current_value = get_current_value(expression.target)
    value_to_add = evaluate_ast(expression.value, state, static_tools, custom_tools)
    # Determine the operation and apply it
    if isinstance(expression.op, ast.Add):
        if isinstance(current_value, list):
            if not isinstance(value_to_add, list):
--- a/src/agents/tools.py
+++ b/src/agents/tools.py
@ -51,7 +51,7 @@ from transformers.utils import (
    is_torch_available,
    is_vision_available,
 )
-from .agent_types import ImageType, handle_agent_inputs, handle_agent_outputs
+from .types import ImageType, handle_agent_inputs, handle_agent_outputs
 import logging
 logger = logging.getLogger(__name__)
@ -928,8 +928,8 @@ def load_tool(task_or_repo_id, model_repo_id=None, token=None, **kwargs):
    """
    if task_or_repo_id in TOOL_MAPPING:
        tool_class_name = TOOL_MAPPING[task_or_repo_id]
-        main_module = importlib.import_module("transformers")
+        main_module = importlib.import_module("agents")
-        tools_module = main_module.agents
+        tools_module = main_module
        tool_class = getattr(tools_module, tool_class_name)
        return tool_class(model_repo_id, token=token, **kwargs)
    else:
--- a/src/agents/agent_types.py
+++ b/src/agents/agent_types.py
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@ -20,7 +20,7 @@ import pytest
 from pathlib import Path
-from agents.agent_types import AgentText
+from agents.types import AgentText
 from agents.agents import (
    AgentMaxIterationsError,
    ManagedAgent,
--- a/tests/test_final_answer.py
+++ b/tests/test_final_answer.py
@ -20,8 +20,8 @@ import numpy as np
 from PIL import Image
 from transformers import is_torch_available
-from transformers.agents.agent_types import AGENT_TYPE_MAPPING
+from agents.types import AGENT_TYPE_MAPPING
-from transformers.agents.default_tools import FinalAnswerTool
+from agents.default_tools import FinalAnswerTool
 from transformers.testing_utils import get_tests_dir, require_torch
 from .test_tools_common import ToolTesterMixin
--- a/tests/test_monitoring.py
+++ b/tests/test_monitoring.py
@ -15,9 +15,7 @@
 import unittest
-from transformers.agents.agent_types import AgentImage
+from agents import AgentImage, AgentError, CodeAgent, JsonAgent, stream_to_gradio
 from transformers.agents.agents import AgentError, CodeAgent, JsonAgent
 from transformers.agents.monitoring import stream_to_gradio
 class MonitoringTester(unittest.TestCase):
@ -122,7 +120,7 @@ final_answer('This is the final answer.')
        # Use stream_to_gradio to capture the output
        outputs = list(stream_to_gradio(agent, task="Test task", test_mode=True))
-        self.assertEqual(len(outputs), 3)
+        self.assertEqual(len(outputs), 4)
        final_message = outputs[-1]
        self.assertEqual(final_message.role, "assistant")
        self.assertIn("This is the final answer.", final_message.content)
@ -149,7 +147,7 @@ final_answer('This is the final answer.')
            )
        )
-        self.assertEqual(len(outputs), 2)
+        self.assertEqual(len(outputs), 3)
        final_message = outputs[-1]
        self.assertEqual(final_message.role, "assistant")
        self.assertIsInstance(final_message.content, dict)
@ -169,7 +167,7 @@ final_answer('This is the final answer.')
        # Use stream_to_gradio to capture the output
        outputs = list(stream_to_gradio(agent, task="Test task", test_mode=True))
-        self.assertEqual(len(outputs), 3)
+        self.assertEqual(len(outputs), 5)
        final_message = outputs[-1]
        self.assertEqual(final_message.role, "assistant")
        self.assertIn("Simulated agent error", final_message.content)
--- a/tests/test_python_interpreter.py
+++ b/tests/test_python_interpreter.py
@ -18,10 +18,10 @@ import unittest
 import numpy as np
 import pytest
-from transformers import load_tool
+from agents import load_tool
-from transformers.agents.agent_types import AGENT_TYPE_MAPPING
+from agents.types import AGENT_TYPE_MAPPING
-from transformers.agents.default_tools import BASE_PYTHON_TOOLS
+from agents.default_tools import BASE_PYTHON_TOOLS
-from transformers.agents.python_interpreter import (
+from agents.local_python_executor import (
    InterpreterError,
    evaluate_python_code,
 )
@ -51,6 +51,7 @@ class PythonInterpreterToolTester(unittest.TestCase, ToolTesterMixin):
        inputs = ["2 * 2"]
        output = self.tool(*inputs)
        output_type = AGENT_TYPE_MAPPING[self.tool.output_type]
        print("OKK", type(output), output_type, AGENT_TYPE_MAPPING)
        self.assertTrue(isinstance(output, output_type))
    def test_agent_types_inputs(self):
--- a/tests/test_search.py
+++ b/tests/test_search.py
@ -15,7 +15,7 @@
 import unittest
-from transformers import load_tool
+from agents import load_tool
 from .test_tools_common import ToolTesterMixin
--- a/tests/test_tools_common.py
+++ b/tests/test_tools_common.py
@ -20,13 +20,13 @@ import numpy as np
 import pytest
 from transformers import is_torch_available, is_vision_available
-from transformers.agents.agent_types import (
+from agents.types import (
    AGENT_TYPE_MAPPING,
    AgentAudio,
    AgentImage,
    AgentText,
 )
-from transformers.agents.tools import Tool, tool
+from agents.tools import Tool, tool
 from transformers.testing_utils import get_tests_dir
--- a/tests/test_agent_types.py
+++ b/tests/test_agent_types.py
@ -18,7 +18,7 @@ import unittest
 import uuid
 from pathlib import Path
-from transformers.agents.agent_types import AgentAudio, AgentImage, AgentText
+from agents.types import AgentAudio, AgentImage, AgentText
 from transformers.testing_utils import (
    get_tests_dir,
    require_soundfile,