#!/usr/bin/env python # coding=utf-8 # Copyright 2024 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json import logging import re import time from typing import Any, Callable, Dict, List, Optional, Tuple, Union import rich from rich import markdown as rich_markdown from transformers.utils import is_torch_available import logging from .utils import console from .agent_types import AgentAudio, AgentImage from .default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool, setup_default_tools from .llm_engine import HfApiEngine, MessageRole from .monitoring import Monitor from .prompts import ( DEFAULT_CODE_SYSTEM_PROMPT, DEFAULT_REACT_CODE_SYSTEM_PROMPT, DEFAULT_REACT_JSON_SYSTEM_PROMPT, PLAN_UPDATE_FINAL_PLAN_REDACTION, PROMPTS_FOR_INITIAL_PLAN, PROMPTS_FOR_PLAN_UPDATE, SUPPORTED_PLAN_TYPES, SYSTEM_PROMPT_FACTS, SYSTEM_PROMPT_FACTS_UPDATE, USER_PROMPT_FACTS_UPDATE, ) from .python_interpreter import LIST_SAFE_MODULES, evaluate_python_code from .tools import ( DEFAULT_TOOL_DESCRIPTION_TEMPLATE, Tool, get_tool_description_with_args, load_tool, ) def parse_json_blob(json_blob: str) -> Dict[str, str]: try: first_accolade_index = json_blob.find("{") last_accolade_index = [a.start() for a in list(re.finditer("}", json_blob))][-1] json_blob = json_blob[first_accolade_index : last_accolade_index + 1].replace('\\"', "'") json_data = json.loads(json_blob, strict=False) return json_data except json.JSONDecodeError as e: place = e.pos if json_blob[place - 1 : place + 2] == "},\n": raise ValueError( "JSON is invalid: you probably tried to provide multiple tool calls in one action. PROVIDE ONLY ONE TOOL CALL." ) raise ValueError( f"The JSON blob you used is invalid due to the following error: {e}.\n" f"JSON blob was: {json_blob}, decoding failed on that specific part of the blob:\n" f"'{json_blob[place-4:place+5]}'." ) except Exception as e: raise ValueError(f"Error in parsing the JSON blob: {e}") def parse_code_blob(code_blob: str) -> str: try: pattern = r"```(?:py|python)?\n(.*?)\n```" match = re.search(pattern, code_blob, re.DOTALL) return match.group(1).strip() except Exception as e: raise ValueError( f""" The code blob you used is invalid: due to the following error: {e} This means that the regex pattern {pattern} was not respected: make sure to include code with the correct pattern, for instance: Thoughts: Your thoughts Code: ```py # Your python code here ```""" ) def parse_json_tool_call(json_blob: str) -> Tuple[str, Dict[str, str]]: json_blob = json_blob.replace("```json", "").replace("```", "") tool_call = parse_json_blob(json_blob) if "action" in tool_call and "action_input" in tool_call: return tool_call["action"], tool_call["action_input"] elif "action" in tool_call: return tool_call["action"], None else: missing_keys = [key for key in ['action', 'action_input'] if key not in tool_call] error_msg = f"Missing keys: {missing_keys} in blob {tool_call}" console.print(f"[bold red]{error_msg}[/bold red]") raise ValueError(error_msg) def parse_text_tool_call(text: str) -> Tuple[str, Union[str, Dict[str, str]]]: """ Expects a text in the format: 'Action:', 'Action input:', 'Observation:'. 'Action input:' contains a json string with input arguments. """ try: if "Observation:" in text: text = text.split("Observation:")[0] if "Action:" in text: text = text.split("Action:")[1] tool_name, tool_input = text.split("Action input:") if "{" in tool_input: tool_input = parse_json_blob(tool_input) else: tool_input = tool_input.strip().replace('"', "") return tool_name.strip().replace('"', "").replace("\\", ""), tool_input except Exception as e: raise ValueError( f"Error in parsing the text tool call: {e}. Be sure to provide the correct format. DO NOT repeat your previous incorrect tool call." ) def to_text(input: Union[List[Dict[str, str]], Dict[str, str], str]) -> str: if isinstance(input, list): return "\n".join([m["content"] for m in input]) elif isinstance(input, dict): return input["content"] else: return input HUGGINGFACE_DEFAULT_TOOLS = {} _tools_are_initialized = False class Toolbox: """ The toolbox contains all tools that the agent can perform operations with, as well as a few methods to manage them. Args: tools (`List[Tool]`): The list of tools to instantiate the toolbox with add_base_tools (`bool`, defaults to `False`, *optional*, defaults to `False`): Whether to add the tools available within `transformers` to the toolbox. """ def __init__(self, tools: List[Tool], add_base_tools: bool = False): self._tools = {tool.name: tool for tool in tools} if add_base_tools: self.add_base_tools() # self._load_tools_if_needed() def add_base_tools(self, add_python_interpreter: bool = False): global _tools_are_initialized global HUGGINGFACE_DEFAULT_TOOLS if not _tools_are_initialized: HUGGINGFACE_DEFAULT_TOOLS = setup_default_tools() _tools_are_initialized = True for tool in HUGGINGFACE_DEFAULT_TOOLS.values(): if tool.name != "python_interpreter" or add_python_interpreter: self.add_tool(tool) # self._load_tools_if_needed() @property def tools(self) -> Dict[str, Tool]: """Get all tools currently in the toolbox""" return self._tools def show_tool_descriptions(self, tool_description_template: str = None) -> str: """ Returns the description of all tools in the toolbox Args: tool_description_template (`str`, *optional*): The template to use to describe the tools. If not provided, the default template will be used. """ return "\n".join( [get_tool_description_with_args(tool, tool_description_template) for tool in self._tools.values()] ) def add_tool(self, tool: Tool): """ Adds a tool to the toolbox Args: tool (`Tool`): The tool to add to the toolbox. """ if tool.name in self._tools: raise KeyError(f"Error: tool '{tool.name}' already exists in the toolbox.") self._tools[tool.name] = tool def remove_tool(self, tool_name: str): """ Removes a tool from the toolbox Args: tool_name (`str`): The tool to remove from the toolbox. """ if tool_name not in self._tools: raise KeyError( f"Error: tool {tool_name} not found in toolbox for removal, should be instead one of {list(self._tools.keys())}." ) del self._tools[tool_name] def update_tool(self, tool: Tool): """ Updates a tool in the toolbox according to its name. Args: tool (`Tool`): The tool to update to the toolbox. """ if tool.name not in self._tools: raise KeyError( f"Error: tool {tool.name} not found in toolbox for update, should be instead one of {list(self._tools.keys())}." ) self._tools[tool.name] = tool def clear_toolbox(self): """Clears the toolbox""" self._tools = {} # def _load_tools_if_needed(self): # for name, tool in self._tools.items(): # if not isinstance(tool, Tool): # task_or_repo_id = tool.task if tool.repo_id is None else tool.repo_id # self._tools[name] = load_tool(task_or_repo_id) def __repr__(self): toolbox_description = "Toolbox contents:\n" for tool in self._tools.values(): toolbox_description += f"\t{tool.name}: {tool.description}\n" return toolbox_description class AgentError(Exception): """Base class for other agent-related exceptions""" def __init__(self, message): super().__init__(message) self.message = message console.print(f"[bold red]{message}[/bold red]") class AgentParsingError(AgentError): """Exception raised for errors in parsing in the agent""" pass class AgentExecutionError(AgentError): """Exception raised for errors in execution in the agent""" pass class AgentMaxIterationsError(AgentError): """Exception raised for errors in execution in the agent""" pass class AgentGenerationError(AgentError): """Exception raised for errors in generation in the agent""" pass def format_prompt_with_tools(toolbox: Toolbox, prompt_template: str, tool_description_template: str) -> str: tool_descriptions = toolbox.show_tool_descriptions(tool_description_template) prompt = prompt_template.replace("<>", tool_descriptions) if "<>" in prompt: tool_names = [f"'{tool_name}'" for tool_name in toolbox.tools.keys()] prompt = prompt.replace("<>", ", ".join(tool_names)) return prompt def show_agents_descriptions(managed_agents: list): managed_agents_descriptions = """ You can also give requests to team members. Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'request', a long string explaning your request. Given that this team member is a real human, you should be very verbose in your request. Here is a list of the team members that you can call:""" for agent in managed_agents.values(): managed_agents_descriptions += f"\n- {agent.name}: {agent.description}" return managed_agents_descriptions def format_prompt_with_managed_agents_descriptions(prompt_template, managed_agents=None) -> str: if managed_agents is not None: return prompt_template.replace("<>", show_agents_descriptions(managed_agents)) else: return prompt_template.replace("<>", "") def format_prompt_with_imports(prompt_template: str, authorized_imports: List[str]) -> str: if "<>" not in prompt_template: raise AgentError("Tag '<>' should be provided in the prompt.") return prompt_template.replace("<>", str(authorized_imports)) class Agent: def __init__( self, tools: Union[List[Tool], Toolbox], llm_engine: Callable = None, system_prompt: Optional[str] = None, tool_description_template: Optional[str] = None, additional_args: Dict = {}, max_iterations: int = 6, tool_parser: Optional[Callable] = None, add_base_tools: bool = False, verbose: bool = False, grammar: Optional[Dict[str, str]] = None, managed_agents: Optional[List] = None, step_callbacks: Optional[List[Callable]] = None, monitor_metrics: bool = True, ): if system_prompt is None: system_prompt = DEFAULT_REACT_CODE_SYSTEM_PROMPT if tool_parser is None: tool_parser = parse_json_tool_call self.agent_name = self.__class__.__name__ self.llm_engine = llm_engine self.system_prompt_template = system_prompt self.tool_description_template = ( tool_description_template if tool_description_template else DEFAULT_TOOL_DESCRIPTION_TEMPLATE ) self.additional_args = additional_args self.max_iterations = max_iterations self.tool_parser = tool_parser self.grammar = grammar self.managed_agents = None if managed_agents is not None: self.managed_agents = {agent.name: agent for agent in managed_agents} if isinstance(tools, Toolbox): self._toolbox = tools if add_base_tools: if not is_torch_available(): raise ImportError("Using the base tools requires torch to be installed.") self._toolbox.add_base_tools(add_python_interpreter=(self.__class__ == ReactJsonAgent)) else: self._toolbox = Toolbox(tools, add_base_tools=add_base_tools) self._toolbox.add_tool(FinalAnswerTool()) self.system_prompt = format_prompt_with_tools( self._toolbox, self.system_prompt_template, self.tool_description_template ) self.system_prompt = format_prompt_with_managed_agents_descriptions(self.system_prompt, self.managed_agents) self.prompt = None self.logs = [] self.task = None self.verbose = verbose # Initialize step callbacks self.step_callbacks = step_callbacks if step_callbacks is not None else [] # Initialize Monitor if monitor_metrics is True self.monitor = None if monitor_metrics: self.monitor = Monitor(self.llm_engine) self.step_callbacks.append(self.monitor.update_metrics) @property def toolbox(self) -> Toolbox: """Get the toolbox currently available to the agent""" return self._toolbox def initialize_for_run(self): self.token_count = 0 self.system_prompt = format_prompt_with_tools( self._toolbox, self.system_prompt_template, self.tool_description_template, ) self.system_prompt = format_prompt_with_managed_agents_descriptions(self.system_prompt, self.managed_agents) if hasattr(self, "authorized_imports"): self.system_prompt = format_prompt_with_imports( self.system_prompt, list(set(LIST_SAFE_MODULES) | set(self.authorized_imports)) ) self.logs = [{"system_prompt": self.system_prompt, "task": self.task}] console.rule("New task", characters='=') console.print(self.task) def write_inner_memory_from_logs(self, summary_mode: Optional[bool] = False) -> List[Dict[str, str]]: """ Reads past llm_outputs, actions, and observations or errors from the logs into a series of messages that can be used as input to the LLM. """ prompt_message = {"role": MessageRole.SYSTEM, "content": self.logs[0]["system_prompt"]} task_message = { "role": MessageRole.USER, "content": "Task: " + self.logs[0]["task"], } if summary_mode: memory = [task_message] else: memory = [prompt_message, task_message] for i, step_log in enumerate(self.logs[1:]): if "llm_output" in step_log and not summary_mode: thought_message = {"role": MessageRole.ASSISTANT, "content": step_log["llm_output"].strip()} memory.append(thought_message) if "facts" in step_log: thought_message = { "role": MessageRole.ASSISTANT, "content": "[FACTS LIST]:\n" + step_log["facts"].strip(), } memory.append(thought_message) if "plan" in step_log and not summary_mode: thought_message = {"role": MessageRole.ASSISTANT, "content": "[PLAN]:\n" + step_log["plan"].strip()} memory.append(thought_message) if "tool_call" in step_log and summary_mode: tool_call_message = { "role": MessageRole.ASSISTANT, "content": f"[STEP {i} TOOL CALL]: " + str(step_log["tool_call"]).strip(), } memory.append(tool_call_message) if "task" in step_log: tool_call_message = { "role": MessageRole.USER, "content": "New task:\n" + step_log["task"], } memory.append(tool_call_message) if "error" in step_log or "observation" in step_log: if "error" in step_log: message_content = ( f"[OUTPUT OF STEP {i}] -> Error:\n" + str(step_log["error"]) + "\nNow let's retry: take care not to repeat previous errors! If you have retried several times, try a completely different approach.\n" ) elif "observation" in step_log: message_content = f"[OUTPUT OF STEP {i}] -> Observation:\n{step_log['observation']}" tool_response_message = {"role": MessageRole.TOOL_RESPONSE, "content": message_content} memory.append(tool_response_message) return memory def get_succinct_logs(self): return [{key: value for key, value in log.items() if key != "agent_memory"} for log in self.logs] def extract_action(self, llm_output: str, split_token: str) -> str: """ Parse action from the LLM output Args: llm_output (`str`): Output of the LLM split_token (`str`): Separator for the action. Should match the example in the system prompt. """ try: split = llm_output.split(split_token) rationale, action = ( split[-2], split[-1], ) # NOTE: using indexes starting from the end solves for when you have more than one split_token in the output except Exception as e: raise AgentParsingError( f"Error: No '{split_token}' token provided in your output.\nYour output:\n{llm_output}\n. Be sure to include an action, prefaced with '{split_token}'!" ) return rationale.strip(), action.strip() def execute_tool_call(self, tool_name: str, arguments: Dict[str, str]) -> Any: """ Execute tool with the provided input and returns the result. This method replaces arguments with the actual values from the state if they refer to state variables. Args: tool_name (`str`): Name of the Tool to execute (should be one from self.toolbox). arguments (Dict[str, str]): Arguments passed to the Tool. """ available_tools = self.toolbox.tools if self.managed_agents is not None: available_tools = {**available_tools, **self.managed_agents} if tool_name not in available_tools: error_msg = f"Error: unknown tool {tool_name}, should be instead one of {list(available_tools.keys())}." console.print(f"[bold red]{error_msg}") raise AgentExecutionError(error_msg) try: if isinstance(arguments, str): observation = available_tools[tool_name](arguments) elif isinstance(arguments, dict): for key, value in arguments.items(): if isinstance(value, str) and value in self.state: arguments[key] = self.state[value] observation = available_tools[tool_name](**arguments) else: error_msg = f"Arguments passed to tool should be a dict or string: got a {type(arguments)}." console.print(f"[bold red]{error_msg}") raise AgentExecutionError(error_msg) return observation except Exception as e: if tool_name in self.toolbox.tools: tool_description = get_tool_description_with_args(available_tools[tool_name]) error_msg = ( f"Error in tool call execution: {e}\nYou should only use this tool with a correct input.\n" f"As a reminder, this tool's description is the following:\n{tool_description}" ) console.print(f"[bold red]{error_msg}") raise AgentExecutionError(error_msg) elif tool_name in self.managed_agents: error_msg = ( f"Error in calling team member: {e}\nYou should only ask this team member with a correct request.\n" f"As a reminder, this team member's description is the following:\n{available_tools[tool_name]}" ) console.print(f"[bold red]{error_msg}") raise AgentExecutionError(error_msg) def log_rationale_code_action(self, rationale: str, code_action: str) -> None: if self.verbose: console.rule("Agent thoughts") console.print(rationale) console.rule("Agent is executing the code below:", align="left") console.print(code_action) console.rule("", align="left") def run(self, **kwargs): """To be implemented in the child class""" raise NotImplementedError class CodeAgent(Agent): """ A class for an agent that solves the given task using a single block of code. It plans all its actions, then executes all in one shot. """ def __init__( self, tools: List[Tool], llm_engine: Optional[Callable] = None, system_prompt: Optional[str] = None, tool_description_template: Optional[str] = None, grammar: Optional[Dict[str, str]] = None, additional_authorized_imports: Optional[List[str]] = None, **kwargs, ): if llm_engine is None: llm_engine = HfApiEngine() if system_prompt is None: system_prompt = DEFAULT_CODE_SYSTEM_PROMPT if tool_description_template is None: tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE super().__init__( tools=tools, llm_engine=llm_engine, system_prompt=system_prompt, tool_description_template=tool_description_template, grammar=grammar, **kwargs, ) self.python_evaluator = evaluate_python_code self.additional_authorized_imports = additional_authorized_imports if additional_authorized_imports else [] self.authorized_imports = list(set(LIST_SAFE_MODULES) | set(self.additional_authorized_imports)) self.system_prompt = self.system_prompt.replace("<>", str(self.authorized_imports)) def parse_code_blob(self, result: str) -> str: """ Override this method if you want to change the way the code is cleaned in the `run` method. """ return parse_code_blob(result) def run(self, task: str, return_generated_code: bool = False, **kwargs): """ Runs the agent for the given task. Args: task (`str`): The task to perform return_generated_code (`bool`, *optional*, defaults to `False`): Whether to return the generated code instead of running it kwargs (additional keyword arguments, *optional*): Any keyword argument to send to the agent when evaluating the code. Example: ```py from transformers.agents import CodeAgent agent = CodeAgent(tools=[]) agent.run("What is the result of 2 power 3.7384?") ``` """ self.task = task if len(kwargs) > 0: self.task += f"\nYou have been provided with these initial arguments: {str(kwargs)}." self.state = kwargs.copy() self.initialize_for_run() # Run LLM prompt_message = {"role": MessageRole.SYSTEM, "content": self.system_prompt} task_message = { "role": MessageRole.USER, "content": "Task: " + self.task, } self.prompt = [prompt_message, task_message] if self.verbose: console.rule("Executing with this prompt") console.print(self.prompt) additional_args = {"grammar": self.grammar} if self.grammar is not None else {} llm_output = self.llm_engine(self.prompt, stop_sequences=[""], **additional_args) # Parse try: rationale, code_action = self.extract_action(llm_output=llm_output, split_token="Code:") except Exception as e: if self.verbose: console.print( f"Error in extracting action, trying to parse the whole output as code. Error trace: {e}" ) rationale, code_action = "", llm_output try: code_action = self.parse_code_blob(code_action) except Exception as e: error_msg = f"Error in code parsing: {e}. Be sure to provide correct code" console.print(f"[bold red]{error_msg}[/bold red]") return error_msg # Execute self.log_rationale_code_action(rationale, code_action) try: available_tools = {**BASE_PYTHON_TOOLS.copy(), **self.toolbox.tools} output = self.python_evaluator( code_action, static_tools=available_tools, custom_tools={}, state=self.state, authorized_imports=self.authorized_imports, ) if self.verbose: console.print(self.state["print_outputs"]) return output except Exception as e: error_msg = f"Error in execution: {e}. Be sure to provide correct code." console.print(f"[bold red]{error_msg}[/bold red]") return error_msg class ReactAgent(Agent): """ This agent that solves the given task step by step, using the ReAct framework: While the objective is not reached, the agent will perform a cycle of thinking and acting. The action will be parsed from the LLM output: it consists in calls to tools from the toolbox, with arguments chosen by the LLM engine. """ def __init__( self, tools: List[Tool], llm_engine: Optional[Callable] = None, system_prompt: Optional[str] = None, tool_description_template: Optional[str] = None, grammar: Optional[Dict[str, str]] = None, plan_type: Optional[str] = None, planning_interval: Optional[int] = None, **kwargs, ): if llm_engine is None: llm_engine = HfApiEngine() if system_prompt is None: system_prompt = DEFAULT_REACT_CODE_SYSTEM_PROMPT if tool_description_template is None: tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE if plan_type is None: plan_type = SUPPORTED_PLAN_TYPES[0] else: assert plan_type in SUPPORTED_PLAN_TYPES, f"plan type {plan_type} is not supported" super().__init__( tools=tools, llm_engine=llm_engine, system_prompt=system_prompt, tool_description_template=tool_description_template, grammar=grammar, **kwargs, ) self.planning_interval = planning_interval self.plan_type = plan_type def provide_final_answer(self, task) -> str: """ This method provides a final answer to the task, based on the logs of the agent's interactions. """ self.prompt = [ { "role": MessageRole.SYSTEM, "content": "An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory:", } ] self.prompt += self.write_inner_memory_from_logs()[1:] self.prompt += [ { "role": MessageRole.USER, "content": f"Based on the above, please provide an answer to the following user request:\n{task}", } ] try: return self.llm_engine(self.prompt) except Exception as e: error_msg = f"Error in generating final LLM output: {e}." console.print(f"[bold red]{error_msg}[/bold red]") return error_msg def run(self, task: str, stream: bool = False, reset: bool = True, **kwargs): """ Runs the agent for the given task. Args: task (`str`): The task to perform Example: ```py from transformers.agents import ReactCodeAgent agent = ReactCodeAgent(tools=[]) agent.run("What is the result of 2 power 3.7384?") ``` """ self.task = task if len(kwargs) > 0: self.task += f"\nYou have been provided with these initial arguments: {str(kwargs)}." self.state = kwargs.copy() if reset: self.initialize_for_run() else: self.logs.append({"task": task}) if stream: return self.stream_run(task) else: return self.direct_run(task) def stream_run(self, task: str): """ Runs the agent in streaming mode, yielding steps as they are executed: should be launched only in the `run` method. """ final_answer = None iteration = 0 while final_answer is None and iteration < self.max_iterations: step_start_time = time.time() step_log_entry = {"iteration": iteration, "start_time": step_start_time} try: self.step(step_log_entry) if "final_answer" in step_log_entry: final_answer = step_log_entry["final_answer"] except AgentError as e: step_log_entry["error"] = e finally: step_end_time = time.time() step_log_entry["step_end_time"] = step_end_time step_log_entry["step_duration"] = step_end_time - step_start_time self.logs.append(step_log_entry) for callback in self.step_callbacks: callback(step_log_entry) iteration += 1 yield step_log_entry if final_answer is None and iteration == self.max_iterations: error_message = "Reached max iterations." final_step_log = {"error": AgentMaxIterationsError(error_message)} self.logs.append(final_step_log) console.print(f"[bold red]{error_message}") final_answer = self.provide_final_answer(task) final_step_log["final_answer"] = final_answer final_step_log["step_duration"] = 0 for callback in self.step_callbacks: callback(final_step_log) yield final_step_log yield final_answer def direct_run(self, task: str): """ Runs the agent in direct mode, returning outputs only at the end: should be launched only in the `run` method. """ final_answer = None iteration = 0 while final_answer is None and iteration < self.max_iterations: step_start_time = time.time() step_log_entry = {"iteration": iteration, "start_time": step_start_time} try: if self.planning_interval is not None and iteration % self.planning_interval == 0: self.planning_step(task, is_first_step=(iteration == 0), iteration=iteration) self.step(step_log_entry) if "final_answer" in step_log_entry: final_answer = step_log_entry["final_answer"] except AgentError as e: step_log_entry["error"] = e finally: step_end_time = time.time() step_log_entry["step_end_time"] = step_end_time step_log_entry["step_duration"] = step_end_time - step_start_time self.logs.append(step_log_entry) for callback in self.step_callbacks: callback(step_log_entry) iteration += 1 if final_answer is None and iteration == self.max_iterations: error_message = "Reached max iterations." final_step_log = {"error": AgentMaxIterationsError(error_message)} self.logs.append(final_step_log) console.print(f"[bold red]{error_message}") final_answer = self.provide_final_answer(task) final_step_log["final_answer"] = final_answer final_step_log["step_duration"] = 0 for callback in self.step_callbacks: callback(final_step_log) return final_answer def planning_step(self, task, is_first_step: bool = False, iteration: int = None): """ Used periodically by the agent to plan the next steps to reach the objective. Args: task (`str`): The task to perform is_first_step (`bool`): If this step is not the first one, the plan should be an update over a previous plan. iteration (`int`): The number of the current step, used as an indication for the LLM. """ if is_first_step: message_prompt_facts = {"role": MessageRole.SYSTEM, "content": SYSTEM_PROMPT_FACTS} message_prompt_task = { "role": MessageRole.USER, "content": f"""Here is the task: ``` {task} ``` Now begin!""", } answer_facts = self.llm_engine([message_prompt_facts, message_prompt_task]) message_system_prompt_plan = { "role": MessageRole.SYSTEM, "content": PROMPTS_FOR_INITIAL_PLAN[self.plan_type]["system"], } message_user_prompt_plan = { "role": MessageRole.USER, "content": PROMPTS_FOR_INITIAL_PLAN[self.plan_type]["user"].format( task=task, tool_descriptions=self._toolbox.show_tool_descriptions(self.tool_description_template), managed_agents_descriptions=( show_agents_descriptions(self.managed_agents) if self.managed_agents is not None else "" ), answer_facts=answer_facts, ), } answer_plan = self.llm_engine( [message_system_prompt_plan, message_user_prompt_plan], stop_sequences=[""] ) final_plan_redaction = f"""Here is the plan of action that I will follow to solve the task: ``` {answer_plan} ```""" final_facts_redaction = f"""Here are the facts that I know so far: ``` {answer_facts} ```""".strip() self.logs.append({"plan": final_plan_redaction, "facts": final_facts_redaction}) console.rule("[orange]Initial plan") console.print(final_plan_redaction) else: # update plan agent_memory = self.write_inner_memory_from_logs( summary_mode=False ) # This will not log the plan but will log facts # Redact updated facts facts_update_system_prompt = { "role": MessageRole.SYSTEM, "content": SYSTEM_PROMPT_FACTS_UPDATE, } facts_update_message = { "role": MessageRole.USER, "content": USER_PROMPT_FACTS_UPDATE, } facts_update = self.llm_engine([facts_update_system_prompt] + agent_memory + [facts_update_message]) # Redact updated plan plan_update_message = { "role": MessageRole.SYSTEM, "content": PROMPTS_FOR_PLAN_UPDATE[self.plan_type]["system"].format(task=task), } plan_update_message_user = { "role": MessageRole.USER, "content": PROMPTS_FOR_PLAN_UPDATE[self.plan_type]["user"].format( task=task, tool_descriptions=self._toolbox.show_tool_descriptions(self.tool_description_template), managed_agents_descriptions=( show_agents_descriptions(self.managed_agents) if self.managed_agents is not None else "" ), facts_update=facts_update, remaining_steps=(self.max_iterations - iteration), ), } plan_update = self.llm_engine( [plan_update_message] + agent_memory + [plan_update_message_user], stop_sequences=[""] ) # Log final facts and plan final_plan_redaction = PLAN_UPDATE_FINAL_PLAN_REDACTION.format(task=task, plan_update=plan_update) final_facts_redaction = f"""Here is the updated list of the facts that I know: ``` {facts_update} ```""" self.logs.append({"plan": final_plan_redaction, "facts": final_facts_redaction}) console.rule("Updated plan") console.print(final_plan_redaction) class ReactJsonAgent(ReactAgent): """ This agent that solves the given task step by step, using the ReAct framework: While the objective is not reached, the agent will perform a cycle of thinking and acting. The tool calls will be formulated by the LLM in JSON format, then parsed and executed. """ def __init__( self, tools: List[Tool], llm_engine: Optional[Callable] = None, system_prompt: Optional[str] = None, tool_description_template: Optional[str] = None, grammar: Optional[Dict[str, str]] = None, planning_interval: Optional[int] = None, **kwargs, ): if llm_engine is None: llm_engine = HfApiEngine() if system_prompt is None: system_prompt = DEFAULT_REACT_JSON_SYSTEM_PROMPT if tool_description_template is None: tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE super().__init__( tools=tools, llm_engine=llm_engine, system_prompt=system_prompt, tool_description_template=tool_description_template, grammar=grammar, planning_interval=planning_interval, **kwargs, ) def step(self, log_entry: Dict[str, Any]): """ Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. The errors are raised here, they are caught and logged in the run() method. """ agent_memory = self.write_inner_memory_from_logs() self.prompt = agent_memory console.rule("New step") # Add new step in logs log_entry["agent_memory"] = agent_memory.copy() if self.verbose: console.rule("Calling LLM with this last message:") console.print(self.prompt[-1]) try: additional_args = {"grammar": self.grammar} if self.grammar is not None else {} llm_output = self.llm_engine( self.prompt, stop_sequences=["", "Observation:"], **additional_args ) except Exception as e: raise AgentGenerationError(f"Error in generating llm output: {e}.") console.rule("===== Output message of the LLM: =====") console.print(llm_output) log_entry["llm_output"] = llm_output # Parse console.rule("===== Extracting action =====") rationale, action = self.extract_action(llm_output=llm_output, split_token="Action:") try: tool_name, arguments = self.tool_parser(action) except Exception as e: raise AgentParsingError(f"Could not parse the given action: {e}.") log_entry["rationale"] = rationale log_entry["tool_call"] = {"tool_name": tool_name, "tool_arguments": arguments} # Execute console.print("=== Agent thoughts:") console.print(rationale) console.print(f">>> Calling tool: '{tool_name}' with arguments: {arguments}") if tool_name == "final_answer": if isinstance(arguments, dict): if "answer" in arguments: answer = arguments["answer"] if ( isinstance(answer, str) and answer in self.state.keys() ): # if the answer is a state variable, return the value answer = self.state[answer] else: answer = arguments else: answer = arguments log_entry["final_answer"] = answer return answer else: if arguments is None: arguments = {} observation = self.execute_tool_call(tool_name, arguments) observation_type = type(observation) if observation_type in [AgentImage, AgentAudio]: if observation_type == AgentImage: observation_name = "image.png" elif observation_type == AgentAudio: observation_name = "audio.mp3" # TODO: observation naming could allow for different names of same type self.state[observation_name] = observation updated_information = f"Stored '{observation_name}' in memory." else: updated_information = str(observation).strip() log_entry["observation"] = updated_information return log_entry class ReactCodeAgent(ReactAgent): """ This agent that solves the given task step by step, using the ReAct framework: While the objective is not reached, the agent will perform a cycle of thinking and acting. The tool calls will be formulated by the LLM in code format, then parsed and executed. """ def __init__( self, tools: List[Tool], llm_engine: Optional[Callable] = None, system_prompt: Optional[str] = None, tool_description_template: Optional[str] = None, grammar: Optional[Dict[str, str]] = None, additional_authorized_imports: Optional[List[str]] = None, planning_interval: Optional[int] = None, **kwargs, ): if llm_engine is None: llm_engine = HfApiEngine() if system_prompt is None: system_prompt = DEFAULT_REACT_CODE_SYSTEM_PROMPT if tool_description_template is None: tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE super().__init__( tools=tools, llm_engine=llm_engine, system_prompt=system_prompt, tool_description_template=tool_description_template, grammar=grammar, planning_interval=planning_interval, **kwargs, ) self.python_evaluator = evaluate_python_code self.additional_authorized_imports = additional_authorized_imports if additional_authorized_imports else [] self.authorized_imports = list(set(LIST_SAFE_MODULES) | set(self.additional_authorized_imports)) self.system_prompt = self.system_prompt.replace("<>", str(self.authorized_imports)) self.custom_tools = {} def step(self, log_entry: Dict[str, Any]): """ Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. The errors are raised here, they are caught and logged in the run() method. """ agent_memory = self.write_inner_memory_from_logs() self.prompt = agent_memory.copy() console.rule("New step") # Add new step in logs log_entry["agent_memory"] = agent_memory.copy() if self.verbose: console.print("===== Calling LLM with these last messages: =====") console.print(self.prompt[-2:]) try: additional_args = {"grammar": self.grammar} if self.grammar is not None else {} llm_output = self.llm_engine( self.prompt, stop_sequences=["", "Observation:"], **additional_args ) except Exception as e: raise AgentGenerationError(f"Error in generating llm output: {e}.") if self.verbose: console.rule("Output message of the LLM:") console.print(llm_output) log_entry["llm_output"] = llm_output # Parse try: rationale, raw_code_action = self.extract_action(llm_output=llm_output, split_token="Code:") except Exception as e: console.print(f"Error in extracting action, trying to parse the whole output. Error trace: {e}") rationale, raw_code_action = llm_output, llm_output try: code_action = parse_code_blob(raw_code_action) except Exception as e: error_msg = f"Error in code parsing: {e}. Make sure to provide correct code" raise AgentParsingError(error_msg) log_entry["rationale"] = rationale log_entry["tool_call"] = {"tool_name": "code interpreter", "tool_arguments": code_action} # Execute self.log_rationale_code_action(rationale, code_action) try: static_tools = { **BASE_PYTHON_TOOLS.copy(), **self.toolbox.tools, } if self.managed_agents is not None: static_tools = {**static_tools, **self.managed_agents} result = self.python_evaluator( code_action, static_tools=static_tools, custom_tools=self.custom_tools, state=self.state, authorized_imports=self.authorized_imports, ) console.print("Print outputs:") console.print(self.state["print_outputs"]) observation = "Print outputs:\n" + self.state["print_outputs"] if result is not None: console.print("Last output from code snippet:") console.print(str(result)) observation += "Last output from code snippet:\n" + str(result)[:100000] log_entry["observation"] = observation except Exception as e: error_msg = f"Code execution failed due to the following error:\n{str(e)}" if "'dict' object has no attribute 'read'" in str(e): error_msg += "\nYou get this error because you passed a dict as input for one of the arguments instead of a string." raise AgentExecutionError(error_msg) for line in code_action.split("\n"): if line[: len("final_answer")] == "final_answer": console.print("Final answer:") console.print(f"[bold]{result}") log_entry["final_answer"] = result return result LENGTH_TRUNCATE_REPORTS = 1000 class ManagedAgent: def __init__(self, agent, name, description, additional_prompting=None, provide_run_summary=False): self.agent = agent self.name = name self.description = description self.additional_prompting = additional_prompting self.provide_run_summary = provide_run_summary def write_full_task(self, task): full_task = f"""You're a helpful agent named '{self.name}'. You have been submitted this task by your manager. --- Task: {task} --- You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible so that they have a clear understanding of the answer. Your final_answer WILL HAVE to contain these parts: ### 1. Task outcome (short version): ### 2. Task outcome (extremely detailed version): ### 3. Additional context (if relevant): Put all these in your final_answer tool, everything that you do not pass as an argument to final_answer will be lost. And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback. <>""" if self.additional_prompting: full_task = full_task.replace("\n<>", self.additional_prompting).strip() else: full_task = full_task.replace("\n<>", "").strip() return full_task def __call__(self, request, **kwargs): full_task = self.write_full_task(request) output = self.agent.run(full_task, **kwargs) if self.provide_run_summary: answer = f"Here is the final answer from your managed agent '{self.name}':\n" answer += str(output) answer += f"\n\nFor more detail, find below a summary of this agent's work:\nSUMMARY OF WORK FROM AGENT '{self.name}':\n" for message in self.agent.write_inner_memory_from_logs(summary_mode=True): content = message["content"] if len(str(content)) < LENGTH_TRUNCATE_REPORTS or "[FACTS LIST]" in str(content): answer += "\n" + str(content) + "\n---" else: answer += ( "\n" + str(content)[:LENGTH_TRUNCATE_REPORTS] + "\n(...Step was truncated because too long)...\n---" ) answer += f"\nEND OF SUMMARY OF WORK FROM AGENT '{self.name}'." return answer else: return output