Fix `vlm_web_browser.py` example (#410)
* Update `vlm_web_browser.py` example Fixed a typo which meant that the images were never being remove for lean processing --------- Co-authored-by: Aymeric <aymeric.roucher@gmail.com>
This commit is contained in:
parent
dcbbe448af
commit
c0abd2134e
|
@ -72,22 +72,24 @@ else:
|
||||||
|
|
||||||
|
|
||||||
# Prepare callback
|
# Prepare callback
|
||||||
def save_screenshot(step_log: ActionStep, agent: CodeAgent) -> None:
|
def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
|
||||||
sleep(1.0) # Let JavaScript animations happen before taking the screenshot
|
sleep(1.0) # Let JavaScript animations happen before taking the screenshot
|
||||||
driver = helium.get_driver()
|
driver = helium.get_driver()
|
||||||
current_step = step_log.step_number
|
current_step = memory_step.step_number
|
||||||
if driver is not None:
|
if driver is not None:
|
||||||
for step_logs in agent.logs: # Remove previous screenshots from logs for lean processing
|
for previous_memory_step in agent.memory.steps: # Remove previous screenshots from logs for lean processing
|
||||||
if isinstance(step_log, ActionStep) and step_log.step_number <= current_step - 2:
|
if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2:
|
||||||
step_logs.observations_images = None
|
previous_memory_step.observations_images = None
|
||||||
png_bytes = driver.get_screenshot_as_png()
|
png_bytes = driver.get_screenshot_as_png()
|
||||||
image = Image.open(BytesIO(png_bytes))
|
image = Image.open(BytesIO(png_bytes))
|
||||||
print(f"Captured a browser screenshot: {image.size} pixels")
|
print(f"Captured a browser screenshot: {image.size} pixels")
|
||||||
step_log.observations_images = [image.copy()] # Create a copy to ensure it persists, important!
|
memory_step.observations_images = [image.copy()] # Create a copy to ensure it persists, important!
|
||||||
|
|
||||||
# Update observations with current URL
|
# Update observations with current URL
|
||||||
url_info = f"Current url: {driver.current_url}"
|
url_info = f"Current url: {driver.current_url}"
|
||||||
step_log.observations = url_info if step_logs.observations is None else step_log.observations + "\n" + url_info
|
memory_step.observations = (
|
||||||
|
url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -225,8 +225,8 @@ class MultiStepAgent:
|
||||||
the LLM.
|
the LLM.
|
||||||
"""
|
"""
|
||||||
messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
|
messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
|
||||||
for step_log in self.memory.steps:
|
for memory_step in self.memory.steps:
|
||||||
messages.extend(step_log.to_messages(summary_mode=summary_mode))
|
messages.extend(memory_step.to_messages(summary_mode=summary_mode))
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str]:
|
def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str]:
|
||||||
|
@ -413,12 +413,12 @@ You have been provided with these additional arguments, that you can access usin
|
||||||
self.memory.steps.append(TaskStep(task=self.task, task_images=images))
|
self.memory.steps.append(TaskStep(task=self.task, task_images=images))
|
||||||
if single_step:
|
if single_step:
|
||||||
step_start_time = time.time()
|
step_start_time = time.time()
|
||||||
step_log = ActionStep(start_time=step_start_time, observations_images=images)
|
memory_step = ActionStep(start_time=step_start_time, observations_images=images)
|
||||||
step_log.end_time = time.time()
|
memory_step.end_time = time.time()
|
||||||
step_log.duration = step_log.end_time - step_start_time
|
memory_step.duration = memory_step.end_time - step_start_time
|
||||||
|
|
||||||
# Run the agent's step
|
# Run the agent's step
|
||||||
result = self.step(step_log)
|
result = self.step(memory_step)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
if stream:
|
if stream:
|
||||||
|
@ -439,7 +439,7 @@ You have been provided with these additional arguments, that you can access usin
|
||||||
self.step_number = 0
|
self.step_number = 0
|
||||||
while final_answer is None and self.step_number < self.max_steps:
|
while final_answer is None and self.step_number < self.max_steps:
|
||||||
step_start_time = time.time()
|
step_start_time = time.time()
|
||||||
step_log = ActionStep(
|
memory_step = ActionStep(
|
||||||
step_number=self.step_number,
|
step_number=self.step_number,
|
||||||
start_time=step_start_time,
|
start_time=step_start_time,
|
||||||
observations_images=images,
|
observations_images=images,
|
||||||
|
@ -461,41 +461,40 @@ You have been provided with these additional arguments, that you can access usin
|
||||||
)
|
)
|
||||||
|
|
||||||
# Run one step!
|
# Run one step!
|
||||||
final_answer = self.step(step_log)
|
final_answer = self.step(memory_step)
|
||||||
except AgentError as e:
|
except AgentError as e:
|
||||||
step_log.error = e
|
memory_step.error = e
|
||||||
finally:
|
finally:
|
||||||
step_log.end_time = time.time()
|
memory_step.end_time = time.time()
|
||||||
step_log.duration = step_log.end_time - step_start_time
|
memory_step.duration = memory_step.end_time - step_start_time
|
||||||
self.memory.steps.append(step_log)
|
self.memory.steps.append(memory_step)
|
||||||
for callback in self.step_callbacks:
|
for callback in self.step_callbacks:
|
||||||
# For compatibility with old callbacks that don't take the agent as an argument
|
# For compatibility with old callbacks that don't take the agent as an argument
|
||||||
if len(inspect.signature(callback).parameters) == 1:
|
if len(inspect.signature(callback).parameters) == 1:
|
||||||
callback(step_log)
|
callback(memory_step)
|
||||||
else:
|
else:
|
||||||
callback(step_log=step_log, agent=self)
|
callback(memory_step, agent=self)
|
||||||
self.step_number += 1
|
self.step_number += 1
|
||||||
yield step_log
|
yield memory_step
|
||||||
|
|
||||||
if final_answer is None and self.step_number == self.max_steps:
|
if final_answer is None and self.step_number == self.max_steps:
|
||||||
error_message = "Reached max steps."
|
error_message = "Reached max steps."
|
||||||
final_answer = self.provide_final_answer(task, images)
|
final_answer = self.provide_final_answer(task, images)
|
||||||
final_step_log = ActionStep(
|
final_memory_step = ActionStep(
|
||||||
step_number=self.step_number, error=AgentMaxStepsError(error_message, self.logger)
|
step_number=self.step_number, error=AgentMaxStepsError(error_message, self.logger)
|
||||||
)
|
)
|
||||||
self.logger.log(final_step_log)
|
final_memory_step = ActionStep(error=AgentMaxStepsError(error_message, self.logger))
|
||||||
final_step_log = ActionStep(error=AgentMaxStepsError(error_message, self.logger))
|
final_memory_step.action_output = final_answer
|
||||||
final_step_log.action_output = final_answer
|
final_memory_step.end_time = time.time()
|
||||||
final_step_log.end_time = time.time()
|
final_memory_step.duration = memory_step.end_time - step_start_time
|
||||||
final_step_log.duration = step_log.end_time - step_start_time
|
self.memory.steps.append(final_memory_step)
|
||||||
self.memory.steps.append(final_step_log)
|
|
||||||
for callback in self.step_callbacks:
|
for callback in self.step_callbacks:
|
||||||
# For compatibility with old callbacks that don't take the agent as an argument
|
# For compatibility with old callbacks that don't take the agent as an argument
|
||||||
if len(inspect.signature(callback).parameters) == 1:
|
if len(inspect.signature(callback).parameters) == 1:
|
||||||
callback(final_step_log)
|
callback(final_memory_step)
|
||||||
else:
|
else:
|
||||||
callback(step_log=final_step_log, agent=self)
|
callback(final_memory_step, agent=self)
|
||||||
yield final_step_log
|
yield final_memory_step
|
||||||
|
|
||||||
yield handle_agent_output_types(final_answer)
|
yield handle_agent_output_types(final_answer)
|
||||||
|
|
||||||
|
|
|
@ -105,7 +105,8 @@ class ActionStep(MemoryStep):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
tool_response_message = Message(
|
tool_response_message = Message(
|
||||||
role=MessageRole.TOOL_RESPONSE, content=f"Call id: {self.tool_calls[0].id}\n{message_content}"
|
role=MessageRole.TOOL_RESPONSE,
|
||||||
|
content=[{"type": "text", "text": f"Call id: {self.tool_calls[0].id}\n{message_content}"}],
|
||||||
)
|
)
|
||||||
|
|
||||||
messages.append(tool_response_message)
|
messages.append(tool_response_message)
|
||||||
|
@ -114,7 +115,12 @@ class ActionStep(MemoryStep):
|
||||||
messages.append(
|
messages.append(
|
||||||
Message(
|
Message(
|
||||||
role=MessageRole.TOOL_RESPONSE,
|
role=MessageRole.TOOL_RESPONSE,
|
||||||
content=f"Call id: {self.tool_calls[0].id}\nObservation:\n{self.observations}",
|
content=[
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": f"Call id: {self.tool_calls[0].id}\nObservation:\n{self.observations}",
|
||||||
|
}
|
||||||
|
],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if self.observations_images:
|
if self.observations_images:
|
||||||
|
|
Loading…
Reference in New Issue