Fix `vlm_web_browser.py` example (#410)
* Update `vlm_web_browser.py` example Fixed a typo which meant that the images were never being remove for lean processing --------- Co-authored-by: Aymeric <aymeric.roucher@gmail.com>
This commit is contained in:
		
							parent
							
								
									dcbbe448af
								
							
						
					
					
						commit
						c0abd2134e
					
				|  | @ -72,22 +72,24 @@ else: | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Prepare callback | # Prepare callback | ||||||
| def save_screenshot(step_log: ActionStep, agent: CodeAgent) -> None: | def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: | ||||||
|     sleep(1.0)  # Let JavaScript animations happen before taking the screenshot |     sleep(1.0)  # Let JavaScript animations happen before taking the screenshot | ||||||
|     driver = helium.get_driver() |     driver = helium.get_driver() | ||||||
|     current_step = step_log.step_number |     current_step = memory_step.step_number | ||||||
|     if driver is not None: |     if driver is not None: | ||||||
|         for step_logs in agent.logs:  # Remove previous screenshots from logs for lean processing |         for previous_memory_step in agent.memory.steps:  # Remove previous screenshots from logs for lean processing | ||||||
|             if isinstance(step_log, ActionStep) and step_log.step_number <= current_step - 2: |             if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2: | ||||||
|                 step_logs.observations_images = None |                 previous_memory_step.observations_images = None | ||||||
|         png_bytes = driver.get_screenshot_as_png() |         png_bytes = driver.get_screenshot_as_png() | ||||||
|         image = Image.open(BytesIO(png_bytes)) |         image = Image.open(BytesIO(png_bytes)) | ||||||
|         print(f"Captured a browser screenshot: {image.size} pixels") |         print(f"Captured a browser screenshot: {image.size} pixels") | ||||||
|         step_log.observations_images = [image.copy()]  # Create a copy to ensure it persists, important! |         memory_step.observations_images = [image.copy()]  # Create a copy to ensure it persists, important! | ||||||
| 
 | 
 | ||||||
|     # Update observations with current URL |     # Update observations with current URL | ||||||
|     url_info = f"Current url: {driver.current_url}" |     url_info = f"Current url: {driver.current_url}" | ||||||
|     step_log.observations = url_info if step_logs.observations is None else step_log.observations + "\n" + url_info |     memory_step.observations = ( | ||||||
|  |         url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info | ||||||
|  |     ) | ||||||
|     return |     return | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -225,8 +225,8 @@ class MultiStepAgent: | ||||||
|         the LLM. |         the LLM. | ||||||
|         """ |         """ | ||||||
|         messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode) |         messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode) | ||||||
|         for step_log in self.memory.steps: |         for memory_step in self.memory.steps: | ||||||
|             messages.extend(step_log.to_messages(summary_mode=summary_mode)) |             messages.extend(memory_step.to_messages(summary_mode=summary_mode)) | ||||||
|         return messages |         return messages | ||||||
| 
 | 
 | ||||||
|     def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str]: |     def extract_action(self, model_output: str, split_token: str) -> Tuple[str, str]: | ||||||
|  | @ -413,12 +413,12 @@ You have been provided with these additional arguments, that you can access usin | ||||||
|         self.memory.steps.append(TaskStep(task=self.task, task_images=images)) |         self.memory.steps.append(TaskStep(task=self.task, task_images=images)) | ||||||
|         if single_step: |         if single_step: | ||||||
|             step_start_time = time.time() |             step_start_time = time.time() | ||||||
|             step_log = ActionStep(start_time=step_start_time, observations_images=images) |             memory_step = ActionStep(start_time=step_start_time, observations_images=images) | ||||||
|             step_log.end_time = time.time() |             memory_step.end_time = time.time() | ||||||
|             step_log.duration = step_log.end_time - step_start_time |             memory_step.duration = memory_step.end_time - step_start_time | ||||||
| 
 | 
 | ||||||
|             # Run the agent's step |             # Run the agent's step | ||||||
|             result = self.step(step_log) |             result = self.step(memory_step) | ||||||
|             return result |             return result | ||||||
| 
 | 
 | ||||||
|         if stream: |         if stream: | ||||||
|  | @ -439,7 +439,7 @@ You have been provided with these additional arguments, that you can access usin | ||||||
|         self.step_number = 0 |         self.step_number = 0 | ||||||
|         while final_answer is None and self.step_number < self.max_steps: |         while final_answer is None and self.step_number < self.max_steps: | ||||||
|             step_start_time = time.time() |             step_start_time = time.time() | ||||||
|             step_log = ActionStep( |             memory_step = ActionStep( | ||||||
|                 step_number=self.step_number, |                 step_number=self.step_number, | ||||||
|                 start_time=step_start_time, |                 start_time=step_start_time, | ||||||
|                 observations_images=images, |                 observations_images=images, | ||||||
|  | @ -461,41 +461,40 @@ You have been provided with these additional arguments, that you can access usin | ||||||
|                 ) |                 ) | ||||||
| 
 | 
 | ||||||
|                 # Run one step! |                 # Run one step! | ||||||
|                 final_answer = self.step(step_log) |                 final_answer = self.step(memory_step) | ||||||
|             except AgentError as e: |             except AgentError as e: | ||||||
|                 step_log.error = e |                 memory_step.error = e | ||||||
|             finally: |             finally: | ||||||
|                 step_log.end_time = time.time() |                 memory_step.end_time = time.time() | ||||||
|                 step_log.duration = step_log.end_time - step_start_time |                 memory_step.duration = memory_step.end_time - step_start_time | ||||||
|                 self.memory.steps.append(step_log) |                 self.memory.steps.append(memory_step) | ||||||
|                 for callback in self.step_callbacks: |                 for callback in self.step_callbacks: | ||||||
|                     # For compatibility with old callbacks that don't take the agent as an argument |                     # For compatibility with old callbacks that don't take the agent as an argument | ||||||
|                     if len(inspect.signature(callback).parameters) == 1: |                     if len(inspect.signature(callback).parameters) == 1: | ||||||
|                         callback(step_log) |                         callback(memory_step) | ||||||
|                     else: |                     else: | ||||||
|                         callback(step_log=step_log, agent=self) |                         callback(memory_step, agent=self) | ||||||
|                 self.step_number += 1 |                 self.step_number += 1 | ||||||
|                 yield step_log |                 yield memory_step | ||||||
| 
 | 
 | ||||||
|         if final_answer is None and self.step_number == self.max_steps: |         if final_answer is None and self.step_number == self.max_steps: | ||||||
|             error_message = "Reached max steps." |             error_message = "Reached max steps." | ||||||
|             final_answer = self.provide_final_answer(task, images) |             final_answer = self.provide_final_answer(task, images) | ||||||
|             final_step_log = ActionStep( |             final_memory_step = ActionStep( | ||||||
|                 step_number=self.step_number, error=AgentMaxStepsError(error_message, self.logger) |                 step_number=self.step_number, error=AgentMaxStepsError(error_message, self.logger) | ||||||
|             ) |             ) | ||||||
|             self.logger.log(final_step_log) |             final_memory_step = ActionStep(error=AgentMaxStepsError(error_message, self.logger)) | ||||||
|             final_step_log = ActionStep(error=AgentMaxStepsError(error_message, self.logger)) |             final_memory_step.action_output = final_answer | ||||||
|             final_step_log.action_output = final_answer |             final_memory_step.end_time = time.time() | ||||||
|             final_step_log.end_time = time.time() |             final_memory_step.duration = memory_step.end_time - step_start_time | ||||||
|             final_step_log.duration = step_log.end_time - step_start_time |             self.memory.steps.append(final_memory_step) | ||||||
|             self.memory.steps.append(final_step_log) |  | ||||||
|             for callback in self.step_callbacks: |             for callback in self.step_callbacks: | ||||||
|                 # For compatibility with old callbacks that don't take the agent as an argument |                 # For compatibility with old callbacks that don't take the agent as an argument | ||||||
|                 if len(inspect.signature(callback).parameters) == 1: |                 if len(inspect.signature(callback).parameters) == 1: | ||||||
|                     callback(final_step_log) |                     callback(final_memory_step) | ||||||
|                 else: |                 else: | ||||||
|                     callback(step_log=final_step_log, agent=self) |                     callback(final_memory_step, agent=self) | ||||||
|             yield final_step_log |             yield final_memory_step | ||||||
| 
 | 
 | ||||||
|         yield handle_agent_output_types(final_answer) |         yield handle_agent_output_types(final_answer) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -105,7 +105,8 @@ class ActionStep(MemoryStep): | ||||||
|                 ) |                 ) | ||||||
|             else: |             else: | ||||||
|                 tool_response_message = Message( |                 tool_response_message = Message( | ||||||
|                     role=MessageRole.TOOL_RESPONSE, content=f"Call id: {self.tool_calls[0].id}\n{message_content}" |                     role=MessageRole.TOOL_RESPONSE, | ||||||
|  |                     content=[{"type": "text", "text": f"Call id: {self.tool_calls[0].id}\n{message_content}"}], | ||||||
|                 ) |                 ) | ||||||
| 
 | 
 | ||||||
|             messages.append(tool_response_message) |             messages.append(tool_response_message) | ||||||
|  | @ -114,7 +115,12 @@ class ActionStep(MemoryStep): | ||||||
|                 messages.append( |                 messages.append( | ||||||
|                     Message( |                     Message( | ||||||
|                         role=MessageRole.TOOL_RESPONSE, |                         role=MessageRole.TOOL_RESPONSE, | ||||||
|                         content=f"Call id: {self.tool_calls[0].id}\nObservation:\n{self.observations}", |                         content=[ | ||||||
|  |                             { | ||||||
|  |                                 "type": "text", | ||||||
|  |                                 "text": f"Call id: {self.tool_calls[0].id}\nObservation:\n{self.observations}", | ||||||
|  |                             } | ||||||
|  |                         ], | ||||||
|                     ) |                     ) | ||||||
|                 ) |                 ) | ||||||
|         if self.observations_images: |         if self.observations_images: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue