diff --git a/examples/benchmark.ipynb b/examples/benchmark.ipynb
index 8b49b0a..fb65d3b 100644
--- a/examples/benchmark.ipynb
+++ b/examples/benchmark.ipynb
@@ -253,11 +253,11 @@
     "\n",
     "            if is_vanilla_llm:\n",
     "                llm = agent\n",
-    "                answer = llm([{\"role\": \"user\", \"content\": question}])\n",
-    "                token_count = llm.last_input_token_count + llm.last_output_token_count\n",
-    "                intermediate_steps = []\n",
+    "                answer = str(llm([{\"role\": \"user\", \"content\": question}]))\n",
+    "                token_count = {\"input\": llm.last_input_token_count, \"output\": llm.last_output_token_count}\n",
+    "                intermediate_steps = str([])\n",
     "            else:\n",
-    "                answer = agent.run(question)\n",
+    "                answer = str(agent.run(question))\n",
     "                token_count = agent.monitor.get_total_token_counts()\n",
     "                intermediate_steps = str(agent.logs)\n",
     "                # Remove memory from logs to make them more compact.\n",