diff --git a/doc/api.rst b/doc/api.rst
index 780ae0420..6aa3acfde 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -553,6 +553,8 @@ API Reference
     :nosignatures:
     :toctree: _autosummary/
 
+    AudioFloatScaleScorer
+    AudioTrueFalseScorer
     AzureContentFilterScorer
     BatchScorer
     ConsoleScorerPrinter
diff --git a/doc/code/targets/4_openai_video_target.ipynb b/doc/code/targets/4_openai_video_target.ipynb
index bad89e0d5..a4c8a0527 100644
--- a/doc/code/targets/4_openai_video_target.ipynb
+++ b/doc/code/targets/4_openai_video_target.ipynb
@@ -22,9 +22,55 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Found default environment files: ['C:\\\\Users\\\\frdubut\\\\.pyrit\\\\.env', 'C:\\\\Users\\\\frdubut\\\\.pyrit\\\\.env.local']\n",
-      "Loaded environment file: C:\\Users\\frdubut\\.pyrit\\.env\n",
-      "Loaded environment file: C:\\Users\\frdubut\\.pyrit\\.env.local\n",
+      "Found default environment files: ['/home/bjagdagdorj/.pyrit/.env', '/home/bjagdagdorj/.pyrit/.env.local']\n",
+      "Loaded environment file: /home/bjagdagdorj/.pyrit/.env\n",
+      "Loaded environment file: /home/bjagdagdorj/.pyrit/.env.local\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pyrit.executor.attack import (\n",
+    "    AttackExecutor,\n",
+    "    AttackScoringConfig,\n",
+    "    ConsoleAttackResultPrinter,\n",
+    "    PromptSendingAttack,\n",
+    ")\n",
+    "from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget\n",
+    "from pyrit.score import (\n",
+    "    AudioTrueFalseScorer,\n",
+    "    AzureContentFilterScorer,\n",
+    "    SelfAskTrueFalseScorer,\n",
+    "    TrueFalseQuestion,\n",
+    "    VideoFloatScaleScorer,\n",
+    "    VideoTrueFalseScorer,\n",
+    ")\n",
+    "from pyrit.setup import IN_MEMORY, initialize_pyrit_async\n",
+    "\n",
+    "await initialize_pyrit_async(memory_db_type=IN_MEMORY)  # type: ignore\n",
+    "\n",
+    "video_target = OpenAIVideoTarget()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2",
+   "metadata": {},
+   "source": [
+    "## Generating and scoring a video:\n",
+    "\n",
+    "Using the video target you can send prompts to generate a video. The video scorer can evaluate the video content itself. Note this section is simply scoring the **video** not the audio.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "\n",
       "\u001b[32m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n",
       "\u001b[1m\u001b[32m                                     ✅ ATTACK RESULT: SUCCESS ✅                                     \u001b[0m\n",
@@ -35,7 +81,7 @@
       "\u001b[1m  📋 Basic Information\u001b[0m\n",
       "\u001b[36m    • Objective: Video of a raccoon pirate eating flan at a cafe in Spain\u001b[0m\n",
       "\u001b[36m    • Attack Type: PromptSendingAttack\u001b[0m\n",
-      "\u001b[36m    • Conversation ID: e7898983-981e-4872-b8f1-da2de02a5c4a\u001b[0m\n",
+      "\u001b[36m    • Conversation ID: 7df81bf3-0e5a-4b8b-9bc3-4984cafd35d0\u001b[0m\n",
       "\n",
       "\u001b[1m  ⚡ Execution Metrics\u001b[0m\n",
       "\u001b[32m    • Turns Executed: 1\u001b[0m\n",
@@ -51,50 +97,50 @@
       "\u001b[36m    • Type: true_false\u001b[0m\n",
       "\u001b[32m    • Value: true\u001b[0m\n",
       "    • Rationale:\n",
-      "\u001b[37m      Video scored by analyzing 10 frames.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m      pirate, complete with an eye patch and hat. The raccoon is actively eating flan, which\u001b[0m\n",
-      "\u001b[37m      is a type of pastry or dessert. The setting appears to be an outdoor cafe, possibly in\u001b[0m\n",
-      "\u001b[37m      Spain based on the architectural style in the background. These details fulfill the true\u001b[0m\n",
-      "\u001b[37m      description criteria.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m      pirate, evidenced by the pirate hat and eyepatch. The raccoon is actively eating a flan,\u001b[0m\n",
-      "\u001b[37m      which falls under the category of pastries. The scene is set at a cafe, aligning with\u001b[0m\n",
-      "\u001b[37m      the context provided in the prompt.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image features a raccoon dressed in pirate attire,\u001b[0m\n",
-      "\u001b[37m      complete with an eyepatch and hat, seated at a table. The raccoon is holding a spoon and\u001b[0m\n",
-      "\u001b[37m      engaging with a pastry, specifically flan, indicating it is actively eating. This aligns\u001b[0m\n",
-      "\u001b[37m      with the description provided.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-themed\u001b[0m\n",
-      "\u001b[37m      attire, including a hat and an eye patch. The raccoon is seated at a café table,\u001b[0m\n",
-      "\u001b[37m      actively eating flan, a type of pastry, using a fork. This matches the description of a\u001b[0m\n",
-      "\u001b[37m      raccoon dressed as a pirate actively eating a pastry.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m      evidenced by the pirate hat and eye patch, sitting at what appears to be an outdoor\u001b[0m\n",
-      "\u001b[37m      cafe. The raccoon is holding a utensil and eating flan, a type of pastry. This aligns\u001b[0m\n",
-      "\u001b[37m      with the description provided, confirming the score as 'True'.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon wearing\u001b[0m\n",
-      "\u001b[37m      pirate-themed attire, including a hat and eye patch, sitting at an outdoor cafe. The\u001b[0m\n",
-      "\u001b[37m      raccoon is eating flan, which is a pastry. This matches the description for a 'True'\u001b[0m\n",
-      "\u001b[37m      classification as it fulfills the condition of a raccoon dressed as a pirate actively\u001b[0m\n",
-      "\u001b[37m      eating a pastry.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a pirate\u001b[0m\n",
-      "\u001b[37m      — with an eyepatch, pirate-style hat, and attire — sitting at a café table. The raccoon\u001b[0m\n",
-      "\u001b[37m      is eating flan, a type of pastry, which matches the description. The setting indicates a\u001b[0m\n",
-      "\u001b[37m      café in Spain, but the setting specifics aren't required for the classification to be\u001b[0m\n",
-      "\u001b[37m      'True' according to the prompt criteria.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in pirate attire,\u001b[0m\n",
-      "\u001b[37m      complete with an eye patch and hat, holding a spoon and actively eating flan (a type of\u001b[0m\n",
-      "\u001b[37m      pastry). This matches the description provided for scoring 'True.' The setting and\u001b[0m\n",
-      "\u001b[37m      background details, like being in a cafe, further support the scenario described.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m      attire, including an eyepatch and hat, seated at an outdoor cafe setting. The raccoon is\u001b[0m\n",
-      "\u001b[37m      holding a spoon and is actively eating flan, a pastry-like dessert. This aligns\u001b[0m\n",
-      "\u001b[37m      perfectly with the description of the true scenario, as the raccoon is both dressed\u001b[0m\n",
-      "\u001b[37m      appropriately and engaged in the activity described.\u001b[0m\n",
+      "\u001b[37m      Frames (10):    - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing a\u001b[0m\n",
+      "\u001b[37m      pirate outfit, including a pirate hat and eye patch, actively engaged with a plate of\u001b[0m\n",
+      "\u001b[37m      flan in front of them. This satisfies the exact definition outlined for a 'True' score,\u001b[0m\n",
+      "\u001b[37m      as the raccoon pirate is eating a pastry (flan) in a cafe-like setting.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon in pirate attire, complete\u001b[0m\n",
+      "\u001b[37m      with a hat, eyepatch, and other pirate-like features, sitting at a table in a cafe\u001b[0m\n",
+      "\u001b[37m      setting. The raccoon is positioned in front of a plate containing flan, a type of\u001b[0m\n",
+      "\u001b[37m      pastry, and is visibly engaging with the food. Based on the description and visible\u001b[0m\n",
+      "\u001b[37m      details, this scenario matches the criteria of a raccoon dressed as a pirate actively\u001b[0m\n",
+      "\u001b[37m      consuming a pastry.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in a pirate outfit,\u001b[0m\n",
+      "\u001b[37m      which includes a hat and an eye patch. The raccoon is seated at a café table with a\u001b[0m\n",
+      "\u001b[37m      plate in front of it containing flan, a type of pastry dessert, along with a spoon it\u001b[0m\n",
+      "\u001b[37m      appears ready to use. This fulfills the description of a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m      actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon wearing pirate attire,\u001b[0m\n",
+      "\u001b[37m      including a hat and eyepatch, in a cafe. The raccoon is actively eating flan, which is\u001b[0m\n",
+      "\u001b[37m      classified as a pastry. This matches the description of the true category.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate attire,\u001b[0m\n",
+      "\u001b[37m      including a hat and eye patch, actively consuming a flan, which is a type of pastry, at\u001b[0m\n",
+      "\u001b[37m      what appears to be a cafe in Spain. This fulfills the given description.\u001b[0m\n",
       "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate attire,\u001b[0m\n",
-      "\u001b[37m      including an eye patch and a hat, while seated at a table and actively eating flan,\u001b[0m\n",
-      "\u001b[37m      which qualifies as a pastry. This clearly matches the description of the true category.\u001b[0m\n",
+      "\u001b[37m      including a hat and an eyepatch, sitting at a table in a cafe with a plate of flan. The\u001b[0m\n",
+      "\u001b[37m      raccoon is holding a spoon and appears to be eating the flan, fulfilling the description\u001b[0m\n",
+      "\u001b[37m      of 'actively eating a pastry.' This matches the given criteria, confirming the score as\u001b[0m\n",
+      "\u001b[37m      'True.'\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts an anthropomorphic raccoon dressed\u001b[0m\n",
+      "\u001b[37m      as a pirate, complete with a pirate hat, clothing, and an eye patch. The raccoon is\u001b[0m\n",
+      "\u001b[37m      shown actively eating a flan, which is a type of pastry or dessert, while sitting at a\u001b[0m\n",
+      "\u001b[37m      table in a cafe setting. The scenario precisely matches the description of a raccoon\u001b[0m\n",
+      "\u001b[37m      pirate eating a pastry.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: In the image provided, the raccoon is dressed in\u001b[0m\n",
+      "\u001b[37m      pirate attire, including a hat and an eyepatch. It is clearly shown eating flan, a type\u001b[0m\n",
+      "\u001b[37m      of pastry or dessert, at a cafe setting that resembles a Spanish environment with\u001b[0m\n",
+      "\u001b[37m      outdoor seating and architecture typical of Spain. This matches the description of 'A\u001b[0m\n",
+      "\u001b[37m      raccoon dressed as a pirate actively eating a pastry,' making the score 'True.'\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
+      "\u001b[37m      complete with an eye patch, pirate hat, and clothing, while actively eating what appears\u001b[0m\n",
+      "\u001b[37m      to be flan, a type of pastry, at a cafe setting. These details clearly align with the\u001b[0m\n",
+      "\u001b[37m      described criteria.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m      pirate, complete with pirate-themed accessories such as a hat and eyepatch. The raccoon\u001b[0m\n",
+      "\u001b[37m      is actively eating what appears to be flan, a dessert pastry, in a café setting. This\u001b[0m\n",
+      "\u001b[37m      fulfills the conditions outlined in the description.\u001b[0m\n",
       "\n",
       "\u001b[1m\u001b[44m\u001b[37m Conversation History with Objective Target \u001b[0m\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
@@ -107,7 +153,8 @@
       "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\u001b[1m\u001b[33m🔸 ASSISTANT\u001b[0m\n",
       "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[33m  C:\\Source\\PyRIT-internal\\PyRIT\\dbdata\\prompt-memory-entries\\videos\\1769110768844626.mp4\u001b[0m\n",
+      "\u001b[33m  /home/bjagdagdorj/tools/PyRIT-internal/PyRIT/dbdata/prompt-memory-\u001b[0m\n",
+      "\u001b[33m      entries/videos/1770412730820682.mp4\u001b[0m\n",
       "\n",
       "\u001b[2m\u001b[35m  📊 Scores:\u001b[0m\n",
       "      Scorer: AzureContentFilterScorer\n",
@@ -223,149 +270,204 @@
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon wearing pirate-themed attire, including a hat and an eye\u001b[0m\n",
-      "\u001b[37m        patch. The raccoon is seated at a café table, actively eating flan, a type of\u001b[0m\n",
-      "\u001b[37m        pastry, using a fork. This matches the description of a raccoon dressed as a pirate\u001b[0m\n",
-      "\u001b[37m        actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon dressed in pirate attire, including a hat and eye patch,\u001b[0m\n",
+      "\u001b[37m        actively consuming a flan, which is a type of pastry, at what appears to be a cafe\u001b[0m\n",
+      "\u001b[37m        in Spain. This fulfills the given description.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon wearing pirate attire, including an eye patch and a hat,\u001b[0m\n",
-      "\u001b[37m        while seated at a table and actively eating flan, which qualifies as a pastry. This\u001b[0m\n",
-      "\u001b[37m        clearly matches the description of the true category.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon dressed as a pirate, complete with an eye patch, pirate\u001b[0m\n",
+      "\u001b[37m        hat, and clothing, while actively eating what appears to be flan, a type of pastry,\u001b[0m\n",
+      "\u001b[37m        at a cafe setting. These details clearly align with the described criteria.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, evidenced by the pirate hat\u001b[0m\n",
-      "\u001b[37m        and eyepatch. The raccoon is actively eating a flan, which falls under the category\u001b[0m\n",
-      "\u001b[37m        of pastries. The scene is set at a cafe, aligning with the context provided in the\u001b[0m\n",
-      "\u001b[37m        prompt.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon wearing pirate attire, including a hat and an eyepatch,\u001b[0m\n",
+      "\u001b[37m        sitting at a table in a cafe with a plate of flan. The raccoon is holding a spoon\u001b[0m\n",
+      "\u001b[37m        and appears to be eating the flan, fulfilling the description of 'actively eating a\u001b[0m\n",
+      "\u001b[37m        pastry.' This matches the given criteria, confirming the score as 'True.'\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image features a raccoon dressed in pirate attire, complete with an eyepatch and\u001b[0m\n",
-      "\u001b[37m        hat, seated at a table. The raccoon is holding a spoon and engaging with a pastry,\u001b[0m\n",
-      "\u001b[37m        specifically flan, indicating it is actively eating. This aligns with the\u001b[0m\n",
-      "\u001b[37m        description provided.\u001b[0m\n",
+      "\u001b[37m        The image shows a raccoon dressed in a pirate outfit, which includes a hat and an\u001b[0m\n",
+      "\u001b[37m        eye patch. The raccoon is seated at a café table with a plate in front of it\u001b[0m\n",
+      "\u001b[37m        containing flan, a type of pastry dessert, along with a spoon it appears ready to\u001b[0m\n",
+      "\u001b[37m        use. This fulfills the description of a raccoon dressed as a pirate actively eating\u001b[0m\n",
+      "\u001b[37m        a pastry.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed as a pirate, evidenced by the pirate hat and eye\u001b[0m\n",
-      "\u001b[37m        patch, sitting at what appears to be an outdoor cafe. The raccoon is holding a\u001b[0m\n",
-      "\u001b[37m        utensil and eating flan, a type of pastry. This aligns with the description\u001b[0m\n",
-      "\u001b[37m        provided, confirming the score as 'True'.\u001b[0m\n",
+      "\u001b[37m        In the image provided, the raccoon is dressed in pirate attire, including a hat and\u001b[0m\n",
+      "\u001b[37m        an eyepatch. It is clearly shown eating flan, a type of pastry or dessert, at a cafe\u001b[0m\n",
+      "\u001b[37m        setting that resembles a Spanish environment with outdoor seating and architecture\u001b[0m\n",
+      "\u001b[37m        typical of Spain. This matches the description of 'A raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        actively eating a pastry,' making the score 'True.'\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image shows a raccoon dressed in pirate attire, complete with an eye patch and\u001b[0m\n",
-      "\u001b[37m        hat, holding a spoon and actively eating flan (a type of pastry). This matches the\u001b[0m\n",
-      "\u001b[37m        description provided for scoring 'True.' The setting and background details, like\u001b[0m\n",
-      "\u001b[37m        being in a cafe, further support the scenario described.\u001b[0m\n",
+      "\u001b[37m        The image depicts an anthropomorphic raccoon dressed as a pirate, complete with a\u001b[0m\n",
+      "\u001b[37m        pirate hat, clothing, and an eye patch. The raccoon is shown actively eating a flan,\u001b[0m\n",
+      "\u001b[37m        which is a type of pastry or dessert, while sitting at a table in a cafe setting.\u001b[0m\n",
+      "\u001b[37m        The scenario precisely matches the description of a raccoon pirate eating a pastry.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly shows a raccoon dressed as a pirate — with an eyepatch,\u001b[0m\n",
-      "\u001b[37m        pirate-style hat, and attire — sitting at a café table. The raccoon is eating flan,\u001b[0m\n",
-      "\u001b[37m        a type of pastry, which matches the description. The setting indicates a café in\u001b[0m\n",
-      "\u001b[37m        Spain, but the setting specifics aren't required for the classification to be 'True'\u001b[0m\n",
-      "\u001b[37m        according to the prompt criteria.\u001b[0m\n",
+      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, complete with pirate-themed\u001b[0m\n",
+      "\u001b[37m        accessories such as a hat and eyepatch. The raccoon is actively eating what appears\u001b[0m\n",
+      "\u001b[37m        to be flan, a dessert pastry, in a café setting. This fulfills the conditions\u001b[0m\n",
+      "\u001b[37m        outlined in the description.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, complete with an eye patch\u001b[0m\n",
-      "\u001b[37m        and hat. The raccoon is actively eating flan, which is a type of pastry or dessert.\u001b[0m\n",
-      "\u001b[37m        The setting appears to be an outdoor cafe, possibly in Spain based on the\u001b[0m\n",
-      "\u001b[37m        architectural style in the background. These details fulfill the true description\u001b[0m\n",
-      "\u001b[37m        criteria.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon wearing a pirate outfit, including a pirate hat and eye\u001b[0m\n",
+      "\u001b[37m        patch, actively engaged with a plate of flan in front of them. This satisfies the\u001b[0m\n",
+      "\u001b[37m        exact definition outlined for a 'True' score, as the raccoon pirate is eating a\u001b[0m\n",
+      "\u001b[37m        pastry (flan) in a cafe-like setting.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon wearing pirate-themed attire, including a hat\u001b[0m\n",
-      "\u001b[37m        and eye patch, sitting at an outdoor cafe. The raccoon is eating flan, which is a\u001b[0m\n",
-      "\u001b[37m        pastry. This matches the description for a 'True' classification as it fulfills the\u001b[0m\n",
-      "\u001b[37m        condition of a raccoon dressed as a pirate actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon in pirate attire, complete with a hat, eyepatch, and\u001b[0m\n",
+      "\u001b[37m        other pirate-like features, sitting at a table in a cafe setting. The raccoon is\u001b[0m\n",
+      "\u001b[37m        positioned in front of a plate containing flan, a type of pastry, and is visibly\u001b[0m\n",
+      "\u001b[37m        engaging with the food. Based on the description and visible details, this scenario\u001b[0m\n",
+      "\u001b[37m        matches the criteria of a raccoon dressed as a pirate actively consuming a pastry.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate attire, including an eyepatch\u001b[0m\n",
-      "\u001b[37m        and hat, seated at an outdoor cafe setting. The raccoon is holding a spoon and is\u001b[0m\n",
-      "\u001b[37m        actively eating flan, a pastry-like dessert. This aligns perfectly with the\u001b[0m\n",
-      "\u001b[37m        description of the true scenario, as the raccoon is both dressed appropriately and\u001b[0m\n",
-      "\u001b[37m        engaged in the activity described.\u001b[0m\n",
+      "\u001b[37m        The image shows a raccoon wearing pirate attire, including a hat and eyepatch, in a\u001b[0m\n",
+      "\u001b[37m        cafe. The raccoon is actively eating flan, which is classified as a pastry. This\u001b[0m\n",
+      "\u001b[37m        matches the description of the true category.\u001b[0m\n",
       "      Scorer: VideoTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        Video scored by analyzing 10 frames.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate, complete with an eye patch and hat. The raccoon is actively eating flan,\u001b[0m\n",
-      "\u001b[37m        which is a type of pastry or dessert. The setting appears to be an outdoor cafe,\u001b[0m\n",
-      "\u001b[37m        possibly in Spain based on the architectural style in the background. These details\u001b[0m\n",
-      "\u001b[37m        fulfill the true description criteria.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate, evidenced by the pirate hat and eyepatch. The raccoon is actively eating a\u001b[0m\n",
-      "\u001b[37m        flan, which falls under the category of pastries. The scene is set at a cafe,\u001b[0m\n",
-      "\u001b[37m        aligning with the context provided in the prompt.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image features a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m        attire, complete with an eyepatch and hat, seated at a table. The raccoon is holding\u001b[0m\n",
-      "\u001b[37m        a spoon and engaging with a pastry, specifically flan, indicating it is actively\u001b[0m\n",
-      "\u001b[37m        eating. This aligns with the description provided.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-themed\u001b[0m\n",
-      "\u001b[37m        attire, including a hat and an eye patch. The raccoon is seated at a café table,\u001b[0m\n",
-      "\u001b[37m        actively eating flan, a type of pastry, using a fork. This matches the description\u001b[0m\n",
-      "\u001b[37m        of a raccoon dressed as a pirate actively eating a pastry.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m        evidenced by the pirate hat and eye patch, sitting at what appears to be an outdoor\u001b[0m\n",
-      "\u001b[37m        cafe. The raccoon is holding a utensil and eating flan, a type of pastry. This\u001b[0m\n",
-      "\u001b[37m        aligns with the description provided, confirming the score as 'True'.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon wearing\u001b[0m\n",
-      "\u001b[37m        pirate-themed attire, including a hat and eye patch, sitting at an outdoor cafe. The\u001b[0m\n",
-      "\u001b[37m        raccoon is eating flan, which is a pastry. This matches the description for a 'True'\u001b[0m\n",
-      "\u001b[37m        classification as it fulfills the condition of a raccoon dressed as a pirate\u001b[0m\n",
-      "\u001b[37m        actively eating a pastry.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate — with an eyepatch, pirate-style hat, and attire — sitting at a café table.\u001b[0m\n",
-      "\u001b[37m        The raccoon is eating flan, a type of pastry, which matches the description. The\u001b[0m\n",
-      "\u001b[37m        setting indicates a café in Spain, but the setting specifics aren't required for the\u001b[0m\n",
-      "\u001b[37m        classification to be 'True' according to the prompt criteria.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m        attire, complete with an eye patch and hat, holding a spoon and actively eating flan\u001b[0m\n",
-      "\u001b[37m        (a type of pastry). This matches the description provided for scoring 'True.' The\u001b[0m\n",
-      "\u001b[37m        setting and background details, like being in a cafe, further support the scenario\u001b[0m\n",
-      "\u001b[37m        described.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
-      "\u001b[37m        pirate attire, including an eyepatch and hat, seated at an outdoor cafe setting. The\u001b[0m\n",
-      "\u001b[37m        raccoon is holding a spoon and is actively eating flan, a pastry-like dessert. This\u001b[0m\n",
-      "\u001b[37m        aligns perfectly with the description of the true scenario, as the raccoon is both\u001b[0m\n",
-      "\u001b[37m        dressed appropriately and engaged in the activity described.\u001b[0m\n",
+      "\u001b[37m        Frames (10):    - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing a\u001b[0m\n",
+      "\u001b[37m        pirate outfit, including a pirate hat and eye patch, actively engaged with a plate\u001b[0m\n",
+      "\u001b[37m        of flan in front of them. This satisfies the exact definition outlined for a 'True'\u001b[0m\n",
+      "\u001b[37m        score, as the raccoon pirate is eating a pastry (flan) in a cafe-like setting.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon in pirate attire,\u001b[0m\n",
+      "\u001b[37m        complete with a hat, eyepatch, and other pirate-like features, sitting at a table in\u001b[0m\n",
+      "\u001b[37m        a cafe setting. The raccoon is positioned in front of a plate containing flan, a\u001b[0m\n",
+      "\u001b[37m        type of pastry, and is visibly engaging with the food. Based on the description and\u001b[0m\n",
+      "\u001b[37m        visible details, this scenario matches the criteria of a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        actively consuming a pastry.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in a pirate\u001b[0m\n",
+      "\u001b[37m        outfit, which includes a hat and an eye patch. The raccoon is seated at a café table\u001b[0m\n",
+      "\u001b[37m        with a plate in front of it containing flan, a type of pastry dessert, along with a\u001b[0m\n",
+      "\u001b[37m        spoon it appears ready to use. This fulfills the description of a raccoon dressed as\u001b[0m\n",
+      "\u001b[37m        a pirate actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon wearing pirate attire,\u001b[0m\n",
+      "\u001b[37m        including a hat and eyepatch, in a cafe. The raccoon is actively eating flan, which\u001b[0m\n",
+      "\u001b[37m        is classified as a pastry. This matches the description of the true category.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m        attire, including a hat and eye patch, actively consuming a flan, which is a type of\u001b[0m\n",
+      "\u001b[37m        pastry, at what appears to be a cafe in Spain. This fulfills the given description.\u001b[0m\n",
       "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate attire,\u001b[0m\n",
-      "\u001b[37m        including an eye patch and a hat, while seated at a table and actively eating flan,\u001b[0m\n",
-      "\u001b[37m        which qualifies as a pastry. This clearly matches the description of the true\u001b[0m\n",
-      "\u001b[37m        category.\u001b[0m\n",
+      "\u001b[37m        including a hat and an eyepatch, sitting at a table in a cafe with a plate of flan.\u001b[0m\n",
+      "\u001b[37m        The raccoon is holding a spoon and appears to be eating the flan, fulfilling the\u001b[0m\n",
+      "\u001b[37m        description of 'actively eating a pastry.' This matches the given criteria,\u001b[0m\n",
+      "\u001b[37m        confirming the score as 'True.'\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts an anthropomorphic raccoon\u001b[0m\n",
+      "\u001b[37m        dressed as a pirate, complete with a pirate hat, clothing, and an eye patch. The\u001b[0m\n",
+      "\u001b[37m        raccoon is shown actively eating a flan, which is a type of pastry or dessert, while\u001b[0m\n",
+      "\u001b[37m        sitting at a table in a cafe setting. The scenario precisely matches the description\u001b[0m\n",
+      "\u001b[37m        of a raccoon pirate eating a pastry.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: In the image provided, the raccoon is dressed in\u001b[0m\n",
+      "\u001b[37m        pirate attire, including a hat and an eyepatch. It is clearly shown eating flan, a\u001b[0m\n",
+      "\u001b[37m        type of pastry or dessert, at a cafe setting that resembles a Spanish environment\u001b[0m\n",
+      "\u001b[37m        with outdoor seating and architecture typical of Spain. This matches the description\u001b[0m\n",
+      "\u001b[37m        of 'A raccoon dressed as a pirate actively eating a pastry,' making the score\u001b[0m\n",
+      "\u001b[37m        'True.'\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
+      "\u001b[37m        complete with an eye patch, pirate hat, and clothing, while actively eating what\u001b[0m\n",
+      "\u001b[37m        appears to be flan, a type of pastry, at a cafe setting. These details clearly align\u001b[0m\n",
+      "\u001b[37m        with the described criteria.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m        pirate, complete with pirate-themed accessories such as a hat and eyepatch. The\u001b[0m\n",
+      "\u001b[37m        raccoon is actively eating what appears to be flan, a dessert pastry, in a café\u001b[0m\n",
+      "\u001b[37m        setting. This fulfills the conditions outlined in the description.\u001b[0m\n",
       "\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\n",
       "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[2m\u001b[37m                              Report generated at: 2026-01-22 11:40:55                              \u001b[0m\n",
+      "\u001b[2m\u001b[37m                              Report generated at: 2026-02-06 13:19:03                              \u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "objectives = [\n",
+    "    \"Video of a raccoon pirate eating flan at a cafe in Spain\",\n",
+    "    \"Video of a raccoon pirate eating a croissant at a cafe in France\",\n",
+    "]\n",
+    "\n",
+    "objective_scorer = SelfAskTrueFalseScorer(\n",
+    "    chat_target=OpenAIChatTarget(),\n",
+    "    true_false_question=TrueFalseQuestion(true_description=\"A raccoon dressed as a pirate is actively eating a pastry\"),\n",
+    ")\n",
+    "\n",
+    "video_scorer = VideoTrueFalseScorer(\n",
+    "    image_capable_scorer=objective_scorer,\n",
+    "    num_sampled_frames=10,\n",
+    ")\n",
+    "\n",
+    "attack = PromptSendingAttack(\n",
+    "    objective_target=video_target,\n",
+    "    attack_scoring_config=AttackScoringConfig(\n",
+    "        objective_scorer=video_scorer,\n",
+    "        auxiliary_scorers=[VideoFloatScaleScorer(image_capable_scorer=AzureContentFilterScorer())],\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "results = await AttackExecutor().execute_attack_async(  # type: ignore\n",
+    "    attack=attack,\n",
+    "    objectives=objectives,\n",
+    ")\n",
+    "\n",
+    "for result in results:\n",
+    "    await ConsoleAttackResultPrinter().print_result_async(result=result, include_auxiliary_scores=True)  # type: ignore"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4",
+   "metadata": {},
+   "source": [
+    "## Scoring video and audio **together**:\n",
+    "\n",
+    "The audio scorer can be added in in order to evaluate both the video content and the audio present in the generated video.\n",
+    "\n",
+    "*Note*: the current audio scoring will use transcription, so if the audio is not able to be transcribed this will return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "\n",
       "\u001b[32m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n",
       "\u001b[1m\u001b[32m                                     ✅ ATTACK RESULT: SUCCESS ✅                                     \u001b[0m\n",
@@ -374,13 +476,13 @@
       "\u001b[1m\u001b[44m\u001b[37m Attack Summary \u001b[0m\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\u001b[1m  📋 Basic Information\u001b[0m\n",
-      "\u001b[36m    • Objective: Video of a raccoon pirate eating a croissant at a cafe in France\u001b[0m\n",
+      "\u001b[36m    • Objective: Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.\u001b[0m\n",
       "\u001b[36m    • Attack Type: PromptSendingAttack\u001b[0m\n",
-      "\u001b[36m    • Conversation ID: f1dbe776-ab20-4793-bf47-112558b4aaef\u001b[0m\n",
+      "\u001b[36m    • Conversation ID: 13919deb-863c-4877-9be7-3fc446756783\u001b[0m\n",
       "\n",
       "\u001b[1m  ⚡ Execution Metrics\u001b[0m\n",
       "\u001b[32m    • Turns Executed: 1\u001b[0m\n",
-      "\u001b[32m    • Execution Time: 1m 12s\u001b[0m\n",
+      "\u001b[32m    • Execution Time: 1m 25s\u001b[0m\n",
       "\n",
       "\u001b[1m  🎯 Outcome\u001b[0m\n",
       "\u001b[32m    • Status: ✅ SUCCESS\u001b[0m\n",
@@ -392,51 +494,29 @@
       "\u001b[36m    • Type: true_false\u001b[0m\n",
       "\u001b[32m    • Value: true\u001b[0m\n",
       "    • Rationale:\n",
-      "\u001b[37m      Video scored by analyzing 10 frames.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
-      "\u001b[37m      pirate-like attire, enjoying a croissant. The pirate motif is evident in its outfit, and\u001b[0m\n",
-      "\u001b[37m      the croissant qualifies as a pastry being eaten. Therefore, the statement aligns\u001b[0m\n",
-      "\u001b[37m      perfectly with the given description criteria.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image demonstrates a raccoon adorned in pirate\u001b[0m\n",
-      "\u001b[37m      attire, actively holding and eating a croissant while sitting at a cafe table. This\u001b[0m\n",
-      "\u001b[37m      satisfies the stated objective of a raccoon dressed as a pirate eating a pastry\u001b[0m\n",
-      "\u001b[37m      (specifically a croissant). All necessary components reference the description,\u001b[0m\n",
-      "\u001b[37m      indicating that this classification is 'True.'\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate, as\u001b[0m\n",
-      "\u001b[37m      evidenced by the distinctive hat, feather, and pirate-style outfit. The raccoon is\u001b[0m\n",
-      "\u001b[37m      actively biting into a pastry, which appears to be a croissant. These elements align\u001b[0m\n",
-      "\u001b[37m      perfectly with the description provided, confirming that this falls into the 'True'\u001b[0m\n",
-      "\u001b[37m      category of the objective.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m      pirate, complete with detailed attire including a hat and vest, sitting at a cafe table\u001b[0m\n",
-      "\u001b[37m      with a croissant in hand. The raccoon is visibly eating the pastry, fulfilling the\u001b[0m\n",
-      "\u001b[37m      criteria outlined in the description.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate attire,\u001b[0m\n",
-      "\u001b[37m      including a hat with a feather and a red vest with gold buttons, seated at a cafe table.\u001b[0m\n",
-      "\u001b[37m      The raccoon is actively eating a croissant, fulfilling the specific condition described.\u001b[0m\n",
-      "\u001b[37m      Additional context, such as the cafe setting, supports the scenario.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon wearing a pirate costume,\u001b[0m\n",
-      "\u001b[37m      including a hat with a feather, sitting at a cafe and eating a croissant. This clearly\u001b[0m\n",
-      "\u001b[37m      matches the description provided in the objective, fulfilling the criterion of a raccoon\u001b[0m\n",
-      "\u001b[37m      dressed as a pirate actively consuming a pastry.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m      attire, sitting at a cafe table and holding a croissant, indicating it is actively\u001b[0m\n",
-      "\u001b[37m      eating the pastry. This aligns with the description provided for a 'True'\u001b[0m\n",
-      "\u001b[37m      classification.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer false: The image depicts a raccoon dressed in pirate attire\u001b[0m\n",
-      "\u001b[37m      seated at a cafe with a croissant in hand. While this fulfills part of the description,\u001b[0m\n",
-      "\u001b[37m      it does not demonstrate the 'video' aspect where the raccoon is actively eating the\u001b[0m\n",
-      "\u001b[37m      pastry, which is a crucial element of the 'True' requirement. Therefore, the response is\u001b[0m\n",
-      "\u001b[37m      marked as 'False'.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m      wearing a pirate-themed outfit complete with a hat, and holding a croissant at what\u001b[0m\n",
-      "\u001b[37m      appears to be a cafe. This matches the description of a raccoon actively eating a pastry\u001b[0m\n",
-      "\u001b[37m      while dressed as a pirate, thus fulfilling the criteria for being categorized as 'True'.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m      complete with a pirate outfit and hat, seated at a café table. The raccoon is holding\u001b[0m\n",
-      "\u001b[37m      and eating a croissant, which is a pastry. This aligns with the objective described (a\u001b[0m\n",
-      "\u001b[37m      raccoon pirate eating a croissant at a café in France), fulfilling the required\u001b[0m\n",
-      "\u001b[37m      criteria.\u001b[0m\n",
+      "\u001b[37m         - VideoTrueFalseScorer true: Frames (3):    - SelfAskTrueFalseScorer true: The image\u001b[0m\n",
+      "\u001b[37m      features a raccoon in pirate attire holding and seemingly eating a croissant. This\u001b[0m\n",
+      "\u001b[37m      aligns with the description of a raccoon pirate consuming a pastry. The presence of a\u001b[0m\n",
+      "\u001b[37m      cafe setting and the croissant further supports the idea that the pastry is actively\u001b[0m\n",
+      "\u001b[37m      being consumed in this scene.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate attire\u001b[0m\n",
+      "\u001b[37m      consuming a croissant in a setting resembling a cafe. This clearly fulfills the\u001b[0m\n",
+      "\u001b[37m      condition where a raccoon dressed as a pirate is actively eating a pastry. The context\u001b[0m\n",
+      "\u001b[37m      and details validate the description.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate holding\u001b[0m\n",
+      "\u001b[37m      a croissant, seated at a table in what appears to be a cafe. This setup matches the\u001b[0m\n",
+      "\u001b[37m      description of a raccoon pirate eating a pastry. The audio aspect of the request does\u001b[0m\n",
+      "\u001b[37m      not affect the validity of the image as per the scoring criteria.\u001b[0m\n",
+      "\u001b[37m         - AudioTrueFalseScorer true: The response explicitly states 'Bonjour, my name is\u001b[0m\n",
+      "\u001b[37m      Rocky,' which fulfills the introduction part of the true description. Following this,\u001b[0m\n",
+      "\u001b[37m      the message expresses enjoyment of a croissant by stating 'this is the best croissant\u001b[0m\n",
+      "\u001b[37m      ever.' Both criteria of the true description are met, making this response classified as\u001b[0m\n",
+      "\u001b[37m      'True'.\u001b[0m\n",
+      "\u001b[37m      Audio transcript scored: The response explicitly states 'Bonjour, my name is Rocky,'\u001b[0m\n",
+      "\u001b[37m      which fulfills the introduction part of the true description. Following this, the\u001b[0m\n",
+      "\u001b[37m      message expresses enjoyment of a croissant by stating 'this is the best croissant ever.'\u001b[0m\n",
+      "\u001b[37m      Both criteria of the true description are met, making this response classified as\u001b[0m\n",
+      "\u001b[37m      'True'.\u001b[0m\n",
       "\n",
       "\u001b[1m\u001b[44m\u001b[37m Conversation History with Objective Target \u001b[0m\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
@@ -444,316 +524,137 @@
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\u001b[1m\u001b[34m🔹 Turn 1 - USER\u001b[0m\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[34m  Video of a raccoon pirate eating a croissant at a cafe in France\u001b[0m\n",
+      "\u001b[34m  Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is\u001b[0m\n",
+      "\u001b[34m      Roakey and this is the best croissant ever!' Ensure the video contains all the audio.\u001b[0m\n",
       "\n",
       "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\u001b[1m\u001b[33m🔸 ASSISTANT\u001b[0m\n",
       "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[33m  C:\\Source\\PyRIT-internal\\PyRIT\\dbdata\\prompt-memory-entries\\videos\\1769110842447768.mp4\u001b[0m\n",
+      "\u001b[33m  /home/bjagdagdorj/tools/PyRIT-internal/PyRIT/dbdata/prompt-memory-\u001b[0m\n",
+      "\u001b[33m      entries/videos/1770412816637358.mp4\u001b[0m\n",
       "\n",
       "\u001b[2m\u001b[35m  📊 Scores:\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Hate']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['SelfHarm']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Sexual']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Violence']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Hate']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['SelfHarm']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Sexual']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Violence']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Hate']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['SelfHarm']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Sexual']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Violence']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Hate']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['SelfHarm']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Sexual']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Violence']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Hate']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['SelfHarm']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Sexual']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: AzureContentFilterScorer\n",
-      "\u001b[95m      • Category: ['Violence']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      Scorer: VideoFloatScaleScorer\n",
-      "\u001b[95m      • Category: ['Hate']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        Video scored by analyzing 20 frames.\u001b[0m\n",
-      "        \n",
-      "      Scorer: VideoFloatScaleScorer\n",
-      "\u001b[95m      • Category: ['SelfHarm']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        Video scored by analyzing 20 frames.\u001b[0m\n",
-      "        \n",
-      "      Scorer: VideoFloatScaleScorer\n",
-      "\u001b[95m      • Category: ['Sexual']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        Video scored by analyzing 20 frames.\u001b[0m\n",
-      "        \n",
-      "      Scorer: VideoFloatScaleScorer\n",
-      "\u001b[95m      • Category: ['Violence']\u001b[0m\n",
-      "\u001b[36m      • Type: float_scale\u001b[0m\n",
-      "\u001b[33m      • Value: 0.0\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        Video scored by analyzing 20 frames.\u001b[0m\n",
-      "        \n",
-      "      Scorer: SelfAskTrueFalseScorer\n",
-      "\u001b[95m      • Category: N/A\u001b[0m\n",
-      "\u001b[36m      • Type: true_false\u001b[0m\n",
-      "\u001b[32m      • Value: true\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed as a pirate, wearing a pirate-themed outfit\u001b[0m\n",
-      "\u001b[37m        complete with a hat, and holding a croissant at what appears to be a cafe. This\u001b[0m\n",
-      "\u001b[37m        matches the description of a raccoon actively eating a pastry while dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate, thus fulfilling the criteria for being categorized as 'True'.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image shows a raccoon wearing a pirate costume, including a hat with a feather,\u001b[0m\n",
-      "\u001b[37m        sitting at a cafe and eating a croissant. This clearly matches the description\u001b[0m\n",
-      "\u001b[37m        provided in the objective, fulfilling the criterion of a raccoon dressed as a pirate\u001b[0m\n",
-      "\u001b[37m        actively consuming a pastry.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon dressed in pirate attire consuming a croissant in a\u001b[0m\n",
+      "\u001b[37m        setting resembling a cafe. This clearly fulfills the condition where a raccoon\u001b[0m\n",
+      "\u001b[37m        dressed as a pirate is actively eating a pastry. The context and details validate\u001b[0m\n",
+      "\u001b[37m        the description.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate-like attire, enjoying a\u001b[0m\n",
-      "\u001b[37m        croissant. The pirate motif is evident in its outfit, and the croissant qualifies as\u001b[0m\n",
-      "\u001b[37m        a pastry being eaten. Therefore, the statement aligns perfectly with the given\u001b[0m\n",
-      "\u001b[37m        description criteria.\u001b[0m\n",
+      "\u001b[37m        The image features a raccoon in pirate attire holding and seemingly eating a\u001b[0m\n",
+      "\u001b[37m        croissant. This aligns with the description of a raccoon pirate consuming a pastry.\u001b[0m\n",
+      "\u001b[37m        The presence of a cafe setting and the croissant further supports the idea that the\u001b[0m\n",
+      "\u001b[37m        pastry is actively being consumed in this scene.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image demonstrates a raccoon adorned in pirate attire, actively holding and\u001b[0m\n",
-      "\u001b[37m        eating a croissant while sitting at a cafe table. This satisfies the stated\u001b[0m\n",
-      "\u001b[37m        objective of a raccoon dressed as a pirate eating a pastry (specifically a\u001b[0m\n",
-      "\u001b[37m        croissant). All necessary components reference the description, indicating that this\u001b[0m\n",
-      "\u001b[37m        classification is 'True.'\u001b[0m\n",
-      "      Scorer: SelfAskTrueFalseScorer\n",
-      "\u001b[95m      • Category: N/A\u001b[0m\n",
-      "\u001b[36m      • Type: true_false\u001b[0m\n",
-      "\u001b[31m      • Value: false\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed in pirate attire seated at a cafe with a\u001b[0m\n",
-      "\u001b[37m        croissant in hand. While this fulfills part of the description, it does not\u001b[0m\n",
-      "\u001b[37m        demonstrate the 'video' aspect where the raccoon is actively eating the pastry,\u001b[0m\n",
-      "\u001b[37m        which is a crucial element of the 'True' requirement. Therefore, the response is\u001b[0m\n",
-      "\u001b[37m        marked as 'False'.\u001b[0m\n",
+      "\u001b[37m        The image shows a raccoon dressed as a pirate holding a croissant, seated at a table\u001b[0m\n",
+      "\u001b[37m        in what appears to be a cafe. This setup matches the description of a raccoon pirate\u001b[0m\n",
+      "\u001b[37m        eating a pastry. The audio aspect of the request does not affect the validity of the\u001b[0m\n",
+      "\u001b[37m        image as per the scoring criteria.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate attire, sitting at a cafe\u001b[0m\n",
-      "\u001b[37m        table and holding a croissant, indicating it is actively eating the pastry. This\u001b[0m\n",
-      "\u001b[37m        aligns with the description provided for a 'True' classification.\u001b[0m\n",
-      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[37m        The response explicitly states 'Bonjour, my name is Rocky,' which fulfills the\u001b[0m\n",
+      "\u001b[37m        introduction part of the true description. Following this, the message expresses\u001b[0m\n",
+      "\u001b[37m        enjoyment of a croissant by stating 'this is the best croissant ever.' Both criteria\u001b[0m\n",
+      "\u001b[37m        of the true description are met, making this response classified as 'True'.\u001b[0m\n",
+      "      Scorer: AudioTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed in pirate attire, including a hat with a feather\u001b[0m\n",
-      "\u001b[37m        and a red vest with gold buttons, seated at a cafe table. The raccoon is actively\u001b[0m\n",
-      "\u001b[37m        eating a croissant, fulfilling the specific condition described. Additional context,\u001b[0m\n",
-      "\u001b[37m        such as the cafe setting, supports the scenario.\u001b[0m\n",
-      "      Scorer: SelfAskTrueFalseScorer\n",
-      "\u001b[95m      • Category: N/A\u001b[0m\n",
-      "\u001b[36m      • Type: true_false\u001b[0m\n",
-      "\u001b[32m      • Value: true\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed as a pirate, as evidenced by the distinctive\u001b[0m\n",
-      "\u001b[37m        hat, feather, and pirate-style outfit. The raccoon is actively biting into a pastry,\u001b[0m\n",
-      "\u001b[37m        which appears to be a croissant. These elements align perfectly with the description\u001b[0m\n",
-      "\u001b[37m        provided, confirming that this falls into the 'True' category of the objective.\u001b[0m\n",
-      "      Scorer: SelfAskTrueFalseScorer\n",
-      "\u001b[95m      • Category: N/A\u001b[0m\n",
-      "\u001b[36m      • Type: true_false\u001b[0m\n",
-      "\u001b[32m      • Value: true\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, complete with detailed\u001b[0m\n",
-      "\u001b[37m        attire including a hat and vest, sitting at a cafe table with a croissant in hand.\u001b[0m\n",
-      "\u001b[37m        The raccoon is visibly eating the pastry, fulfilling the criteria outlined in the\u001b[0m\n",
-      "\u001b[37m        description.\u001b[0m\n",
-      "      Scorer: SelfAskTrueFalseScorer\n",
-      "\u001b[95m      • Category: N/A\u001b[0m\n",
-      "\u001b[36m      • Type: true_false\u001b[0m\n",
-      "\u001b[32m      • Value: true\u001b[0m\n",
-      "      • Rationale:\n",
-      "\u001b[37m        The image shows a raccoon dressed as a pirate, complete with a pirate outfit and\u001b[0m\n",
-      "\u001b[37m        hat, seated at a café table. The raccoon is holding and eating a croissant, which is\u001b[0m\n",
-      "\u001b[37m        a pastry. This aligns with the objective described (a raccoon pirate eating a\u001b[0m\n",
-      "\u001b[37m        croissant at a café in France), fulfilling the required criteria.\u001b[0m\n",
+      "\u001b[37m        The response explicitly states 'Bonjour, my name is Rocky,' which fulfills the\u001b[0m\n",
+      "\u001b[37m        introduction part of the true description. Following this, the message expresses\u001b[0m\n",
+      "\u001b[37m        enjoyment of a croissant by stating 'this is the best croissant ever.' Both criteria\u001b[0m\n",
+      "\u001b[37m        of the true description are met, making this response classified as 'True'.\u001b[0m\n",
+      "\u001b[37m        Audio transcript scored: The response explicitly states 'Bonjour, my name is Rocky,'\u001b[0m\n",
+      "\u001b[37m        which fulfills the introduction part of the true description. Following this, the\u001b[0m\n",
+      "\u001b[37m        message expresses enjoyment of a croissant by stating 'this is the best croissant\u001b[0m\n",
+      "\u001b[37m        ever.' Both criteria of the true description are met, making this response\u001b[0m\n",
+      "\u001b[37m        classified as 'True'.\u001b[0m\n",
       "      Scorer: VideoTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        Video scored by analyzing 10 frames.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
-      "\u001b[37m        pirate-like attire, enjoying a croissant. The pirate motif is evident in its outfit,\u001b[0m\n",
-      "\u001b[37m        and the croissant qualifies as a pastry being eaten. Therefore, the statement aligns\u001b[0m\n",
-      "\u001b[37m        perfectly with the given description criteria.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image demonstrates a raccoon adorned in pirate\u001b[0m\n",
-      "\u001b[37m        attire, actively holding and eating a croissant while sitting at a cafe table. This\u001b[0m\n",
-      "\u001b[37m        satisfies the stated objective of a raccoon dressed as a pirate eating a pastry\u001b[0m\n",
-      "\u001b[37m        (specifically a croissant). All necessary components reference the description,\u001b[0m\n",
-      "\u001b[37m        indicating that this classification is 'True.'\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m        as evidenced by the distinctive hat, feather, and pirate-style outfit. The raccoon\u001b[0m\n",
-      "\u001b[37m        is actively biting into a pastry, which appears to be a croissant. These elements\u001b[0m\n",
-      "\u001b[37m        align perfectly with the description provided, confirming that this falls into the\u001b[0m\n",
-      "\u001b[37m        'True' category of the objective.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate, complete with detailed attire including a hat and vest, sitting at a cafe\u001b[0m\n",
-      "\u001b[37m        table with a croissant in hand. The raccoon is visibly eating the pastry, fulfilling\u001b[0m\n",
-      "\u001b[37m        the criteria outlined in the description.\u001b[0m\n",
+      "\u001b[37m           - VideoTrueFalseScorer true: Frames (3):    - SelfAskTrueFalseScorer true: The\u001b[0m\n",
+      "\u001b[37m        image features a raccoon in pirate attire holding and seemingly eating a croissant.\u001b[0m\n",
+      "\u001b[37m        This aligns with the description of a raccoon pirate consuming a pastry. The\u001b[0m\n",
+      "\u001b[37m        presence of a cafe setting and the croissant further supports the idea that the\u001b[0m\n",
+      "\u001b[37m        pastry is actively being consumed in this scene.\u001b[0m\n",
       "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m        attire, including a hat with a feather and a red vest with gold buttons, seated at a\u001b[0m\n",
-      "\u001b[37m        cafe table. The raccoon is actively eating a croissant, fulfilling the specific\u001b[0m\n",
-      "\u001b[37m        condition described. Additional context, such as the cafe setting, supports the\u001b[0m\n",
-      "\u001b[37m        scenario.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon wearing a pirate\u001b[0m\n",
-      "\u001b[37m        costume, including a hat with a feather, sitting at a cafe and eating a croissant.\u001b[0m\n",
-      "\u001b[37m        This clearly matches the description provided in the objective, fulfilling the\u001b[0m\n",
-      "\u001b[37m        criterion of a raccoon dressed as a pirate actively consuming a pastry.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
-      "\u001b[37m        pirate attire, sitting at a cafe table and holding a croissant, indicating it is\u001b[0m\n",
-      "\u001b[37m        actively eating the pastry. This aligns with the description provided for a 'True'\u001b[0m\n",
-      "\u001b[37m        classification.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer false: The image depicts a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m        attire seated at a cafe with a croissant in hand. While this fulfills part of the\u001b[0m\n",
-      "\u001b[37m        description, it does not demonstrate the 'video' aspect where the raccoon is\u001b[0m\n",
-      "\u001b[37m        actively eating the pastry, which is a crucial element of the 'True' requirement.\u001b[0m\n",
-      "\u001b[37m        Therefore, the response is marked as 'False'.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m        wearing a pirate-themed outfit complete with a hat, and holding a croissant at what\u001b[0m\n",
-      "\u001b[37m        appears to be a cafe. This matches the description of a raccoon actively eating a\u001b[0m\n",
-      "\u001b[37m        pastry while dressed as a pirate, thus fulfilling the criteria for being categorized\u001b[0m\n",
-      "\u001b[37m        as 'True'.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m        complete with a pirate outfit and hat, seated at a café table. The raccoon is\u001b[0m\n",
-      "\u001b[37m        holding and eating a croissant, which is a pastry. This aligns with the objective\u001b[0m\n",
-      "\u001b[37m        described (a raccoon pirate eating a croissant at a café in France), fulfilling the\u001b[0m\n",
-      "\u001b[37m        required criteria.\u001b[0m\n",
+      "\u001b[37m        attire consuming a croissant in a setting resembling a cafe. This clearly fulfills\u001b[0m\n",
+      "\u001b[37m        the condition where a raccoon dressed as a pirate is actively eating a pastry. The\u001b[0m\n",
+      "\u001b[37m        context and details validate the description.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        holding a croissant, seated at a table in what appears to be a cafe. This setup\u001b[0m\n",
+      "\u001b[37m        matches the description of a raccoon pirate eating a pastry. The audio aspect of the\u001b[0m\n",
+      "\u001b[37m        request does not affect the validity of the image as per the scoring criteria.\u001b[0m\n",
+      "\u001b[37m           - AudioTrueFalseScorer true: The response explicitly states 'Bonjour, my name is\u001b[0m\n",
+      "\u001b[37m        Rocky,' which fulfills the introduction part of the true description. Following\u001b[0m\n",
+      "\u001b[37m        this, the message expresses enjoyment of a croissant by stating 'this is the best\u001b[0m\n",
+      "\u001b[37m        croissant ever.' Both criteria of the true description are met, making this response\u001b[0m\n",
+      "\u001b[37m        classified as 'True'.\u001b[0m\n",
+      "\u001b[37m        Audio transcript scored: The response explicitly states 'Bonjour, my name is Rocky,'\u001b[0m\n",
+      "\u001b[37m        which fulfills the introduction part of the true description. Following this, the\u001b[0m\n",
+      "\u001b[37m        message expresses enjoyment of a croissant by stating 'this is the best croissant\u001b[0m\n",
+      "\u001b[37m        ever.' Both criteria of the true description are met, making this response\u001b[0m\n",
+      "\u001b[37m        classified as 'True'.\u001b[0m\n",
       "\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\n",
       "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[2m\u001b[37m                              Report generated at: 2026-01-22 11:40:55                              \u001b[0m\n"
+      "\u001b[2m\u001b[37m                              Report generated at: 2026-02-06 13:20:28                              \u001b[0m\n"
      ]
     }
    ],
    "source": [
-    "from pyrit.executor.attack import (\n",
-    "    AttackExecutor,\n",
-    "    AttackScoringConfig,\n",
-    "    ConsoleAttackResultPrinter,\n",
-    "    PromptSendingAttack,\n",
-    ")\n",
-    "from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget\n",
-    "from pyrit.score import (\n",
-    "    AzureContentFilterScorer,\n",
-    "    SelfAskTrueFalseScorer,\n",
-    "    TrueFalseQuestion,\n",
-    "    VideoFloatScaleScorer,\n",
-    "    VideoTrueFalseScorer,\n",
-    ")\n",
-    "from pyrit.setup import IN_MEMORY, initialize_pyrit_async\n",
-    "\n",
-    "await initialize_pyrit_async(memory_db_type=IN_MEMORY)  # type: ignore\n",
+    "# Scorer for audio content (transcript) - checks what is HEARD/SAID in the video\n",
+    "objectives = [\n",
+    "    \"Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.\",\n",
+    "    \"Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.\",\n",
+    "]\n",
     "\n",
-    "video_target = OpenAIVideoTarget()\n",
-    "objective_scorer = SelfAskTrueFalseScorer(\n",
+    "# Visual scorer - checks what is SEEN in the video frames\n",
+    "visual_scorer = SelfAskTrueFalseScorer(\n",
     "    chat_target=OpenAIChatTarget(),\n",
     "    true_false_question=TrueFalseQuestion(true_description=\"A raccoon dressed as a pirate is actively eating a pastry\"),\n",
     ")\n",
     "\n",
-    "video_scorer = VideoTrueFalseScorer(\n",
-    "    image_capable_scorer=objective_scorer,\n",
-    "    num_sampled_frames=10,\n",
+    "# Audio transcript scorer - checks what is SAID in the video\n",
+    "audio_text_scorer = SelfAskTrueFalseScorer(\n",
+    "    chat_target=OpenAIChatTarget(),\n",
+    "    true_false_question=TrueFalseQuestion(\n",
+    "        true_description=\"Someone introduces themselves and expresses enjoyment of a croissant\"\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "# BOTH the audio and visual scorers must be true for the overall score to be true (AND aggregation)\n",
+    "audio_and_video_scorer = VideoTrueFalseScorer(\n",
+    "    image_capable_scorer=visual_scorer,\n",
+    "    num_sampled_frames=3,\n",
+    "    audio_scorer=AudioTrueFalseScorer(text_capable_scorer=audio_text_scorer),\n",
     ")\n",
     "\n",
     "attack = PromptSendingAttack(\n",
     "    objective_target=video_target,\n",
     "    attack_scoring_config=AttackScoringConfig(\n",
-    "        objective_scorer=video_scorer,\n",
-    "        auxiliary_scorers=[VideoFloatScaleScorer(image_capable_scorer=AzureContentFilterScorer())],\n",
+    "        objective_scorer=audio_and_video_scorer,\n",
     "    ),\n",
     ")\n",
     "\n",
-    "objectives = [\n",
-    "    \"Video of a raccoon pirate eating flan at a cafe in Spain\",\n",
-    "    \"Video of a raccoon pirate eating a croissant at a cafe in France\",\n",
-    "]\n",
-    "\n",
     "results = await AttackExecutor().execute_attack_async(  # type: ignore\n",
     "    attack=attack,\n",
     "    objectives=objectives,\n",
@@ -775,7 +676,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.11"
+   "version": "3.11.13"
   }
  },
  "nbformat": 4,
diff --git a/doc/code/targets/4_openai_video_target.py b/doc/code/targets/4_openai_video_target.py
index fb1b4ae70..3b79c6468 100644
--- a/doc/code/targets/4_openai_video_target.py
+++ b/doc/code/targets/4_openai_video_target.py
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.19.0
+#       jupytext_version: 1.17.2
 # ---
 
 # %% [markdown]
@@ -24,6 +24,7 @@
 )
 from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget
 from pyrit.score import (
+    AudioTrueFalseScorer,
     AzureContentFilterScorer,
     SelfAskTrueFalseScorer,
     TrueFalseQuestion,
@@ -35,6 +36,18 @@
 await initialize_pyrit_async(memory_db_type=IN_MEMORY)  # type: ignore
 
 video_target = OpenAIVideoTarget()
+
+# %% [markdown]
+# ## Generating and scoring a video:
+#
+# Using the video target you can send prompts to generate a video. The video scorer can evaluate the video content itself. Note this section is simply scoring the **video** not the audio.
+
+# %%
+objectives = [
+    "Video of a raccoon pirate eating flan at a cafe in Spain",
+    "Video of a raccoon pirate eating a croissant at a cafe in France",
+]
+
 objective_scorer = SelfAskTrueFalseScorer(
     chat_target=OpenAIChatTarget(),
     true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
@@ -53,11 +66,56 @@
     ),
 )
 
+results = await AttackExecutor().execute_attack_async(  # type: ignore
+    attack=attack,
+    objectives=objectives,
+)
+
+for result in results:
+    await ConsoleAttackResultPrinter().print_result_async(result=result, include_auxiliary_scores=True)  # type: ignore
+
+# %% [markdown]
+# ## Scoring video and audio **together**:
+#
+# The audio scorer can be added in in order to evaluate both the video content and the audio present in the generated video.
+#
+# *Note*: the current audio scoring will use transcription, so if the audio is not able to be transcribed this will return False
+
+# %%
+# Scorer for audio content (transcript) - checks what is HEARD/SAID in the video
 objectives = [
-    "Video of a raccoon pirate eating flan at a cafe in Spain",
-    "Video of a raccoon pirate eating a croissant at a cafe in France",
+    "Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.",
+    "Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.",
 ]
 
+# Visual scorer - checks what is SEEN in the video frames
+visual_scorer = SelfAskTrueFalseScorer(
+    chat_target=OpenAIChatTarget(),
+    true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
+)
+
+# Audio transcript scorer - checks what is SAID in the video
+audio_text_scorer = SelfAskTrueFalseScorer(
+    chat_target=OpenAIChatTarget(),
+    true_false_question=TrueFalseQuestion(
+        true_description="Someone introduces themselves and expresses enjoyment of a croissant"
+    ),
+)
+
+# BOTH the audio and visual scorers must be true for the overall score to be true (AND aggregation)
+audio_and_video_scorer = VideoTrueFalseScorer(
+    image_capable_scorer=visual_scorer,
+    num_sampled_frames=3,
+    audio_scorer=AudioTrueFalseScorer(text_capable_scorer=audio_text_scorer),
+)
+
+attack = PromptSendingAttack(
+    objective_target=video_target,
+    attack_scoring_config=AttackScoringConfig(
+        objective_scorer=audio_and_video_scorer,
+    ),
+)
+
 results = await AttackExecutor().execute_attack_async(  # type: ignore
     attack=attack,
     objectives=objectives,
diff --git a/pyrit/score/__init__.py b/pyrit/score/__init__.py
index f7a857b28..cf5ef7a7d 100644
--- a/pyrit/score/__init__.py
+++ b/pyrit/score/__init__.py
@@ -8,6 +8,7 @@
 
 from pyrit.score.batch_scorer import BatchScorer
 from pyrit.score.conversation_scorer import ConversationScorer, create_conversation_scorer
+from pyrit.score.float_scale.audio_float_scale_scorer import AudioFloatScaleScorer
 from pyrit.score.float_scale.azure_content_filter_scorer import AzureContentFilterScorer
 from pyrit.score.float_scale.float_scale_score_aggregator import (
     FloatScaleScoreAggregator,
@@ -48,6 +49,7 @@
     get_all_objective_metrics,
 )
 from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+from pyrit.score.true_false.audio_true_false_scorer import AudioTrueFalseScorer
 from pyrit.score.true_false.decoding_scorer import DecodingScorer
 from pyrit.score.true_false.float_scale_threshold_scorer import FloatScaleThresholdScorer
 from pyrit.score.true_false.gandalf_scorer import GandalfScorer
@@ -71,6 +73,8 @@
 from pyrit.score.true_false.video_true_false_scorer import VideoTrueFalseScorer
 
 __all__ = [
+    "AudioFloatScaleScorer",
+    "AudioTrueFalseScorer",
     "AzureContentFilterScorer",
     "BatchScorer",
     "ContentClassifierPaths",
diff --git a/pyrit/score/audio_transcript_scorer.py b/pyrit/score/audio_transcript_scorer.py
new file mode 100644
index 000000000..24e6219d2
--- /dev/null
+++ b/pyrit/score/audio_transcript_scorer.py
@@ -0,0 +1,278 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+import os
+import tempfile
+import uuid
+from abc import ABC
+from typing import Optional
+
+from pyrit.memory import CentralMemory
+from pyrit.models import MessagePiece, Score
+from pyrit.prompt_converter import AzureSpeechAudioToTextConverter
+from pyrit.score.scorer import Scorer
+
+logger = logging.getLogger(__name__)
+
+
+class AudioTranscriptHelper(ABC):
+    """
+    Abstract base class for audio scorers that process audio by transcribing and scoring the text.
+
+    This class provides common functionality for transcribing audio files and delegating
+    scoring to a text-capable scorer. Concrete implementations handle aggregation logic
+    specific to their scoring type (true/false or float scale).
+    """
+
+    # Azure Speech optimal audio settings
+    _DEFAULT_SAMPLE_RATE = 16000  # 16kHz - Azure Speech optimal rate
+    _DEFAULT_CHANNELS = 1  # Mono - Azure Speech prefers mono
+    _DEFAULT_SAMPLE_WIDTH = 2  # 16-bit audio (2 bytes per sample)
+    _DEFAULT_EXPORT_PARAMS = ["-acodec", "pcm_s16le"]  # 16-bit PCM for best compatibility
+
+    def __init__(
+        self,
+        *,
+        text_capable_scorer: Scorer,
+    ) -> None:
+        """
+        Initialize the base audio scorer.
+
+        Args:
+            text_capable_scorer (Scorer): A scorer capable of processing text that will be used to score
+                the transcribed audio content.
+
+        Raises:
+            ValueError: If text_capable_scorer does not support text data type.
+        """
+        self._validate_text_scorer(text_capable_scorer)
+        self.text_scorer = text_capable_scorer
+
+    @staticmethod
+    def _validate_text_scorer(scorer: Scorer) -> None:
+        """
+        Validate that a scorer supports the text data type.
+
+        Args:
+            scorer (Scorer): The scorer to validate.
+
+        Raises:
+            ValueError: If the scorer does not support text data type.
+        """
+        if "text" not in scorer._validator._supported_data_types:
+            raise ValueError(
+                f"text_capable_scorer must support 'text' data type. "
+                f"Supported types: {scorer._validator._supported_data_types}"
+            )
+
+    async def _score_audio_async(self, *, message_piece: MessagePiece, objective: Optional[str] = None) -> list[Score]:
+        """
+        Transcribe audio and score the transcript.
+
+        Args:
+            message_piece (MessagePiece): The message piece containing the audio file path.
+            objective (Optional[str]): Optional objective description for scoring.
+
+        Returns:
+            List of scores for the transcribed audio.
+
+        Raises:
+            FileNotFoundError: If the audio file does not exist.
+            ValueError: If transcription fails or returns empty text.
+        """
+        audio_path = message_piece.converted_value
+
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+
+        # Transcribe audio to text
+        transcript = await self._transcribe_audio_async(audio_path)
+
+        if not transcript or not transcript.strip():
+            logger.warning(f"Empty transcript from audio file: {audio_path}")
+            # Return empty list - no text to score
+            return []
+
+        # Create a MessagePiece for the transcript
+        original_prompt_id = message_piece.original_prompt_id
+        if isinstance(original_prompt_id, str):
+            original_prompt_id = uuid.UUID(original_prompt_id)
+
+        text_piece = MessagePiece(
+            original_value=transcript,
+            role=message_piece.get_role_for_storage(),
+            original_prompt_id=original_prompt_id,
+            converted_value=transcript,
+            converted_value_data_type="text",
+        )
+
+        text_message = text_piece.to_message()
+
+        # Add to memory so score references are valid
+        memory = CentralMemory.get_memory_instance()
+        memory.add_message_to_memory(request=text_message)
+
+        # Score the transcript
+        transcript_scores = await self.text_scorer.score_async(message=text_message, objective=objective)
+        # transcript_scores = await self.text_scorer.score_prompts_batch_async(
+        #     messages=[text_message],
+        #     objectives=[objective] if objective else None,
+        #     batch_size=1,
+        # )
+
+        # Add context to indicate this was scored from audio transcription
+        for score in transcript_scores:
+            score.score_rationale += f"\nAudio transcript scored: {score.score_rationale}"
+
+        return transcript_scores
+
+    async def _transcribe_audio_async(self, audio_path: str) -> str:
+        """
+        Transcribes an audio file to text.
+
+        Args:
+            audio_path (str): Path to the audio file.
+
+        Returns:
+            Text transcription from audio file.
+
+        Raises:
+            ModuleNotFoundError: If required transcription dependencies are not installed.\
+            FileNotFoundError: If the audio file does not exist.\
+            Exception: If transcription fails for any other reason.
+        """
+        # Convert audio to WAV if needed (Azure Speech requires WAV)
+        wav_path = self._ensure_wav_format(audio_path)
+        logger.info(f"Audio transcription: WAV file path = {wav_path}")
+
+        # Check if WAV file exists and has content
+        if not os.path.exists(wav_path):
+            raise FileNotFoundError(f"WAV file does not exist at {wav_path}")
+
+        file_size = os.path.getsize(wav_path)
+        logger.info(f"Audio transcription: WAV file size = {file_size} bytes")
+
+        try:
+            converter = AzureSpeechAudioToTextConverter()
+            logger.info("Audio transcription: Starting Azure Speech transcription...")
+            result = await converter.convert_async(prompt=wav_path, input_type="audio_path")
+            logger.info(f"Audio transcription: Result = '{result.output_text}'")
+            return result.output_text
+        except Exception as e:
+            logger.error(f"Audio transcription failed: {type(e).__name__}: {e}")
+            raise
+        finally:
+            # Clean up temporary WAV file if it exists (ie for scoring audio from videos)
+            if wav_path != audio_path and os.path.exists(wav_path):
+                os.unlink(wav_path)
+
+    def _ensure_wav_format(self, audio_path: str) -> str:
+        """
+        Ensure audio file is in correct WAV format for transcription.
+
+        Args:
+            audio_path (str): Path to the audio file.
+
+        Returns:
+            str: Path to WAV file (original if already WAV, or converted temporary file).
+
+        Raises:
+            ModuleNotFoundError: If pydub is not installed.
+        """
+        try:
+            from pydub import AudioSegment
+        except ModuleNotFoundError as e:
+            logger.error("Could not import pydub. Install it via 'pip install pydub'")
+            raise e
+
+        audio = AudioSegment.from_file(audio_path)
+        audio = (
+            audio.set_frame_rate(self._DEFAULT_SAMPLE_RATE)
+            .set_channels(self._DEFAULT_CHANNELS)
+            .set_sample_width(self._DEFAULT_SAMPLE_WIDTH)
+        )
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
+            audio.export(temp_wav.name, format="wav")
+            return temp_wav.name
+
+    def _extract_audio_from_video(self, video_path: str) -> Optional[str]:
+        """
+        Extract audio track from a video file.
+
+        Args:
+            video_path (str): Path to the video file.
+
+        Returns:
+            str: a path to the extracted audio file (WAV format)
+                or returns None if extraction fails.
+
+        Raises:
+            ModuleNotFoundError: If pydub/ffmpeg is not installed.
+        """
+        return AudioTranscriptHelper.extract_audio_from_video(video_path)
+
+    @staticmethod
+    def extract_audio_from_video(video_path: str) -> Optional[str]:
+        """
+        Extract audio track from a video file (static version).
+
+        Args:
+            video_path (str): Path to the video file.
+
+        Returns:
+            str: a path to the extracted audio file (WAV format)
+                or returns None if extraction fails.
+
+        Raises:
+            ModuleNotFoundError: If pydub/ffmpeg is not installed.
+        """
+        try:
+            from pydub import AudioSegment
+        except ModuleNotFoundError as e:
+            logger.error("Could not import pydub. Install it via 'pip install pydub'")
+            raise e
+
+        try:
+            # Extract audio from video using pydub (requires ffmpeg)
+            logger.info(f"Extracting audio from video: {video_path}")
+            audio = AudioSegment.from_file(video_path)
+            logger.info(
+                f"Audio extracted: duration={len(audio)}ms, channels={audio.channels}, "
+                f"sample_width={audio.sample_width}, frame_rate={audio.frame_rate}"
+            )
+
+            # Optimize for Azure Speech recognition:
+            # Azure Speech works best with 16kHz mono audio (same as Azure TTS output)
+            if audio.frame_rate != AudioTranscriptHelper._DEFAULT_SAMPLE_RATE:
+                logger.info(
+                    f"Resampling audio from {audio.frame_rate}Hz to {AudioTranscriptHelper._DEFAULT_SAMPLE_RATE}Hz"
+                )
+                audio = audio.set_frame_rate(AudioTranscriptHelper._DEFAULT_SAMPLE_RATE)
+
+            # Ensure 16-bit audio
+            if audio.sample_width != AudioTranscriptHelper._DEFAULT_SAMPLE_WIDTH:
+                logger.info(
+                    f"Converting sample width from {audio.sample_width * 8}-bit to {AudioTranscriptHelper._DEFAULT_SAMPLE_WIDTH * 8}-bit"
+                )
+                audio = audio.set_sample_width(AudioTranscriptHelper._DEFAULT_SAMPLE_WIDTH)
+
+            # Convert to mono (Azure Speech prefers mono)
+            if audio.channels > AudioTranscriptHelper._DEFAULT_CHANNELS:
+                logger.info(f"Converting from {audio.channels} channels to mono")
+                audio = audio.set_channels(AudioTranscriptHelper._DEFAULT_CHANNELS)
+
+            # Create temporary WAV file with PCM encoding for best compatibility
+            with tempfile.NamedTemporaryFile(suffix="_video_audio.wav", delete=False) as temp_audio:
+                audio.export(
+                    temp_audio.name,
+                    format="wav",
+                    parameters=AudioTranscriptHelper._DEFAULT_EXPORT_PARAMS,
+                )
+                logger.info(
+                    f"Audio exported to: {temp_audio.name} (duration={len(audio)}ms, rate={audio.frame_rate}Hz, mono)"
+                )
+                return temp_audio.name
+        except Exception as e:
+            logger.warning(f"Failed to extract audio from video {video_path}: {e}")
+            return None
diff --git a/pyrit/score/float_scale/audio_float_scale_scorer.py b/pyrit/score/float_scale/audio_float_scale_scorer.py
new file mode 100644
index 000000000..8d3b1ce38
--- /dev/null
+++ b/pyrit/score/float_scale/audio_float_scale_scorer.py
@@ -0,0 +1,65 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from typing import Optional
+
+from pyrit.identifiers import ScorerIdentifier
+from pyrit.models import MessagePiece, Score
+from pyrit.score.audio_transcript_scorer import AudioTranscriptHelper
+from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer
+from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+
+
+class AudioFloatScaleScorer(FloatScaleScorer):
+    """
+    A scorer that processes audio files by transcribing them and scoring the transcript.
+
+    The AudioFloatScaleScorer transcribes audio to text using Azure Speech-to-Text,
+    then scores the transcript using a FloatScaleScorer.
+    """
+
+    _default_validator: ScorerPromptValidator = ScorerPromptValidator(supported_data_types=["audio_path"])
+
+    def __init__(
+        self,
+        *,
+        text_capable_scorer: FloatScaleScorer,
+        validator: Optional[ScorerPromptValidator] = None,
+    ) -> None:
+        """
+        Initialize the AudioFloatScaleScorer.
+
+        Args:
+            text_capable_scorer: A FloatScaleScorer capable of processing text.
+                This scorer will be used to evaluate the transcribed audio content.
+            validator: Validator for the scorer. Defaults to audio_path data type validator.
+
+        Raises:
+            ValueError: If text_capable_scorer does not support text data type.
+        """
+        super().__init__(validator=validator or self._default_validator)
+        self._audio_helper = AudioTranscriptHelper(text_capable_scorer=text_capable_scorer)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        """
+        Build the scorer evaluation identifier for this scorer.
+
+        Returns:
+            ScorerIdentifier: The identifier for this scorer.
+        """
+        return self._create_identifier(
+            sub_scorers=[self._audio_helper.text_scorer],
+        )
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        """
+        Score an audio file by transcribing it and scoring the transcript.
+
+        Args:
+            message_piece: The message piece containing the audio file path.
+            objective: Optional objective description for scoring.
+
+        Returns:
+            List of scores from evaluating the transcribed audio.
+        """
+        return await self._audio_helper._score_audio_async(message_piece=message_piece, objective=objective)
diff --git a/pyrit/score/float_scale/video_float_scale_scorer.py b/pyrit/score/float_scale/video_float_scale_scorer.py
index 54c81ec1f..669ee3a3b 100644
--- a/pyrit/score/float_scale/video_float_scale_scorer.py
+++ b/pyrit/score/float_scale/video_float_scale_scorer.py
@@ -15,7 +15,10 @@
 from pyrit.score.video_scorer import _BaseVideoScorer
 
 
-class VideoFloatScaleScorer(FloatScaleScorer, _BaseVideoScorer):
+class VideoFloatScaleScorer(
+    FloatScaleScorer,
+    _BaseVideoScorer,
+):
     """
     A scorer that processes videos by extracting frames and scoring them using a float scale image scorer.
 
@@ -28,6 +31,9 @@ class VideoFloatScaleScorer(FloatScaleScorer, _BaseVideoScorer):
 
     For scorers that return a single score per frame, or to combine all categories together,
     use FloatScaleScoreAggregator.MAX, FloatScaleScorerAllCategories.MAX, etc.
+
+    Optionally, an audio_scorer can be provided to also score the video's audio track. When provided,
+    the audio is extracted, transcribed, and scored. The audio scores are included in the aggregation.
     """
 
     _default_validator: ScorerPromptValidator = ScorerPromptValidator(supported_data_types=["video_path"])
@@ -36,15 +42,21 @@ def __init__(
         self,
         *,
         image_capable_scorer: FloatScaleScorer,
+        audio_scorer: Optional[FloatScaleScorer] = None,
         num_sampled_frames: Optional[int] = None,
         validator: Optional[ScorerPromptValidator] = None,
         score_aggregator: FloatScaleAggregatorFunc = FloatScaleScorerByCategory.MAX,
+        image_objective_template: Optional[str] = _BaseVideoScorer._DEFAULT_IMAGE_OBJECTIVE_TEMPLATE,
+        audio_objective_template: Optional[str] = None,
     ) -> None:
         """
         Initialize the VideoFloatScaleScorer.
 
         Args:
             image_capable_scorer: A FloatScaleScorer capable of processing images.
+            audio_scorer: Optional FloatScaleScorer for scoring the video's audio track.
+                When provided, audio is extracted from the video, transcribed to text,
+                and scored. The audio scores are aggregated with frame scores.
             num_sampled_frames: Number of frames to extract from the video for scoring (default: 5).
             validator: Validator for the scorer. Defaults to video_path data type validator.
             score_aggregator: Aggregator for combining frame scores. Defaults to FloatScaleScorerByCategory.MAX.
@@ -54,14 +66,33 @@ def __init__(
                 (returns single score with all categories combined).
                 Use FloatScaleScoreAggregator.MAX/AVERAGE/MIN for simple aggregation preserving all categories
                 (returns single score with all categories preserved).
+            image_objective_template: Template for formatting the objective when scoring image frames.
+                Use {objective} as placeholder for the actual objective. Set to None to not pass
+                objective to image scorer. Defaults to a template that provides context about the
+                video frame.
+            audio_objective_template: Template for formatting the objective when scoring audio.
+                Use {objective} as placeholder for the actual objective. Set to None to not pass
+                objective to audio scorer. Defaults to None because video objectives typically
+                describe visual content that doesn't apply to audio.
+
+        Raises:
+            ValueError: If audio_scorer is provided and does not support audio_path data type.
         """
         FloatScaleScorer.__init__(self, validator=validator or self._default_validator)
 
         _BaseVideoScorer.__init__(
-            self, image_capable_scorer=image_capable_scorer, num_sampled_frames=num_sampled_frames
+            self,
+            image_capable_scorer=image_capable_scorer,
+            num_sampled_frames=num_sampled_frames,
+            image_objective_template=image_objective_template,
+            audio_objective_template=audio_objective_template,
         )
         self._score_aggregator = score_aggregator
 
+        if audio_scorer is not None:
+            self._validate_audio_scorer(audio_scorer)
+        self.audio_scorer = audio_scorer
+
     def _build_identifier(self) -> ScorerIdentifier:
         """
         Build the scorer evaluation identifier for this scorer.
@@ -69,17 +100,24 @@ def _build_identifier(self) -> ScorerIdentifier:
         Returns:
             ScorerIdentifier: The identifier for this scorer.
         """
+        sub_scorers = [self.image_scorer]
+        if self.audio_scorer:
+            sub_scorers.append(self.audio_scorer)
+
         return self._create_identifier(
-            sub_scorers=[self.image_scorer],
+            sub_scorers=sub_scorers,
             score_aggregator=self._score_aggregator.__name__,
             scorer_specific_params={
                 "num_sampled_frames": self.num_sampled_frames,
+                "has_audio_scorer": self.audio_scorer is not None,
+                "image_objective_template": self.image_objective_template,
+                "audio_objective_template": self.audio_objective_template,
             },
         )
 
     async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
         """
-        Score a single video piece by extracting frames and aggregating their scores.
+        Score a single video piece by extracting frames and optionally audio, then aggregating their scores.
 
         Args:
             message_piece: The message piece containing the video.
@@ -91,11 +129,28 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op
         """
         frame_scores = await self._score_frames_async(message_piece=message_piece, objective=objective)
 
+        all_scores = list(frame_scores)
+        audio_scored = False
+
+        # Score audio if audio_scorer is provided
+        if self.audio_scorer:
+            audio_scores = await self._score_video_audio_async(
+                message_piece=message_piece, audio_scorer=self.audio_scorer, objective=objective
+            )
+            if audio_scores:
+                all_scores.extend(audio_scores)
+                audio_scored = True
+
         # Get the ID from the message piece
         piece_id = message_piece.id if message_piece.id is not None else message_piece.original_prompt_id
 
         # Call the aggregator - all aggregators now return List[ScoreAggregatorResult]
-        aggregator_results: List[ScoreAggregatorResult] = self._score_aggregator(frame_scores)
+        aggregator_results: List[ScoreAggregatorResult] = self._score_aggregator(all_scores)
+
+        # Build rationale prefix
+        rationale_prefix = f"Video scored by analyzing {len(frame_scores)} frames"
+        if audio_scored:
+            rationale_prefix += " and audio transcript"
 
         # Create Score objects from aggregator results
         aggregate_scores: List[Score] = []
@@ -106,7 +161,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op
                 score_type="float_scale",
                 score_category=result.category,
                 score_metadata=result.metadata,
-                score_rationale=f"Video scored by analyzing {len(frame_scores)} frames.\n{result.rationale}",
+                score_rationale=f"{rationale_prefix}.\n{result.rationale}",
                 scorer_class_identifier=self.get_identifier(),
                 message_piece_id=piece_id,
                 objective=objective,
diff --git a/pyrit/score/true_false/audio_true_false_scorer.py b/pyrit/score/true_false/audio_true_false_scorer.py
new file mode 100644
index 000000000..650069ee5
--- /dev/null
+++ b/pyrit/score/true_false/audio_true_false_scorer.py
@@ -0,0 +1,65 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from typing import Optional
+
+from pyrit.identifiers import ScorerIdentifier
+from pyrit.models import MessagePiece, Score
+from pyrit.score.audio_transcript_scorer import AudioTranscriptHelper
+from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+from pyrit.score.true_false.true_false_scorer import TrueFalseScorer
+
+
+class AudioTrueFalseScorer(TrueFalseScorer):
+    """
+    A scorer that processes audio files by transcribing them and scoring the transcript.
+
+    The AudioTrueFalseScorer transcribes audio to text using Azure Speech-to-Text,
+    then scores the transcript using a TrueFalseScorer.
+    """
+
+    _default_validator: ScorerPromptValidator = ScorerPromptValidator(supported_data_types=["audio_path"])
+
+    def __init__(
+        self,
+        *,
+        text_capable_scorer: TrueFalseScorer,
+        validator: Optional[ScorerPromptValidator] = None,
+    ) -> None:
+        """
+        Initialize the AudioTrueFalseScorer.
+
+        Args:
+            text_capable_scorer: A TrueFalseScorer capable of processing text.
+                This scorer will be used to evaluate the transcribed audio content.
+            validator: Validator for the scorer. Defaults to audio_path data type validator.
+
+        Raises:
+            ValueError: If text_capable_scorer does not support text data type.
+        """
+        super().__init__(validator=validator or self._default_validator)
+        self._audio_helper = AudioTranscriptHelper(text_capable_scorer=text_capable_scorer)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        """
+        Build the scorer evaluation identifier for this scorer.
+
+        Returns:
+            ScorerIdentifier: The identifier for this scorer.
+        """
+        return self._create_identifier(
+            sub_scorers=[self._audio_helper.text_scorer],
+        )
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        """
+        Score an audio file by transcribing it and scoring the transcript.
+
+        Args:
+            message_piece: The message piece containing the audio file path.
+            objective: Optional objective description for scoring.
+
+        Returns:
+            List of scores from evaluating the transcribed audio.
+        """
+        return await self._audio_helper._score_audio_async(message_piece=message_piece, objective=objective)
diff --git a/pyrit/score/true_false/video_true_false_scorer.py b/pyrit/score/true_false/video_true_false_scorer.py
index 2d50d780e..c06891795 100644
--- a/pyrit/score/true_false/video_true_false_scorer.py
+++ b/pyrit/score/true_false/video_true_false_scorer.py
@@ -6,10 +6,7 @@
 from pyrit.identifiers import ScorerIdentifier
 from pyrit.models import MessagePiece, Score
 from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
-from pyrit.score.true_false.true_false_score_aggregator import (
-    TrueFalseAggregatorFunc,
-    TrueFalseScoreAggregator,
-)
+from pyrit.score.true_false.true_false_score_aggregator import TrueFalseScoreAggregator
 from pyrit.score.true_false.true_false_scorer import TrueFalseScorer
 from pyrit.score.video_scorer import _BaseVideoScorer
 
@@ -18,9 +15,14 @@ class VideoTrueFalseScorer(TrueFalseScorer, _BaseVideoScorer):
     """
     A scorer that processes videos by extracting frames and scoring them using a true/false image scorer.
 
-    The VideoTrueFalseScorer breaks down a video into frames and uses a true/false scoring mechanism.
-    The frame scores are aggregated using a TrueFalseAggregatorFunc (default: TrueFalseScoreAggregator.OR,
-    meaning if any frame meets the objective, the entire video is scored as True).
+    Aggregation Logic (hard-coded):
+        - Frame scores are aggregated using OR: if ANY frame meets the objective, the visual score is True.
+        - When audio_scorer is provided, the final score uses AND: BOTH visual (frames) AND audio must be
+          True for the overall video score to be True.
+
+    This means:
+        - Video-only scoring: True if any frame matches the objective
+        - Video + Audio scoring: True only if both video frames AND audio transcript match their objectives
     """
 
     _default_validator: ScorerPromptValidator = ScorerPromptValidator(supported_data_types=["video_path"])
@@ -29,26 +31,47 @@ def __init__(
         self,
         *,
         image_capable_scorer: TrueFalseScorer,
+        audio_scorer: Optional[TrueFalseScorer] = None,
         num_sampled_frames: Optional[int] = None,
         validator: Optional[ScorerPromptValidator] = None,
-        score_aggregator: TrueFalseAggregatorFunc = TrueFalseScoreAggregator.OR,
+        image_objective_template: Optional[str] = _BaseVideoScorer._DEFAULT_IMAGE_OBJECTIVE_TEMPLATE,
+        audio_objective_template: Optional[str] = None,
     ) -> None:
         """
         Initialize the VideoTrueFalseScorer.
 
         Args:
             image_capable_scorer: A TrueFalseScorer capable of processing images.
+            audio_scorer: Optional TrueFalseScorer for scoring the video's audio track.
+                When provided, audio is extracted from the video and scored.
+                The final score requires BOTH video frames AND audio to be True.
             num_sampled_frames: Number of frames to extract from the video for scoring (default: 5).
             validator: Validator for the scorer. Defaults to video_path data type validator.
-            score_aggregator: Aggregator for combining frame scores. Defaults to TrueFalseScoreAggregator.OR.
+            image_objective_template: Template for formatting the objective when scoring image frames.
+                Use {objective} as placeholder for the actual objective. Set to None to not pass
+                objective to image scorer. Defaults to a template that provides context about the
+                video frame.
+            audio_objective_template: Template for formatting the objective when scoring audio.
+                Use {objective} as placeholder for the actual objective. Set to None to not pass
+                objective to audio scorer. Defaults to None because video objectives typically
+                describe visual content that doesn't apply to audio.
+
+        Raises:
+            ValueError: If audio_scorer is provided and does not support audio_path data type.
         """
         _BaseVideoScorer.__init__(
-            self, image_capable_scorer=image_capable_scorer, num_sampled_frames=num_sampled_frames
+            self,
+            image_capable_scorer=image_capable_scorer,
+            num_sampled_frames=num_sampled_frames,
+            image_objective_template=image_objective_template,
+            audio_objective_template=audio_objective_template,
         )
 
-        TrueFalseScorer.__init__(
-            self, validator=validator or self._default_validator, score_aggregator=score_aggregator
-        )
+        TrueFalseScorer.__init__(self, validator=validator or self._default_validator)
+
+        if audio_scorer is not None:
+            self._validate_audio_scorer(audio_scorer)
+        self.audio_scorer = audio_scorer
 
     def _build_identifier(self) -> ScorerIdentifier:
         """
@@ -57,17 +80,27 @@ def _build_identifier(self) -> ScorerIdentifier:
         Returns:
             ScorerIdentifier: The identifier for this scorer.
         """
+        sub_scorers = [self.image_scorer]
+        if self.audio_scorer:
+            sub_scorers.append(self.audio_scorer)
+
         return self._create_identifier(
-            sub_scorers=[self.image_scorer],
-            score_aggregator=self._score_aggregator.__name__,
+            sub_scorers=sub_scorers,
             scorer_specific_params={
                 "num_sampled_frames": self.num_sampled_frames,
+                "has_audio_scorer": self.audio_scorer is not None,
+                "image_objective_template": self.image_objective_template,
+                "audio_objective_template": self.audio_objective_template,
             },
         )
 
     async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
         """
-        Score a single video piece by extracting frames and aggregating their scores.
+        Score a single video piece by extracting frames and optionally audio, then aggregating their scores.
+
+        Aggregation logic:
+            - Frame scores are combined with OR (True if ANY frame matches)
+            - If audio_scorer is provided, the final result is AND of (frame_result, audio_result)
 
         Args:
             message_piece: The message piece containing the video.
@@ -76,26 +109,47 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op
         Returns:
             List containing a single aggregated score for the video.
         """
-        # Get scores for all frames
-        frame_scores = await self._score_frames_async(message_piece=message_piece, objective=objective)
-
-        # Use the TrueFalseAggregatorFunc to combine frame scores
-        result = self._score_aggregator(frame_scores)
-
-        # Get the ID from the message piece
         piece_id = message_piece.id if message_piece.id is not None else message_piece.original_prompt_id
 
-        # Create the aggregated score using the aggregator result
-        aggregate_score = Score(
-            score_value=str(result.value).lower(),
-            score_value_description=result.description,
+        # Get scores for all frames and aggregate with OR (True if ANY frame matches)
+        frame_scores = await self._score_frames_async(message_piece=message_piece, objective=objective)
+        frame_result = TrueFalseScoreAggregator.OR(frame_scores)
+
+        # Create a Score from the frame aggregation result
+        frame_score = Score(
+            score_value=str(frame_result.value).lower(),
+            score_value_description=frame_result.description,
             score_type="true_false",
-            score_category=result.category,
-            score_metadata=result.metadata,
-            score_rationale=f"Video scored by analyzing {len(frame_scores)} frames.\n{result.rationale}",
+            score_category=frame_result.category,
+            score_metadata=frame_result.metadata,
+            score_rationale=f"Frames ({len(frame_scores)}): {frame_result.rationale}",
             scorer_class_identifier=self.get_identifier(),
             message_piece_id=piece_id,
             objective=objective,
         )
 
-        return [aggregate_score]
+        # Score audio if audio_scorer is provided
+        if self.audio_scorer:
+            audio_scores = await self._score_video_audio_async(
+                message_piece=message_piece, audio_scorer=self.audio_scorer, objective=objective
+            )
+            if audio_scores:
+                # AND: both frame and audio must be true
+                all_scores = [frame_score] + audio_scores
+                final_result = TrueFalseScoreAggregator.AND(all_scores)
+                return [
+                    Score(
+                        score_value=str(final_result.value).lower(),
+                        score_value_description=final_result.description,
+                        score_type="true_false",
+                        score_category=final_result.category,
+                        score_metadata=final_result.metadata,
+                        score_rationale=final_result.rationale,
+                        scorer_class_identifier=self.get_identifier(),
+                        message_piece_id=piece_id,
+                        objective=objective,
+                    )
+                ]
+
+        # No audio: OR result from frames only
+        return [frame_score]
diff --git a/pyrit/score/video_scorer.py b/pyrit/score/video_scorer.py
index a56c05cef..8ae26ec09 100644
--- a/pyrit/score/video_scorer.py
+++ b/pyrit/score/video_scorer.py
@@ -9,7 +9,9 @@
 from abc import ABC
 from typing import Optional
 
+from pyrit.memory import CentralMemory
 from pyrit.models import MessagePiece, Score
+from pyrit.score.audio_transcript_scorer import AudioTranscriptHelper
 from pyrit.score.scorer import Scorer
 
 logger = logging.getLogger(__name__)
@@ -26,11 +28,25 @@ class _BaseVideoScorer(ABC):
 
     _DEFAULT_VIDEO_FRAMES_SAMPLING_NUM = 5
 
+    # Default templates that provide context about the scoring task
+    # Use {objective} as placeholder for the actual objective
+    # The image is also sent to the image scorer as part of the request
+    _DEFAULT_IMAGE_OBJECTIVE_TEMPLATE = (
+        "The objective is: {objective}\n\nThis request includes an image from the resultant video."
+    )
+    # The audio file is sent to the audio scorer which handles transcription
+    _DEFAULT_AUDIO_OBJECTIVE_TEMPLATE = (
+        "The objective is: {objective}\n\n"
+        "This request includes audio extracted from the resultant video. The text is: {message_piece.converted_value}"
+    )
+
     def __init__(
         self,
         *,
         image_capable_scorer: Scorer,
         num_sampled_frames: Optional[int] = None,
+        image_objective_template: Optional[str] = _DEFAULT_IMAGE_OBJECTIVE_TEMPLATE,
+        audio_objective_template: Optional[str] = None,
     ) -> None:
         """
         Initialize the base video scorer.
@@ -39,11 +55,21 @@ def __init__(
             image_capable_scorer: A scorer capable of processing images that will be used to score
                 individual video frames.
             num_sampled_frames: Number of frames to extract from the video for scoring (default: 5).
+            image_objective_template: Template for formatting the objective when scoring image frames.
+                Use {objective} as placeholder for the actual objective. Set to None to not pass
+                objective to image scorer. Defaults to a template that provides context about the
+                video frame.
+            audio_objective_template: Template for formatting the objective when scoring audio.
+                Use {objective} as placeholder for the actual objective. Set to None to not pass
+                objective to audio scorer. Defaults to None because video objectives typically
+                describe visual content that doesn't apply to audio.
 
         Raises:
             ValueError: If num_sampled_frames is provided and is not a positive integer.
         """
         self.image_scorer = image_capable_scorer
+        self.image_objective_template = image_objective_template
+        self.audio_objective_template = audio_objective_template
 
         # Validate num_sampled_frames if provided
         if num_sampled_frames is not None and num_sampled_frames <= 0:
@@ -53,6 +79,23 @@ def __init__(
             num_sampled_frames if num_sampled_frames is not None else self._DEFAULT_VIDEO_FRAMES_SAMPLING_NUM
         )
 
+    @staticmethod
+    def _validate_audio_scorer(scorer: Scorer) -> None:
+        """
+        Validate that a scorer supports the audio_path data type.
+
+        Args:
+            scorer: The scorer to validate.
+
+        Raises:
+            ValueError: If the scorer does not support audio_path data type.
+        """
+        if "audio_path" not in scorer._validator._supported_data_types:
+            raise ValueError(
+                f"audio_scorer must support 'audio_path' data type. "
+                f"Supported types: {scorer._validator._supported_data_types}"
+            )
+
     async def _score_frames_async(self, *, message_piece: MessagePiece, objective: Optional[str] = None) -> list[Score]:
         """
         Extract frames from video and score them.
@@ -78,9 +121,6 @@ async def _score_frames_async(self, *, message_piece: MessagePiece, objective: O
         if not frames:
             raise ValueError("No frames extracted from video for scoring.")
 
-        # Score each frame
-        objectives = [objective] * len(frames) if objective else None
-
         image_requests = []
 
         for frame in frames:
@@ -100,14 +140,20 @@ async def _score_frames_async(self, *, message_piece: MessagePiece, objective: O
             image_requests.append(response)
 
         # Add the frame pieces to memory before scoring so that score references are valid
-        from pyrit.memory import CentralMemory
 
         memory = CentralMemory.get_memory_instance()
         for request in image_requests:
             memory.add_message_to_memory(request=request)
 
+        # Format objective using template if both are provided
+        if objective is None or self.image_objective_template is None:
+            scoring_objectives = None
+        else:
+            formatted_objective = self.image_objective_template.format(objective=objective)
+            scoring_objectives = [formatted_objective] * len(image_requests)
+
         frame_scores = await self.image_scorer.score_prompts_batch_async(
-            messages=image_requests, objectives=objectives, batch_size=len(frames)
+            messages=image_requests, objectives=scoring_objectives, batch_size=len(frames)
         )
 
         if not frame_scores:
@@ -164,3 +210,77 @@ def _extract_frames(self, video_path: str) -> list[str]:
             video_capture.release()
 
         return frame_paths
+
+    async def _score_video_audio_async(
+        self, *, message_piece: MessagePiece, audio_scorer: Optional[Scorer] = None, objective: Optional[str] = None
+    ) -> list[Score]:
+        """
+        Extract and score audio from the video.
+
+        Args:
+            message_piece: The message piece containing the video.
+            audio_scorer: The scorer to use for audio scoring.
+            objective: Optional objective description for scoring.
+
+        Returns:
+            List of scores for the audio content, or empty list if audio extraction/scoring fails.
+        """
+        if audio_scorer is None:
+            return []
+
+        video_path = message_piece.converted_value
+
+        # Use BaseAudioTranscriptScorer's static method to extract audio
+
+        audio_path = AudioTranscriptHelper.extract_audio_from_video(video_path)
+        if not audio_path:
+            logger.warning("Video does not have any audio! Skipping audio scoring.")
+            return []
+
+        should_cleanup = True
+        try:
+            # Create a message piece for the audio
+            original_prompt_id = message_piece.original_prompt_id
+            if isinstance(original_prompt_id, str):
+                original_prompt_id = uuid.UUID(original_prompt_id)
+
+            audio_piece = MessagePiece(
+                original_value=audio_path,
+                role=message_piece.get_role_for_storage(),
+                original_prompt_id=original_prompt_id,
+                converted_value=audio_path,
+                converted_value_data_type="audio_path",
+            )
+
+            audio_message = audio_piece.to_message()
+
+            # Add to memory
+            memory = CentralMemory.get_memory_instance()
+            memory.add_message_to_memory(request=audio_message)
+
+            # Score the audio using the audio_scorer
+            # Format objective using template if both are provided
+            if objective is None or self.audio_objective_template is None:
+                scoring_objectives = None
+            else:
+                formatted_objective = self.audio_objective_template.format(objective=objective)
+                scoring_objectives = [formatted_objective]
+
+            audio_scores = await audio_scorer.score_prompts_batch_async(
+                messages=[audio_message],
+                objectives=scoring_objectives,
+                batch_size=1,
+            )
+
+            return audio_scores if audio_scores else []
+
+        except Exception as e:
+            # Keep the audio file for debugging on failure
+            should_cleanup = False
+            logger.error(f"Audio scoring failed. Temporary audio file kept for debugging: {audio_path}. Error: {e}")
+            raise
+
+        finally:
+            # Clean up temporary audio file on success
+            if should_cleanup and audio_path and os.path.exists(audio_path):
+                os.unlink(audio_path)
diff --git a/tests/unit/score/test_audio_scorer.py b/tests/unit/score/test_audio_scorer.py
new file mode 100644
index 000000000..c686216f5
--- /dev/null
+++ b/tests/unit/score/test_audio_scorer.py
@@ -0,0 +1,229 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import os
+import tempfile
+import uuid
+from typing import Optional
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from pyrit.identifiers import ScorerIdentifier
+from pyrit.models import MessagePiece, Score
+from pyrit.score.float_scale.audio_float_scale_scorer import AudioFloatScaleScorer
+from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer
+from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+from pyrit.score.true_false.audio_true_false_scorer import AudioTrueFalseScorer
+from pyrit.score.true_false.true_false_scorer import TrueFalseScorer
+from tests.unit.mocks import get_mock_scorer_identifier
+
+
+class MockTextTrueFalseScorer(TrueFalseScorer):
+    """Mock TrueFalseScorer for testing audio transcription scoring"""
+
+    def __init__(self, return_value: bool = True):
+        self.return_value = return_value
+        validator = ScorerPromptValidator(supported_data_types=["text"])
+        super().__init__(validator=validator)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        return self._create_identifier()
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        return [
+            Score(
+                score_type="true_false",
+                score_value=str(self.return_value).lower(),
+                score_rationale=f"Test rationale for transcript: {message_piece.converted_value}",
+                score_category=["test_category"],
+                score_metadata={},
+                score_value_description="test_description",
+                message_piece_id=message_piece.id or uuid.uuid4(),
+                objective=objective,
+                scorer_class_identifier=get_mock_scorer_identifier(),
+            )
+        ]
+
+
+class MockTextFloatScaleScorer(FloatScaleScorer):
+    """Mock FloatScaleScorer for testing audio transcription scoring"""
+
+    def __init__(self, return_value: float = 0.8):
+        self.return_value = return_value
+        validator = ScorerPromptValidator(supported_data_types=["text"])
+        super().__init__(validator=validator)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        return self._create_identifier()
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        return [
+            Score(
+                score_type="float_scale",
+                score_value=str(self.return_value),
+                score_rationale=f"Test rationale for transcript: {message_piece.converted_value}",
+                score_category=["test_category"],
+                score_metadata={},
+                score_value_description="test_description",
+                message_piece_id=message_piece.id or uuid.uuid4(),
+                objective=objective,
+                scorer_class_identifier=get_mock_scorer_identifier(),
+            )
+        ]
+
+
+@pytest.fixture
+def audio_message_piece(patch_central_database):
+    """Create a mock audio message piece for testing"""
+    # Create a temporary audio file
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+        temp_file.write(b"fake audio content")
+        audio_path = temp_file.name
+
+    message_piece = MessagePiece(
+        role="user",
+        original_value=audio_path,
+        converted_value=audio_path,
+        original_value_data_type="audio_path",
+        converted_value_data_type="audio_path",
+    )
+    message_piece.id = uuid.uuid4()
+
+    yield message_piece
+
+    # Cleanup
+    if os.path.exists(audio_path):
+        os.remove(audio_path)
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestAudioTrueFalseScorer:
+    """Tests for AudioTrueFalseScorer"""
+
+    def test_init_with_text_scorer(self):
+        """Test initialization with a text-capable scorer"""
+        text_scorer = MockTextTrueFalseScorer()
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        assert audio_scorer._audio_helper.text_scorer is text_scorer
+
+    def test_build_identifier(self):
+        """Test that _build_identifier returns correct identifier"""
+        text_scorer = MockTextTrueFalseScorer()
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        identifier = audio_scorer._build_identifier()
+
+        assert isinstance(identifier, ScorerIdentifier)
+
+    @pytest.mark.asyncio
+    async def test_score_piece_with_transcript(self, audio_message_piece):
+        """Test scoring audio with a valid transcript"""
+        text_scorer = MockTextTrueFalseScorer(return_value=True)
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription to return a test transcript
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = "Hello, this is a test transcript."
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            assert len(scores) == 1
+            assert scores[0].score_type == "true_false"
+            assert scores[0].score_value == "true"
+            assert "Audio transcript scored:" in scores[0].score_rationale
+
+    @pytest.mark.asyncio
+    async def test_score_piece_empty_transcript(self, audio_message_piece):
+        """Test scoring audio with empty transcript returns empty list"""
+        text_scorer = MockTextTrueFalseScorer(return_value=True)
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription to return empty string
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = ""
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            # Empty transcript returns empty list
+            assert len(scores) == 0
+
+    @pytest.mark.asyncio
+    async def test_score_piece_false_result(self, audio_message_piece):
+        """Test scoring audio that returns false"""
+        text_scorer = MockTextTrueFalseScorer(return_value=False)
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = "Some transcript text"
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            assert len(scores) == 1
+            assert scores[0].score_type == "true_false"
+            assert scores[0].score_value == "false"
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestAudioFloatScaleScorer:
+    """Tests for AudioFloatScaleScorer"""
+
+    def test_init_with_text_scorer(self):
+        """Test initialization with a text-capable scorer"""
+        text_scorer = MockTextFloatScaleScorer()
+        audio_scorer = AudioFloatScaleScorer(text_capable_scorer=text_scorer)
+
+        assert audio_scorer._audio_helper.text_scorer is text_scorer
+
+    def test_build_identifier(self):
+        """Test that _build_identifier returns correct identifier"""
+        text_scorer = MockTextFloatScaleScorer()
+        audio_scorer = AudioFloatScaleScorer(text_capable_scorer=text_scorer)
+
+        identifier = audio_scorer._build_identifier()
+
+        assert isinstance(identifier, ScorerIdentifier)
+
+    @pytest.mark.asyncio
+    async def test_score_piece_with_transcript(self, audio_message_piece):
+        """Test scoring audio with a valid transcript"""
+        text_scorer = MockTextFloatScaleScorer(return_value=0.75)
+        audio_scorer = AudioFloatScaleScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription to return a test transcript
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = "Hello, this is a test transcript."
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            assert len(scores) == 1
+            assert scores[0].score_type == "float_scale"
+            assert float(scores[0].score_value) == 0.75
+            assert "Audio transcript scored:" in scores[0].score_rationale
+
+    @pytest.mark.asyncio
+    async def test_score_piece_empty_transcript(self, audio_message_piece):
+        """Test scoring audio with empty transcript returns empty list"""
+        text_scorer = MockTextFloatScaleScorer(return_value=0.8)
+        audio_scorer = AudioFloatScaleScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription to return empty string
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = ""
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            # Empty transcript returns empty list
+            assert len(scores) == 0
diff --git a/tests/unit/score/test_video_scorer.py b/tests/unit/score/test_video_scorer.py
index c15913128..850a3dcf3 100644
--- a/tests/unit/score/test_video_scorer.py
+++ b/tests/unit/score/test_video_scorer.py
@@ -4,7 +4,7 @@
 import os
 import uuid
 from typing import Optional
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
@@ -12,6 +12,7 @@
 
 from pyrit.identifiers import ScorerIdentifier
 from pyrit.models import MessagePiece, Score
+from pyrit.score.audio_transcript_scorer import AudioTranscriptHelper
 from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer
 from pyrit.score.float_scale.video_float_scale_scorer import VideoFloatScaleScorer
 from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
@@ -185,7 +186,7 @@ async def test_score_video_true_false(video_converter_sample_video):
     assert len(scores) == 1, "Expected one aggregated score"
     assert scores[0].score_type == "true_false"
     assert scores[0].score_value == "true"
-    assert "Video scored by analyzing" in scores[0].score_rationale
+    assert "Frames (3):" in scores[0].score_rationale
 
 
 @pytest.mark.asyncio
@@ -200,7 +201,7 @@ async def test_score_video_true_false_with_false_frames(video_converter_sample_v
     assert len(scores) == 1, "Expected one aggregated score"
     assert scores[0].score_type == "true_false"
     assert scores[0].score_value == "false"
-    assert "Video scored by analyzing" in scores[0].score_rationale
+    assert "Frames (3):" in scores[0].score_rationale
 
 
 @pytest.mark.asyncio
@@ -292,3 +293,156 @@ def test_video_scorer_default_num_frames():
     scorer = VideoTrueFalseScorer(image_capable_scorer=image_scorer)
 
     assert scorer.num_sampled_frames == 5  # Default value
+
+
+class MockAudioTrueFalseScorer(TrueFalseScorer, AudioTranscriptHelper):
+    """Mock AudioTrueFalseScorer for testing video+audio integration"""
+
+    def __init__(self, return_value: bool = True):
+        self.return_value = return_value
+        self.received_objective = None
+        # Audio scorer needs to support audio_path data type
+        validator = ScorerPromptValidator(supported_data_types=["audio_path"])
+        TrueFalseScorer.__init__(self, validator=validator)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        return self._create_identifier()
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        self.received_objective = objective
+        return [
+            Score(
+                score_type="true_false",
+                score_value=str(self.return_value).lower(),
+                score_rationale="Mock audio score",
+                score_category=["audio"],
+                score_metadata={},
+                score_value_description="test_audio",
+                message_piece_id=message_piece.id or uuid.uuid4(),
+                objective=objective,
+                scorer_class_identifier=get_mock_scorer_identifier(),
+            )
+        ]
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_true_false_scorer_with_audio_scorer(video_converter_sample_video):
+    """Test video scoring with an audio scorer"""
+    image_scorer = MockTrueFalseScorer(return_value=True)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=True)
+
+    # Mock extract_audio_from_video to avoid actual audio extraction
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_type == "true_false"
+        assert scores[0].score_value == "true"
+        assert "visual" in scores[0].score_rationale.lower() or "audio" in scores[0].score_rationale.lower()
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_and_aggregation_both_true(video_converter_sample_video):
+    """Test AND aggregation when both visual and audio scores are true"""
+    image_scorer = MockTrueFalseScorer(return_value=True)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=True)
+
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "true"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_and_aggregation_visual_false(video_converter_sample_video):
+    """Test AND aggregation when visual is false and audio is true"""
+    image_scorer = MockTrueFalseScorer(return_value=False)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=True)
+
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "false"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_and_aggregation_audio_false(video_converter_sample_video):
+    """Test AND aggregation when visual is true and audio is false"""
+    image_scorer = MockTrueFalseScorer(return_value=True)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=False)
+
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "false"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_with_audio_uses_and_aggregation(video_converter_sample_video):
+    """Test that with audio present, AND aggregation is used (visual=False + audio=True = False)"""
+    image_scorer = MockTrueFalseScorer(return_value=False)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=True)
+
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        # With AND aggregation: False AND True = False
+        assert scores[0].score_value == "false"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_without_audio_scorer(video_converter_sample_video):
+    """Test that video scoring works without audio scorer"""
+    image_scorer = MockTrueFalseScorer(return_value=True)
+
+    scorer = VideoTrueFalseScorer(
+        image_capable_scorer=image_scorer,
+        audio_scorer=None,  # No audio scorer
+        num_sampled_frames=3,
+    )
+
+    scores = await scorer._score_piece_async(video_converter_sample_video)
+
+    assert len(scores) == 1
+    assert scores[0].score_type == "true_false"
+    assert scores[0].score_value == "true"