Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,8 @@ API Reference
:nosignatures:
:toctree: _autosummary/

AudioFloatScaleScorer
AudioTrueFalseScorer
AzureContentFilterScorer
BatchScorer
ConsoleScorerPrinter
Expand Down
777 changes: 339 additions & 438 deletions doc/code/targets/4_openai_video_target.ipynb

Large diffs are not rendered by default.

64 changes: 61 additions & 3 deletions doc/code/targets/4_openai_video_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.19.0
# jupytext_version: 1.17.2
# ---

# %% [markdown]
Expand All @@ -24,6 +24,7 @@
)
from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget
from pyrit.score import (
AudioTrueFalseScorer,
AzureContentFilterScorer,
SelfAskTrueFalseScorer,
TrueFalseQuestion,
Expand All @@ -35,6 +36,18 @@
await initialize_pyrit_async(memory_db_type=IN_MEMORY) # type: ignore

video_target = OpenAIVideoTarget()

# %% [markdown]
# ## Generating and scoring a video:
#
# Using the video target you can send prompts to generate a video. The video scorer can evaluate the video content itself. Note this section is simply scoring the **video** not the audio.

# %%
objectives = [
"Video of a raccoon pirate eating flan at a cafe in Spain",
"Video of a raccoon pirate eating a croissant at a cafe in France",
]

objective_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
Expand All @@ -53,11 +66,56 @@
),
)

results = await AttackExecutor().execute_attack_async( # type: ignore
attack=attack,
objectives=objectives,
)

for result in results:
await ConsoleAttackResultPrinter().print_result_async(result=result, include_auxiliary_scores=True) # type: ignore

# %% [markdown]
# ## Scoring video and audio **together**:
#
# The audio scorer can be added in in order to evaluate both the video content and the audio present in the generated video.
#
# *Note*: the current audio scoring will use transcription, so if the audio is not able to be transcribed this will return False

# %%
# Scorer for audio content (transcript) - checks what is HEARD/SAID in the video
objectives = [
"Video of a raccoon pirate eating flan at a cafe in Spain",
"Video of a raccoon pirate eating a croissant at a cafe in France",
"Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.",
"Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.",
]

# Visual scorer - checks what is SEEN in the video frames
visual_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
)

# Audio transcript scorer - checks what is SAID in the video
audio_text_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(
true_description="Someone introduces themselves and expresses enjoyment of a croissant"
),
)

# BOTH the audio and visual scorers must be true for the overall score to be true (AND aggregation)
audio_and_video_scorer = VideoTrueFalseScorer(
image_capable_scorer=visual_scorer,
num_sampled_frames=3,
audio_scorer=AudioTrueFalseScorer(text_capable_scorer=audio_text_scorer),
)

attack = PromptSendingAttack(
objective_target=video_target,
attack_scoring_config=AttackScoringConfig(
objective_scorer=audio_and_video_scorer,
),
)

results = await AttackExecutor().execute_attack_async( # type: ignore
attack=attack,
objectives=objectives,
Expand Down
4 changes: 4 additions & 0 deletions pyrit/score/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pyrit.score.batch_scorer import BatchScorer
from pyrit.score.conversation_scorer import ConversationScorer, create_conversation_scorer
from pyrit.score.float_scale.audio_float_scale_scorer import AudioFloatScaleScorer
from pyrit.score.float_scale.azure_content_filter_scorer import AzureContentFilterScorer
from pyrit.score.float_scale.float_scale_score_aggregator import (
FloatScaleScoreAggregator,
Expand Down Expand Up @@ -48,6 +49,7 @@
get_all_objective_metrics,
)
from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
from pyrit.score.true_false.audio_true_false_scorer import AudioTrueFalseScorer
from pyrit.score.true_false.decoding_scorer import DecodingScorer
from pyrit.score.true_false.float_scale_threshold_scorer import FloatScaleThresholdScorer
from pyrit.score.true_false.gandalf_scorer import GandalfScorer
Expand All @@ -71,6 +73,8 @@
from pyrit.score.true_false.video_true_false_scorer import VideoTrueFalseScorer

__all__ = [
"AudioFloatScaleScorer",
"AudioTrueFalseScorer",
"AzureContentFilterScorer",
"BatchScorer",
"ContentClassifierPaths",
Expand Down
Loading