diff --git a/packages/optimization/src/ldai_optimization/client.py b/packages/optimization/src/ldai_optimization/client.py
index 6ebd8bb..f78c2a7 100644
--- a/packages/optimization/src/ldai_optimization/client.py
+++ b/packages/optimization/src/ldai_optimization/client.py
@@ -21,6 +21,11 @@
     OptimizationOptions,
     ToolDefinition,
 )
+from ldai_optimization.prompts import (
+    build_message_history_text,
+    build_new_variation_prompt,
+    build_reasoning_history,
+)
 from ldai_optimization.util import (
     await_if_needed,
     create_evaluation_tool,
@@ -179,39 +184,6 @@ def _judge_config(
         """
         return self._ldClient.judge_config(judge_key, context, default, variables)
 
-    def _build_message_history_text(
-        self,
-        input_text: str,
-        reasoning_history: str,
-        current_user_input: str,
-    ) -> str:
-        """
-        Build a formatted message-history string for use as a judge template variable.
-
-        Combines the current instructions (system text), the conversation turns
-        recorded in self._history, the current turn's user question, and the
-        accumulated reasoning/score history.
-
-        :param input_text: Current system instructions (may be empty string)
-        :param reasoning_history: Pre-formatted string from _build_reasoning_history
-        :param current_user_input: The user question for the turn being evaluated.
-            Must be passed explicitly because the current turn is not yet in
-            self._history when the judge runs.
-        :return: Combined string to substitute into the judge's message_history variable
-        """
-        turn_messages = []
-        for ctx in self._history:
-            if ctx.user_input:
-                turn_messages.append(f"User: {ctx.user_input}")
-            if ctx.completion_response:
-                turn_messages.append(f"Assistant: {ctx.completion_response}")
-
-        # Include the current turn's question so judges see what was actually asked
-        turn_messages.append(f"User: {current_user_input}")
-
-        sections = [input_text, "\n".join(turn_messages), reasoning_history]
-        return "\n\n".join(s for s in sections if s)
-
     def _serialize_scores(
         self, judge_results: Dict[str, JudgeResult]
     ) -> Dict[str, Any]:
@@ -349,7 +321,7 @@ async def _call_judges(
         resolved_agent_tools: List[ToolDefinition] = agent_tools or []
 
         logger.info("[Iteration %d] -> Executing evaluation...", iteration)
-        reasoning_history = self._build_reasoning_history()
+        reasoning_history = build_reasoning_history(self._history)
         judge_results: Dict[str, JudgeResult] = {}
 
         judge_count = len(self._options.judges)
@@ -449,8 +421,8 @@ async def _evaluate_config_judge(
         # Config-type judge: fetch judge config on-demand from LaunchDarkly SDK
         input_text = self._current_instructions or ""
         # Combine current instructions, history, and current question for message_history
-        message_history_text = self._build_message_history_text(
-            input_text, reasoning_history, current_user_input=user_input
+        message_history_text = build_message_history_text(
+            self._history, input_text, reasoning_history, user_input
         )
 
         # Merge agent variables so the judge's LD-managed instructions can reference
@@ -619,8 +591,8 @@ async def _evaluate_acceptance_judge(
         resolved_agent_tools = agent_tools or []
 
         # Build message history including the current user question
-        message_history_text = self._build_message_history_text(
-            "", reasoning_history, current_user_input=user_input
+        message_history_text = build_message_history_text(
+            self._history, "", reasoning_history, user_input
         )
 
         # Build instructions for the judge
@@ -746,342 +718,6 @@ async def optimize_from_options(
         agent_config = await self._get_agent_config(agent_key, context)
         return await self._run_optimization(agent_config, options)
 
-    def _build_reasoning_history(self) -> str:
-        """
-        Build a formatted string of reasoning from previous iterations.
-
-        :return: Formatted string containing reasoning history
-        """
-        if not self._history:
-            return ""
-
-        reasoning_parts = []
-        for i, prev_ctx in enumerate(self._history, 1):
-            if prev_ctx.scores:
-                reasoning_parts.append(f"## Iteration {i} Judge Evaluations:")
-                for judge_key, result in prev_ctx.scores.items():
-                    reasoning_parts.append(f"- {judge_key}: Score {result.score}")
-                    if result.rationale:
-                        reasoning_parts.append(f"  Reasoning: {result.rationale}")
-                reasoning_parts.append("")
-
-        return "\n".join(reasoning_parts)
-
-    def _build_new_variation_prompt(self, history: List[OptimizationContext]) -> str:
-        """
-        Build the LLM prompt for generating an improved agent configuration.
-
-        Constructs a detailed instruction string based on the full optimization
-        history, including all previous configurations, completion results, and
-        judge scores. When history is empty (first variation attempt), asks the
-        LLM to improve the current config without evaluation feedback.
-
-        :param history: All previous OptimizationContexts, oldest first. Empty on the first attempt.
-        :return: The assembled prompt string
-        """
-        sections = [
-            self._new_variation_prompt_preamble(),
-            self._new_variation_prompt_acceptance_criteria(),
-            self._new_variation_prompt_configuration(history),
-            self._new_variation_prompt_feedback(history),
-            self._new_variation_prompt_improvement_instructions(history),
-        ]
-
-        built_prompt = "\n\n".join(s for s in sections if s)
-        return built_prompt
-
-    def _new_variation_prompt_preamble(self) -> str:
-        """Static opening section for the variation generation prompt."""
-        return "\n".join(
-            [
-                "You are an assistant that helps improve agent configurations through iterative optimization.",
-                "",
-                "Your task is to generate improved agent instructions and parameters based on the feedback provided.",
-                "The feedback you provide should guide the LLM to improve the agent instructions "
-                "for all possible use cases, not one concrete case.",
-                "For example, if the feedback is that the agent is not returning the correct records, "
-                "you should improve the agent instructions to return the correct records for all possible use cases. "
-                "Not just the one concrete case that was provided in the feedback.",
-                "When changing the instructions, keep the original intent in mind "
-                "when it comes to things like the use of variables and placeholders.",
-                "If the original instructions were to use a placeholder like {{id}}, "
-                "you should keep the placeholder in the new instructions, not replace it with the actual value. "
-                "This is the case for all parameterized values (all parameters should appear in each new variation).",
-                "Pay particular attention to the instructions regarding tools and the rules for variables.",
-            ]
-        )
-
-    def _new_variation_prompt_acceptance_criteria(self) -> str:
-        """
-        Acceptance criteria section of the variation prompt.
-
-        Collects every acceptance statement defined across all judges and renders
-        them as an emphatic block so the LLM understands exactly what the improved
-        configuration must achieve. Returns an empty string when no judges carry
-        acceptance statements (e.g. all judges are config-key-only judges).
-        """
-        if not self._options.judges:
-            return ""
-
-        statements = [
-            (key, judge.acceptance_statement)
-            for key, judge in self._options.judges.items()
-            if judge.acceptance_statement
-        ]
-
-        if not statements:
-            return ""
-
-        lines = [
-            "## *** ACCEPTANCE CRITERIA (MUST BE MET) ***",
-            "The improved configuration MUST produce responses that satisfy ALL of the following criteria.",
-            "These criteria are non-negotiable — every generated variation will be evaluated against them.",
-            "",
-        ]
-        for key, statement in statements:
-            lines.append(f"- [{key}] {statement}")
-
-        lines += [
-            "",
-            "When writing new instructions, explicitly address each criterion above.",
-            "Do not sacrifice any criterion in favour of another.",
-        ]
-
-        return "\n".join(lines)
-
-    def _new_variation_prompt_configuration(
-        self, history: List[OptimizationContext]
-    ) -> str:
-        """
-        Configuration section of the variation prompt.
-
-        Shows the most recent iteration's model, instructions, parameters,
-        user input, and completion response when history is available, or the
-        current instance state on the first attempt.
-        """
-        if history:
-            previous_ctx = history[-1]
-            lines = [
-                "## Most Recent Configuration:",
-                f"Model: {previous_ctx.current_model}",
-                f"Instructions: {previous_ctx.current_instructions}",
-                f"Parameters: {previous_ctx.current_parameters}",
-                "",
-                "## Most Recent Result:",
-            ]
-            if previous_ctx.user_input:
-                lines.append(f"User question: {previous_ctx.user_input}")
-            lines.append(f"Agent response: {previous_ctx.completion_response}")
-            return "\n".join(lines)
-        else:
-            return "\n".join(
-                [
-                    "## Current Configuration:",
-                    f"Model: {self._current_model}",
-                    f"Instructions: {self._current_instructions}",
-                    f"Parameters: {self._current_parameters}",
-                ]
-            )
-
-    def _new_variation_prompt_feedback(self, history: List[OptimizationContext]) -> str:
-        """
-        Evaluation feedback section of the variation prompt.
-
-        Renders all previous iterations' scores in chronological order so the
-        LLM can observe trends across the full optimization run. Returns an
-        empty string when no history exists or no iteration has scores, so it
-        is filtered out of the assembled prompt entirely.
-        """
-        iterations_with_scores = [ctx for ctx in history if ctx.scores]
-        if not iterations_with_scores:
-            return ""
-
-        lines = ["## Evaluation History:"]
-        for ctx in iterations_with_scores:
-            lines.append(f"\n### Iteration {ctx.iteration}:")
-            if ctx.user_input:
-                lines.append(f"User question: {ctx.user_input}")
-            for judge_key, result in ctx.scores.items():
-                optimization_judge = (
-                    self._options.judges.get(judge_key)
-                    if self._options.judges
-                    else None
-                )
-                if optimization_judge:
-                    score = result.score
-                    if optimization_judge.threshold is not None:
-                        passed = score >= optimization_judge.threshold
-                        status = "PASSED" if passed else "FAILED"
-                        feedback_line = (
-                            f"- {judge_key}: Score {score:.3f}"
-                            f" (threshold: {optimization_judge.threshold}) - {status}"
-                        )
-                    else:
-                        passed = score >= 1.0
-                        status = "PASSED" if passed else "FAILED"
-                        feedback_line = f"- {judge_key}: {status}"
-                    if result.rationale:
-                        feedback_line += f"\n  Reasoning: {result.rationale}"
-                    lines.append(feedback_line)
-        return "\n".join(lines)
-
-    def _new_variation_prompt_improvement_instructions(
-        self, history: List[OptimizationContext]
-    ) -> str:
-        """
-        Improvement instructions section of the variation prompt.
-
-        Includes model-choice guidance, prompt variable rules, and the required
-        output format schema. When history is non-empty, adds feedback-driven
-        improvement directives.
-        """
-        model_instructions = "\n".join(
-            [
-                "You may also choose to change the model if you believe that the current model is "
-                "not performing well or a different model would be better suited for the task. "
-                f"Here are the models you may choose from: {self._options.model_choices}. "
-                "You must always return a model property, even if it's the same as the current model.",
-                "When suggesting a new model, you should provide a rationale for why you believe "
-                "the new model would be better suited for the task.",
-            ]
-        )
-
-        # Collect unique variable keys across all variable_choices entries
-        variable_keys: set = set()
-        for choice in self._options.variable_choices:
-            variable_keys.update(choice.keys())
-        placeholder_list = ", ".join(f"{{{{{k}}}}}" for k in sorted(variable_keys))
-
-        variable_instructions = "\n".join(
-            [
-                "## Prompt Variables:",
-                "These variables are substituted into the instructions at call time using {{variable_name}} syntax.",
-                "Rules:",
-                "- If the {{variable_name}} placeholder is not present in the current instructions, "
-                "you should include it where logically appropriate.",
-                "Here are the original instructions so that you can see how the "
-                "placeholders are used and which are available:",
-                "\nSTART:" "\n" + self._initial_instructions + "\n",
-                "\nEND OF ORIGINAL INSTRUCTIONS\n",
-                f"The following prompt variables are available and are the only "
-                f"variables that should be used: {placeholder_list}",
-                "Here is an example of a good response if an {{id}} placeholder is available: "
-                "'Select records matching id {{id}}'",
-                "Here is an example of a bad response if an {{id}} placeholder is available: "
-                "'Select records matching id 1232'",
-                "Here is an example of a good response if a {{resource_id}} and {{resource_type}} "
-                "placeholder are available: "
-                "'Select records matching id {{resource_id}} and type {{resource_type}}'",
-                "Here is an example of a bad response if a {{resource_id}} and {{resource_type}} "
-                "placeholder are available: "
-                "'Select records matching id 1232 and type {{resource_type}}'",
-                "Here is another example of a bad response if a {{resource_id}} and {{resource_type}} "
-                "placeholder are available: "
-                "'Select records matching id {{resource_id}} and type resource-123'",
-            ]
-        )
-
-        tool_instructions = "\n".join(
-            [
-                "## Tool Format:",
-                'If the current configuration includes tools, you MUST return them '
-                'unchanged in current_parameters["tools"].',
-                "Do NOT include internal framework tools such as the evaluation tool or structured output tool.",
-                "Each tool must follow this exact format:",
-                "{",
-                '  "name": "tool-name",',
-                '  "type": "function",',
-                '  "description": "What the tool does",',
-                '  "parameters": {',
-                '    "type": "object",',
-                '    "properties": {',
-                '      "param_name": {',
-                '        "type": "type of the input parameter",',
-                '        "description": "Description of the parameter"',
-                "      }",
-                "    },",
-                '    "required": ["param_name"],',
-                '    "additionalProperties": false',
-                "  }",
-                "}",
-                "Example:",
-                "{",
-                '  "name": "user-preferences-lookup",',
-                '  "type": "function",',
-                '  "description": "Looks up user preferences by ID",',
-                '  "parameters": {',
-                '    "type": "object",',
-                '    "properties": {',
-                '      "user_id": {',
-                '        "type": "string",',
-                '        "description": "The user id"',
-                "      }",
-                "    },",
-                '    "required": ["user_id"],',
-                '    "additionalProperties": false',
-                "  }",
-                "}",
-            ]
-        )
-
-        parameters_instructions = "\n".join(
-            [
-                "Return these values in a JSON object with the following keys: "
-                "current_instructions, current_parameters, and model.",
-                "Example:",
-                "{",
-                '  "current_instructions": "...',
-                '  "current_parameters": {',
-                '    "...": "..."',
-                "  },",
-                '  "model": "gpt-4o"',
-                "}",
-                "Parameters should only be things that are directly parseable by an LLM call, "
-                "for example, temperature, max_tokens, etc.",
-                "Do not include any other parameters that are not directly parseable by an LLM call. "
-                "If you want to provide instruction for tone or other attributes, "
-                "provide them directly in the instructions.",
-            ]
-        )
-
-        if history:
-            return "\n".join(
-                [
-                    "## Improvement Instructions:",
-                    "Based on the evaluation history above, generate improved agent instructions and parameters.",
-                    "Focus on addressing the areas where the evaluation failed or scored below threshold.",
-                    "The new configuration should aim to improve the agent's performance on the evaluation criteria.",
-                    model_instructions,
-                    "",
-                    variable_instructions,
-                    "",
-                    tool_instructions,
-                    "",
-                    "Return the improved configuration in a structured format that can be parsed to update:",
-                    "1. The agent instructions (current_instructions)",
-                    "2. The agent parameters (current_parameters)",
-                    "3. The model (model) - you must always return a model, "
-                    "even if it's the same as the current model.",
-                    "4. You should return the tools the user has defined, as-is, on the new parameters. "
-                    "Do not modify them, but make sure you do not include internal tools like "
-                    "the evaluation tool or structured output tool.",
-                    parameters_instructions,
-                ]
-            )
-        else:
-            return "\n".join(
-                [
-                    "Generate an improved version of this configuration.",
-                    model_instructions,
-                    "",
-                    variable_instructions,
-                    "",
-                    tool_instructions,
-                    "",
-                    parameters_instructions,
-                ]
-            )
-
     def _apply_new_variation_response(
         self,
         response_data: Dict[str, Any],
@@ -1198,7 +834,16 @@ async def _generate_new_variation(
         )
         self._safe_status_update("generating variation", status_ctx, iteration)
 
-        instructions = self._build_new_variation_prompt(self._history)
+        instructions = build_new_variation_prompt(
+            self._history,
+            self._options.judges,
+            self._current_model,
+            self._current_instructions,
+            self._current_parameters,
+            self._options.model_choices,
+            self._options.variable_choices,
+            self._initial_instructions,
+        )
 
         # Create a flat history list (without nested history) to avoid exponential growth
         flat_history = [prev_ctx.copy_without_history() for prev_ctx in self._history]
diff --git a/packages/optimization/src/ldai_optimization/prompts.py b/packages/optimization/src/ldai_optimization/prompts.py
new file mode 100644
index 0000000..556b661
--- /dev/null
+++ b/packages/optimization/src/ldai_optimization/prompts.py
@@ -0,0 +1,427 @@
+"""Prompt-building functions for LaunchDarkly AI optimization."""
+
+from typing import Any, Dict, List, Optional
+
+from ldai_optimization.dataclasses import (
+    OptimizationContext,
+    OptimizationJudge,
+)
+
+
+def build_message_history_text(
+    history: List[OptimizationContext],
+    input_text: str,
+    reasoning_history: str,
+    current_user_input: str,
+) -> str:
+    """
+    Build a formatted message-history string for use as a judge template variable.
+
+    Combines the current instructions (system text), the conversation turns
+    recorded in history, the current turn's user question, and the accumulated
+    reasoning/score history.
+
+    :param history: All previous OptimizationContexts, oldest first
+    :param input_text: Current system instructions (may be empty string)
+    :param reasoning_history: Pre-formatted string from build_reasoning_history
+    :param current_user_input: The user question for the turn being evaluated.
+        Must be passed explicitly because the current turn is not yet in
+        history when the judge runs.
+    :return: Combined string to substitute into the judge's message_history variable
+    """
+    turn_messages = []
+    for ctx in history:
+        if ctx.user_input:
+            turn_messages.append(f"User: {ctx.user_input}")
+        if ctx.completion_response:
+            turn_messages.append(f"Assistant: {ctx.completion_response}")
+
+    # Include the current turn's question so judges see what was actually asked
+    turn_messages.append(f"User: {current_user_input}")
+
+    parts = []
+    if input_text:
+        parts.append(f"System: {input_text}")
+    if turn_messages:
+        parts.append("\n".join(turn_messages))
+    if reasoning_history:
+        parts.append(f"Evaluation history:\n{reasoning_history}")
+
+    return "\n\n".join(parts)
+
+
+def build_reasoning_history(history: List[OptimizationContext]) -> str:
+    """
+    Build a formatted string of reasoning from previous iterations.
+
+    :param history: All previous OptimizationContexts, oldest first
+    :return: Formatted string containing reasoning history
+    """
+    if not history:
+        return ""
+
+    reasoning_parts = []
+    for i, prev_ctx in enumerate(history, 1):
+        if prev_ctx.scores:
+            reasoning_parts.append(f"## Iteration {i} Judge Evaluations:")
+            for judge_key, result in prev_ctx.scores.items():
+                reasoning_parts.append(f"- {judge_key}: Score {result.score}")
+                if result.rationale:
+                    reasoning_parts.append(f"  Reasoning: {result.rationale}")
+            reasoning_parts.append("")
+
+    return "\n".join(reasoning_parts)
+
+
+def build_new_variation_prompt(
+    history: List[OptimizationContext],
+    judges: Optional[Dict[str, OptimizationJudge]],
+    current_model: Optional[str],
+    current_instructions: str,
+    current_parameters: Dict[str, Any],
+    model_choices: List[str],
+    variable_choices: List[Dict[str, Any]],
+    initial_instructions: str,
+) -> str:
+    """
+    Build the LLM prompt for generating an improved agent configuration.
+
+    Constructs a detailed instruction string based on the full optimization
+    history, including all previous configurations, completion results, and
+    judge scores. When history is empty (first variation attempt), asks the
+    LLM to improve the current config without evaluation feedback.
+
+    :param history: All previous OptimizationContexts, oldest first. Empty on the first attempt.
+    :param judges: Judge configuration dict from OptimizationOptions
+    :param current_model: The model currently in use
+    :param current_instructions: The current agent instructions template
+    :param current_parameters: The current model parameters dict
+    :param model_choices: List of model IDs the LLM may select from
+    :param variable_choices: List of variable dicts (used to derive placeholder names)
+    :param initial_instructions: The original unmodified instructions template
+    :return: The assembled prompt string
+    """
+    sections = [
+        variation_prompt_preamble(),
+        variation_prompt_acceptance_criteria(judges),
+        variation_prompt_configuration(
+            history, current_model, current_instructions, current_parameters
+        ),
+        variation_prompt_feedback(history, judges),
+        variation_prompt_improvement_instructions(
+            history, model_choices, variable_choices, initial_instructions
+        ),
+    ]
+
+    return "\n\n".join(s for s in sections if s)
+
+
+def variation_prompt_preamble() -> str:
+    """Static opening section for the variation generation prompt."""
+    return "\n".join(
+        [
+            "You are an assistant that helps improve agent configurations through iterative optimization.",
+            "",
+            "Your task is to generate improved agent instructions and parameters based on the feedback provided.",
+            "The feedback you provide should guide the LLM to improve the agent instructions "
+            "for all possible use cases, not one concrete case.",
+            "For example, if the feedback is that the agent is not returning the correct records, "
+            "you should improve the agent instructions to return the correct records for all possible use cases. "
+            "Not just the one concrete case that was provided in the feedback.",
+            "When changing the instructions, keep the original intent in mind "
+            "when it comes to things like the use of variables and placeholders.",
+            "If the original instructions were to use a placeholder like {{id}}, "
+            "you should keep the placeholder in the new instructions, not replace it with the actual value. "
+            "This is the case for all parameterized values (all parameters should appear in each new variation).",
+            "Pay particular attention to the instructions regarding tools and the rules for variables.",
+        ]
+    )
+
+
+def variation_prompt_acceptance_criteria(
+    judges: Optional[Dict[str, OptimizationJudge]],
+) -> str:
+    """
+    Acceptance criteria section of the variation prompt.
+
+    Collects every acceptance statement defined across all judges and renders
+    them as an emphatic block so the LLM understands exactly what the improved
+    configuration must achieve. Returns an empty string when no judges carry
+    acceptance statements (e.g. all judges are config-key-only judges).
+    """
+    if not judges:
+        return ""
+
+    statements = [
+        (key, judge.acceptance_statement)
+        for key, judge in judges.items()
+        if judge.acceptance_statement
+    ]
+
+    if not statements:
+        return ""
+
+    lines = [
+        "## *** ACCEPTANCE CRITERIA (MUST BE MET) ***",
+        "The improved configuration MUST produce responses that satisfy ALL of the following criteria.",
+        "These criteria are non-negotiable — every generated variation will be evaluated against them.",
+        "All variables must be used in the new instructions."
+        "",
+    ]
+    for key, statement in statements:
+        lines.append(f"- [{key}] {statement}")
+
+    lines += [
+        "",
+        "When writing new instructions, explicitly address each criterion above.",
+        "Do not sacrifice any criterion in favour of another.",
+    ]
+
+    return "\n".join(lines)
+
+
+def variation_prompt_configuration(
+    history: List[OptimizationContext],
+    current_model: Optional[str],
+    current_instructions: str,
+    current_parameters: Dict[str, Any],
+) -> str:
+    """
+    Configuration section of the variation prompt.
+
+    Shows the most recent iteration's model, instructions, parameters,
+    user input, and completion response when history is available, or the
+    current state on the first attempt.
+    """
+    if history:
+        previous_ctx = history[-1]
+        lines = [
+            "## Most Recent Configuration:",
+            f"Model: {previous_ctx.current_model}",
+            f"Instructions: {previous_ctx.current_instructions}",
+            f"Parameters: {previous_ctx.current_parameters}",
+            "",
+            "## Most Recent Result:",
+        ]
+        if previous_ctx.user_input:
+            lines.append(f"User question: {previous_ctx.user_input}")
+        lines.append(f"Agent response: {previous_ctx.completion_response}")
+        return "\n".join(lines)
+    else:
+        return "\n".join(
+            [
+                "## Current Configuration:",
+                f"Model: {current_model}",
+                f"Instructions: {current_instructions}",
+                f"Parameters: {current_parameters}",
+            ]
+        )
+
+
+def variation_prompt_feedback(
+    history: List[OptimizationContext],
+    judges: Optional[Dict[str, OptimizationJudge]],
+) -> str:
+    """
+    Evaluation feedback section of the variation prompt.
+
+    Renders all previous iterations' scores in chronological order so the
+    LLM can observe trends across the full optimization run. Returns an
+    empty string when no history exists or no iteration has scores, so it
+    is filtered out of the assembled prompt entirely.
+    """
+    iterations_with_scores = [ctx for ctx in history if ctx.scores]
+    if not iterations_with_scores:
+        return ""
+
+    lines = ["## Evaluation History:"]
+    for ctx in iterations_with_scores:
+        lines.append(f"\n### Iteration {ctx.iteration}:")
+        if ctx.user_input:
+            lines.append(f"User question: {ctx.user_input}")
+        for judge_key, result in ctx.scores.items():
+            optimization_judge = judges.get(judge_key) if judges else None
+            if optimization_judge:
+                score = result.score
+                if optimization_judge.threshold is not None:
+                    passed = score >= optimization_judge.threshold
+                    status = "PASSED" if passed else "FAILED"
+                    feedback_line = (
+                        f"- {judge_key}: Score {score:.3f}"
+                        f" (threshold: {optimization_judge.threshold}) - {status}"
+                    )
+                else:
+                    passed = score >= 1.0
+                    status = "PASSED" if passed else "FAILED"
+                    feedback_line = f"- {judge_key}: {status}"
+                if result.rationale:
+                    feedback_line += f"\n  Reasoning: {result.rationale}"
+                lines.append(feedback_line)
+    return "\n".join(lines)
+
+
+def variation_prompt_improvement_instructions(
+    history: List[OptimizationContext],
+    model_choices: List[str],
+    variable_choices: List[Dict[str, Any]],
+    initial_instructions: str,
+) -> str:
+    """
+    Improvement instructions section of the variation prompt.
+
+    Includes model-choice guidance, prompt variable rules, and the required
+    output format schema. When history is non-empty, adds feedback-driven
+    improvement directives.
+    """
+    model_instructions = "\n".join(
+        [
+            "You may also choose to change the model if you believe that the current model is "
+            "not performing well or a different model would be better suited for the task. "
+            f"Here are the models you may choose from: {model_choices}. "
+            "You must always return a model property, even if it's the same as the current model.",
+            "When suggesting a new model, you should provide a rationale for why you believe "
+            "the new model would be better suited for the task.",
+        ]
+    )
+
+    # Collect unique variable keys across all variable_choices entries
+    variable_keys: set = set()
+    for choice in variable_choices:
+        variable_keys.update(choice.keys())
+    placeholder_list = ", ".join(f"{{{{{k}}}}}" for k in sorted(variable_keys))
+
+    variable_instructions = "\n".join(
+        [
+            "## Prompt Variables:",
+            "These variables are substituted into the instructions at call time using {{variable_name}} syntax.",
+            "Rules:",
+            "- If the {{variable_name}} placeholder is not present in the current instructions, "
+            "you should include it where logically appropriate.",
+            "Here are the original instructions so that you can see how the "
+            "placeholders are used and which are available:",
+            "\nSTART:" "\n" + initial_instructions + "\n",
+            "\nEND OF ORIGINAL INSTRUCTIONS\n",
+            "The following prompt variables are available and are the only "
+            f"variables that should be used: {placeholder_list}",
+            "Here is an example of a good response if an {{id}} placeholder is available: "
+            "'Select records matching id {{id}}'",
+            "Here is an example of a bad response if an {{id}} placeholder is available: "
+            "'Select records matching id 1232'",
+            "Here is an example of a good response if a {{resource_id}} and {{resource_type}} "
+            "placeholder are available: "
+            "'Select records matching id {{resource_id}} and type {{resource_type}}'",
+            "Here is an example of a bad response if a {{resource_id}} and {{resource_type}} "
+            "placeholder are available: "
+            "'Select records matching id 1232 and type {{resource_type}}'",
+            "Here is another example of a bad response if a {{resource_id}} and {{resource_type}} "
+            "placeholder are available: "
+            "'Select records matching id {{resource_id}} and type {{resource-123}}'",
+            "The above example is incorrect because the resource-123 is not a valid variable name.",
+            "To fix the above example, you would instead use {{resource_type}} and {{resource_id}}",
+        ]
+    )
+
+    tool_instructions = "\n".join(
+        [
+            "## Tool Format:",
+            'If the current configuration includes tools, you MUST return them '
+            'unchanged in current_parameters["tools"].',
+            "Do NOT include internal framework tools such as the evaluation tool or structured output tool.",
+            "Each tool must follow this exact format:",
+            "{",
+            '  "name": "tool-name",',
+            '  "type": "function",',
+            '  "description": "What the tool does",',
+            '  "parameters": {',
+            '    "type": "object",',
+            '    "properties": {',
+            '      "param_name": {',
+            '        "type": "type of the input parameter",',
+            '        "description": "Description of the parameter"',
+            "      }",
+            "    },",
+            '    "required": ["param_name"],',
+            '    "additionalProperties": false',
+            "  }",
+            "}",
+            "Example:",
+            "{",
+            '  "name": "user-preferences-lookup",',
+            '  "type": "function",',
+            '  "description": "Looks up user preferences by ID",',
+            '  "parameters": {',
+            '    "type": "object",',
+            '    "properties": {',
+            '      "user_id": {',
+            '        "type": "string",',
+            '        "description": "The user id"',
+            "      }",
+            "    },",
+            '    "required": ["user_id"],',
+            '    "additionalProperties": false',
+            "  }",
+            "}",
+            "",
+            "Always call the return_improved_configuration tool to format the response.",
+            "Return the response as-is from the return_improved_configuration tool,",
+            "do not modify it in any way.",
+        ]
+    )
+
+    parameters_instructions = "\n".join(
+        [
+            "Return these values in a JSON object with the following keys: "
+            "current_instructions, current_parameters, and model.",
+            "Example:",
+            "{",
+            '  "current_instructions": "...',
+            '  "current_parameters": {',
+            '    "...": "..."',
+            "  },",
+            '  "model": "gpt-4o"',
+            "}",
+            "Parameters should only be things that are directly parseable by an LLM call, "
+            "for example, temperature, max_tokens, etc.",
+            "Do not include any other parameters that are not directly parseable by an LLM call. "
+            "If you want to provide instruction for tone or other attributes, "
+            "provide them directly in the instructions.",
+        ]
+    )
+
+    if history:
+        return "\n".join(
+            [
+                "## Improvement Instructions:",
+                "Based on the evaluation history above, generate improved agent instructions and parameters.",
+                "Focus on addressing the areas where the evaluation failed or scored below threshold.",
+                "The new configuration should aim to improve the agent's performance on the evaluation criteria.",
+                model_instructions,
+                "",
+                variable_instructions,
+                "",
+                tool_instructions,
+                "",
+                "Return the improved configuration in a structured format that can be parsed to update:",
+                "1. The agent instructions (current_instructions)",
+                "2. The agent parameters (current_parameters)",
+                "3. The model (model) - you must always return a model, "
+                "even if it's the same as the current model.",
+                "4. You should return the tools the user has defined, as-is, on the new parameters. "
+                "Do not modify them, but make sure you do not include internal tools like "
+                "the evaluation tool or structured output tool.",
+                parameters_instructions,
+            ]
+        )
+    else:
+        return "\n".join(
+            [
+                "Generate an improved version of this configuration.",
+                model_instructions,
+                "",
+                variable_instructions,
+                "",
+                tool_instructions,
+                "",
+                parameters_instructions,
+            ]
+        )
diff --git a/packages/optimization/src/ldai_optimization/util.py b/packages/optimization/src/ldai_optimization/util.py
index 7996182..2882c87 100644
--- a/packages/optimization/src/ldai_optimization/util.py
+++ b/packages/optimization/src/ldai_optimization/util.py
@@ -257,7 +257,8 @@ def extract_json_from_response(response_str: str) -> Dict[str, Any]:
         )
         raise ValueError(
             "Failed to parse structured output from variation generation. "
-            "Expected JSON object with 'current_instructions', 'current_parameters', and 'model' fields."
+            "Expected JSON object with 'current_instructions', 'current_parameters', and 'model' fields. "
+            f"Response length: {len(response_str)}"
         )
 
     return response_data
diff --git a/packages/optimization/tests/test_client.py b/packages/optimization/tests/test_client.py
index b580524..7ccb406 100644
--- a/packages/optimization/tests/test_client.py
+++ b/packages/optimization/tests/test_client.py
@@ -19,6 +19,10 @@
     OptimizationOptions,
     ToolDefinition,
 )
+from ldai_optimization.prompts import (
+    build_new_variation_prompt,
+    variation_prompt_acceptance_criteria,
+)
 from ldai_optimization.util import (
     create_evaluation_tool,
     create_variation_tool,
@@ -949,60 +953,56 @@ async def test_status_update_callback_called_at_each_stage(self):
 
 
 class TestVariationPromptAcceptanceCriteria:
-    def setup_method(self):
-        self.client = _make_client()
-        agent_config = _make_agent_config()
-        self.client._agent_key = "test-agent"
-        self.client._agent_config = agent_config
-        self.client._initial_instructions = AGENT_INSTRUCTIONS
-        self.client._initialize_class_members_from_config(agent_config)
-
-    def _set_judges(self, judges):
-        self.client._options = _make_options(judges=judges)
-
     def test_includes_acceptance_statement_in_section(self):
-        self._set_judges({
+        judges = {
             "quality": OptimizationJudge(
                 threshold=0.8,
                 acceptance_statement="Responses must be concise and factual.",
             )
-        })
-        section = self.client._new_variation_prompt_acceptance_criteria()
+        }
+        section = variation_prompt_acceptance_criteria(judges)
         assert "Responses must be concise and factual." in section
         assert "quality" in section
 
     def test_labels_all_judges(self):
-        self._set_judges({
+        judges = {
             "a": OptimizationJudge(threshold=0.8, acceptance_statement="Must be brief."),
             "b": OptimizationJudge(threshold=0.9, acceptance_statement="Must cite sources."),
-        })
-        section = self.client._new_variation_prompt_acceptance_criteria()
+        }
+        section = variation_prompt_acceptance_criteria(judges)
         assert "[a]" in section
         assert "[b]" in section
         assert "Must be brief." in section
         assert "Must cite sources." in section
 
     def test_returns_empty_string_when_no_acceptance_statements(self):
-        self._set_judges({
+        judges = {
             "ld-judge": OptimizationJudge(threshold=0.8, judge_key="some-ld-key"),
-        })
-        section = self.client._new_variation_prompt_acceptance_criteria()
+        }
+        section = variation_prompt_acceptance_criteria(judges)
         assert section == ""
 
     def test_returns_empty_string_with_no_judges(self):
-        options = MagicMock()
-        options.judges = None
-        self.client._options = options
-        section = self.client._new_variation_prompt_acceptance_criteria()
+        section = variation_prompt_acceptance_criteria(None)
         assert section == ""
 
     def test_section_appears_in_full_prompt(self):
-        self._set_judges({
+        judges = {
             "accuracy": OptimizationJudge(
                 threshold=0.8,
                 acceptance_statement="Facts only.",
             )
-        })
-        prompt = self.client._build_new_variation_prompt([])
+        }
+        options = _make_options(judges=judges)
+        prompt = build_new_variation_prompt(
+            history=[],
+            judges=judges,
+            current_model="gpt-4o",
+            current_instructions=AGENT_INSTRUCTIONS,
+            current_parameters={},
+            model_choices=options.model_choices,
+            variable_choices=options.variable_choices,
+            initial_instructions=AGENT_INSTRUCTIONS,
+        )
         assert "Facts only." in prompt
         assert "ACCEPTANCE CRITERIA" in prompt