@@ -357,22 +357,88 @@ def test_evaluation_metric_resource_name(client):
357357 """Tests with a metric resource name in types.Metric."""
358358 client ._api_client ._http_options .api_version = "v1beta1"
359359 client ._api_client ._http_options .base_url = (
360- "https://us-central1-staging-aiplatform.sandbox.googleapis.com/"
360+ "https://us-central1-autopush-aiplatform.sandbox.googleapis.com/"
361+ )
362+ tone_check_metric = types .LLMMetric (
363+ name = "tone_check" ,
364+ prompt_template = """
365+ # Instruction
366+ You are a professional writing evaluator. Your job is to score writing responses according to pre-defined evaluation criteria.
367+
368+ # Criteria
369+ Analyze the tone of the response based on these two criteria:
370+ 1. Professionalism: The response should use appropriate language and maintain a business-like demeanor.
371+ 2. Empathy: The response should acknowledge the user's feelings and show understanding.
372+
373+ # Input
374+ Prompt: {agent_data.turns[0].events[0]}
375+ Response: {agent_data.turns[0].events[1]}
376+
377+ # Output Format
378+ Respond in a JSON format with the following schema:
379+ {
380+ "type": "OBJECT",
381+ "properties": {
382+ "score": {"type": "NUMBER"},
383+ "explanation": {"type": "STRING"},
384+ },
385+ "required": ["score", "explanation"],
386+ }
387+ Return the JSON format output in a string representation of a Python dictionary directly, without strings like '```json' or '```'.
388+
389+ The output would include the following fields:
390+ score: based on your evaluation, the score should be a number based on the rating rubrics.
391+ explanation: your explanation for the score rating, in one line.
392+
393+ ## Example Output Format:
394+ {"score" : -1, "explanation": "Here is the reason that the response is given a score of -1 based on the rating rubric."}
395+ {"score" : 3, "explanation": "Here is the reason that the response is given a score of 3 based on the rating rubric."}
396+ {"score" : 0, "explanation": "Here is the reason that the response is given a score of 0 based on the rating rubric."}
397+ {"score" : 5, "explanation": "Here is the reason that the response is given a score of 5 based on the rating rubric."}
398+ """ ,
361399 )
362400 metric_resource_name = client .evals .create_evaluation_metric (
363- display_name = "test_metric" ,
364- description = "test_description" ,
365- metric = types .RubricMetric .GENERAL_QUALITY ,
401+ metric = tone_check_metric ,
366402 )
367403 assert isinstance (metric_resource_name , str )
368404 assert re .match (
369405 r"^projects/[^/]+/locations/[^/]+/evaluationMetrics/[^/]+$" ,
370406 metric_resource_name ,
371407 )
408+ agent_data = types .evals .AgentData (
409+ turns = [
410+ types .evals .ConversationTurn (
411+ turn_index = 0 ,
412+ events = [
413+ types .evals .AgentEvent (
414+ author = "user" ,
415+ content = genai_types .Content (
416+ role = "user" ,
417+ parts = [
418+ genai_types .Part (
419+ text = ("Write a simple story about a dinosaur" )
420+ )
421+ ],
422+ ),
423+ ),
424+ types .evals .AgentEvent (
425+ author = "model" ,
426+ content = genai_types .Content (
427+ role = "model" ,
428+ parts = [
429+ genai_types .Part (
430+ text = "Once upon a time, there was a T-Rex named Rexy."
431+ )
432+ ],
433+ ),
434+ ),
435+ ],
436+ ),
437+ ],
438+ )
372439 byor_df = pd .DataFrame (
373440 {
374- "prompt" : ["Write a simple story about a dinosaur" ],
375- "response" : ["Once upon a time, there was a T-Rex named Rexy." ],
441+ "agent_data" : [agent_data ],
376442 }
377443 )
378444 metric = types .Metric (
0 commit comments