Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions evaluation/auto_evaluation/eval_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ def __init__(self, base_url: str, dataset: str, reranker_base_url: str = ""):
self.qns = preprocess.read_data(self.dataset)
self.eval_model = GeminiModel(
model_name="gemini-2.5-pro",
project=os.getenv("GOOGLE_PROJECT_ID", ""),
location=os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1"),
api_key=os.getenv("GOOGLE_API_KEY"),
)
self.log_dir = "logs"
os.makedirs(self.log_dir, exist_ok=True)
Expand Down Expand Up @@ -108,7 +107,6 @@ def evaluate(self, retriever: str, limit: int | None = None):
retrieval_tcs.append(retrieval_tc)
response_times.append(response_time)

# parallel evaluate
evaluate(
test_cases=retrieval_tcs,
metrics=[precision, recall, hallucination],
Expand Down
91 changes: 91 additions & 0 deletions evaluation/auto_evaluation/src/models/gemini.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Custom DeepEvalLLM wrapper using Google Gemini API (google.genai).
"""

import os
from typing import Any, Optional, Type

from google import genai
from google.genai import types
from deepeval.models.base_model import DeepEvalBaseLLM
from pydantic import BaseModel


class Response(BaseModel):
content: str


class GoogleGeminiLangChain(DeepEvalBaseLLM):
"""Class that implements Google Gemini API for DeepEval"""

def __init__(self, model_name, *args, **kwargs):
self.client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
super().__init__(model_name, *args, **kwargs)

def load_model(self, *args, **kwargs):
return self.client.models

def generate(self, prompt: str, schema: Optional[Type[BaseModel]] = None) -> Any:
if schema is not None:
response = self.client.models.generate_content(
model=self.model_name,
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=schema,
),
)
return response.parsed, 0
else:
response = self.client.models.generate_content(
model=self.model_name,
contents=prompt,
)
return response.text, 0

async def a_generate(
self, prompt: str, schema: Optional[Type[BaseModel]] = None
) -> Any:
if schema is not None:
response = await self.client.aio.models.generate_content(
model=self.model_name,
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=schema,
),
)
return response.parsed, 0
else:
response = await self.client.aio.models.generate_content(
model=self.model_name,
contents=prompt,
)
return response.text, 0

def get_model_name(self):
return self.model_name or "model-not-specified"


def main():
model = GoogleGeminiLangChain(model_name="gemini-2.5-pro")
prompt = "Write me a joke"
print(f"Prompt: {prompt}")
response = model.generate(prompt, schema=Response)
print(f"Response: {response}")


async def main_async():
model = GoogleGeminiLangChain(model_name="gemini-2.5-pro")
prompt = "Write me a joke"
print(f"Prompt: {prompt}")
response = await model.a_generate(prompt, schema=Response)
print(f"Response: {response}")


if __name__ == "__main__":
import asyncio
from dotenv import load_dotenv

load_dotenv()
asyncio.run(main_async())
100 changes: 0 additions & 100 deletions evaluation/auto_evaluation/src/models/vertex_ai.py

This file was deleted.

10 changes: 4 additions & 6 deletions evaluation/script_based_evaluation/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@ def load_environment(env_path: str):
raise FileNotFoundError(f"The specified .env file does not exist at {env_path}")
load_dotenv(env_path, override=True)
config = dotenv_values(env_path)
google_creds: Optional[str] = config.get("GOOGLE_APPLICATION_CREDENTIALS")
if google_creds is not None:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = google_creds
api_key: Optional[str] = config.get("GOOGLE_API_KEY")
if api_key is not None:
os.environ["GOOGLE_API_KEY"] = api_key
else:
raise KeyError(
"GOOGLE_APPLICATION_CREDENTIALS not found in .env file or is None"
)
raise KeyError("GOOGLE_API_KEY not found in .env file or is None")


def get_config() -> dict[str, str]:
Expand Down
4 changes: 0 additions & 4 deletions evaluation/script_based_evaluation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from openai import OpenAI
from tqdm import tqdm
from typing import Any
import vertexai


def get_accuracy_value(response_text: str, ground_truth: str, query_text: str) -> str:
Expand Down Expand Up @@ -256,9 +255,6 @@ def main():

client = OpenAI(api_key=api_key)

# Initialize Vertex AI
vertexai.init()

# Input and Output Files
input_file = "data/data.csv"
output_file = f"{os.path.splitext(input_file)[0]}_result.csv"
Expand Down
121 changes: 49 additions & 72 deletions evaluation/script_based_evaluation/models/gemini_model.py
Original file line number Diff line number Diff line change
@@ -1,101 +1,78 @@
import os
import time
import sys
import traceback
import vertexai.preview.generative_models as genai
from vertexai.generative_models import (
HarmCategory,
HarmBlockThreshold,
SafetySetting,
)
from google import genai
from google.genai import types
from script_based_evaluation.utils.logging_utils import log_error

_client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))

def base_gemini_1_5_flash(query: str) -> tuple[str, float]:
safety_config = [
SafetySetting(
category=HarmCategory.HARM_CATEGORY_HATE_SPEECH,
threshold=HarmBlockThreshold.BLOCK_NONE,
),
SafetySetting(
category=HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
threshold=HarmBlockThreshold.BLOCK_NONE,
),
SafetySetting(
category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold=HarmBlockThreshold.BLOCK_NONE,
),
SafetySetting(
category=HarmCategory.HARM_CATEGORY_HARASSMENT,
threshold=HarmBlockThreshold.BLOCK_NONE,
),
]
_safety_config = [
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
threshold=types.HarmBlockThreshold.BLOCK_NONE,
),
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
threshold=types.HarmBlockThreshold.BLOCK_NONE,
),
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold=types.HarmBlockThreshold.BLOCK_NONE,
),
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
threshold=types.HarmBlockThreshold.BLOCK_NONE,
),
]


def base_gemini_flash(query: str) -> tuple[str, float]:
while True:
try:
model = genai.GenerativeModel("gemini-2.0-flash")
start_time = time.time()
query = " " + query
response = model.generate_content(query, safety_settings=safety_config)
response = _client.models.generate_content(
model="gemini-2.0-flash",
contents=" " + query,
config=types.GenerateContentConfig(
safety_settings=_safety_config,
),
)
end_time = time.time()
response_time = (end_time - start_time) * 1000 # Convert to milliseconds
return response.text, response_time
response_time = (end_time - start_time) * 1000
return response.text or "", response_time
except Exception as e:
if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
print("Rate limit exceeded, sleeping for 10 seconds")
time.sleep(10)
print("Rate limit exceeded, sleeping for 60 seconds")
time.sleep(60)
else:
error_message = f"Error in base_gemini_1_5_flash: {str(e)}"
error_details = traceback.format_exc()
log_error(error_message, error_details)
print(
"An error occurred while sending request to Gemini. Check error_log.txt for details."
log_error(
f"Error in base_gemini_flash: {str(e)}", traceback.format_exc()
)
sys.exit(1)


def base_gemini_1_5_pro(query: str) -> tuple[str, float]:
safety_config = [
SafetySetting(
category=HarmCategory.HARM_CATEGORY_HATE_SPEECH,
threshold=HarmBlockThreshold.BLOCK_NONE,
),
SafetySetting(
category=HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
threshold=HarmBlockThreshold.BLOCK_NONE,
),
SafetySetting(
category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold=HarmBlockThreshold.BLOCK_NONE,
),
SafetySetting(
category=HarmCategory.HARM_CATEGORY_HARASSMENT,
threshold=HarmBlockThreshold.BLOCK_NONE,
),
]
def base_gemini_pro(query: str) -> tuple[str, float]:
while True:
try:
model = genai.GenerativeModel("gemini-2.5-pro")
start_time = time.time()
query = " " + query
response = model.generate_content(
query,
safety_settings=safety_config,
generation_config=genai.GenerationConfig(
response = _client.models.generate_content(
model="gemini-2.5-pro",
contents=" " + query,
config=types.GenerateContentConfig(
safety_settings=_safety_config,
max_output_tokens=2000,
temperature=0.0,
),
)
end_time = time.time()
response_time = (end_time - start_time) * 1000 # Convert to milliseconds
return response.text, response_time
response_time = (end_time - start_time) * 1000
return response.text or "", response_time
except Exception as e:
if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
print("Rate limit exceeded, sleeping for 10 seconds")
time.sleep(10)
print("Rate limit exceeded, sleeping for 60 seconds")
time.sleep(60)
else:
error_message = f"Error in base_gemini_1_5_flash: {str(e)}"
error_details = traceback.format_exc()
log_error(error_message, error_details)
print(
"An error occurred while sending request to Gemini. Check error_log.txt for details."
)
log_error(f"Error in base_gemini_pro: {str(e)}", traceback.format_exc())
sys.exit(1)
Loading