[feat] add guardrails with llama guard 3

CuriousGu · CuriousGu · commit e5efdb8f45ae · 2025-02-12T17:11:18.000-03:00
diff --git a/src/api/controllers/__init__.py b/src/api/controllers/__init__.py
@@ -1,3 +1,4 @@
 from .chat import new_message as chat_new_message
+from .guardrails import Guardrail
 
-__all__ = ["chat_new_message"]
+__all__ = ["chat_new_message", "Guardrail"]
diff --git a/src/api/controllers/guardrails.py b/src/api/controllers/guardrails.py
@@ -0,0 +1,54 @@
+from fastapi import HTTPException, Request
+
+from src.api.models import APIRequest
+from src.services.llama_guard import LlamaGuard
+from src.infrastructure.database import (
+    MongoDB,
+    get_user_details,
+    block_user
+)
+
+
+class Guardrail:
+    def __call__(self, api_request: APIRequest, req: Request) -> None:
+        if req.app.llama_guard:
+            self.llama_guard_layer(
+                api_request.message,
+                req.app.llama_guard,
+                api_request.user_id,
+                req.app.database
+            )
+        self.check_user(api_request.user_id, req.app.database)
+
+    def llama_guard_layer(
+        self,
+        message: str,
+        llama_guard: LlamaGuard,
+        user_id: str,
+        db: MongoDB
+    ) -> None:
+        response = llama_guard(message)
+        if not response:
+            _ = block_user(user_id, db)
+
+            raise HTTPException(
+                status_code=400,
+                detail=f"""
+                    O conteúdo fornecido viola as políticas da plataforma.
+                    O seu usuário foi bloqueado.
+
+                    Retorno do LLAMA GUARD: {response}
+                """
+            )
+
+    def check_user(self, user_id: str, db: MongoDB):
+        user_details = get_user_details(user_id, db)
+        if user_details and user_details.get("blocked"):
+            raise HTTPException(
+                status_code=400,
+                detail="""
+                    O usuário está bloqueado devido a violação de políticas.
+                """
+            )
+
+    # More Guardrails could be added here.
diff --git a/src/api/routes/chat.py b/src/api/routes/chat.py
@@ -1,13 +1,15 @@
 from fastapi import APIRouter, status, Request, Depends
 
 from src.api.models import APIResponse, APIRequest
-from src.api.controllers import chat_new_message
+from src.api.controllers import chat_new_message, Guardrail
 
 
 router = APIRouter(
     prefix="/chat",
     tags=["chat"],
-    # dependencies=[Depends(validate_user)]
+    dependencies=[
+        Depends(Guardrail())
+    ]
 )
 
 
diff --git a/src/infrastructure/config/settings.py b/src/infrastructure/config/settings.py
@@ -25,6 +25,9 @@ class Settings(BaseSettings):
     MODEL_TEMPERATURE: float = 0.2
     MODEL_API_KEY: str = ''
 
+    # LlamaGuard
+    LLAMA_GUARD_MODEL: str = "llama-guard3"
+
     class Config:
         env_file = ".env"
         extra = "ignore"
diff --git a/src/main.py b/src/main.py
@@ -1,20 +1,19 @@
 from fastapi import FastAPI
 
 from src.infrastructure.database import MongoDB
-from src.api.routes import chat_router
-from src.infrastructure.config import settings
 from src.infrastructure.config.llm import LLM
+from src.services.llama_guard import LlamaGuard
+
+from src.api.routes import chat_router
 
 
 def create_app():
     app = FastAPI()
 
     # defining API variables
-    app.database = MongoDB(db_name=settings.MONGO_DB)
-    app.llm = LLM(model_name=settings.MODEL)
-
-    # app.vector_store = ChromaDB()
-    # app.llm = choose_model(model_name=settings.MODEL)
+    app.database = MongoDB()
+    app.llm = LLM()
+    app.llama_guard = LlamaGuard()
 
     # including routes
     app.include_router(chat_router)
diff --git a/src/services/llama_guard/__init__.py b/src/services/llama_guard/__init__.py
@@ -0,0 +1,3 @@
+from .llama_guard import LlamaGuard
+
+__all__ = ["LlamaGuard"]
diff --git a/src/services/llama_guard/llama_guard.py b/src/services/llama_guard/llama_guard.py
@@ -0,0 +1,17 @@
+from langchain_ollama.llms import OllamaLLM
+from src.infrastructure.config import settings
+
+
+class LlamaGuard:
+    def __init__(self) -> None:
+        try:
+            self.llm = OllamaLLM(
+                model=settings.LLAMA_GUARD_MODEL,
+                base_url=settings.MODEL_URL,
+            )
+        except:
+            return None
+
+    def __call__(self, message: str):
+        response = self.llm.invoke(message)
+        return True if response == "safe" else False

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .llama_guard import LlamaGuard`
	`2`	`+`
	`3`	`+__all__ = ["LlamaGuard"]`