-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
79 lines (64 loc) · 2.35 KB
/
main.py
File metadata and controls
79 lines (64 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
'''
Main application for dynamic LLM routing using LangGraph
'''
import os
import sys
import logging
# add core and config to path for imports
sys.path.insert(0,os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from core import SemanticCache
from core import Router
from config import *
from config.logger_config import setup_logger
# Model configuration
MODELS_CONFIG=MODELS_CONFIG
logger = logging.getLogger("llm_router.main")
def process_query(query: str, router: Router) -> None:
"""Process a single query using the router."""
logger.info("%s", "=" * 50)
logger.info("Processing query")
# Route the query
result = router.route(query)
# Process the result
if result.get("cache_hit"):
logger.info("Cache hit")
else:
logger.info("Classification: %s", result.get("classification"))
logger.info("Selected model: %s", result.get("selected_model"))
if result.get("error"):
logger.error("Error: %s", result["error"])
if result.get("llm_response"):
logger.info("Response from %s", result.get("used_model", "unknown model"))
logger.info("%s", "-" * 50)
logger.info("%s", result["llm_response"])
logger.info("%s", "-" * 50)
def main():
setup_logger("llm_router")
# Initialize components
cache = SemanticCache(default_ttl=600) # 10 minute TTL
classifier = Classifier()
llm_client = LLMClient(MODELS_CONFIG)
# Create the router
router = Router(
models_config={
"tier1": [m[1] for m in MODELS_CONFIG["tier1"]],
"tier2": [m[1] for m in MODELS_CONFIG["tier2"]],
"tier3": [m[1] for m in MODELS_CONFIG["tier3"]],
},
cache=cache,
classifier=classifier,
llm_client=llm_client,
max_retries=3
)
# Example queries
queries = [
"What is the capital of Ghana?",
"Explain quantum computing in simple terms.",
"Create code for a simple weather application that takes a city name and displays the current temperature by calling a weather API.",
"Develop a multi-step plan to reduce carbon emissions in a mid-sized city, considering economic, social, and political factors."
]
# Process each query
for query in queries:
process_query(query, router)
if __name__ == "__main__":
main()