Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions evaluators/contrib/budget/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Budget Evaluator

Cumulative LLM cost and token budget tracking for agent-control.
47 changes: 47 additions & 0 deletions evaluators/contrib/budget/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
[project]
name = "agent-control-evaluator-budget"
version = "0.1.0"
description = "Budget evaluator for agent-control -- cumulative LLM cost and token tracking"
readme = "README.md"
requires-python = ">=3.12"
license = { text = "Apache-2.0" }
authors = [{ name = "Agent Control Team" }]
dependencies = [
"agent-control-evaluators>=3.0.0",
"agent-control-models>=3.0.0",
]

[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.23.0",
"ruff>=0.1.0",
"mypy>=1.8.0",
]

[project.entry-points."agent_control.evaluators"]
budget = "agent_control_evaluator_budget.budget:BudgetEvaluator"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["src/agent_control_evaluator_budget"]

[tool.ruff]
line-length = 100
target-version = "py312"

[tool.ruff.lint]
select = ["E", "F", "I"]

[tool.uv.sources]
agent-control-evaluators = { path = "../../builtin", editable = true }
agent-control-models = { path = "../../../models", editable = true }

[dependency-groups]
dev = [
"pytest>=9.0.2",
"pytest-asyncio>=1.3.0",
]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Budget evaluator for per-agent LLM cost and token tracking."""

from agent_control_evaluator_budget.budget.config import BudgetEvaluatorConfig
from agent_control_evaluator_budget.budget.evaluator import (
BudgetEvaluator,
clear_budget_stores,
)
from agent_control_evaluator_budget.budget.memory_store import InMemoryBudgetStore
from agent_control_evaluator_budget.budget.store import BudgetSnapshot, BudgetStore

__all__ = [
"BudgetEvaluator",
"BudgetEvaluatorConfig",
"BudgetSnapshot",
"BudgetStore",
"InMemoryBudgetStore",
"clear_budget_stores",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Configuration for the budget evaluator."""

from __future__ import annotations

from agent_control_evaluators._base import EvaluatorConfig
from pydantic import Field, field_validator, model_validator

# ---------------------------------------------------------------------------
# Window convenience constants (seconds)
# ---------------------------------------------------------------------------

WINDOW_HOURLY = 3600
WINDOW_DAILY = 86400
WINDOW_WEEKLY = 604800
WINDOW_MONTHLY = 2592000 # 30 days


class BudgetLimitRule(EvaluatorConfig):
"""A single budget limit rule.

Each rule defines a ceiling for a combination of scope dimensions
and time window. Multiple rules can apply to the same step -- the
evaluator checks all of them and triggers on the first breach.

Cost limits are denominated in USD minor units (cents). Token limits
are orthogonal and tracked independently.

Attributes:
scope: Static scope dimensions that must match for this rule
to apply. Empty dict = global rule.
Examples:
{"agent": "summarizer"} -- per-agent limit
{"agent": "summarizer", "channel": "slack"} -- agent+channel limit
group_by: If set, the limit is applied independently for each
unique value of this dimension. e.g. group_by="user_id" means
each user gets their own budget. None = shared/global limit.
window_seconds: Time window for accumulation in seconds.
None = cumulative (no reset). See WINDOW_* constants.
limit: Maximum spend in the window, in cents (USD).
None = uncapped on cost.
limit_tokens: Maximum tokens in the window. None = uncapped.
"""

scope: dict[str, str] = Field(default_factory=dict)
group_by: str | None = None
window_seconds: int | None = None
limit: int | None = None
limit_tokens: int | None = None

@model_validator(mode="after")
def at_least_one_limit(self) -> "BudgetLimitRule":
if self.limit is None and self.limit_tokens is None:
raise ValueError("At least one of limit or limit_tokens must be set")
return self

@field_validator("limit")
@classmethod
def validate_limit(cls, v: int | None) -> int | None:
if v is not None and v <= 0:
raise ValueError("limit must be a positive integer")
return v

@field_validator("limit_tokens")
@classmethod
def validate_limit_tokens(cls, v: int | None) -> int | None:
if v is not None and v <= 0:
raise ValueError("limit_tokens must be positive")
return v

@field_validator("window_seconds")
@classmethod
def validate_window_seconds(cls, v: int | None) -> int | None:
if v is not None and v <= 0:
raise ValueError("window_seconds must be positive")
return v


class BudgetEvaluatorConfig(EvaluatorConfig):
"""Configuration for the budget evaluator.

Attributes:
limits: List of budget limit rules. Each is checked independently.
pricing: Optional model pricing table. Maps model name to per-1K
token rates in cents. Used to derive cost from token counts
and model name.
token_path: Dot-notation path to extract token usage from step
data (e.g. "usage.total_tokens"). If None, looks for standard
fields (input_tokens, output_tokens, total_tokens, usage).
model_path: Dot-notation path to extract model name (for pricing lookup).
metadata_paths: Mapping of metadata field name to dot-notation path
in step data. Used to extract scope dimensions (channel, user_id, etc).
"""

limits: list[BudgetLimitRule] = Field(min_length=1)
pricing: dict[str, dict[str, float]] | None = None
token_path: str | None = None
model_path: str | None = None
metadata_paths: dict[str, str] = Field(default_factory=dict)
Loading