agentcontrol · amabito · Mar 31, 2026 · Apr 2, 2026
diff --git a/evaluators/contrib/budget/README.md b/evaluators/contrib/budget/README.md
@@ -0,0 +1,3 @@
+# Budget Evaluator
+
+Cumulative LLM cost and token budget tracking for agent-control.
diff --git a/evaluators/contrib/budget/pyproject.toml b/evaluators/contrib/budget/pyproject.toml
@@ -0,0 +1,47 @@
+[project]
+name = "agent-control-evaluator-budget"
+version = "0.1.0"
+description = "Budget evaluator for agent-control -- cumulative LLM cost and token tracking"
+readme = "README.md"
+requires-python = ">=3.12"
+license = { text = "Apache-2.0" }
+authors = [{ name = "Agent Control Team" }]
+dependencies = [
+    "agent-control-evaluators>=3.0.0",
+    "agent-control-models>=3.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-asyncio>=0.23.0",
+    "ruff>=0.1.0",
+    "mypy>=1.8.0",
+]
+
+[project.entry-points."agent_control.evaluators"]
+budget = "agent_control_evaluator_budget.budget:BudgetEvaluator"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/agent_control_evaluator_budget"]
+
+[tool.ruff]
+line-length = 100
+target-version = "py312"
+
+[tool.ruff.lint]
+select = ["E", "F", "I"]
+
+[tool.uv.sources]
+agent-control-evaluators = { path = "../../builtin", editable = true }
+agent-control-models = { path = "../../../models", editable = true }
+
+[dependency-groups]
+dev = [
+    "pytest>=9.0.2",
+    "pytest-asyncio>=1.3.0",
+]
diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/__init__.py b/evaluators/contrib/budget/src/agent_control_evaluator_budget/__init__.py
diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/__init__.py b/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/__init__.py
@@ -0,0 +1,18 @@
+"""Budget evaluator for per-agent LLM cost and token tracking."""
+
+from agent_control_evaluator_budget.budget.config import BudgetEvaluatorConfig
+from agent_control_evaluator_budget.budget.evaluator import (
+    BudgetEvaluator,
+    clear_budget_stores,
+)
+from agent_control_evaluator_budget.budget.memory_store import InMemoryBudgetStore
+from agent_control_evaluator_budget.budget.store import BudgetSnapshot, BudgetStore
+
+__all__ = [
+    "BudgetEvaluator",
+    "BudgetEvaluatorConfig",
+    "BudgetSnapshot",
+    "BudgetStore",
+    "InMemoryBudgetStore",
+    "clear_budget_stores",
+]
diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py b/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py
@@ -0,0 +1,98 @@
+"""Configuration for the budget evaluator."""
+
+from __future__ import annotations
+
+from agent_control_evaluators._base import EvaluatorConfig
+from pydantic import Field, field_validator, model_validator
+
+# ---------------------------------------------------------------------------
+# Window convenience constants (seconds)
+# ---------------------------------------------------------------------------
+
+WINDOW_HOURLY = 3600
+WINDOW_DAILY = 86400
+WINDOW_WEEKLY = 604800
+WINDOW_MONTHLY = 2592000  # 30 days
+
+
+class BudgetLimitRule(EvaluatorConfig):
+    """A single budget limit rule.
+
+    Each rule defines a ceiling for a combination of scope dimensions
+    and time window. Multiple rules can apply to the same step -- the
+    evaluator checks all of them and triggers on the first breach.
+
+    Cost limits are denominated in USD minor units (cents). Token limits
+    are orthogonal and tracked independently.
+
+    Attributes:
+        scope: Static scope dimensions that must match for this rule
+            to apply. Empty dict = global rule.
+            Examples:
+                {"agent": "summarizer"} -- per-agent limit
+                {"agent": "summarizer", "channel": "slack"} -- agent+channel limit
+        group_by: If set, the limit is applied independently for each
+            unique value of this dimension. e.g. group_by="user_id" means
+            each user gets their own budget. None = shared/global limit.
+        window_seconds: Time window for accumulation in seconds.
+            None = cumulative (no reset). See WINDOW_* constants.
+        limit: Maximum spend in the window, in cents (USD).
+            None = uncapped on cost.
+        limit_tokens: Maximum tokens in the window. None = uncapped.
+    """
+
+    scope: dict[str, str] = Field(default_factory=dict)
+    group_by: str | None = None
+    window_seconds: int | None = None
+    limit: int | None = None
+    limit_tokens: int | None = None
+
+    @model_validator(mode="after")
+    def at_least_one_limit(self) -> "BudgetLimitRule":
+        if self.limit is None and self.limit_tokens is None:
+            raise ValueError("At least one of limit or limit_tokens must be set")
+        return self
+
+    @field_validator("limit")
+    @classmethod
+    def validate_limit(cls, v: int | None) -> int | None:
+        if v is not None and v <= 0:
+            raise ValueError("limit must be a positive integer")
+        return v
+
+    @field_validator("limit_tokens")
+    @classmethod
+    def validate_limit_tokens(cls, v: int | None) -> int | None:
+        if v is not None and v <= 0:
+            raise ValueError("limit_tokens must be positive")
+        return v
+
+    @field_validator("window_seconds")
+    @classmethod
+    def validate_window_seconds(cls, v: int | None) -> int | None:
+        if v is not None and v <= 0:
+            raise ValueError("window_seconds must be positive")
+        return v
+
+
+class BudgetEvaluatorConfig(EvaluatorConfig):
+    """Configuration for the budget evaluator.
+
+    Attributes:
+        limits: List of budget limit rules. Each is checked independently.
+        pricing: Optional model pricing table. Maps model name to per-1K
+            token rates in cents. Used to derive cost from token counts
+            and model name.
+        token_path: Dot-notation path to extract token usage from step
+            data (e.g. "usage.total_tokens"). If None, looks for standard
+            fields (input_tokens, output_tokens, total_tokens, usage).
+        model_path: Dot-notation path to extract model name (for pricing lookup).
+        metadata_paths: Mapping of metadata field name to dot-notation path
+            in step data. Used to extract scope dimensions (channel, user_id, etc).
+    """
+
+    limits: list[BudgetLimitRule] = Field(min_length=1)
+    pricing: dict[str, dict[str, float]] | None = None
+    token_path: str | None = None
+    model_path: str | None = None
+    metadata_paths: dict[str, str] = Field(default_factory=dict)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Budget Evaluator

		Cumulative LLM cost and token budget tracking for agent-control.