From 28c47c43c446daab2d3a20d2520f969e6bec4607 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Mon, 16 Mar 2026 10:59:48 -0300 Subject: [PATCH 01/17] feat: revised MVP scope --- docs/mvp-scope.md | 331 ++++++++++++++++ plans/implementation-backlog.md | 79 +++- plans/mvp-implementation-backlog.md | 561 ++++++++++++++++++++++++++++ 3 files changed, 970 insertions(+), 1 deletion(-) create mode 100644 docs/mvp-scope.md create mode 100644 plans/mvp-implementation-backlog.md diff --git a/docs/mvp-scope.md b/docs/mvp-scope.md new file mode 100644 index 0000000..0fcbc2d --- /dev/null +++ b/docs/mvp-scope.md @@ -0,0 +1,331 @@ +# Substreams Data Service MVP Scope + +Drafted: 2026-03-12 + +## Purpose + +This document defines the target MVP for Substreams Data Service (SDS). + +It is intended to be the stable source of truth for: + +- engineering scope +- product and operational scope +- architectural decisions and their rationale +- MVP acceptance scenarios +- explicit non-goals and open questions + +It is not a task tracker. Detailed current-state assessment and implementation tracking should live in a separate document. + +## Audience + +This document is written for: + +- SDS engineers +- product and planning stakeholders +- external collaborators such as StreamingFast + +## MVP Definition + +The SDS MVP is a usable end-to-end payment-enabled Substreams stack, not just a local demo. It must support real provider discovery, real consumer and provider integration paths, paid streaming with provider-authoritative byte metering, live low-funds handling, durable provider-side payment state, manual operator-driven funding and settlement workflows, and a production-oriented transport/security posture. + +The MVP may intentionally simplify parts of the system where doing so materially reduces implementation complexity without invalidating the architecture. In particular, the MVP may assume session-local funding logic and defer correct payer-level aggregate exposure handling across concurrent streams. + +## Current Status Summary + +As of 2026-03-12, the repo already contains important parts of the MVP foundation: + +- working Horizon V2 / TAP signing, verification, and aggregation +- deterministic local chain/contracts and integration coverage +- consumer sidecar and provider gateway RPC surfaces +- sidecar-to-gateway session start and bidirectional payment session flow +- provider-side Firehose plugin services (`auth`, `session`, `usage`) +- a development/demo stack and sink wrapper + +However, the current repo does not yet constitute the MVP. Major remaining gaps include: + +- standalone oracle/discovery component +- real production-path provider and consumer integration completion +- provider-side durable persistence for accepted RAV and collection state +- low-funds stop/pause behavior during live streaming +- operator funding and settlement CLI flows +- authenticated admin/operator surfaces +- finalization of several protocol decisions called out below as open questions + +See `plans/mvp-gap-analysis.md` for a detailed status map. + +## Goals + +- Deliver a full SDS stack that can be used against a real provider deployment, initially expected to be StreamingFast. +- Make the consumer sidecar the mandatory client-side integration component. +- Use a standalone oracle service for provider discovery, while still supporting direct provider configuration as fallback. +- Use provider-authoritative byte metering as the billing source of truth. +- Support reconnect/resume behavior without making consumer-local persistence mandatory. +- Preserve accepted RAV and settlement-relevant state durably on the provider side. +- Support manual operator-driven funding and collection workflows through CLI tooling. +- Use TLS by default outside local/dev usage. + +## Non-Goals + +- Correct aggregate funding/exposure handling across multiple concurrent streams for the same payer. +- Blocking concurrent streams at runtime. +- Permissionless oracle provider sourcing from on-chain registry data. +- Wallet-connected end-user funding UI. +- Automated/background settlement collection. +- Rich provider ranking or QoS-based oracle selection. +- Full observability hardening with finalized metrics/tracing strategy. + +## Key Workflows + +### 1. Discover Provider + +- The consumer sidecar queries a standalone oracle service. +- The oracle receives the requested chain/network context. +- The oracle returns: + - the eligible provider set + - a recommended provider choice +- The consumer sidecar uses the recommended provider by default. +- Direct provider configuration remains a supported fallback path. + +### 2. Initialize or Reconnect a Paid Session + +- The consumer sidecar initiates the provider handshake. +- The provider responds with either: + - a fresh zero-value RAV for a new handshake, or + - the latest known resumable RAV for a reconnect +- The provider remains the authoritative side for accepted payment state. +- Recovery should be folded into the initial handshake rather than introduced as a separate recovery endpoint. + +### 3. Stream and Update Payment State + +- The real provider integration path meters streamed bytes using the provider-side metering plugin path. +- The provider is authoritative for billable usage. +- The consumer sidecar participates in payment/session control but is not authoritative for billed byte usage. +- While streaming: + - provider reports/payment state advances + - RAVs are requested and updated as needed + - low-funds conditions can be surfaced during the live stream +- For MVP, low-funds decisions are session-local, not payer-global across concurrent streams. + +### 4. Fund or Top Up Escrow + +- Funding is an operator/developer workflow, not an end-user wallet UI. +- CLI tooling should make approve/deposit/top-up simple enough for MVP operations. +- The system should surface when additional funding is needed, but the act of funding remains external to the core runtime path. + +### 5. Collect Accepted RAVs On-Chain + +- The provider side stores accepted collectible RAV state durably. +- Operator CLI tooling queries the provider for settlement-relevant RAV data. +- The CLI crafts, signs, and submits the `collect()` transaction locally. +- The settlement signing key remains outside the provider sidecar. +- Collection state should distinguish between at least: + - `collectible` + - `collect_pending` + - `collected` + +## Major MVP Decisions + +| Decision | MVP Choice | Short Rationale | +| --- | --- | --- | +| Consumer integration | Consumer sidecar is mandatory | SDS is a full stack, not a loose protocol suggestion | +| Provider discovery | Standalone oracle component | Discovery is a real product component and should exist independently even if initial logic is simple | +| Oracle selection logic | Whitelist plus simple selection among eligible providers | Good enough for MVP while preserving future ranking logic | +| Oracle response shape | Return eligible providers plus a recommended provider | Keeps default client behavior simple while preserving future flexibility | +| Direct provider connection | Supported as fallback | Useful bridge from current implementation and operational fallback | +| Funding model | Session-local low-funds logic | Avoids premature distributed liability accounting for concurrent streams | +| Concurrent streams | Documented limitation, not blocked | Simpler MVP with explicit limitation instead of partial enforcement | +| Billing unit | Streamed bytes | Aligns with provider-authoritative metering path | +| Funding UX | CLI/operator-driven | Avoids premature UI scope | +| Collection execution | CLI signs and submits locally | Keeps settlement key custody outside provider sidecar | +| Provider payment state | Durable persistence required | Losing accepted RAV state is unacceptable | +| Consumer persistence | Not required for MVP | Better to recover from provider-authoritative state during handshake | +| Recovery shape | Part of initial handshake | Avoids separate recovery API surface | +| Security posture | TLS by default outside local/dev | Better security without forcing heavy hardening | +| Admin/operator actions | Require authentication | Oracle governance and provider operations should not be effectively public | +| Real integration | Real provider and consumer paths are mandatory | Local demo flow is insufficient for MVP | +| Validation scope | One real provider environment is enough for MVP acceptance | Narrow operational validation is acceptable if architecture stays generic | + +## Why Multi-Stream Support Is Deferred + +Correct multi-stream support is more than summing usage across sessions. + +If a single payer can run multiple streams from different machines, correct funding control requires: + +- a provider-authoritative global liability ledger keyed by payer or payment identity +- durable shared state across provider instances +- session liveness and stale-session cleanup +- race-safe exposure accounting when streams start concurrently +- clear rules for pending requested RAVs, accepted RAVs, and unaggregated usage +- restart and resume semantics that avoid duplicated or lost liability + +That is a materially larger distributed-state problem than the session-local MVP design. The MVP therefore documents concurrent streams as a known limitation for funding-control correctness and does not attempt to enforce or fully solve them. + +## Component Deliverables + +### Oracle + +- Standalone service and deployment unit +- Manually managed provider whitelist +- Provider selection based on minimal metadata, at least: + - endpoint information + - chain/network eligibility + - possibly pricing metadata +- Returns eligible providers plus one recommended provider +- Administrative/governance actions require authentication + +### Consumer Sidecar + +- Mandatory client-side SDS integration component +- Supports oracle-backed discovery and direct provider fallback +- Performs session initialization with provider +- Participates in reconnect flow where provider may return fresh or latest known RAV +- Maintains payment/session coordination during streaming +- Works with the real client integration path, not only demo wrappers +- Does not require durable local persistence for MVP + +### Provider Gateway / Provider Integration + +- Real integration into the provider path is mandatory +- Validates payment/session state for real streaming traffic +- Uses provider-authoritative byte metering from plugin/integration path +- Drives RAV request/response flow +- Handles live low-funds conditions during streaming +- Persists accepted RAV and settlement-relevant state durably +- Exposes authenticated operator/admin surfaces for inspection and settlement data retrieval + +### Provider State Storage + +- Durable persistence for accepted RAV state +- Durable persistence for settlement-relevant metadata and collection lifecycle state +- Durable persistence should survive provider restarts +- Storage model should support: + - latest accepted collectible state + - session/runtime state + - collection status tracking + +### CLI / Operator Tooling + +- Funding flows: + - approve + - deposit + - top up +- Settlement flows: + - inspect collectible state + - fetch settlement-relevant RAV data + - craft and submit `collect()` transaction locally + - inspect or retry pending collection attempts +- Tooling should be sufficient for operators without requiring a dedicated UI + +### Security and Admin Surfaces + +- TLS enabled by default for non-dev usage +- Plaintext allowed only for local/dev/demo workflows +- Authenticated admin/operator actions for: + - oracle management + - provider inspection + - collection-data retrieval +- Final auth mechanism remains an implementation choice + +### Observability + +- Sufficient operational visibility for MVP +- Structured logs and status/inspection tools are required +- Richer metrics/tracing strategy remains open + +## Operational Deliverables + +- Providers can restart without losing accepted collectible RAV state +- Operators can inspect session/payment/collection state +- Operators can fund and top up escrow through CLI workflows +- Operators can perform manual on-chain collection through CLI workflows +- The system can surface low-funds conditions during active streams +- Recovery/reconnect behavior is defined well enough for operators to understand expected runtime behavior + +## Acceptance Scenarios + +The scenarios below are the primary definition of done for the MVP. + +### A. Discovery to Paid Streaming + +- Consumer sidecar queries the oracle for a required chain/network +- Oracle returns eligible providers plus a recommended choice +- Consumer sidecar uses the recommended provider +- Provider handshake succeeds +- Real streaming begins through the production integration path +- Byte metering occurs on the provider side +- Payment state advances correctly during streaming + +### B. Reconnect and Resume + +- An active SDS session is interrupted +- Consumer sidecar reconnects through the normal handshake path +- Provider responds with the appropriate fresh or resumable RAV state +- Streaming resumes without losing the authoritative accepted payment state + +### C. Low Funds During Streaming + +- Streaming starts with initially sufficient funds +- Usage progresses until provider-side session-local funding logic determines funds are too low +- Provider surfaces the low-funds condition during the live stream +- The client path receives and reacts to the stop/pause decision correctly + +### D. Provider Restart Without Losing Collectible State + +- Provider accepts at least one updated RAV +- Provider process restarts +- Accepted collectible RAV state remains available after restart +- Operator can still inspect and use that state for settlement + +### E. Manual Funding Flow + +- Operator can approve token spend and deposit/top up escrow through CLI tooling +- The resulting on-chain funding state is usable by SDS runtime flows + +### F. Manual Collection Flow + +- Provider exposes settlement-relevant accepted RAV data +- CLI fetches that data +- CLI crafts, signs, and submits the `collect()` transaction locally +- Collection can be retried safely if needed +- Provider-side collection state can distinguish pending vs completed status + +### G. Secure Deployment Posture + +- Non-dev deployments use TLS by default +- Operator/admin actions are authenticated +- Local/dev/demo workflows may still use simpler transport settings explicitly + +## Known Limitations for MVP + +- Funding-control correctness is session-local, not payer-global across concurrent streams +- Concurrent streams are not blocked, only documented as a limitation +- Funding remains an operator/developer workflow rather than end-user wallet UI +- Collection remains operator-driven rather than automatic +- Oracle provider set is manually curated rather than permissionless +- Observability scope is intentionally basic + +## Post-MVP Follow-Ups + +- Correct multi-stream aggregate exposure handling +- Permissionless oracle sourcing from the Substreams Data Service contract registry +- Richer oracle metadata and provider ranking +- Automated/background collection using a separate settlement agent +- Better consumer recovery semantics if needed beyond handshake-based recovery +- Better funding UX, including possible wallet-connected UI +- Stronger observability and operational tooling + +## Open Questions + +- Should chain/network be derived automatically from the Substreams package, or supplied explicitly to the oracle query path? +- What is the pricing authority contract between oracle metadata and provider handshake responses? +- What is the exact canonical payment identity and `collection_id` reuse policy for fresh workloads versus reconnects? +- How much of the reconnect/recovery state should be keyed by session versus on-chain payment identity? +- Should simple observability for MVP include metrics endpoints, or are structured logs plus inspection/status tooling sufficient? +- What exact authentication mechanism should protect provider and oracle admin/operator surfaces? + +## References + +- `docs/phase1-sidecar-spec.md` +- `plans/implementation-backlog.md` +- `plans/component-task-breakdown.md` +- `README.md` diff --git a/plans/implementation-backlog.md b/plans/implementation-backlog.md index 281bb61..4c4ee3f 100644 --- a/plans/implementation-backlog.md +++ b/plans/implementation-backlog.md @@ -1,6 +1,6 @@ # Substreams Data Service — Implementation Backlog -_Last updated: 2026-03-04_ +_Last updated: 2026-03-09_ This repo already contains a working **Horizon V2 (TAP) signing/verification core** (`horizon/`) and a **development environment + integration tests** (`horizon/devenv/`, `test/integration/`). @@ -58,6 +58,83 @@ See also: `docs/agent-workflow.md` for the step-by-step implementation/verificat --- +## Recommended Next Path (2026-03-09) + +This section captures the current implementation analysis after the demo wiring work (`sds demo setup`, `.reflex.stack`, `devel/sds_sink`, provider-authoritative pricing, on-chain signer authorization, and sidecar↔sidecar `PaymentSession` flow). + +### What is already solid enough + +- The happy-path protocol is now demonstrable: + - consumer `Init` ↔ provider `StartSession` + - shared session IDs + session resumption + - bidirectional `PaymentSession` + - provider-driven `rav_request` + - provider-authoritative cost computation + - consumer `EndSession` propagating session close + - on-chain signer authorization via `isAuthorized` +- The repo also has a realistic manual stack: + - `sds devenv` + - `sds demo setup` + - `reflex -c .reflex.stack` + - `./devel/sds_sink run ...` + +### Highest-priority implementation sequence + +1. **Provider exposure control first** (`SDS-022` -> `SDS-016`) + - Implement aggregate outstanding-liability tracking per payer/collection across concurrent sessions. + - Use that ledger plus escrow balance checks to drive `need_more_funds` and provider `Continue` / `Pause` / `Stop` decisions. + - Rationale: this is the main missing product-safety loop. Without it, the provider can still over-serve even though the happy path works. + +2. **Make the real streaming demo obey stop decisions** (`SDS-038`) + - `sds sink run` should terminate promptly when `ReportUsage` returns `should_continue=false`, surface the stop reason, and still best-effort `EndSession`. + - Rationale: once provider exposure control exists, the primary demo must prove that the client path actually honors those decisions. + +3. **Stabilize RAV issuance policy** (`SDS-020`) + - Add deterministic signing thresholds (value/time/provider-request) instead of signing on every usage report. + - Rationale: this is needed before real load and before upstream integrations rely on the current behavior. + +4. **Freeze the wire contract before deeper integration** (`SDS-008` + `SDS-028`) + - Define the `metadata` schema and the client↔provider payment/header contract together. + - This should explicitly cover: + - canonical serialized payment material, + - session binding, + - signature encoding, + - any metadata used for replay protection / request correlation. + - Rationale: the older phase 1 spec still reflects an earlier control-plane shape; the current repo has moved toward `x-sds-rav` headers and SDS/firecore plugin integration, so the protocol contract needs to be frozen before cross-repo work deepens. + +5. **Then wire into the real client/provider paths** (`SDS-029` + `SDS-030`) + - Provider side: the live tier1/provider path must validate payment on connect, report metering from the live stream, and act on `Continue` / `Pause` / `Stop`. + - Consumer side: the real substreams client path must call `Init` / `ReportUsage` / `EndSession` and keep payment state current without relying on the demo wrapper. + - Rationale: this is the point where the project stops being a demo scaffold and becomes the actual product integration. + +6. **Only after that, finalize settlement workflow** (`SDS-021`) + - Decide who triggers `collect()` and how operators run it (admin RPC, daemon, or external workflow). + - Rationale: important for monetization, but downstream of correct off-chain liability and stream-control behavior. + +7. **Finish production hardening after protocol semantics are stable** (`SDS-024`, `SDS-023`, `SDS-025`, `SDS-026`, `SDS-027`) + - Persistence / restart semantics first. + - TTL / cleanup next. + - Then transport security, observability, and abuse protection. + - Rationale: all are required for production, but they should land after the economic/control semantics stop moving. + +### Additional tasks not yet tracked explicitly + +- **Define exact live-stream metering semantics.** + - We have chosen provider-authoritative cost calculation, but we still need a crisp contract for what exact usage signals are billed in the real provider integration path. + - This likely belongs under `SDS-019`/`SDS-028`, but it is important enough to call out explicitly before deeper tier1/client integration. + +- **Refresh the architecture/protocol docs once the wire contract is frozen.** + - The phase 1 spec remains useful for the big picture, but parts of it no longer match the current implementation direction (notably the API-key-oriented flow versus the current sidecar + header + firecore/plugin path). + - This should happen once `SDS-008`/`SDS-028` are settled, so future work is guided by the current architecture rather than the older draft. + +### Not the highest leverage right now + +- `SDS-039` is partially addressed already by README/demo documentation; the remaining value is mostly in a clearer runtime preflight/failure mode. +- `SDS-032` and `SDS-033` are useful cleanup, but they do not materially move protocol completeness or product readiness. +- Multi-provider/oracle work from the older spec should remain deferred until the single-provider path is complete and robust. + +--- + ## Status Tracker Status values: diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md new file mode 100644 index 0000000..780b7b8 --- /dev/null +++ b/plans/mvp-implementation-backlog.md @@ -0,0 +1,561 @@ +# Substreams Data Service — MVP Implementation Backlog + +_Last updated: 2026-03-13_ + +This document translates `docs/mvp-scope.md` into concrete implementation tasks for the MVP. + +It is intentionally separate from `plans/implementation-backlog.md`. + +Rationale for the split: + +- `plans/implementation-backlog.md` reflects the earlier implementation sequence and still contains useful historical context +- this document reflects the revised MVP scope agreed after the MVP rescoping work +- the MVP scope is now broader than the original sidecar-centric backlog and includes new deliverables such as the oracle, operator tooling, and provider-side persistence/settlement workflows + +This document is a scope-aligned execution backlog, not a priority list. + +## How To Use This Document + +- Use `docs/mvp-scope.md` as the stable target-state definition. +- Use `plans/mvp-gap-analysis.md` for current-state assessment. +- Use this file to define the concrete MVP implementation work that remains. + +Each task includes: + +- **Context**: why the task exists +- **Assumptions**: design assumptions or unresolved questions that affect the task definition +- **Done when**: objective completion criteria +- **Verify**: how to corroborate the behavior + +The status tracker below also includes: + +- **Depends on**: tasks that should be frozen or completed first so downstream work does not build on moving semantics +- **Scenarios**: acceptance scenarios from `docs/mvp-scope.md` (`A` through `G`) that the task materially contributes to + +Unless otherwise scoped, the baseline validation for code changes remains: + +- `go test ./...` +- `go vet ./...` +- `gofmt` on changed Go files + +## Assumptions Register + +These assumptions are referenced by task ID so it is clear where unresolved decisions still matter. + +- `A1` Chain/network discovery input is still open. + - MVP work should support an explicit chain/network input path now. + - Automatic derivation from the Substreams package remains optional/open. + +- `A2` Pricing authority between oracle metadata and provider handshake is still open. + - MVP work should avoid hard-coding a final authority rule unless/until aligned with StreamingFast. + +- `A3` Canonical payment identity and `collection_id` reuse semantics are still open. + - MVP work should isolate persistence and reconnect logic behind a model that can evolve without a full rewrite. + +- `A4` Observability scope beyond logs/status tooling is still open. + - MVP work should implement structured logging and inspection/status surfaces without forcing a final metrics/tracing backend choice. + +- `A5` Admin/operator authentication mechanism is still open. + - MVP work should require authentication and keep the implementation pluggable enough to avoid boxing in the final auth choice. + +- `A6` MVP funding-control logic is intentionally session-local. + - Do not require aggregate concurrent-stream liability tracking to complete MVP. + +## Status Values + +- `not_started` +- `in_progress` +- `blocked` +- `open_question` +- `done` +- `deferred` + +`open_question` tasks still need a concrete output: + +- a documented decision, narrowed contract, or explicit recorded deferral that downstream implementation tasks can reference + +## Status Tracker + +| ID | Status | Area | Assumptions | Depends on | Scenarios | Task | +| --- | --- | --- | --- | --- | --- | --- | +| MVP-001 | `open_question` | protocol | `A2` | none | `A` | Freeze the pricing exposure contract between oracle metadata and provider handshake | +| MVP-002 | `not_started` | protocol | `A1`, `A3` | `MVP-027` | `B` | Freeze reconnect handshake semantics so provider can return fresh or latest-known resumable RAV during normal session init | +| MVP-003 | `not_started` | protocol | `A3` | `MVP-027` | `B`, `D`, `F` | Define the durable provider-side payment and settlement data model | +| MVP-004 | `not_started` | protocol | none | none | `A`, `B`, `C` | Define and document the byte-billing and payment/header contract used in the real runtime path | +| MVP-005 | `not_started` | oracle | `A1`, `A2`, `A5` | `MVP-033` | `A` | Implement a standalone oracle service with manual whitelist and recommended-provider response | +| MVP-006 | `not_started` | oracle | `A5` | `MVP-028` | `A`, `G` | Add authenticated oracle administration for whitelist and provider metadata management | +| MVP-007 | `not_started` | consumer | `A1`, `A2` | `MVP-001`, `MVP-005`, `MVP-033` | `A` | Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback | +| MVP-008 | `not_started` | provider-state | `A3`, `A6` | `MVP-003` | `B`, `D`, `F` | Add durable provider storage for accepted RAV, session state, and collection lifecycle state | +| MVP-009 | `not_started` | provider-state | `A3` | `MVP-003`, `MVP-029` | `D`, `F` | Expose provider inspection and settlement-data retrieval APIs for accepted/collectible RAV state | +| MVP-010 | `not_started` | funding-control | `A6` | `MVP-004` | `C` | Implement session-local low-funds detection and provider Continue/Pause/Stop decisions during streaming | +| MVP-011 | `not_started` | funding-control | `A6` | `MVP-010` | `C` | Propagate provider stop/pause decisions through consumer sidecar into the real client path | +| MVP-012 | `not_started` | funding-control | none | `MVP-004` | `A`, `C` | Add deterministic RAV issuance thresholds suitable for real runtime behavior | +| MVP-013 | `not_started` | consumer | `A3` | `MVP-002`, `MVP-008` | `B` | Implement provider-authoritative reconnect/resume in the normal handshake path | +| MVP-014 | `not_started` | provider-integration | none | `MVP-004` | `A` | Integrate provider gateway validation into the real provider streaming path | +| MVP-015 | `not_started` | provider-integration | none | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering from the provider/plugin path into gateway payment state | +| MVP-016 | `not_started` | provider-integration | `A6` | `MVP-010`, `MVP-014` | `C` | Enforce gateway Continue/Pause/Stop decisions in the live provider stream lifecycle | +| MVP-017 | `not_started` | consumer-integration | `A1` | `MVP-007`, `MVP-011`, `MVP-033` | `A`, `C` | Integrate the real consumer/client path with consumer sidecar init, usage reporting, and session end | +| MVP-018 | `not_started` | tooling | none | `MVP-032` | `E` | Implement operator funding CLI flows for approve/deposit/top-up beyond local demo assumptions | +| MVP-019 | `not_started` | tooling | `A3`, `A5` | `MVP-009`, `MVP-022` | `D`, `F` | Implement provider inspection CLI flows for collectible/accepted RAV data | +| MVP-020 | `not_started` | tooling | `A3` | `MVP-009`, `MVP-029` | `F` | Implement manual collection CLI flow that crafts/signs/submits collect transactions locally | +| MVP-021 | `not_started` | security | `A5` | none | `G` | Make TLS the default non-dev runtime posture for oracle, sidecar, and provider integration paths | +| MVP-022 | `not_started` | security | `A5` | `MVP-009`, `MVP-028` | `D`, `F`, `G` | Add authentication and authorization to provider admin/operator APIs | +| MVP-023 | `open_question` | observability | `A4` | none | `A`, `B`, `C`, `D`, `F`, `G` | Define the final MVP observability floor beyond structured logs and status tooling | +| MVP-024 | `not_started` | observability | `A4` | `MVP-023` | `B`, `C`, `D`, `F`, `G` | Implement basic operator-facing inspection/status surfaces and log correlation | +| MVP-025 | `not_started` | validation | none | none | `A`, `B`, `C`, `D`, `E`, `F`, `G` | Add MVP acceptance coverage for the primary end-to-end scenarios in docs/tests/manual verification | +| MVP-026 | `not_started` | docs | `A1`, `A2`, `A3`, `A4`, `A5` | `MVP-001`, `MVP-002`, `MVP-003`, `MVP-004`, `MVP-023`, `MVP-027`, `MVP-028`, `MVP-033` | `A`, `B`, `C`, `D`, `E`, `F`, `G` | Refresh protocol/runtime docs so they match the MVP architecture and explicit open questions | +| MVP-027 | `open_question` | protocol | `A3` | none | `B`, `D`, `F` | Freeze canonical payment identity, `collection_id` reuse, and session-vs-payment keying semantics | +| MVP-028 | `open_question` | security | `A5` | none | `G` | Define the MVP authentication and authorization contract for oracle and provider operator surfaces | +| MVP-029 | `not_started` | provider-state | `A3` | `MVP-003`, `MVP-027` | `D`, `F` | Implement provider collection lifecycle transitions and update surfaces for `collectible`, `collect_pending`, `collected`, and retryable collection state | +| MVP-030 | `not_started` | provider-integration | none | `MVP-014`, `MVP-017` | `A`, `G` | Add runtime compatibility and preflight checks for real provider/plugin deployments | +| MVP-031 | `not_started` | runtime-payment | none | `MVP-004`, `MVP-012`, `MVP-014`, `MVP-017` | `A`, `C` | Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path | +| MVP-032 | `not_started` | operations | `A3`, `A4`, `A5` | `MVP-003`, `MVP-008`, `MVP-010`, `MVP-022` | `B`, `C`, `D`, `F`, `G` | Expose operator runtime/session/payment inspection APIs and CLI/status flows | +| MVP-033 | `open_question` | protocol | `A1` | none | `A` | Freeze the chain/network discovery input contract across client, sidecar, and oracle | + +## Protocol and Contract Tasks + +- [ ] MVP-001 Freeze the pricing exposure contract between oracle metadata and provider handshake. + - Context: + - The MVP scope expects pricing to likely appear in both places, but pricing authority is still open. + - Real consumer/oracle/provider integration should not proceed on hand-wavy assumptions here. + - Assumptions: + - `A2` + - Done when: + - The intended relationship between oracle pricing metadata and provider handshake pricing is documented. + - The implementation path does not rely on contradictory authority assumptions across components. + - Verify: + - Update `docs/mvp-scope.md` open question if unresolved, or close it if decided. + - Add or update integration/manual verification notes for whichever pricing source is actually consumed at runtime. + +- [ ] MVP-033 Freeze the chain/network discovery input contract across client, sidecar, and oracle. + - Context: + - The MVP requires oracle-backed provider discovery keyed by chain/network context, but the source of that context is still open. + - Leaving this only as an assumption risks incompatible implementations across the real client path, sidecar API, and oracle API. + - Assumptions: + - `A1` + - Done when: + - The repo defines the canonical chain/network identifier shape used by the oracle query path. + - It is explicit whether the real client must supply chain/network directly, whether the sidecar may derive it, and what fallback behavior is allowed when derivation is unavailable. + - Validation and error behavior are documented for missing, invalid, or unsupported chain/network inputs. + - MVP-005, MVP-007, and MVP-017 all point to the same contract. + - Verify: + - Update `docs/mvp-scope.md` open question if unresolved, or close/narrow it if decided. + - Add contract-level tests or documented manual verification for valid, missing, and unsupported chain/network inputs. + +- [ ] MVP-002 Freeze reconnect handshake semantics so provider can return fresh or latest-known resumable RAV during normal session init. + - Context: + - The current repo supports resume when the caller already has `existing_rav`, but the MVP requires provider-authoritative reconnect behavior in the handshake. + - Assumptions: + - `A1` + - `A3` + - Done when: + - Consumer init has a documented reconnect story. + - Provider can distinguish fresh handshake from reconnect/resume during the normal init flow. + - Provider returns either a zero-value/fresh RAV or the latest resumable RAV according to the chosen semantics. + - Verify: + - Add an integration test that reconnects without relying solely on consumer-local in-memory session state. + +- [ ] MVP-027 Freeze canonical payment identity, `collection_id` reuse, and session-vs-payment keying semantics. + - Context: + - Reconnect, durable provider state, inspection APIs, and manual collection all depend on a stable answer for what identity ties those records together. + - Leaving this implicit risks implementing mutually incompatible storage and API shapes across provider, consumer, and tooling code. + - Assumptions: + - `A3` + - Done when: + - The repo documents the canonical payment identity used across runtime, persistence, and settlement flows. + - The rules for `collection_id` reuse versus minting a new `collection_id` are explicit for fresh sessions, reconnects, and retryable collection flows. + - It is clear which state is keyed by session identifier versus payment identity. + - Verify: + - Update `docs/mvp-scope.md` open questions if unresolved, or close/narrow them if decided. + - Confirm MVP-002, MVP-003, MVP-008, MVP-013, MVP-019, and MVP-020 all reference the same identity semantics without contradiction. + +- [ ] MVP-003 Define the durable provider-side payment and settlement data model. + - Context: + - Provider persistence is MVP-critical, but the canonical durable model still needs to support both runtime session state and settlement state. + - Assumptions: + - `A3` + - Done when: + - The provider-side durable record types are documented. + - The model supports accepted RAV state, runtime session association, and collection lifecycle state. + - The model is structured so the unresolved `collection_id` semantics do not force a rewrite later. + - Verify: + - Document the schema/record model in a repo plan or doc. + - Confirm every persistence-related task below maps cleanly to the model. + +- [ ] MVP-004 Define and document the byte-billing and payment/header contract used in the real runtime path. + - Context: + - The MVP now explicitly requires real provider and consumer integrations, so the runtime payment/header contract must be frozen enough for those paths. + - Assumptions: + - none + - Done when: + - The document explains how the real provider path receives/validates payment material. + - Billable usage is defined as provider-authoritative streamed bytes. + - Header/payment material, signature encoding, and session binding expectations are documented. + - Verify: + - Update the relevant docs and ensure implementation tasks that depend on the wire contract can point to a stable reference. + +## Oracle Tasks + +- [ ] MVP-005 Implement a standalone oracle service with manual whitelist and recommended-provider response. + - Context: + - The oracle is now a mandatory MVP component, even though the initial logic is intentionally simple. + - Assumptions: + - `A1` + - `A2` + - `A5` + - Done when: + - A standalone oracle component exists. + - It can serve a manually curated provider set. + - It returns eligible providers plus a recommended provider for a requested chain/network. + - The oracle request/response contract is documented and stable enough for the consumer sidecar to integrate against without provider-specific assumptions. + - Each provider record includes the minimum metadata required for MVP routing and connection setup, at least provider identity, endpoint/transport details, and chain/network eligibility. + - Recommendation behavior is deterministic for the same request and whitelist state. + - If pricing metadata is returned before pricing authority is fully frozen, the response documents that status clearly so the consumer does not treat advisory metadata as final authority by accident. + - Verify: + - Add tests for whitelist lookup and provider recommendation behavior. + - Add API contract coverage for request validation and response shape. + - Add a manual smoke flow that exercises oracle -> consumer sidecar -> provider selection. + +- [ ] MVP-006 Add authenticated oracle administration for whitelist and provider metadata management. + - Context: + - Oracle governance actions must require authentication in MVP. + - Assumptions: + - `A5` + - Done when: + - Oracle whitelist/provider metadata changes require authenticated operator access. + - The implementation does not rely on an open admin surface. + - Verify: + - Add tests for unauthenticated rejection and authenticated success on admin actions. + +## Consumer Tasks + +- [ ] MVP-007 Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback. + - Context: + - Consumer sidecar is the mandatory client-side integration point and must support oracle-driven default behavior. + - Assumptions: + - `A1` + - `A2` + - Done when: + - Consumer sidecar can query the oracle and choose the recommended provider. + - Direct provider configuration still works as a fallback. + - The two flows share the same downstream session-init/payment behavior. + - Verify: + - Add tests or manual smoke steps for both oracle-backed and direct-provider flows. + +- [ ] MVP-013 Implement provider-authoritative reconnect/resume in the normal handshake path. + - Context: + - Consumer persistence is intentionally not required for MVP, so the reconnect story must not depend entirely on consumer-local state. + - Assumptions: + - `A3` + - Done when: + - Consumer reconnect flow can recover via provider-authoritative handshake behavior. + - The sidecar can handle the provider returning either fresh or resumable state. + - Verify: + - Add an integration scenario that disconnects and reconnects through the normal init flow and resumes against provider state. + +## Provider State and Settlement Tasks + +- [ ] MVP-008 Add durable provider storage for accepted RAV, session state, and collection lifecycle state. + - Context: + - Provider-side accepted payment state must survive restart for MVP. + - Assumptions: + - `A3` + - `A6` + - Done when: + - Provider restart does not lose accepted collectible RAV state. + - Collection lifecycle state persists across restart. + - Runtime session state and settlement state are both recoverable enough for MVP behavior. + - Verify: + - Add a restart-focused integration or persistence test that validates accepted state survives process restart. + +- [ ] MVP-009 Expose provider inspection and settlement-data retrieval APIs for accepted/collectible RAV state. + - Context: + - CLI inspection and manual collection require a provider-side way to retrieve settlement-relevant data. + - Assumptions: + - `A3` + - Done when: + - Provider exposes APIs for listing and fetching accepted/collectible payment state. + - The returned data is sufficient for operator inspection and CLI-based collection. + - The API shape is stable enough for MVP-019 and MVP-020 to build on it without provider-specific ad hoc reads. + - Verify: + - Add integration tests for listing and fetching settlement-relevant accepted state. + +- [ ] MVP-029 Implement provider collection lifecycle transitions and update surfaces for `collectible`, `collect_pending`, `collected`, and retryable collection state. + - Context: + - The MVP requires provider-visible collection lifecycle state, but inspection APIs and CLI submission are not sufficient unless something owns the transitions between those states. + - The provider needs a consistent way to track in-flight collection attempts, safe retries, and completed collection outcomes. + - Assumptions: + - `A3` + - Done when: + - Provider persistence supports the required collection lifecycle states and transition rules. + - There is a defined provider-side update path for marking collection attempts pending, completed, or retryable after an on-chain submission outcome. + - Retry behavior is documented so the CLI can interact with provider state idempotently. + - Verify: + - Add integration or persistence tests that cover `collectible` -> `collect_pending` -> `collected` and a retryable failure path. + +## Funding Control and Runtime Payment Tasks + +- [ ] MVP-010 Implement session-local low-funds detection and provider Continue/Pause/Stop decisions during streaming. + - Context: + - The MVP requires low-funds handling during active streaming, but only on a session-local basis. + - Assumptions: + - `A6` + - Done when: + - Provider can compare session-local exposure against available funding. + - Provider emits the appropriate control/funding messages during active streams. + - Low-funds behavior includes a structured operator-usable reason and enough funding state to explain why streaming was paused or stopped. + - The low-funds signal is stable enough for operator tooling and client-side messaging to consume consistently. + - Verify: + - Add an integration test with intentionally low funding that reaches a stop/pause or low-funds condition during streaming. + - Confirm the surfaced low-funds state includes a machine-readable reason and actionable funding context. + +- [ ] MVP-011 Propagate provider stop/pause decisions through consumer sidecar into the real client path. + - Context: + - Low-funds logic is incomplete until the client path actually obeys it. + - Assumptions: + - `A6` + - Done when: + - Consumer sidecar converts provider control/funding messages into client-visible stop/pause behavior. + - Real client integration honors those decisions. + - Verify: + - Add integration/manual verification showing the real client path stops or pauses when provider requires it. + +- [ ] MVP-012 Add deterministic RAV issuance thresholds suitable for real runtime behavior. + - Context: + - The current "sign on every report" behavior is not a good real-runtime policy. + - Assumptions: + - none + - Done when: + - RAV issuance is controlled by explicit policy such as value/time/provider-request thresholds. + - Threshold behavior is documented and tested. + - Verify: + - Add tests that show repeated usage does not force a signature on every report unless policy requires it. + +## Real Provider and Consumer Integration Tasks + +- [ ] MVP-014 Integrate provider gateway validation into the real provider streaming path. + - Context: + - MVP requires real provider-path integration, not just demo harness behavior. + - Assumptions: + - none + - Done when: + - The real provider path validates payment/session state through SDS integration before or during stream setup as required by the chosen runtime contract. + - Verify: + - Add a real-path integration test or manual verification against the production-like provider path. + +- [ ] MVP-015 Wire real byte metering from the provider/plugin path into gateway payment state. + - Context: + - Billable usage for MVP is authoritative streamed bytes from provider-side metering. + - Assumptions: + - none + - Done when: + - Real provider-side byte metering feeds the payment state used for billing/RAV progression. + - The runtime path does not rely on consumer-reported bytes as the billing source of truth. + - Verify: + - Add tests or manual instrumentation evidence showing the live provider path updates billing state from metered bytes. + +- [ ] MVP-016 Enforce gateway Continue/Pause/Stop decisions in the live provider stream lifecycle. + - Context: + - Provider-side control logic is incomplete if the live provider stream does not obey it. + - Assumptions: + - `A6` + - Done when: + - The real provider path can enforce SDS control decisions during live streaming. + - Verify: + - Add manual or automated verification where the provider stops or pauses the live stream based on gateway control decisions. + +- [ ] MVP-017 Integrate the real consumer/client path with consumer sidecar init, usage reporting, and session end. + - Context: + - The consumer sidecar is mandatory in the MVP architecture, but the real client path still needs to use it end to end. + - This task covers lifecycle entry/exit integration; the long-lived payment-session control loop is tracked separately in MVP-031. + - Assumptions: + - `A1` + - Done when: + - The real client path uses consumer sidecar init before streaming. + - It reports usage/end-of-session through the sidecar. + - It participates in oracle-backed discovery or direct fallback according to configuration. + - Verify: + - Add a real-path integration or manual scenario covering init -> stream -> usage -> end-session. + +- [ ] MVP-031 Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path. + - Context: + - MVP requires payment state to keep advancing during real streaming, not only during local/demo harness flows. + - The real integration is incomplete until provider-driven RAV requests and funding/control messages flow through the same production path used by the live stream. + - Assumptions: + - none + - Done when: + - The real client/provider integration keeps the long-lived SDS payment-session control loop active alongside the live stream. + - Provider-driven RAV requests, acknowledgements, and funding/control messages flow through the production runtime path rather than only through demo wrappers. + - Payment state advancement during streaming uses the same runtime path that real deployments will use. + - Verify: + - Add a real-path integration or documented manual verification showing stream start, at least one provider-driven payment/RAV update during live streaming, and synchronized session state until normal end or stop/pause. + +## Operator Tooling Tasks + +- [ ] MVP-018 Implement operator funding CLI flows for approve/deposit/top-up beyond local demo assumptions. + - Context: + - Funding is an MVP operator workflow, but current tooling is still demo-oriented. + - Assumptions: + - none + - Done when: + - CLI commands exist for approve/deposit/top-up in a provider-operator/payer-operator workflow. + - The commands are not limited to local deterministic devenv assumptions. + - The documented operator flow links funding actions to the low-funds or runtime inspection surfaces so an operator can move from a funding-related stop condition to topping up the correct escrow without ad hoc investigation. + - Verify: + - Add command-level tests where practical and document a manual funding flow that works against a non-demo configuration. + +- [ ] MVP-019 Implement provider inspection CLI flows for collectible/accepted RAV data. + - Context: + - Operators need to inspect what can be collected before settlement. + - Assumptions: + - `A3` + - `A5` + - Done when: + - CLI can retrieve and display accepted/collectible payment state from the provider. + - It supports the collection lifecycle states needed for MVP operations. + - Verify: + - Add manual smoke coverage for inspecting accepted and `collect_pending` state. + +- [ ] MVP-020 Implement manual collection CLI flow that crafts/signs/submits collect transactions locally. + - Context: + - Settlement keys should stay outside the provider sidecar. + - Assumptions: + - `A3` + - Done when: + - CLI fetches settlement-relevant data from provider. + - CLI crafts and signs the collect transaction locally. + - CLI can retry safely when collection is pending or needs to be re-attempted. + - Verify: + - Add a manual or automated integration scenario that retrieves collectible state and completes a collect transaction. + +## Security, Runtime Compatibility, and Observability Tasks + +- [ ] MVP-028 Define the MVP authentication and authorization contract for oracle and provider operator surfaces. + - Context: + - The MVP requires authenticated operator/admin actions, but the exact auth mechanism remains open. + - Oracle and provider surfaces should not drift into incompatible auth behavior without an explicit contract. + - Assumptions: + - `A5` + - Done when: + - The repo documents the MVP authn/authz approach for oracle and provider operator/admin surfaces. + - It is clear which endpoints/actions require operator privileges and which identities or credentials satisfy that requirement. + - MVP-006 and MVP-022 can implement the same contract rather than inventing separate security behavior. + - Verify: + - Update `docs/mvp-scope.md` open question if unresolved, or close/narrow it if decided. + - Confirm oracle and provider admin task definitions point to the same auth contract. + +- [ ] MVP-021 Make TLS the default non-dev runtime posture for oracle, sidecar, and provider integration paths. + - Context: + - The MVP requires real transport security without forcing a perfect production-hardening story. + - Assumptions: + - `A5` + - Done when: + - Non-dev/runtime docs and defaults use TLS for oracle, consumer sidecar, and provider integration surfaces. + - Plaintext behavior is clearly scoped to local/dev/demo usage. + - Oracle administration and provider/operator traffic do not rely on plaintext-by-default behavior outside explicitly dev-scoped workflows. + - Verify: + - Add validation or smoke coverage for TLS-enabled startup and client connectivity across oracle and sidecar/provider paths. + +- [ ] MVP-022 Add authentication and authorization to provider admin/operator APIs. + - Context: + - Provider-side operator actions must not rely on open or anonymous admin APIs. + - This task is about protecting provider operator surfaces, not defining the inspection/retrieval API shape itself. + - Assumptions: + - `A5` + - Done when: + - Provider inspection and settlement-retrieval APIs require authentication and authorization according to the shared MVP contract. + - The implementation rejects unauthenticated or unauthorized access to operator-only provider actions. + - The authentication requirement is documented and enforced in tests where practical. + - Verify: + - Add tests for authenticated success and unauthenticated rejection. + +## Runtime Compatibility Tasks + +- [ ] MVP-030 Add runtime compatibility and preflight checks for real provider/plugin deployments. + - Context: + - The MVP definition requires a real provider deployment path, not only a local happy-path demo. + - Reproducible real-path validation is weaker if the repo does not explicitly check the runtime compatibility assumptions required by the `sds://` provider/plugin integration path. + - Assumptions: + - none + - Done when: + - The repo identifies at least one named real-provider target environment for MVP acceptance and documents the required runtime compatibility constraints clearly enough for operators to validate before rollout. + - The required runtime versions, plugin compatibility assumptions, and non-demo configuration prerequisites for that environment are documented. + - Startup or preflight checks fail fast when the provider/plugin environment is incompatible with the required SDS runtime contract. + - Verify: + - Add a startup/preflight validation test or a documented manual verification flow that demonstrates clear failure modes for unsupported runtime combinations. + - Document a reproducible preflight or smoke checklist for the named real-provider environment. + +- [ ] MVP-023 Define the final MVP observability floor beyond structured logs and status tooling. + - Context: + - MVP requires operational visibility, but metrics/tracing depth is still open. + - Assumptions: + - `A4` + - Done when: + - The repo has a documented observability floor for MVP. + - It is clear whether metrics endpoints are part of MVP or not. + - Verify: + - Update `docs/mvp-scope.md` and remove or narrow the open question if a decision is made. + +- [ ] MVP-024 Implement basic operator-facing inspection/status surfaces and log correlation. + - Context: + - Even if metrics remain open, operators need enough visibility to debug runtime/payment issues. + - Assumptions: + - `A4` + - Done when: + - Logs provide enough correlation to understand session/payment events. + - Provider/operator tooling exposes basic status views and correlation aids without assuming a finalized metrics/tracing backend. + - This task complements MVP-032 rather than replacing concrete runtime/session/payment inspection APIs. + - Verify: + - Manual verification that operators can inspect and reason about low-funds, reconnect, and collection flows without code-level debugging. + +- [ ] MVP-032 Expose operator runtime/session/payment inspection APIs and CLI/status flows. + - Context: + - The MVP scope requires operators to inspect session, payment, and collection state, not only settlement-ready collectible records. + - Reconnect debugging, low-funds handling, and restart validation are weaker if operators must infer runtime state from raw logs or direct datastore access. + - Assumptions: + - `A3` + - `A4` + - `A5` + - Done when: + - The provider exposes authenticated runtime/status APIs for active or recent sessions, payment state, latest accepted/requested RAV context, and current low-funds/control state where applicable. + - Operator-facing CLI or status tooling can retrieve and display that runtime state without direct backend/database access. + - Low-funds inspection includes enough actionable information for an operator to understand whether additional escrow funding is required and why. + - Operators can inspect enough runtime/session/payment detail to understand reconnect, low-funds, and post-restart behavior without relying solely on logs. + - Verify: + - Add manual or integration coverage for inspecting an active or recently interrupted session, a low-funds session, and persisted post-restart payment state. + +## Validation and Documentation Tasks + +- [ ] MVP-025 Add MVP acceptance coverage for the primary end-to-end scenarios in docs/tests/manual verification. + - Context: + - The MVP scope makes scenarios the primary definition of done. + - Assumptions: + - none + - Done when: + - The key scenarios from `docs/mvp-scope.md` are covered by tests, reproducible manual flows, or both. + - The repo identifies which scenarios are validated locally versus against a named real-provider environment. + - At least scenarios `A`, `B`, `C`, and `G` have a defined validation path against a real-provider environment rather than relying only on local demo coverage. + - The repo clearly states how each acceptance scenario is validated. + - Verify: + - Update the scenario matrix or equivalent test/docs references for each acceptance scenario, including environment, validation method, and source of truth for the result. + +- [ ] MVP-026 Refresh protocol/runtime docs so they match the MVP architecture and explicit open questions. + - Context: + - The phase 1 spec remains useful but no longer matches the MVP architecture in several important ways. + - Assumptions: + - `A1` + - `A2` + - `A3` + - `A4` + - `A5` + - Done when: + - The repo documentation reflects the MVP architecture rather than the older API-key-centric/control-plane assumptions. + - Open questions are called out explicitly rather than being hidden in outdated text. + - Verify: + - Review the updated docs against `docs/mvp-scope.md` and confirm there are no major contradictions. + +## Notes on Scope Boundaries + +- This backlog intentionally does **not** make aggregate multi-stream payer-level liability tracking an MVP requirement. +- It also does **not** make wallet-based funding UI or automated collection an MVP requirement. +- If future work needs those features, it should be tracked separately as post-MVP scope unless the MVP definition changes again. From bbd855c0555f156009edac9a8010173f8795330b Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Tue, 17 Mar 2026 02:54:43 -0300 Subject: [PATCH 02/17] docs: align agent workflow with MVP planning Update the live agent workflow to use the MVP implementation backlog and clarify how agents should handle MVP-scoped work. - add MVP planning references to AGENTS.md - expand docs/agent-workflow.md with task classification, scope control, docs-only validation, open-question corroboration, and multi-agent coordination guidance - add plans/mvp-gap-analysis.md as the current-state assessment companion to the MVP scope and backlog --- AGENTS.md | 9 ++ docs/agent-workflow.md | 99 +++++++++++++- plans/mvp-gap-analysis.md | 278 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 382 insertions(+), 4 deletions(-) create mode 100644 plans/mvp-gap-analysis.md diff --git a/AGENTS.md b/AGENTS.md index 7cb9a23..fc4a886 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -113,6 +113,15 @@ grt, err := sds.NewGRT() sds.MustNewGRT() ``` +## MVP Planning References + +For MVP-scoped work: + +- Use `docs/mvp-scope.md` as the target-state definition. +- Use `plans/mvp-gap-analysis.md` for current-state assessment. +- Use `plans/mvp-implementation-backlog.md` as the active execution backlog. +- Treat `plans/implementation-backlog.md` as historical context unless explicitly requested. + ## Notes - All builds must pass before committing diff --git a/docs/agent-workflow.md b/docs/agent-workflow.md index a516d0a..7524720 100644 --- a/docs/agent-workflow.md +++ b/docs/agent-workflow.md @@ -3,23 +3,79 @@ This repo contains two complementary process documents: - `AGENTS.md`: repo-specific rules (commands, Go conventions, CLI flag validation patterns). -- `plans/implementation-backlog.md`: the task list with per-task **Done when** + **Verify** criteria and a status table. +- `plans/mvp-implementation-backlog.md`: the active MVP task list with per-task **Done when** + **Verify** criteria and a status table. This document defines the **step-by-step workflow** to implement backlog tasks consistently (human or LLM). +Note on the backlog switch: + +- `plans/mvp-implementation-backlog.md` is now the active implementation backlog for MVP-scoped work. +- `plans/implementation-backlog.md` remains useful historical context, but agents should not treat it as the primary task source for current MVP execution. + +## Scope of This Workflow + +This workflow defines the default execution behavior for MVP-scoped work. + +Authoritative planning documents for MVP work: + +- `docs/mvp-scope.md`: target-state definition +- `plans/mvp-gap-analysis.md`: current-state assessment +- `plans/mvp-implementation-backlog.md`: active execution backlog + +Historical context only: + +- `plans/implementation-backlog.md` + +Agents should treat `plans/mvp-implementation-backlog.md` as the primary task source unless the user explicitly asks for historical or pre-MVP planning context. + --- ## 1) Pick a Task -- Choose an `SDS-###` item from `plans/implementation-backlog.md`. +- Choose an `MVP-###` item from `plans/mvp-implementation-backlog.md`. - Read its **Context**, **Done when**, and **Verify** sections. - If the task requires cross-repo coordination, confirm owners and dependencies first. --- +## 1.5) Classify the Task Before Acting + +Before making changes, determine which kind of backlog item you are working on: + +- **Implementation task**: + - goal is code and behavior change + - complete the smallest change that satisfies **Done when** +- **Open-question task**: + - goal is a documented decision, narrowed contract, or explicit recorded deferral + - do not silently choose an implementation-specific interpretation in code +- **Validation/review task**: + - goal is to verify, review, or corroborate behavior against the backlog and current code + - do not broaden into implementation unless explicitly asked + +For every task, first extract and restate: + +- task ID +- dependencies +- assumptions +- **Done when** +- **Verify** +- relevant MVP scenarios + +Keep this restatement brief and execution-oriented. It should be a short working summary, not a long analysis section. + +If a dependency is still unresolved and the current task cannot be completed safely without inventing semantics, mark the task `blocked` instead of coding around it. + +When an `open_question` task needs information beyond the repo: + +- use repo context first +- when external research is available, prefer primary sources +- record whether the result is a final decision, a narrowed contract, or a recommendation awaiting confirmation + +--- + ## 2) Update Status First -In `plans/implementation-backlog.md`: +In `plans/mvp-implementation-backlog.md`: - Set the chosen task `Status` to `in_progress` in the Status Tracker table. - If you cannot proceed, set `blocked` and add a short reason in the task’s section. @@ -32,6 +88,15 @@ In `plans/implementation-backlog.md`: - Prefer fixing root causes over adding workarounds. - Keep changes narrowly scoped to the selected task (avoid drive-by refactors). +### Scope Control Rules + +- Prefer one `MVP-###` task per implementation pass unless the backlog already makes a tightly-coupled dependency unavoidable. +- Small supporting edits are acceptable when they are strictly necessary to satisfy the selected task’s **Done when** or **Verify** criteria. +- Do not solve adjacent backlog items unless they are strictly required to satisfy the selected task’s **Done when** criteria. +- Do not absorb a second meaningful backlog item just because it touches the same files or component. +- Do not introduce new abstractions, helpers, configuration, or refactors unless they are necessary for the selected task. +- If you discover a missing prerequisite or unresolved contract, stop, document it, and update the backlog/task state instead of embedding an implicit decision in code. + Before writing code, run these quick checks: - **Domain type check**: is there already a repo-level type/helper for this domain (`sds.GRT`, address/signature helpers, etc.)? @@ -67,6 +132,11 @@ Follow `AGENTS.md` guidance: - `go test ./...` - `go vet ./...` +For docs-only, planning-only, or other non-code tasks: + +- run the task-specific verification that actually applies +- do not treat `gofmt`, `go test ./...`, or `go vet ./...` as mandatory unless code changed + If validation fails: - Fix the failure if it’s caused by your changes. @@ -80,11 +150,18 @@ If validation fails: - Confirm the expected outcomes (return codes, error codes, logs, state changes). - If “Verify” is missing or insufficient, update the backlog entry to make it reproducible. +If the task is an `open_question` item, corroboration should include the concrete output artifact: + +- updated docs or contract text +- narrowed decision record +- explicit deferral recorded in the backlog or docs +- downstream task references updated to point at that output when needed + --- ## 7) Mark Done -In `plans/implementation-backlog.md`: +In `plans/mvp-implementation-backlog.md`: - Set `Status` to `done` in the Status Tracker table. - Tick the task checkbox in its detailed section. @@ -99,6 +176,20 @@ In `plans/implementation-backlog.md`: --- +## 8.5) Multi-Agent Coordination + +When using multiple agents in parallel: + +- assign one primary `MVP-###` task per agent +- use separate git worktrees when parallel agents may edit code concurrently +- avoid overlapping file ownership unless one agent is review-only +- assign one owner for updates to shared planning/status documents when multiple agents are active +- only the assigned document owner should update shared status tables unless explicitly coordinated otherwise +- prefer splitting by dependency boundary or component boundary, not by arbitrary file count +- if a task changes shared contracts or protobufs, finish and merge that work before starting dependent implementation tasks + +--- + ## 9) Incorporate Review Learnings If you are implementing or revisiting code after reviewer feedback: diff --git a/plans/mvp-gap-analysis.md b/plans/mvp-gap-analysis.md new file mode 100644 index 0000000..583d00d --- /dev/null +++ b/plans/mvp-gap-analysis.md @@ -0,0 +1,278 @@ +# MVP Gap Analysis + +Drafted: 2026-03-12 + +This document maps the current repository state against the MVP defined in `docs/mvp-scope.md`. + +Unlike the MVP scope document, this file is expected to change frequently. + +Status values used here: + +- `implemented` +- `partial` +- `missing` +- `open_question` + +## Summary + +The repository already has a strong technical foundation: + +- Horizon V2 / TAP signing, verification, and aggregation are implemented and tested +- local chain/contracts and integration tests are in place +- consumer sidecar and provider gateway exist +- sidecar-to-gateway session start and payment-session flow exist +- provider-side plugin services exist for auth, session, and usage + +The main MVP gaps are not the cryptographic/payment core. They are the surrounding system capabilities required to make SDS a usable product stack: + +- standalone oracle/discovery component +- real provider and consumer production-path integration completion +- provider-side durable payment/collection persistence +- live low-funds enforcement in the real stream path +- funding and settlement CLI workflows +- authenticated admin/operator surfaces + +## Acceptance Scenario Status + +| Scenario | Status | Notes | +| --- | --- | --- | +| Discovery to paid streaming | `partial` | Paid session flow exists, but standalone oracle is missing and real production-path integration is not complete | +| Reconnect and resume | `partial` | Resume with `existing_rav` exists; provider-authoritative recovery during normal handshake is not finalized | +| Low funds during streaming | `missing` | Session-local low-funds decisions during active streaming are still backlog work | +| Provider restart without losing collectible state | `missing` | Provider accepted RAV state is still in-memory today | +| Manual funding flow | `partial` | Local/demo helper exists via `sds demo setup`, but general MVP funding CLI workflow is not implemented | +| Manual collection flow | `missing` | No MVP settlement inspection/collection CLI flow yet | +| Secure deployment posture | `partial` | TLS hooks exist, but admin authentication and final secure operational surfaces are not complete | + +## Component Status + +### Core Payment / Horizon + +Status: `implemented` + +Evidence: + +- `horizon/` +- `test/integration/rav_test.go` +- `test/integration/collect_test.go` +- `test/integration/authorization_test.go` + +Notes: + +- This area is already strong enough to support the rest of MVP work. + +### Consumer Sidecar RPC Surface + +Status: `partial` + +Evidence: + +- `consumer/sidecar/sidecar.go` +- `consumer/sidecar/handler_init.go` +- `consumer/sidecar/handler_report_usage.go` +- `consumer/sidecar/handler_end_session.go` + +What already exists: + +- session init +- usage reporting +- end session +- payment-session loop wiring to provider gateway +- existing-RAV-based resumption + +What is still missing for MVP: + +- finalized provider-authoritative reconnect flow in the normal handshake +- completion of real client integration path +- finalized handling around low-funds stop/pause in real usage path + +### Provider Gateway RPC Surface + +Status: `partial` + +Evidence: + +- `provider/gateway/gateway.go` +- `provider/gateway/handler_start_session.go` +- `provider/gateway/handler_payment_session.go` +- `provider/gateway/handler_submit_rav.go` +- `provider/gateway/handler_get_session_status.go` + +What already exists: + +- session start +- bidirectional payment session +- RAV validation and authorization checks +- basic session status inspection + +What is still missing for MVP: + +- durable accepted-RAV persistence +- collection lifecycle state +- low-funds logic during active streaming +- authenticated admin/operator surfaces + +### Provider Plugin Services + +Status: `partial` + +Evidence: + +- `provider/auth/service.go` +- `provider/session/service.go` +- `provider/usage/service.go` +- `provider/plugin/` + +What already exists: + +- auth, session, and usage services for `sds://` +- provider-authoritative metering path foundation + +What is still missing for MVP: + +- full real provider path integration and validation against production-like usage +- finalized byte-billing semantics in the complete runtime path +- stop/pause behavior enforced in the live stream path + +### Oracle + +Status: `missing` + +What MVP requires: + +- standalone service +- manual whitelist +- eligible provider set plus recommended provider response +- authenticated admin/governance actions + +### Provider Persistence + +Status: `missing` + +Current state: + +- provider repository is in-memory +- accepted RAV/session state is lost on restart + +Evidence: + +- `provider/repository/inmemory.go` +- `provider/repository/repository.go` + +What MVP requires: + +- durable provider-side state for accepted collectible RAVs +- settlement lifecycle state +- persistence across restarts + +### Funding CLI + +Status: `partial` + +Current state: + +- local/demo funding helper exists + +Evidence: + +- `cmd/sds/demo_setup.go` + +What MVP requires: + +- operator-oriented approve/deposit/top-up workflow beyond local demo assumptions + +### Settlement / Collection CLI + +Status: `missing` + +What MVP requires: + +- inspect collectible accepted RAV data +- fetch settlement-relevant data from provider +- craft/sign/submit `collect()` transaction locally +- retry-safe operator workflow + +### Transport Security + +Status: `partial` + +Evidence: + +- `cmd/sds/provider_gateway.go` +- `cmd/sds/consumer_sidecar.go` +- `sidecar/server_transport.go` +- `provider/plugin/plugin.go` + +What already exists: + +- plaintext vs TLS transport configuration paths + +What is still missing for MVP: + +- finalized secure deployment defaults and operational guidance +- authenticated admin/operator surfaces + +### Observability + +Status: `partial` + +What already exists: + +- structured logging +- health endpoints +- status inspection basics + +What is still missing for MVP: + +- final MVP decision on metrics endpoints +- better operator-facing inspection for payment/collection state + +## Backlog Alignment + +The largest currently tracked backlog items that still map directly to MVP are: + +- `SDS-008` Define and document `metadata` schema + encoding +- `SDS-016` Implement `NeedMoreFunds` loop + Continue/Stop/Pause +- `SDS-020` Add signing thresholds +- `SDS-021` Decide/implement on-chain collection workflow +- `SDS-022` Track outstanding RAVs across concurrent streams + - note: full aggregate concurrent-stream correctness is no longer assumed to be MVP-critical +- `SDS-024` Add durable state storage +- `SDS-025` Add transport security + authn/authz +- `SDS-026` Add observability +- `SDS-028` Define payment header format +- `SDS-029` Integrate provider gateway into tier1 provider +- `SDS-030` Integrate consumer sidecar into substreams client +- `SDS-038` Make `sds sink run` the primary end-to-end demo (STOP-aware) +- `SDS-039` Document/enforce required firehose-core version for `sds://` plugins + +Additional MVP work not yet clearly represented as a complete deliverable set in the existing backlog: + +- standalone oracle component +- authenticated provider/oracle admin surfaces +- operator-oriented funding CLI +- operator-oriented settlement inspection and collection CLI +- provider-authoritative reconnect flow folded into the normal handshake + +## Open Questions Carrying Risk + +These are not implementation gaps yet, but unresolved design points that could change scope or interfaces: + +- chain/network derivation from package vs explicit input +- pricing authority between oracle metadata and provider handshake +- canonical payment identity and `collection_id` reuse semantics +- metrics endpoints vs logs-plus-status-only for MVP observability +- exact admin authentication mechanism + +## Recommended Usage + +Use `docs/mvp-scope.md` as the stable target-state reference. + +Use this file to: + +- assess current progress +- identify MVP gaps +- map backlog work to the target MVP +- keep implementation status current without rewriting the MVP scope itself + +Use `plans/mvp-implementation-backlog.md` as the concrete task backlog aligned to the revised MVP scope. From 550432dcb0f7b46605fe99b4ab685243bb9ae6b8 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Wed, 18 Mar 2026 02:03:06 -0300 Subject: [PATCH 03/17] docs: record MVP-033 network contract Document the resolved chain/network discovery contract for MVP and add a sequencing guide derived from the MVP backlog. - mark MVP-033 as done and record the chosen package-derived network behavior in plans/mvp-implementation-backlog.md - narrow the corresponding open question in docs/mvp-scope.md - add docs/mvp-implementation-sequencing.md to describe dependency-driven implementation order and parallel work lanes --- docs/mvp-implementation-sequencing.md | 324 ++++++++++++++++++++++++++ docs/mvp-scope.md | 8 +- plans/mvp-implementation-backlog.md | 27 ++- 3 files changed, 349 insertions(+), 10 deletions(-) create mode 100644 docs/mvp-implementation-sequencing.md diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md new file mode 100644 index 0000000..551f5e2 --- /dev/null +++ b/docs/mvp-implementation-sequencing.md @@ -0,0 +1,324 @@ +# MVP Implementation Sequencing + +This document derives a recommended implementation sequence from `plans/mvp-implementation-backlog.md`. + +It is not a replacement for the backlog. + +Use it to: + +- understand which tasks are true prerequisites for others +- identify which work can proceed in parallel +- avoid prompting agents to implement downstream tasks before the required contracts are stable enough + +Use `docs/mvp-scope.md` as the target-state definition and `plans/mvp-implementation-backlog.md` as the source of truth for task definitions, dependencies, and status. + +## How To Read This Document + +This is a dependency-driven sequencing guide, not a strict linear priority list. + +The MVP backlog is a DAG with multiple work lanes. Some tasks are: + +- **hard blockers**: downstream implementation should not proceed until they are resolved or narrowed enough +- **soft blockers**: they affect a lane, but some limited work can still proceed under the documented assumptions +- **parallelizable**: they can be worked on independently once their local prerequisites are stable enough + +This document combines: + +- explicit task dependencies from `plans/mvp-implementation-backlog.md` +- execution judgment about which tasks are safest or most useful to do earlier within those dependency constraints + +When this document recommends an order that is not strictly enforced by the backlog, treat it as advisory rather than mandatory. + +For `open_question` tasks, “done” means producing a concrete output that downstream work can reference: + +- a documented decision +- a narrowed contract +- or an explicit recorded deferral + +## Sequencing Principles + +1. Freeze shared contracts before asking agents to implement multiple dependent tasks. +2. Unlock work by lane once the minimum required contracts for that lane are stable enough. +3. Do not wait for every open question to be fully closed before starting all implementation. +4. Do not start downstream implementation by embedding an implicit answer to an unresolved contract question. +5. Prefer prompting one `MVP-###` task at a time unless the supporting change is strictly required by that task’s `Done when` or `Verify`. + +## Phase 0: Shared Contract And Decision Gate + +These tasks define semantics or interfaces that multiple lanes depend on. + +The grouping is recommended because these tasks have broad downstream impact. It is not a claim that every one of them must be fully closed before any implementation begins. + +### Hard Blockers + +- `MVP-004` Define and document the byte-billing and payment/header contract used in the real runtime path + - Blocks most runtime payment, provider integration, and client integration work. +- `MVP-027` Freeze canonical payment identity, `collection_id` reuse, and session-vs-payment keying semantics + - Blocks reconnect, provider state, settlement lifecycle, and operator retrieval/collection work. +- `MVP-028` Define the MVP authentication and authorization contract for oracle and provider operator surfaces + - Blocks authenticated admin/operator implementation for oracle and provider APIs. + +### Soft Blockers + +- `MVP-001` Freeze the pricing exposure contract between oracle metadata and provider handshake + - Primarily blocks discovery/oracle integration where pricing semantics could drift. + - Some oracle work can proceed if pricing remains explicitly non-authoritative or advisory. +- `MVP-023` Define the final MVP observability floor beyond structured logs and status tooling + - Primarily blocks final observability closure, not all operator visibility work. + +### Guidance + +- Start MVP execution by resolving as many hard blockers as possible. +- Do not require all soft blockers to be fully closed before any implementation begins. +- For `MVP-001` and `MVP-023`, allow limited implementation so long as the current assumptions remain explicit and no irreversible semantics are baked into code. +- `MVP-033` is already resolved enough for downstream discovery and client-integration work to rely on its contract. + +## Phase 1: Lane Unlocks + +Once enough of the Phase 0 gate is stable, work can proceed in parallel lanes. + +The lane ordering below respects explicit backlog dependencies. Ordering within a lane is recommended unless the backlog dependency graph makes it mandatory. + +### Lane A: Discovery And Consumer Entry + +Minimum prerequisites: + +- `MVP-033` resolved enough for implementation +- `MVP-001` stable enough for the chosen oracle/pricing exposure behavior + +Recommended sequence: + +1. `MVP-005` Implement a standalone oracle service with manual whitelist and recommended-provider response +2. `MVP-007` Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback +3. `MVP-017` Integrate the real consumer/client path with consumer sidecar init, usage reporting, and session end +4. `MVP-030` Add runtime compatibility and preflight checks for real provider/plugin deployments + +Notes: + +- `MVP-033` is resolved for MVP with the following contract: + - consumer sidecar derives network from the Substreams package by default + - if a package/module resolves a specific `networks` entry, that takes precedence over top-level `network` + - explicit input remains supported only as fallback when package derivation is unavailable + - mismatch between explicit and package-derived values fails fast after normalization + - missing usable network also fails fast +- `MVP-017` also depends on `MVP-011`, so only the entry/lifecycle portion should move first. +- `MVP-030` is late in the lane because it depends on real-path integration existing. + +### Lane B: Runtime Payment And Stream Control + +Minimum prerequisites: + +- `MVP-004` + +Recommended sequence: + +1. `MVP-010` Implement session-local low-funds detection and provider Continue/Pause/Stop decisions during streaming +2. `MVP-012` Add deterministic RAV issuance thresholds suitable for real runtime behavior +3. `MVP-014` Integrate provider gateway validation into the real provider streaming path +4. `MVP-015` Wire real byte metering from the provider/plugin path into gateway payment state +5. `MVP-011` Propagate provider stop/pause decisions through consumer sidecar into the real client path +6. `MVP-016` Enforce gateway Continue/Pause/Stop decisions in the live provider stream lifecycle +7. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path + +Notes: + +- `MVP-010` and `MVP-014` are the main foundations in this lane. +- `MVP-031` is effectively the capstone runtime-payment task because it depends on real provider and consumer integration plus thresholding. + +### Lane C: Provider State, Settlement, And Operator Retrieval + +Minimum prerequisites: + +- `MVP-027` + +Recommended sequence: + +1. `MVP-003` Define the durable provider-side payment and settlement data model +2. `MVP-008` Add durable provider storage for accepted RAV, session state, and collection lifecycle state +3. `MVP-029` Implement provider collection lifecycle transitions and update surfaces for `collectible`, `collect_pending`, `collected`, and retryable collection state +4. `MVP-009` Expose provider inspection and settlement-data retrieval APIs for accepted/collectible RAV state +5. `MVP-022` Add authentication and authorization to provider admin/operator APIs +6. `MVP-019` Implement provider inspection CLI flows for collectible/accepted RAV data +7. `MVP-020` Implement manual collection CLI flow that crafts/signs/submits collect transactions locally +8. `MVP-032` Expose operator runtime/session/payment inspection APIs and CLI/status flows +9. `MVP-018` Implement operator funding CLI flows for approve/deposit/top-up beyond local demo assumptions + +Notes: + +- `MVP-008` and `MVP-029` can begin in parallel once `MVP-003` and `MVP-027` are stable enough. +- `MVP-009` depends on `MVP-029`, so this part of the sequence is required by the backlog rather than just recommended. +- `MVP-018` comes late because the current backlog explicitly ties it to operator runtime/low-funds inspection surfaces. + +### Lane D: Reconnect And Resume + +Minimum prerequisites: + +- `MVP-027` + +Recommended sequence: + +1. `MVP-002` Freeze reconnect handshake semantics so provider can return fresh or latest-known resumable RAV during normal session init +2. `MVP-008` durable provider state work must be stable enough for reconnect behavior +3. `MVP-013` Implement provider-authoritative reconnect/resume in the normal handshake path + +Notes: + +- This lane depends on both protocol and persistence work. +- Reconnect should not be treated as complete until it is proven against provider-authoritative durable state, not just consumer-local memory. + +### Lane E: Security And Deployment + +Minimum prerequisites: + +- `MVP-028` for authenticated operator/admin surfaces + +Recommended sequence: + +1. `MVP-021` Make TLS the default non-dev runtime posture for oracle, sidecar, and provider integration paths +2. `MVP-006` Add authenticated oracle administration for whitelist and provider metadata management +3. `MVP-022` Add authentication and authorization to provider admin/operator APIs +4. `MVP-030` Add runtime compatibility and preflight checks for real provider/plugin deployments + +Notes: + +- `MVP-021` can proceed relatively early even though it has no hard dependency on `MVP-028`. +- `MVP-030` overlaps discovery and runtime work and should land once the real deployment path is concrete enough to validate. + +### Lane F: Observability, Validation, And Docs + +Minimum prerequisites: + +- `MVP-023` for final observability scope + +Recommended sequence: + +1. `MVP-024` Implement basic operator-facing inspection/status surfaces and log correlation +2. `MVP-025` Add MVP acceptance coverage for the primary end-to-end scenarios in docs/tests/manual verification +3. `MVP-026` Refresh protocol/runtime docs so they match the MVP architecture and explicit open questions + +Notes: + +- `MVP-024` can begin in a limited way before `MVP-023` is fully closed if it stays within the current “basic visibility” assumption. +- `MVP-025` should be updated incrementally throughout implementation, but its final closure belongs near the end. +- `MVP-026` should be completed after the key open-question outputs it depends on are stable. + +## Suggested Implementation Phases + +This is the most practical high-level order to use when prompting agents. + +It is a recommended rollout sequence, not a canonical priority order embedded in the backlog itself. + +### Phase 0: Resolve Or Narrow Shared Contracts + +- `MVP-004` +- `MVP-027` +- `MVP-028` +- `MVP-001` +- `MVP-023` + +Already resolved: + +- `MVP-033` + +### Phase 1: Start The First Implementable Lanes + +- Discovery foundation: + - `MVP-005` + - `MVP-007` +- Runtime foundation: + - `MVP-010` + - `MVP-012` + - `MVP-014` +- Provider state foundation: + - `MVP-003` + - `MVP-008` + - `MVP-029` +- Security foundation: + - `MVP-021` + +### Phase 2: Integrate Runtime And Retrieval Paths + +- `MVP-015` +- `MVP-011` +- `MVP-016` +- `MVP-017` +- `MVP-009` +- `MVP-022` +- `MVP-002` + +### Phase 3: Complete Reconnect, Runtime Control, And Operator Flows + +- `MVP-013` +- `MVP-031` +- `MVP-006` +- `MVP-019` +- `MVP-020` +- `MVP-032` +- `MVP-018` +- `MVP-030` + +### Phase 4: Finalize Visibility, Acceptance, And Documentation + +- `MVP-024` +- `MVP-025` +- `MVP-026` + +## Tasks That Can Safely Start Before Every Open Question Is Closed + +These are useful to know when sequencing agent work. + +This section is interpretive guidance based on the assumptions register and dependency graph. It is not a direct restatement of the backlog. + +### Safe To Start Early + +- `MVP-021` + - TLS default posture is broadly independent of most unresolved protocol questions. +- `MVP-024` + - Basic log correlation and status surfaces can begin before observability scope is finalized. +- `MVP-025` + - Acceptance coverage scaffolding can be built incrementally while implementation proceeds. + +### Safe To Start If Assumptions Remain Explicit + +- `MVP-005` + - Can begin before `MVP-001` is fully closed if pricing authority remains clearly non-final in the API/implementation. +- `MVP-003` + - Some schema design can begin while `MVP-027` is being narrowed, but it should not be treated as finalized until identity semantics are stable. +- `MVP-024` + - Can proceed in a reduced/basic form before `MVP-023` is fully closed. + +### Should Usually Wait + +- `MVP-007` + - Should wait until the chain/network and pricing exposure contracts are stable enough. +- `MVP-013` + - Should wait until reconnect semantics and durable provider state are both stable enough. +- `MVP-019` and `MVP-020` + - Should wait until retrieval APIs, auth, and collection lifecycle semantics are in place. + +## Prompting Guidance For Sequenced Work + +When prompting an agent, reference both the task and its place in the sequencing. + +Recommended pattern: + +1. State the current phase or lane. +2. State the exact `MVP-###` task. +3. Name the resolved prerequisites the agent is allowed to rely on. +4. Name any unresolved upstream questions the agent must not answer implicitly in code. + +Example: + +```text +We are currently in Phase 1, Runtime foundation. +Implement MVP-010 only. +You may rely on MVP-004 as the frozen runtime billing/payment contract. +Do not broaden into MVP-011 or MVP-016 except for strictly necessary supporting edits. +If you find that MVP-010 still requires unresolved semantics beyond MVP-004, mark it blocked instead of choosing an implicit contract in code. +``` + +## Notes + +- This document derives sequence from the current dependency structure in `plans/mvp-implementation-backlog.md`. +- If task dependencies change, this document should be updated to match. +- When the backlog and this document disagree, the backlog is the source of truth. diff --git a/docs/mvp-scope.md b/docs/mvp-scope.md index 0fcbc2d..9317a29 100644 --- a/docs/mvp-scope.md +++ b/docs/mvp-scope.md @@ -316,7 +316,13 @@ The scenarios below are the primary definition of done for the MVP. ## Open Questions -- Should chain/network be derived automatically from the Substreams package, or supplied explicitly to the oracle query path? +- The chain/network discovery input contract is narrowed for MVP: + - consumer sidecar derives network from the Substreams package by default + - if a package/module resolves a specific `networks` entry, that takes precedence over top-level `network` + - explicit user-supplied network input remains supported only as fallback when package derivation is unavailable + - if both explicit input and package-derived network exist and differ after normalization, the request fails fast + - if neither source yields a usable network, the request fails fast + - SDS uses the same canonical network keys as the Graph networks registry for MVP, with repo-owned/pinned mappings rather than live runtime registry lookups - What is the pricing authority contract between oracle metadata and provider handshake responses? - What is the exact canonical payment identity and `collection_id` reuse policy for fresh workloads versus reconnects? - How much of the reconnect/recovery state should be keyed by session versus on-chain payment identity? diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index 780b7b8..e82abc4 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -42,9 +42,13 @@ Unless otherwise scoped, the baseline validation for code changes remains: These assumptions are referenced by task ID so it is clear where unresolved decisions still matter. -- `A1` Chain/network discovery input is still open. - - MVP work should support an explicit chain/network input path now. - - Automatic derivation from the Substreams package remains optional/open. +- `A1` Chain/network discovery input is narrowed for MVP, but implementation details still matter. + - Consumer sidecar derives network from the Substreams package by default. + - If a package/module resolves a specific `networks` entry, that takes precedence over top-level `network`. + - Explicit input remains supported as fallback when package derivation is unavailable. + - If both explicit input and derived package network exist and differ after normalization, fail fast. + - If neither source yields a usable network, fail fast. + - SDS should use repo-owned/pinned mappings to the Graph networks registry keys for MVP rather than live runtime registry lookups. - `A2` Pricing authority between oracle metadata and provider handshake is still open. - MVP work should avoid hard-coding a final authority rule unless/until aligned with StreamingFast. @@ -110,7 +114,7 @@ These assumptions are referenced by task ID so it is clear where unresolved deci | MVP-030 | `not_started` | provider-integration | none | `MVP-014`, `MVP-017` | `A`, `G` | Add runtime compatibility and preflight checks for real provider/plugin deployments | | MVP-031 | `not_started` | runtime-payment | none | `MVP-004`, `MVP-012`, `MVP-014`, `MVP-017` | `A`, `C` | Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path | | MVP-032 | `not_started` | operations | `A3`, `A4`, `A5` | `MVP-003`, `MVP-008`, `MVP-010`, `MVP-022` | `B`, `C`, `D`, `F`, `G` | Expose operator runtime/session/payment inspection APIs and CLI/status flows | -| MVP-033 | `open_question` | protocol | `A1` | none | `A` | Freeze the chain/network discovery input contract across client, sidecar, and oracle | +| MVP-033 | `done` | protocol | `A1` | none | `A` | Freeze the chain/network discovery input contract across client, sidecar, and oracle | ## Protocol and Contract Tasks @@ -127,7 +131,7 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - Update `docs/mvp-scope.md` open question if unresolved, or close it if decided. - Add or update integration/manual verification notes for whichever pricing source is actually consumed at runtime. -- [ ] MVP-033 Freeze the chain/network discovery input contract across client, sidecar, and oracle. +- [x] MVP-033 Freeze the chain/network discovery input contract across client, sidecar, and oracle. - Context: - The MVP requires oracle-backed provider discovery keyed by chain/network context, but the source of that context is still open. - Leaving this only as an assumption risks incompatible implementations across the real client path, sidecar API, and oracle API. @@ -135,12 +139,17 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - `A1` - Done when: - The repo defines the canonical chain/network identifier shape used by the oracle query path. - - It is explicit whether the real client must supply chain/network directly, whether the sidecar may derive it, and what fallback behavior is allowed when derivation is unavailable. - - Validation and error behavior are documented for missing, invalid, or unsupported chain/network inputs. + - Consumer sidecar derives network from the Substreams package by default. + - If a package/module resolves a specific `networks` entry, that takes precedence over top-level `network`. + - Explicit input remains supported only as fallback when package derivation is unavailable. + - If both explicit input and package-derived network exist and differ after normalization, the request fails fast. + - If neither source yields a usable network, the request fails fast. + - SDS uses the same canonical network keys as the Graph networks registry for MVP, with repo-owned/pinned mappings rather than live runtime registry lookups. + - Consumer sidecar owns derivation, normalization, validation, and conflict detection. - MVP-005, MVP-007, and MVP-017 all point to the same contract. - Verify: - - Update `docs/mvp-scope.md` open question if unresolved, or close/narrow it if decided. - - Add contract-level tests or documented manual verification for valid, missing, and unsupported chain/network inputs. + - Update `docs/mvp-scope.md` open question to reflect the narrowed contract. + - Add contract-level tests or documented manual verification for package-derived network success, explicit-input fallback, mismatch rejection, and missing-network rejection. - [ ] MVP-002 Freeze reconnect handshake semantics so provider can return fresh or latest-known resumable RAV during normal session init. - Context: From 5d18b9f304d41a1488db30f913642523d3901ddf Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Mon, 23 Mar 2026 01:39:13 -0300 Subject: [PATCH 04/17] devenv: make demo-ready state the default - prepare escrow, provision, provider registration, and demo signer authorization during shared devenv startup - simplify sds demo setup into demo-state verification plus env export only - update integration helpers/tests for the new default bootstrap behavior - switch .reflex to the deterministic demo signer and remove the obsolete .reflex.stack workflow --- .reflex | 2 +- .reflex.stack | 39 ---- README.md | 22 +- cmd/sds/demo_setup.go | 237 +++----------------- cmd/sds/devenv.go | 1 + horizon/devenv/connect.go | 38 ++++ horizon/devenv/defaults.go | 59 +++++ horizon/devenv/devenv.go | 33 ++- horizon/devenv/helpers.go | 266 +++++++++++++++++++---- test/integration/authorization_test.go | 20 -- test/integration/setup_test.go | 8 + test/integration/substreams_flow_test.go | 20 +- 12 files changed, 412 insertions(+), 333 deletions(-) delete mode 100644 .reflex.stack create mode 100644 horizon/devenv/connect.go create mode 100644 horizon/devenv/defaults.go diff --git a/.reflex b/.reflex index 2062f08..57bdec7 100644 --- a/.reflex +++ b/.reflex @@ -13,7 +13,7 @@ DLOG=".*=debug" ./devel/sds consumer sidecar \ --grpc-listen-addr=:9002 \ --plaintext \ - --signer-private-key=0xe4c2694501255921b6588519cfd36d4e86ddc4ce19ab1bc91d9c58057c040304 \ + --signer-private-key=0x0bba7d355d1750fce9756af7887e826e8071a56d9d8e327f546b1f34c78f9281 \ --collector-address=0x1d01649b4f94722b55b5c3b3e10fe26cd90c1ba9' -r "\.(go|sql)$" -s -R "devel/.*" -- sh -c \ diff --git a/.reflex.stack b/.reflex.stack deleted file mode 100644 index a77cbd1..0000000 --- a/.reflex.stack +++ /dev/null @@ -1,39 +0,0 @@ --r "\.(go|sql)$" -s -R "devel/.*" -- sh -c \ - 'test -f ./devel/.demo.env || { echo "Missing ./devel/.demo.env. Run ./devel/sds demo setup first."; exit 1; } && \ - . ./devel/.demo.env && \ - : "${SDS_DEMO_COLLECTOR_ADDRESS:?Missing SDS_DEMO_COLLECTOR_ADDRESS in ./devel/.demo.env}" && \ - : "${SDS_DEMO_SIGNER_PRIVATE_KEY:?Missing SDS_DEMO_SIGNER_PRIVATE_KEY in ./devel/.demo.env}" && \ - CONSUMER_LISTEN_ADDR="${SDS_DEMO_CONSUMER_LISTEN_ADDR:-:9002}" && \ - echo "Starting Consumer Sidecar ..." && \ - DLOG=".*=debug" ./devel/sds consumer sidecar \ - --grpc-listen-addr="$CONSUMER_LISTEN_ADDR" \ - --plaintext \ - --signer-private-key="$SDS_DEMO_SIGNER_PRIVATE_KEY" \ - --collector-address="$SDS_DEMO_COLLECTOR_ADDRESS"' - --r "\.(go|sql)$" -s -R "devel/.*" -- sh -c \ - 'test -f ./devel/.demo.env || { echo "Missing ./devel/.demo.env. Run ./devel/sds demo setup first."; exit 1; } && \ - . ./devel/.demo.env && \ - : "${SDS_DEMO_SERVICE_PROVIDER_ADDRESS:?Missing SDS_DEMO_SERVICE_PROVIDER_ADDRESS in ./devel/.demo.env}" && \ - : "${SDS_DEMO_COLLECTOR_ADDRESS:?Missing SDS_DEMO_COLLECTOR_ADDRESS in ./devel/.demo.env}" && \ - : "${SDS_DEMO_ESCROW_ADDRESS:?Missing SDS_DEMO_ESCROW_ADDRESS in ./devel/.demo.env}" && \ - : "${SDS_DEMO_RPC_ENDPOINT:?Missing SDS_DEMO_RPC_ENDPOINT in ./devel/.demo.env}" && \ - PROVIDER_LISTEN_ADDR="${SDS_DEMO_PROVIDER_LISTEN_ADDR:-:9001}" && \ - echo "Starting Provider Gateway ..." && \ - touch ./devel/.provider-gateway && \ - DLOG=".*=debug" ./devel/sds provider gateway \ - --grpc-listen-addr="$PROVIDER_LISTEN_ADDR" \ - --plaintext \ - --service-provider="$SDS_DEMO_SERVICE_PROVIDER_ADDRESS" \ - --collector-address="$SDS_DEMO_COLLECTOR_ADDRESS" \ - --escrow-address="$SDS_DEMO_ESCROW_ADDRESS" \ - --rpc-endpoint="$SDS_DEMO_RPC_ENDPOINT"' - --r "(\.provider-gateway|firecore\.config\.yaml)$" -s -R "devel/.*" -- sh -c \ - 'echo "Restarting firehose-core instance (5s delay for provider gateway startup)..." && \ - sleep 5 && \ - rm -rf ./devel/.firehose && \ - echo "Starting Firehose Core" && \ - echo " Substreams: sds sink -e https://localhost:10016 --insecure" && \ - echo "" && \ - PATH="$(go env GOPATH)/bin:$PATH" DLOG="${DLOG:-error}" firecore -c devel/firecore.config.yaml -d ./devel/.firehose start' diff --git a/README.md b/README.md index 16fff2c..7a3c1eb 100644 --- a/README.md +++ b/README.md @@ -33,17 +33,7 @@ Now `sds` invokes `devel/sds` directly. Use [reflex](https://github.com/cespare/ reflex -c .reflex ``` -Both reflex configs pass `--plaintext` explicitly for the local/demo sidecar↔gateway path. Outside local/demo usage, configure TLS certificate/key files instead of relying on plaintext defaults. - -To keep on-chain state stable while restarting the rest of the stack, run `devenv` separately and use the stack-only reflex config: - -```bash -./devel/sds devenv -./devel/sds demo setup # writes devel/.demo.env required by `.reflex.stack` -reflex -c .reflex.stack -``` - -`.reflex.stack` now fails fast if `devel/.demo.env` is missing or does not contain the required demo variables. +The default `.reflex` flow uses the deterministic demo signer that `sds devenv` authorizes automatically. It also passes `--plaintext` explicitly for the local/demo sidecar↔gateway path. Outside local/demo usage, configure TLS certificate/key files instead of relying on plaintext defaults. We have `devel/sds_sink` helper that can be used to sink in data service mode (invokes `sds sink ...` configured for development environment): @@ -58,6 +48,13 @@ sds_sink run common@v0.1.0 map_clocks -s -1 The `sds devenv` command starts an Anvil node and deploys Graph Protocol contracts (requires Docker). It deploys the original `PaymentsEscrow`, `GraphPayments`, and `GraphTallyCollector` contracts, plus `SubstreamsDataService` and various mock contracts (GRTToken, Controller, Staking, etc.) for testing. Integration tests use the same devenv via testcontainers. +After deployment, `devenv` also prepares the default local demo state: + +- payer escrow funded for the default service provider +- data service provision minimum set to `0` +- default service provider provisioned and registered +- deterministic demo signer authorized for the default payer + ```bash sds devenv # Prints contract addresses and test accounts ``` @@ -115,6 +112,9 @@ Test accounts (10 ETH + 10,000 GRT each): | User1 | `0x90353af8461a969e755ef1e1dbadb9415ae5cb6e` | `0xdd02564c0e9836fb570322be23f8355761d4d04ebccdc53f4f53325227680a9f` | | User2 | `0x9585430b90248cd82cb71d5098ac3f747f89793b` | `0xbc3def46fab7929038dfb0df7e0168cba60d3384aceabf85e23e5e0ff90c8fe3` | | User3 | `0x37305c711d52007a2bcfb33b37015f1d0e9ab339` | `0x7acd0f26d5be968f73ca8f2198fa52cc595650f8d5819ee9122fe90329847c48` | +| Demo Signer | `0x82b6f0bbbab50f0ddc249e5ff60c6dc64d55340e` | `0x0bba7d355d1750fce9756af7887e826e8071a56d9d8e327f546b1f34c78f9281` | + +`sds demo setup` no longer mutates the chain. It verifies the default demo-ready state from `sds devenv` and writes `devel/.demo.env` for any manual env-driven workflows, but it is no longer required for the default `.reflex` flow. ### Running Tests diff --git a/cmd/sds/demo_setup.go b/cmd/sds/demo_setup.go index 4270173..b3ee330 100644 --- a/cmd/sds/demo_setup.go +++ b/cmd/sds/demo_setup.go @@ -1,62 +1,35 @@ package main import ( - "context" - "encoding/hex" "fmt" - "math/big" "os" "path/filepath" "sort" "strings" - "time" - sds "github.com/graphprotocol/substreams-data-service" - "github.com/graphprotocol/substreams-data-service/contracts/artifacts" "github.com/graphprotocol/substreams-data-service/horizon/devenv" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/streamingfast/cli" . "github.com/streamingfast/cli" "github.com/streamingfast/cli/sflags" - "github.com/streamingfast/eth-go" - "github.com/streamingfast/eth-go/rpc" ) var demoSetupCmd = Command( runDemoSetup, "setup", - "Prepare on-chain demo state for a running `sds devenv`", + "Verify demo-ready devenv state and write demo env vars", Description(` - Connects to an already-running local devenv chain and prepares the on-chain state - needed to run a manual sidecar demo: - - mint/approve/deposit escrow funds - - set provision + register service provider - - authorize a signer on-chain + Connects to an already-running local devenv chain, verifies that the default + demo-ready on-chain state is present, and writes the environment variables + required by the local demo stack. - This is intended for local development only. + This command does not mutate chain state. `), Flags(func(flags *pflag.FlagSet) { flags.String("rpc-endpoint", "http://localhost:58545", "Ethereum RPC endpoint (from `sds devenv` output)") flags.Uint64("chain-id", 1337, "Chain ID for the local devenv chain") - - // Contract addresses (defaults match current deterministic devenv). - flags.String("collector-address", "0x1d01649b4f94722b55b5c3b3e10fe26cd90c1ba9", "GraphTallyCollector contract address") - flags.String("escrow-address", "0xfc7487a37ca8eac2e64cba61277aa109e9b8631e", "PaymentsEscrow contract address") - flags.String("data-service-address", "0x37478fd2f5845e3664fe4155d74c00e1a4e7a5e2", "SubstreamsDataService contract address") - flags.String("staking-address", "0x32f01bc7a55d437b7a8354621a9486b9be08a3bb", "MockStaking contract address") - flags.String("grt-token-address", "0xfa7a048544f86c11206afd89b40bc987e464cb58", "MockGRTToken contract address") - - // Keys (defaults match current deterministic devenv). - flags.String("deployer-private-key", "0x1aa5d8f9a42ba0b9439c7034d24e93619f67af22a9ab15be9e4ce7eadddb5143", "Deployer private key (hex)") - flags.String("payer-private-key", "0xe4c2694501255921b6588519cfd36d4e86ddc4ce19ab1bc91d9c58057c040304", "Payer private key (hex)") - flags.String("service-provider-private-key", "0x41942233cf1d78b6e3262f1806f8da36aafa24a941031aad8e056a1d34640f8d", "Service provider private key (hex)") - - flags.String("signer-private-key", "", "Signer private key to authorize (hex). If empty, a new key is generated and printed") flags.String("env-file", "devel/.demo.env", "Write an env file for `.reflex.stack` (set to empty to disable)") - - flags.String("escrow-amount-grt", "10000", "Escrow amount to deposit in GRT (decimal, e.g. 10000)") - flags.String("provision-amount-grt", "1000", "Provision amount in GRT (decimal, e.g. 1000)") }), ) @@ -65,182 +38,59 @@ func runDemoSetup(cmd *cobra.Command, args []string) error { rpcEndpoint := strings.TrimSpace(sflags.MustGetString(cmd, "rpc-endpoint")) chainID := sflags.MustGetUint64(cmd, "chain-id") - - collectorHex := sflags.MustGetString(cmd, "collector-address") - escrowHex := sflags.MustGetString(cmd, "escrow-address") - dataServiceHex := sflags.MustGetString(cmd, "data-service-address") - stakingHex := sflags.MustGetString(cmd, "staking-address") - tokenHex := sflags.MustGetString(cmd, "grt-token-address") - - deployerKeyHex := sflags.MustGetString(cmd, "deployer-private-key") - payerKeyHex := sflags.MustGetString(cmd, "payer-private-key") - serviceProviderKeyHex := sflags.MustGetString(cmd, "service-provider-private-key") - signerKeyHex := strings.TrimSpace(sflags.MustGetString(cmd, "signer-private-key")) envFilePath := strings.TrimSpace(sflags.MustGetString(cmd, "env-file")) - escrowAmountStr := sflags.MustGetString(cmd, "escrow-amount-grt") - provisionAmountStr := sflags.MustGetString(cmd, "provision-amount-grt") - cli.Ensure(rpcEndpoint != "", " is required") - collectorAddr, err := eth.NewAddress(collectorHex) - cli.NoError(err, "invalid %q", collectorHex) - escrowAddr, err := eth.NewAddress(escrowHex) - cli.NoError(err, "invalid %q", escrowHex) - dataServiceAddr, err := eth.NewAddress(dataServiceHex) - cli.NoError(err, "invalid %q", dataServiceHex) - stakingAddr, err := eth.NewAddress(stakingHex) - cli.NoError(err, "invalid %q", stakingHex) - tokenAddr, err := eth.NewAddress(tokenHex) - cli.NoError(err, "invalid %q", tokenHex) - - deployerKey, err := eth.NewPrivateKey(deployerKeyHex) - cli.NoError(err, "invalid %q", deployerKeyHex) - payerKey, err := eth.NewPrivateKey(payerKeyHex) - cli.NoError(err, "invalid %q", payerKeyHex) - serviceProviderKey, err := eth.NewPrivateKey(serviceProviderKeyHex) - cli.NoError(err, "invalid %q", serviceProviderKeyHex) - - var signerKey *eth.PrivateKey - if signerKeyHex != "" { - signerKey, err = eth.NewPrivateKey(signerKeyHex) - cli.NoError(err, "invalid %q", signerKeyHex) - } else { - signerKey, err = eth.NewRandomPrivateKey() - cli.NoError(err, "unable to generate random signer key") - } - - escrowAmount, err := sds.ParseGRT(escrowAmountStr) - cli.NoError(err, "invalid %q", escrowAmountStr) - provisionAmount, err := sds.ParseGRT(provisionAmountStr) - cli.NoError(err, "invalid %q", provisionAmountStr) - - rpcClient := rpc.NewClient(rpcEndpoint) - serviceProviderAddr := serviceProviderKey.PublicKey().Address() - payerAddr := payerKey.PublicKey().Address() - signerAddr := signerKey.PublicKey().Address() - - // Load ABIs from embedded artifacts. - tokenABI, err := artifacts.LoadABI("MockGRTToken") - cli.NoError(err, "unable to load MockGRTToken ABI") - escrowABI, err := artifacts.LoadABI("PaymentsEscrow") - cli.NoError(err, "unable to load PaymentsEscrow ABI") - collectorABI, err := artifacts.LoadABI("GraphTallyCollector") - cli.NoError(err, "unable to load GraphTallyCollector ABI") - dataServiceABI, err := artifacts.LoadABI("SubstreamsDataService") - cli.NoError(err, "unable to load SubstreamsDataService ABI") - stakingABI, err := artifacts.LoadABI("MockStaking") - cli.NoError(err, "unable to load MockStaking ABI") - - tokenContract := &devenv.Contract{Address: tokenAddr, ABI: tokenABI} - escrowContract := &devenv.Contract{Address: escrowAddr, ABI: escrowABI} - collectorContract := &devenv.Contract{Address: collectorAddr, ABI: collectorABI} - dataServiceContract := &devenv.Contract{Address: dataServiceAddr, ABI: dataServiceABI} - stakingContract := &devenv.Contract{Address: stakingAddr, ABI: stakingABI} - - // Mint GRT to payer. - { - data, err := tokenContract.CallData("mint", payerAddr, escrowAmount.BigInt()) - cli.NoError(err, "unable to encode MockGRTToken.mint") - cli.NoError(devenv.SendTransaction(ctx, rpcClient, deployerKey, chainID, &tokenContract.Address, sds.ZeroGRT().BigInt(), data), "minting GRT") - } - - // Approve escrow to spend GRT. - { - data, err := tokenContract.CallData("approve", escrowContract.Address, escrowAmount.BigInt()) - cli.NoError(err, "unable to encode MockGRTToken.approve") - cli.NoError(devenv.SendTransaction(ctx, rpcClient, payerKey, chainID, &tokenContract.Address, sds.ZeroGRT().BigInt(), data), "approving escrow spend") - } - - // Deposit into escrow for collector+serviceProvider. - { - data, err := escrowContract.CallData("deposit", collectorContract.Address, serviceProviderAddr, escrowAmount.BigInt()) - cli.NoError(err, "unable to encode PaymentsEscrow.deposit") - cli.NoError(devenv.SendTransaction(ctx, rpcClient, payerKey, chainID, &escrowContract.Address, sds.ZeroGRT().BigInt(), data), "depositing escrow") - } - - // Set provision tokens range (min=0 for demo). - { - data, err := dataServiceContract.CallData("setProvisionTokensRange", sds.ZeroGRT().BigInt()) - cli.NoError(err, "unable to encode SubstreamsDataService.setProvisionTokensRange") - cli.NoError(devenv.SendTransaction(ctx, rpcClient, deployerKey, chainID, &dataServiceContract.Address, sds.ZeroGRT().BigInt(), data), "setting provision tokens range") - } - - // Set provision for service provider. - { - data, err := stakingContract.CallData("setProvision", serviceProviderAddr, dataServiceAddr, provisionAmount.BigInt(), uint32(0), uint64(0)) - cli.NoError(err, "unable to encode MockStaking.setProvision") - cli.NoError(devenv.SendTransaction(ctx, rpcClient, deployerKey, chainID, &stakingContract.Address, sds.ZeroGRT().BigInt(), data), "setting provision") - } - - // Register service provider. - { - registerData := make([]byte, 32) - copy(registerData[12:], serviceProviderAddr[:]) + env := devenv.Connect(ctx, rpcEndpoint, chainID) - data, err := dataServiceContract.CallData("register", serviceProviderAddr, registerData) - cli.NoError(err, "unable to encode SubstreamsDataService.register") - cli.NoError(devenv.SendTransaction(ctx, rpcClient, serviceProviderKey, chainID, &dataServiceContract.Address, sds.ZeroGRT().BigInt(), data), "registering service provider") + if err := env.VerifyDefaultDemoState(devenv.DefaultTestSetupConfig()); err != nil { + return fmt.Errorf("demo-ready devenv state is not available at %s: %w\nrun `sds devenv` and wait for it to finish bootstrapping before re-running `sds demo setup`", rpcEndpoint, err) } - // Authorize signer. - { - proofDeadline := uint64(time.Now().Add(1 * time.Hour).Unix()) - proof, err := devenv.GenerateSignerProof(chainID, collectorAddr, proofDeadline, payerAddr, signerKey) - cli.NoError(err, "unable to generate signer proof") - - data, err := collectorContract.CallData("authorizeSigner", signerAddr, new(big.Int).SetUint64(proofDeadline), proof) - cli.NoError(err, "unable to encode GraphTallyCollector.authorizeSigner") - cli.NoError(devenv.SendTransaction(ctx, rpcClient, payerKey, chainID, &collectorContract.Address, sds.ZeroGRT().BigInt(), data), "authorizing signer") - } - - authorized, err := isAuthorized(ctx, rpcClient, collectorContract, payerAddr, signerAddr) - cli.NoError(err, "unable to verify isAuthorized") - cli.Ensure(authorized, "expected signer %s to be authorized for payer %s", signerAddr.Pretty(), payerAddr.Pretty()) - if envFilePath != "" { if err := writeDemoEnvFile(envFilePath, map[string]string{ "SDS_DEMO_CHAIN_ID": fmt.Sprintf("%d", chainID), "SDS_DEMO_RPC_ENDPOINT": rpcEndpoint, - "SDS_DEMO_COLLECTOR_ADDRESS": collectorAddr.Pretty(), - "SDS_DEMO_ESCROW_ADDRESS": escrowAddr.Pretty(), - "SDS_DEMO_DATA_SERVICE_ADDRESS": dataServiceAddr.Pretty(), - "SDS_DEMO_SERVICE_PROVIDER_ADDRESS": serviceProviderAddr.Pretty(), - "SDS_DEMO_PAYER_ADDRESS": payerAddr.Pretty(), - "SDS_DEMO_SIGNER_PRIVATE_KEY": "0x" + signerKey.String(), - "SDS_DEMO_SIGNER_ADDRESS": signerAddr.Pretty(), + "SDS_DEMO_COLLECTOR_ADDRESS": env.Collector.Address.Pretty(), + "SDS_DEMO_ESCROW_ADDRESS": env.Escrow.Address.Pretty(), + "SDS_DEMO_DATA_SERVICE_ADDRESS": env.DataService.Address.Pretty(), + "SDS_DEMO_SERVICE_PROVIDER_ADDRESS": env.ServiceProvider.Address.Pretty(), + "SDS_DEMO_PAYER_ADDRESS": env.Payer.Address.Pretty(), + "SDS_DEMO_SIGNER_PRIVATE_KEY": "0x" + env.DemoSigner.PrivateKey.String(), + "SDS_DEMO_SIGNER_ADDRESS": env.DemoSigner.Address.Pretty(), }); err != nil { return err } - fmt.Printf("Wrote demo env file for `.reflex.stack`: %s\n\n", envFilePath) + fmt.Printf("Verified demo-ready chain state and wrote demo env file: %s\n\n", envFilePath) } - fmt.Printf("\nDemo state prepared successfully.\n\n") - fmt.Printf("AUTHORIZED SIGNER:\n") - fmt.Printf(" Signer address: %s\n", signerAddr.Pretty()) - fmt.Printf(" Signer private key: 0x%s\n", signerKey.String()) + fmt.Printf("\nDemo-ready state verified successfully.\n\n") + fmt.Printf("AUTHORIZED DEMO SIGNER:\n") + fmt.Printf(" Signer address: %s\n", env.DemoSigner.Address.Pretty()) + fmt.Printf(" Signer private key: 0x%s\n", env.DemoSigner.PrivateKey.String()) fmt.Printf("\n") fmt.Printf("START COMMANDS:\n") fmt.Printf(" Provider gateway:\n") fmt.Printf(" sds provider gateway --service-provider %s --collector-address %s --escrow-address %s --rpc-endpoint %s\n", - serviceProviderAddr.Pretty(), - collectorAddr.Pretty(), - escrowAddr.Pretty(), + env.ServiceProvider.Address.Pretty(), + env.Collector.Address.Pretty(), + env.Escrow.Address.Pretty(), rpcEndpoint, ) fmt.Printf("\n") fmt.Printf(" Consumer sidecar:\n") fmt.Printf(" sds consumer sidecar --signer-private-key 0x%s --collector-address %s\n", - signerKey.String(), - collectorAddr.Pretty(), + env.DemoSigner.PrivateKey.String(), + env.Collector.Address.Pretty(), ) fmt.Printf("\n") fmt.Printf(" Demo flow:\n") fmt.Printf(" sds demo flow --payer-address %s --receiver-address %s --data-service-address %s --provider-endpoint http://localhost:9001 --consumer-sidecar-addr http://localhost:9002\n", - payerAddr.Pretty(), - serviceProviderAddr.Pretty(), - dataServiceAddr.Pretty(), + env.Payer.Address.Pretty(), + env.ServiceProvider.Address.Pretty(), + env.DataService.Address.Pretty(), ) return nil @@ -249,7 +99,7 @@ func runDemoSetup(cmd *cobra.Command, args []string) error { func writeDemoEnvFile(path string, exports map[string]string) error { var b strings.Builder b.WriteString("# Generated by `sds demo setup`\n") - b.WriteString("# Source this file to align `.reflex.stack` with the authorized signer.\n") + b.WriteString("# Source this file to align `.reflex.stack` with the deterministic demo signer.\n") keys := make([]string, 0, len(exports)) for k := range exports { @@ -275,32 +125,3 @@ func writeDemoEnvFile(path string, exports map[string]string) error { } return nil } - -func isAuthorized(ctx context.Context, rpcClient *rpc.Client, collector *devenv.Contract, payer, signer eth.Address) (bool, error) { - fn := collector.ABI.FindFunctionByName("isAuthorized") - if fn == nil { - return false, fmt.Errorf("isAuthorized function not found in ABI") - } - - data, err := fn.NewCall(payer, signer).Encode() - if err != nil { - return false, fmt.Errorf("encoding isAuthorized call: %w", err) - } - - params := rpc.CallParams{To: collector.Address, Data: data} - resultHex, err := rpcClient.Call(ctx, params) - if err != nil { - return false, fmt.Errorf("calling isAuthorized: %w", err) - } - - resultHex = strings.TrimPrefix(resultHex, "0x") - out, err := hex.DecodeString(resultHex) - if err != nil { - return false, fmt.Errorf("decoding isAuthorized result: %w", err) - } - if len(out) != 32 { - return false, fmt.Errorf("unexpected isAuthorized result length: %d", len(out)) - } - - return out[31] == 1, nil -} diff --git a/cmd/sds/devenv.go b/cmd/sds/devenv.go index 17d6fd3..d16b640 100644 --- a/cmd/sds/devenv.go +++ b/cmd/sds/devenv.go @@ -31,6 +31,7 @@ var devenvCmd = Command( - GraphPayments: Original payment distribution contract - GraphTallyCollector: Original RAV verification contract - SubstreamsDataService: Data service contract + - Default demo-ready chain state (escrow, provision, registration, signer auth) Press Ctrl+C to shut down the environment. `), diff --git a/horizon/devenv/connect.go b/horizon/devenv/connect.go new file mode 100644 index 0000000..eac9ddb --- /dev/null +++ b/horizon/devenv/connect.go @@ -0,0 +1,38 @@ +package devenv + +import ( + "context" + + "github.com/streamingfast/eth-go" + "github.com/streamingfast/eth-go/rpc" +) + +// Connect creates a detached Env view over a running deterministic devenv chain. +// It does not start or own any container lifecycle. +func Connect(ctx context.Context, rpcURL string, chainID uint64) *Env { + ctx, cancel := context.WithCancel(ctx) + accounts := DefaultAccounts() + contracts := DefaultContractAddresses() + + return &Env{ + ctx: ctx, + cancel: cancel, + rpcClient: rpc.NewClient(rpcURL), + RPCURL: rpcURL, + ChainID: chainID, + GRTToken: &Contract{Address: contracts.GRTToken, ABI: mustLoadContract("MockGRTToken").ABI}, + Controller: &Contract{Address: eth.Address{}, ABI: mustLoadContract("MockController").ABI}, + Staking: &Contract{Address: contracts.Staking, ABI: mustLoadContract("MockStaking").ABI}, + Escrow: &Contract{Address: contracts.Escrow, ABI: mustLoadContract("PaymentsEscrow").ABI}, + GraphPayments: &Contract{Address: eth.Address{}, ABI: mustLoadContract("GraphPayments").ABI}, + Collector: &Contract{Address: contracts.Collector, ABI: mustLoadContract("GraphTallyCollector").ABI}, + DataService: &Contract{Address: contracts.DataService, ABI: mustLoadContract("SubstreamsDataService").ABI}, + Deployer: accounts.Deployer, + ServiceProvider: accounts.ServiceProvider, + Payer: accounts.Payer, + User1: accounts.User1, + User2: accounts.User2, + User3: accounts.User3, + DemoSigner: accounts.DemoSigner, + } +} diff --git a/horizon/devenv/defaults.go b/horizon/devenv/defaults.go new file mode 100644 index 0000000..2683d86 --- /dev/null +++ b/horizon/devenv/defaults.go @@ -0,0 +1,59 @@ +package devenv + +import "github.com/streamingfast/eth-go" + +const ( + DefaultDeployerPrivateKeyHex = "1aa5d8f9a42ba0b9439c7034d24e93619f67af22a9ab15be9e4ce7eadddb5143" + DefaultServiceProviderPrivateKeyHex = "41942233cf1d78b6e3262f1806f8da36aafa24a941031aad8e056a1d34640f8d" + DefaultPayerPrivateKeyHex = "e4c2694501255921b6588519cfd36d4e86ddc4ce19ab1bc91d9c58057c040304" + DefaultUser1PrivateKeyHex = "dd02564c0e9836fb570322be23f8355761d4d04ebccdc53f4f53325227680a9f" + DefaultUser2PrivateKeyHex = "bc3def46fab7929038dfb0df7e0168cba60d3384aceabf85e23e5e0ff90c8fe3" + DefaultUser3PrivateKeyHex = "7acd0f26d5be968f73ca8f2198fa52cc595650f8d5819ee9122fe90329847c48" + DefaultDemoSignerPrivateKeyHex = "0bba7d355d1750fce9756af7887e826e8071a56d9d8e327f546b1f34c78f9281" + + DefaultGraphTallyCollectorAddressHex = "0x1d01649b4f94722b55b5c3b3e10fe26cd90c1ba9" + DefaultPaymentsEscrowAddressHex = "0xfc7487a37ca8eac2e64cba61277aa109e9b8631e" + DefaultSubstreamsDataServiceHex = "0x37478fd2f5845e3664fe4155d74c00e1a4e7a5e2" + DefaultMockGRTTokenAddressHex = "0xfa7a048544f86c11206afd89b40bc987e464cb58" + DefaultMockStakingAddressHex = "0x32f01bc7a55d437b7a8354621a9486b9be08a3bb" +) + +type DeterministicAccounts struct { + Deployer Account + ServiceProvider Account + Payer Account + User1 Account + User2 Account + User3 Account + DemoSigner Account +} + +type DeterministicContracts struct { + Collector eth.Address + Escrow eth.Address + DataService eth.Address + GRTToken eth.Address + Staking eth.Address +} + +func DefaultAccounts() DeterministicAccounts { + return DeterministicAccounts{ + Deployer: mustAccountFromHex(DefaultDeployerPrivateKeyHex), + ServiceProvider: mustAccountFromHex(DefaultServiceProviderPrivateKeyHex), + Payer: mustAccountFromHex(DefaultPayerPrivateKeyHex), + User1: mustAccountFromHex(DefaultUser1PrivateKeyHex), + User2: mustAccountFromHex(DefaultUser2PrivateKeyHex), + User3: mustAccountFromHex(DefaultUser3PrivateKeyHex), + DemoSigner: mustAccountFromHex(DefaultDemoSignerPrivateKeyHex), + } +} + +func DefaultContractAddresses() DeterministicContracts { + return DeterministicContracts{ + Collector: eth.MustNewAddress(DefaultGraphTallyCollectorAddressHex), + Escrow: eth.MustNewAddress(DefaultPaymentsEscrowAddressHex), + DataService: eth.MustNewAddress(DefaultSubstreamsDataServiceHex), + GRTToken: eth.MustNewAddress(DefaultMockGRTTokenAddressHex), + Staking: eth.MustNewAddress(DefaultMockStakingAddressHex), + } +} diff --git a/horizon/devenv/devenv.go b/horizon/devenv/devenv.go index 8a0d46f..4ecd5c3 100644 --- a/horizon/devenv/devenv.go +++ b/horizon/devenv/devenv.go @@ -44,6 +44,7 @@ type Env struct { User1 Account User2 Account User3 Account + DemoSigner Account } var ( @@ -191,12 +192,14 @@ func start(ctx context.Context, opts ...Option) (*Env, error) { // Create test accounts (deterministic keys for reproducibility) report("Creating test accounts...") - deployer := mustAccountFromHex("1aa5d8f9a42ba0b9439c7034d24e93619f67af22a9ab15be9e4ce7eadddb5143") - serviceProvider := mustAccountFromHex("41942233cf1d78b6e3262f1806f8da36aafa24a941031aad8e056a1d34640f8d") - payer := mustAccountFromHex("e4c2694501255921b6588519cfd36d4e86ddc4ce19ab1bc91d9c58057c040304") - user1 := mustAccountFromHex("dd02564c0e9836fb570322be23f8355761d4d04ebccdc53f4f53325227680a9f") - user2 := mustAccountFromHex("bc3def46fab7929038dfb0df7e0168cba60d3384aceabf85e23e5e0ff90c8fe3") - user3 := mustAccountFromHex("7acd0f26d5be968f73ca8f2198fa52cc595650f8d5819ee9122fe90329847c48") + accountsConfig := DefaultAccounts() + deployer := accountsConfig.Deployer + serviceProvider := accountsConfig.ServiceProvider + payer := accountsConfig.Payer + user1 := accountsConfig.User1 + user2 := accountsConfig.User2 + user3 := accountsConfig.User3 + demoSigner := accountsConfig.DemoSigner // Fund all test accounts from dev account (10 ETH each) report("Funding test accounts...") @@ -210,6 +213,7 @@ func start(ctx context.Context, opts ...Option) (*Env, error) { "user1": user1.Address, "user2": user2.Address, "user3": user3.Address, + "demo_signer": demoSigner.Address, } { if err := fundFromDevAccount(ctx, rpcClient, devAccount, addr, fundAmount); err != nil { zlog.Error("failed to fund account", zap.String("name", name), zap.Error(err)) @@ -249,6 +253,7 @@ func start(ctx context.Context, opts ...Option) (*Env, error) { User1: user1, User2: user2, User3: user3, + DemoSigner: demoSigner, } // Mint GRT to all test accounts @@ -260,6 +265,7 @@ func start(ctx context.Context, opts ...Option) (*Env, error) { "user1": user1.Address, "user2": user2.Address, "user3": user3.Address, + "demo_signer": demoSigner.Address, } { if err := env.MintGRT(addr, config.EscrowAmount); err != nil { env.cleanup() @@ -267,6 +273,15 @@ func start(ctx context.Context, opts ...Option) (*Env, error) { } } + report("Preparing default demo-ready chain state...") + if err := env.PrepareDefaultDemoState(&TestSetupConfig{ + EscrowAmount: new(big.Int).Set(config.EscrowAmount), + ProvisionAmount: new(big.Int).Set(config.ProvisionAmount), + }); err != nil { + env.cleanup() + return nil, fmt.Errorf("preparing default demo-ready state: %w", err) + } + report("Development environment ready") return env, nil @@ -529,6 +544,12 @@ func (env *Env) PrintInfo(w io.Writer) { fmt.Fprintf(w, " User1: %s (0x%s)\n", env.User1.Address.Pretty(), env.User1.PrivateKey.String()) fmt.Fprintf(w, " User2: %s (0x%s)\n", env.User2.Address.Pretty(), env.User2.PrivateKey.String()) fmt.Fprintf(w, " User3: %s (0x%s)\n", env.User3.Address.Pretty(), env.User3.PrivateKey.String()) + fmt.Fprintf(w, " Demo Signer: %s (0x%s)\n", env.DemoSigner.Address.Pretty(), env.DemoSigner.PrivateKey.String()) + fmt.Fprintf(w, "\n") + fmt.Fprintf(w, "DEFAULT DEMO STATE:\n") + fmt.Fprintf(w, " Payer -> Provider escrow funded and ready\n") + fmt.Fprintf(w, " Service provider provisioned and registered\n") + fmt.Fprintf(w, " Demo signer authorized for payer\n") fmt.Fprintf(w, "\n") fmt.Fprintf(w, "============================================================\n") } diff --git a/horizon/devenv/helpers.go b/horizon/devenv/helpers.go index 620e7b6..34331a7 100644 --- a/horizon/devenv/helpers.go +++ b/horizon/devenv/helpers.go @@ -5,6 +5,7 @@ import ( "encoding/hex" "fmt" "math/big" + "reflect" "strings" "time" @@ -128,33 +129,48 @@ func (env *Env) MintGRT(to eth.Address, amount *big.Int) error { return SendTransaction(env.ctx, env.rpcClient, env.Deployer.PrivateKey, env.ChainID, &env.GRTToken.Address, big.NewInt(0), data) } -// ApproveGRT approves the escrow contract to spend GRT (from Payer account) -func (env *Env) ApproveGRT(amount *big.Int) error { +// ApproveGRTFrom approves the escrow contract to spend GRT from the provided payer account. +func (env *Env) ApproveGRTFrom(payer Account, amount *big.Int) error { data, err := env.GRTToken.CallData("approve", env.Escrow.Address, amount) if err != nil { return err } - return SendTransaction(env.ctx, env.rpcClient, env.Payer.PrivateKey, env.ChainID, &env.GRTToken.Address, big.NewInt(0), data) + return SendTransaction(env.ctx, env.rpcClient, payer.PrivateKey, env.ChainID, &env.GRTToken.Address, big.NewInt(0), data) } -// DepositEscrow deposits GRT into escrow (from Payer to Collector for ServiceProvider) -func (env *Env) DepositEscrow(amount *big.Int) error { - data, err := env.Escrow.CallData("deposit", env.Collector.Address, env.ServiceProvider.Address, amount) +// ApproveGRT approves the escrow contract to spend GRT (from Payer account) +func (env *Env) ApproveGRT(amount *big.Int) error { + return env.ApproveGRTFrom(env.Payer, amount) +} + +// DepositEscrowFor deposits GRT into escrow from a payer to the collector for a service provider. +func (env *Env) DepositEscrowFor(payer Account, serviceProvider eth.Address, amount *big.Int) error { + data, err := env.Escrow.CallData("deposit", env.Collector.Address, serviceProvider, amount) if err != nil { return err } - return SendTransaction(env.ctx, env.rpcClient, env.Payer.PrivateKey, env.ChainID, &env.Escrow.Address, big.NewInt(0), data) + return SendTransaction(env.ctx, env.rpcClient, payer.PrivateKey, env.ChainID, &env.Escrow.Address, big.NewInt(0), data) } -// SetProvision sets provision tokens for service provider -func (env *Env) SetProvision(tokens *big.Int, maxVerifierCut uint32, thawingPeriod uint64) error { - data, err := env.Staking.CallData("setProvision", env.ServiceProvider.Address, env.DataService.Address, tokens, maxVerifierCut, thawingPeriod) +// DepositEscrow deposits GRT into escrow (from Payer to Collector for ServiceProvider) +func (env *Env) DepositEscrow(amount *big.Int) error { + return env.DepositEscrowFor(env.Payer, env.ServiceProvider.Address, amount) +} + +// SetProvisionFor sets provision tokens for the selected service provider. +func (env *Env) SetProvisionFor(serviceProvider eth.Address, tokens *big.Int, maxVerifierCut uint32, thawingPeriod uint64) error { + data, err := env.Staking.CallData("setProvision", serviceProvider, env.DataService.Address, tokens, maxVerifierCut, thawingPeriod) if err != nil { return err } return SendTransaction(env.ctx, env.rpcClient, env.Deployer.PrivateKey, env.ChainID, &env.Staking.Address, big.NewInt(0), data) } +// SetProvision sets provision tokens for the default service provider. +func (env *Env) SetProvision(tokens *big.Int, maxVerifierCut uint32, thawingPeriod uint64) error { + return env.SetProvisionFor(env.ServiceProvider.Address, tokens, maxVerifierCut, thawingPeriod) +} + // SetProvisionTokensRange sets the minimum provision tokens for the data service func (env *Env) SetProvisionTokensRange(minimumProvisionTokens *big.Int) error { data, err := env.DataService.CallData("setProvisionTokensRange", minimumProvisionTokens) @@ -164,27 +180,32 @@ func (env *Env) SetProvisionTokensRange(minimumProvisionTokens *big.Int) error { return SendTransaction(env.ctx, env.rpcClient, env.Deployer.PrivateKey, env.ChainID, &env.DataService.Address, big.NewInt(0), data) } -// RegisterServiceProvider registers the service provider with the data service -func (env *Env) RegisterServiceProvider() error { +// RegisterServiceProviderAccount registers the service provider with the data service. +func (env *Env) RegisterServiceProviderAccount(serviceProvider Account) error { // Encode the paymentsDestination as the data parameter (abi.encode(address)) registerData := make([]byte, 32) - copy(registerData[12:], env.ServiceProvider.Address[:]) + copy(registerData[12:], serviceProvider.Address[:]) - data, err := env.DataService.CallData("register", env.ServiceProvider.Address, registerData) + data, err := env.DataService.CallData("register", serviceProvider.Address, registerData) if err != nil { return err } - return SendTransaction(env.ctx, env.rpcClient, env.ServiceProvider.PrivateKey, env.ChainID, &env.DataService.Address, big.NewInt(0), data) + return SendTransaction(env.ctx, env.rpcClient, serviceProvider.PrivateKey, env.ChainID, &env.DataService.Address, big.NewInt(0), data) } -// AuthorizeSigner authorizes a signer key to sign RAVs for the payer -func (env *Env) AuthorizeSigner(signerKey *eth.PrivateKey) error { +// RegisterServiceProvider registers the default service provider with the data service. +func (env *Env) RegisterServiceProvider() error { + return env.RegisterServiceProviderAccount(env.ServiceProvider) +} + +// AuthorizeSignerFor authorizes a signer key to sign RAVs for the provided payer. +func (env *Env) AuthorizeSignerFor(payer Account, signerKey *eth.PrivateKey) error { signerAddr := signerKey.PublicKey().Address() // Generate proof with deadline 1 hour in the future proofDeadline := uint64(time.Now().Add(1 * time.Hour).Unix()) - proof, err := GenerateSignerProof(env.ChainID, env.Collector.Address, proofDeadline, env.Payer.Address, signerKey) + proof, err := GenerateSignerProof(env.ChainID, env.Collector.Address, proofDeadline, payer.Address, signerKey) if err != nil { return fmt.Errorf("generating signer proof: %w", err) } @@ -195,7 +216,12 @@ func (env *Env) AuthorizeSigner(signerKey *eth.PrivateKey) error { return fmt.Errorf("encoding authorizeSigner call: %w", err) } - return SendTransaction(env.ctx, env.rpcClient, env.Payer.PrivateKey, env.ChainID, &env.Collector.Address, big.NewInt(0), data) + return SendTransaction(env.ctx, env.rpcClient, payer.PrivateKey, env.ChainID, &env.Collector.Address, big.NewInt(0), data) +} + +// AuthorizeSigner authorizes a signer key to sign RAVs for the default payer. +func (env *Env) AuthorizeSigner(signerKey *eth.PrivateKey) error { + return env.AuthorizeSignerFor(env.Payer, signerKey) } // ThawSigner initiates thawing for a signer @@ -274,43 +300,83 @@ type TestSetupResult struct { SignerAddr eth.Address } -// SetupTestWithSigner performs common test setup: fund escrow, set provision, register, and authorize signer -func (env *Env) SetupTestWithSigner(config *TestSetupConfig) (*TestSetupResult, error) { +func cloneTestSetupConfig(config *TestSetupConfig) *TestSetupConfig { if config == nil { config = DefaultTestSetupConfig() } - // Mint GRT to payer - if err := env.MintGRT(env.Payer.Address, config.EscrowAmount); err != nil { - return nil, fmt.Errorf("minting GRT: %w", err) + return &TestSetupConfig{ + EscrowAmount: new(big.Int).Set(config.EscrowAmount), + ProvisionAmount: new(big.Int).Set(config.ProvisionAmount), + } +} + +func sameTestSetupConfig(a, b *TestSetupConfig) bool { + if a == nil || b == nil { + return a == nil && b == nil } - // Approve escrow to spend GRT - if err := env.ApproveGRT(config.EscrowAmount); err != nil { - return nil, fmt.Errorf("approving GRT: %w", err) + return reflect.DeepEqual(a.EscrowAmount, b.EscrowAmount) && reflect.DeepEqual(a.ProvisionAmount, b.ProvisionAmount) +} + +// SetupPaymentParticipants prepares escrow/provision/registration for the supplied payer and service provider. +func (env *Env) SetupPaymentParticipants(payer, serviceProvider Account, config *TestSetupConfig) error { + config = cloneTestSetupConfig(config) + + if err := env.MintGRT(payer.Address, config.EscrowAmount); err != nil { + return fmt.Errorf("minting GRT: %w", err) } - // Deposit to escrow - if err := env.DepositEscrow(config.EscrowAmount); err != nil { - return nil, fmt.Errorf("depositing to escrow: %w", err) + if err := env.ApproveGRTFrom(payer, config.EscrowAmount); err != nil { + return fmt.Errorf("approving GRT: %w", err) + } + + if err := env.DepositEscrowFor(payer, serviceProvider.Address, config.EscrowAmount); err != nil { + return fmt.Errorf("depositing to escrow: %w", err) } - // Set provision tokens range (min = 0 for testing) if err := env.SetProvisionTokensRange(big.NewInt(0)); err != nil { - return nil, fmt.Errorf("setting provision tokens range: %w", err) + return fmt.Errorf("setting provision tokens range: %w", err) + } + + if err := env.SetProvisionFor(serviceProvider.Address, config.ProvisionAmount, 0, 0); err != nil { + return fmt.Errorf("setting provision: %w", err) + } + + if err := env.RegisterServiceProviderAccount(serviceProvider); err != nil { + return fmt.Errorf("registering with data service: %w", err) + } + + return nil +} + +// SetupCustomPaymentParticipantsWithSigner prepares a custom payer/provider pair and authorizes a fresh signer for it. +func (env *Env) SetupCustomPaymentParticipantsWithSigner(payer, serviceProvider Account, config *TestSetupConfig) (*TestSetupResult, error) { + if err := env.SetupPaymentParticipants(payer, serviceProvider, config); err != nil { + return nil, err + } + + signerKey, err := eth.NewRandomPrivateKey() + if err != nil { + return nil, fmt.Errorf("creating signer key: %w", err) } - // Set provision for service provider - if err := env.SetProvision(config.ProvisionAmount, 0, 0); err != nil { - return nil, fmt.Errorf("setting provision: %w", err) + if err := env.AuthorizeSignerFor(payer, signerKey); err != nil { + return nil, fmt.Errorf("authorizing signer: %w", err) } - // Register service provider with data service - if err := env.RegisterServiceProvider(); err != nil { - return nil, fmt.Errorf("registering with data service: %w", err) + return &TestSetupResult{ + SignerKey: signerKey, + SignerAddr: signerKey.PublicKey().Address(), + }, nil +} + +// SetupTestWithSigner authorizes a fresh signer against the default demo-ready payer state. +func (env *Env) SetupTestWithSigner(config *TestSetupConfig) (*TestSetupResult, error) { + if config != nil && !sameTestSetupConfig(config, DefaultTestSetupConfig()) { + return nil, fmt.Errorf("custom test setup must use SetupCustomPaymentParticipantsWithSigner") } - // Create and authorize signer signerKey, err := eth.NewRandomPrivateKey() if err != nil { return nil, fmt.Errorf("creating signer key: %w", err) @@ -352,3 +418,125 @@ func (env *Env) GetEscrowBalance(payer, receiver eth.Address) (*big.Int, error) return new(big.Int).SetBytes(result), nil } + +// GetProvisionTokensRange returns the current provision token range configured on the data service. +func (env *Env) GetProvisionTokensRange() (*big.Int, *big.Int, error) { + data, err := env.DataService.CallData("getProvisionTokensRange") + if err != nil { + return nil, nil, fmt.Errorf("encoding getProvisionTokensRange call: %w", err) + } + + result, err := env.CallContract(env.DataService.Address, data) + if err != nil { + return nil, nil, fmt.Errorf("calling getProvisionTokensRange: %w", err) + } + if len(result) != 64 { + return nil, nil, fmt.Errorf("unexpected getProvisionTokensRange result length: %d", len(result)) + } + + return new(big.Int).SetBytes(result[:32]), new(big.Int).SetBytes(result[32:]), nil +} + +// IsServiceProviderRegistered reports whether the provider is registered in the data service. +func (env *Env) IsServiceProviderRegistered(serviceProvider eth.Address) (bool, error) { + data, err := env.DataService.CallData("isRegistered", serviceProvider) + if err != nil { + return false, fmt.Errorf("encoding isRegistered call: %w", err) + } + + result, err := env.CallContract(env.DataService.Address, data) + if err != nil { + return false, fmt.Errorf("calling isRegistered: %w", err) + } + if len(result) != 32 { + return false, fmt.Errorf("unexpected isRegistered result length: %d", len(result)) + } + + return result[31] == 1, nil +} + +// GetProviderTokensAvailable returns the provision available for a provider and data service. +func (env *Env) GetProviderTokensAvailable(serviceProvider, dataService eth.Address) (*big.Int, error) { + data, err := env.Staking.CallData("getProviderTokensAvailable", serviceProvider, dataService) + if err != nil { + return nil, fmt.Errorf("encoding getProviderTokensAvailable call: %w", err) + } + + result, err := env.CallContract(env.Staking.Address, data) + if err != nil { + return nil, fmt.Errorf("calling getProviderTokensAvailable: %w", err) + } + if len(result) != 32 { + return nil, fmt.Errorf("unexpected getProviderTokensAvailable result length: %d", len(result)) + } + + return new(big.Int).SetBytes(result), nil +} + +// PrepareDefaultDemoState applies the default payer/provider setup and authorizes the deterministic demo signer. +func (env *Env) PrepareDefaultDemoState(config *TestSetupConfig) error { + config = cloneTestSetupConfig(config) + + if err := env.SetupPaymentParticipants(env.Payer, env.ServiceProvider, config); err != nil { + return err + } + + if err := env.AuthorizeSigner(env.DemoSigner.PrivateKey); err != nil { + return fmt.Errorf("authorizing deterministic demo signer: %w", err) + } + + if err := env.VerifyDefaultDemoState(config); err != nil { + return err + } + + return nil +} + +// VerifyDefaultDemoState confirms the deterministic payer/provider demo state is ready. +func (env *Env) VerifyDefaultDemoState(config *TestSetupConfig) error { + config = cloneTestSetupConfig(config) + + escrowBalance, err := env.GetEscrowBalance(env.Payer.Address, env.ServiceProvider.Address) + if err != nil { + return err + } + if escrowBalance.Cmp(config.EscrowAmount) < 0 { + return fmt.Errorf("expected escrow balance >= %s for payer %s and provider %s, got %s", + config.EscrowAmount.String(), env.Payer.Address.Pretty(), env.ServiceProvider.Address.Pretty(), escrowBalance.String()) + } + + minProvision, _, err := env.GetProvisionTokensRange() + if err != nil { + return err + } + if minProvision.Cmp(big.NewInt(0)) != 0 { + return fmt.Errorf("expected minimum provision tokens to be 0, got %s", minProvision.String()) + } + + provision, err := env.GetProviderTokensAvailable(env.ServiceProvider.Address, env.DataService.Address) + if err != nil { + return err + } + if provision.Cmp(config.ProvisionAmount) < 0 { + return fmt.Errorf("expected provision >= %s for provider %s, got %s", + config.ProvisionAmount.String(), env.ServiceProvider.Address.Pretty(), provision.String()) + } + + registered, err := env.IsServiceProviderRegistered(env.ServiceProvider.Address) + if err != nil { + return err + } + if !registered { + return fmt.Errorf("service provider %s is not registered in SubstreamsDataService", env.ServiceProvider.Address.Pretty()) + } + + authorized, err := env.IsAuthorized(env.Payer.Address, env.DemoSigner.Address) + if err != nil { + return err + } + if !authorized { + return fmt.Errorf("demo signer %s is not authorized for payer %s", env.DemoSigner.Address.Pretty(), env.Payer.Address.Pretty()) + } + + return nil +} diff --git a/test/integration/authorization_test.go b/test/integration/authorization_test.go index 05b4f53..e86d32d 100644 --- a/test/integration/authorization_test.go +++ b/test/integration/authorization_test.go @@ -16,16 +16,6 @@ func TestAuthorizeSignerFlow(t *testing.T) { env := SetupEnv(t) zlog.Info("starting TestAuthorizeSignerFlow", zap.Uint64("chain_id", env.ChainID)) - // Setup escrow, provision, and create a signer key (but don't authorize it yet - we test that below) - config := DefaultTestSetupConfig() - - require.NoError(t, callMintGRT(env, env.Payer.Address, config.EscrowAmount), "Failed to mint GRT") - require.NoError(t, callApproveGRT(env, config.EscrowAmount), "Failed to approve GRT") - require.NoError(t, callDepositEscrow(env, config.EscrowAmount), "Failed to deposit to escrow") - require.NoError(t, callSetProvisionTokensRange(env, big.NewInt(0)), "Failed to set provision tokens range") - require.NoError(t, callSetProvision(env, config.ProvisionAmount, 0, 0), "Failed to set provision") - require.NoError(t, callRegisterWithDataService(env), "Failed to register with data service") - // Create a signer key (different from payer) - we'll authorize it manually for this test signerKey, err := eth.NewRandomPrivateKey() require.NoError(t, err) @@ -94,16 +84,6 @@ func TestUnauthorizedSignerFails(t *testing.T) { env := SetupEnv(t) zlog.Info("starting TestUnauthorizedSignerFails", zap.Uint64("chain_id", env.ChainID)) - // Setup escrow and provision (but don't authorize a signer) - config := DefaultTestSetupConfig() - - require.NoError(t, callMintGRT(env, env.Payer.Address, config.EscrowAmount), "Failed to mint GRT") - require.NoError(t, callApproveGRT(env, config.EscrowAmount), "Failed to approve GRT") - require.NoError(t, callDepositEscrow(env, config.EscrowAmount), "Failed to deposit to escrow") - require.NoError(t, callSetProvisionTokensRange(env, big.NewInt(0)), "Failed to set provision tokens range") - require.NoError(t, callSetProvision(env, config.ProvisionAmount, 0, 0), "Failed to set provision") - require.NoError(t, callRegisterWithDataService(env), "Failed to register with data service") - // Create an unauthorized signer key (intentionally not calling callAuthorizeSigner) unauthorizedKey, err := eth.NewRandomPrivateKey() require.NoError(t, err) diff --git a/test/integration/setup_test.go b/test/integration/setup_test.go index 3c726e1..47f9370 100644 --- a/test/integration/setup_test.go +++ b/test/integration/setup_test.go @@ -61,6 +61,14 @@ func SetupTestWithSigner(t *testing.T, env *TestEnv, config *TestSetupConfig) *T return result } +// SetupCustomTestWithSigner prepares a custom payer/provider pair and returns a fresh authorized signer for it. +func SetupCustomTestWithSigner(t *testing.T, env *TestEnv, payer, serviceProvider Account, config *TestSetupConfig) *TestSetupResult { + t.Helper() + result, err := env.SetupCustomPaymentParticipantsWithSigner(payer, serviceProvider, config) + require.NoError(t, err, "Failed to setup custom test with signer") + return result +} + // mustNewCollectionID creates a CollectionID from a hex string or panics func mustNewCollectionID(hexStr string) horizon.CollectionID { return devenv.MustNewCollectionID(hexStr) diff --git a/test/integration/substreams_flow_test.go b/test/integration/substreams_flow_test.go index c236c28..a79f5b6 100644 --- a/test/integration/substreams_flow_test.go +++ b/test/integration/substreams_flow_test.go @@ -518,7 +518,7 @@ func TestSubstreamsNetworkPaymentsFlow(t *testing.T) { // Setup escrow, provision, register, and authorize signer config := DefaultTestSetupConfig() - setup := SetupTestWithSigner(t, env, config) + setup := SetupTestWithSigner(t, env, nil) signerKey := setup.SignerKey signerAddr := setup.SignerAddr @@ -707,7 +707,9 @@ func TestSubstreamsFlowWithInsufficientEscrow(t *testing.T) { EscrowAmount: smallEscrow, ProvisionAmount: DefaultTestSetupConfig().ProvisionAmount, } - setup := SetupTestWithSigner(t, env, config) + payer := env.User1 + serviceProvider := env.User2 + setup := SetupCustomTestWithSigner(t, env, payer, serviceProvider, config) signerKey := setup.SignerKey signerAddr := setup.SignerAddr @@ -719,8 +721,8 @@ func TestSubstreamsFlowWithInsufficientEscrow(t *testing.T) { "ConsumerSidecar", domain, signerKey, - env.Payer.Address, - env.ServiceProvider.Address, + payer.Address, + serviceProvider.Address, env.DataService.Address, collectionID, ) @@ -732,8 +734,8 @@ func TestSubstreamsFlowWithInsufficientEscrow(t *testing.T) { collectionID, smallEscrow, // Small escrow env, - env.Payer.Address, - env.ServiceProvider.PrivateKey, + payer.Address, + serviceProvider.PrivateKey, ) substreamsClient := NewSubstreamsClient("SubstreamsClient", consumerSidecar) @@ -742,9 +744,9 @@ func TestSubstreamsFlowWithInsufficientEscrow(t *testing.T) { "BlockProvider", providerGateway, env.DataService.Address, - env.ServiceProvider.PrivateKey, + serviceProvider.PrivateKey, env.Collector.Address, - env.ServiceProvider.Address, + serviceProvider.Address, ) // Start session @@ -803,7 +805,7 @@ func TestSubstreamsFlowMultipleRAVRequests(t *testing.T) { // Setup escrow, provision, register, and authorize signer config := DefaultTestSetupConfig() - setup := SetupTestWithSigner(t, env, config) + setup := SetupTestWithSigner(t, env, nil) signerKey := setup.SignerKey signerAddr := setup.SignerAddr From 799f001354dc403ee6b476d3e3355781afb02b22 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Wed, 25 Mar 2026 01:16:08 -0300 Subject: [PATCH 05/17] docs: realign MVP planning docs Rewrite the MVP scope, gap analysis, and implementation backlog so they reflect the agreed architecture and the current repo state. - make the consumer sidecar the primary SDS-facing boundary - align discovery, pricing, and session semantics with the revised MVP decisions - update gap and backlog status to reflect recent provider runtime and persistence work - narrow remaining open questions to auth and observability --- docs/mvp-scope.md | 160 +++++++----- plans/mvp-gap-analysis.md | 245 +++++++++++------ plans/mvp-implementation-backlog.md | 392 ++++++++++++++-------------- 3 files changed, 467 insertions(+), 330 deletions(-) diff --git a/docs/mvp-scope.md b/docs/mvp-scope.md index 9317a29..3d0717f 100644 --- a/docs/mvp-scope.md +++ b/docs/mvp-scope.md @@ -1,6 +1,7 @@ # Substreams Data Service MVP Scope -Drafted: 2026-03-12 +Drafted: 2026-03-12 +Revised: 2026-03-24 ## Purpose @@ -12,9 +13,9 @@ It is intended to be the stable source of truth for: - product and operational scope - architectural decisions and their rationale - MVP acceptance scenarios -- explicit non-goals and open questions +- explicit non-goals and remaining open questions -It is not a task tracker. Detailed current-state assessment and implementation tracking should live in a separate document. +It is not a task tracker. Detailed current-state assessment and implementation tracking should live in separate planning documents. ## Audience @@ -28,11 +29,17 @@ This document is written for: The SDS MVP is a usable end-to-end payment-enabled Substreams stack, not just a local demo. It must support real provider discovery, real consumer and provider integration paths, paid streaming with provider-authoritative byte metering, live low-funds handling, durable provider-side payment state, manual operator-driven funding and settlement workflows, and a production-oriented transport/security posture. -The MVP may intentionally simplify parts of the system where doing so materially reduces implementation complexity without invalidating the architecture. In particular, the MVP may assume session-local funding logic and defer correct payer-level aggregate exposure handling across concurrent streams. +The MVP should preserve Substreams-compatible data-plane usage as much as possible. Existing users should be able to point Substreams tooling at a consumer sidecar endpoint and use it like a normal Substreams endpoint, while SDS-specific discovery, payment, and provider coordination happen behind that boundary. + +The MVP may intentionally simplify parts of the system where doing so materially reduces implementation complexity without invalidating the architecture. In particular, the MVP may assume: + +- session-local funding logic rather than payer-global aggregate exposure control across concurrent streams +- oracle-authoritative pricing across a curated provider set +- fresh SDS payment sessions for new requests or reconnects rather than payment-session continuation ## Current Status Summary -As of 2026-03-12, the repo already contains important parts of the MVP foundation: +As of 2026-03-24, the repo already contains important parts of the MVP foundation: - working Horizon V2 / TAP signing, verification, and aggregation - deterministic local chain/contracts and integration coverage @@ -44,23 +51,23 @@ As of 2026-03-12, the repo already contains important parts of the MVP foundatio However, the current repo does not yet constitute the MVP. Major remaining gaps include: - standalone oracle/discovery component -- real production-path provider and consumer integration completion +- consumer-side endpoint compatibility that hides SDS control flow behind a Substreams-compatible ingress - provider-side durable persistence for accepted RAV and collection state - low-funds stop/pause behavior during live streaming - operator funding and settlement CLI flows - authenticated admin/operator surfaces -- finalization of several protocol decisions called out below as open questions +- finalization of observability scope See `plans/mvp-gap-analysis.md` for a detailed status map. ## Goals - Deliver a full SDS stack that can be used against a real provider deployment, initially expected to be StreamingFast. -- Make the consumer sidecar the mandatory client-side integration component. +- Make the consumer sidecar the mandatory client-side SDS integration component and primary user entrypoint. +- Preserve backwards-compatible Substreams data-plane interaction semantics through the consumer sidecar. - Use a standalone oracle service for provider discovery, while still supporting direct provider configuration as fallback. - Use provider-authoritative byte metering as the billing source of truth. -- Support reconnect/resume behavior without making consumer-local persistence mandatory. -- Preserve accepted RAV and settlement-relevant state durably on the provider side. +- Use provider-authoritative accepted payment state and durable provider-side settlement state. - Support manual operator-driven funding and collection workflows through CLI tooling. - Use TLS by default outside local/dev usage. @@ -69,6 +76,8 @@ See `plans/mvp-gap-analysis.md` for a detailed status map. - Correct aggregate funding/exposure handling across multiple concurrent streams for the same payer. - Blocking concurrent streams at runtime. - Permissionless oracle provider sourcing from on-chain registry data. +- Provider-specific or negotiated pricing during MVP session handshake. +- Payment-session continuation or RAV-lineage reuse across reconnects. - Wallet-connected end-user funding UI. - Automated/background settlement collection. - Rich provider ranking or QoS-based oracle selection. @@ -78,72 +87,95 @@ See `plans/mvp-gap-analysis.md` for a detailed status map. ### 1. Discover Provider +- The consumer sidecar is the default SDS-facing entrypoint for the user. +- The consumer sidecar derives the requested network from the Substreams package by default. - The consumer sidecar queries a standalone oracle service. - The oracle receives the requested chain/network context. - The oracle returns: - the eligible provider set - a recommended provider choice + - the provider control-plane endpoint for the selected provider - The consumer sidecar uses the recommended provider by default. -- Direct provider configuration remains a supported fallback path. +- Direct provider configuration remains a supported fallback or override path. +- The oracle is not required to fully resolve the data-plane endpoint used for streaming. + +MVP network-discovery contract: -### 2. Initialize or Reconnect a Paid Session +- The consumer sidecar derives network from the Substreams package by default. +- If a package or module resolves a specific `networks` entry, that takes precedence over top-level `network`. +- Explicit user-supplied network input remains supported only as fallback when package derivation is unavailable. +- If both explicit input and package-derived network exist and differ after normalization, the request fails fast. +- If neither source yields a usable network, the request fails fast. +- SDS uses the same canonical network keys as the Graph networks registry for MVP, with repo-owned or pinned mappings rather than live runtime registry lookups. -- The consumer sidecar initiates the provider handshake. -- The provider responds with either: - - a fresh zero-value RAV for a new handshake, or - - the latest known resumable RAV for a reconnect +### 2. Initialize a Paid Session + +- The consumer sidecar initiates the provider handshake using the selected provider control-plane endpoint. +- Every new request or connection creates a fresh SDS payment session. +- The provider returns session-specific information needed to begin streaming, including the data-plane endpoint for that session. - The provider remains the authoritative side for accepted payment state. -- Recovery should be folded into the initial handshake rather than introduced as a separate recovery endpoint. +- The handshake is not a pricing negotiation step for MVP. +- Pricing is already fixed by the oracle for the curated MVP provider set. ### 3. Stream and Update Payment State - The real provider integration path meters streamed bytes using the provider-side metering plugin path. -- The provider is authoritative for billable usage. -- The consumer sidecar participates in payment/session control but is not authoritative for billed byte usage. +- The provider is authoritative for billable usage and low-funds decisions during live sessions. +- The consumer sidecar coordinates payment/session control while preserving normal Substreams-style usage at its user-facing endpoint. - While streaming: - - provider reports/payment state advances + - provider-authoritative usage advances - RAVs are requested and updated as needed + - accepted payment state advances on the provider side - low-funds conditions can be surfaced during the live stream - For MVP, low-funds decisions are session-local, not payer-global across concurrent streams. ### 4. Fund or Top Up Escrow - Funding is an operator/developer workflow, not an end-user wallet UI. -- CLI tooling should make approve/deposit/top-up simple enough for MVP operations. -- The system should surface when additional funding is needed, but the act of funding remains external to the core runtime path. +- CLI tooling should make approve, deposit, and top-up flows simple enough for MVP operations. +- The consumer sidecar may provide lightweight advisory checks, for example: + - warning if observed on-chain balance is below a coarse threshold + - surfacing provider-reported low-funds signals more clearly + - providing rough estimates based on oracle pricing +- The consumer sidecar is not authoritative for funding sufficiency. +- The act of funding remains external to the core runtime path. ### 5. Collect Accepted RAVs On-Chain - The provider side stores accepted collectible RAV state durably. - Operator CLI tooling queries the provider for settlement-relevant RAV data. - The CLI crafts, signs, and submits the `collect()` transaction locally. -- The settlement signing key remains outside the provider sidecar. +- The settlement signing key remains outside the provider gateway. - Collection state should distinguish between at least: - `collectible` - `collect_pending` - `collected` +- Interrupted or restarted streams may create multiple independent collectible session records for the same payer. ## Major MVP Decisions | Decision | MVP Choice | Short Rationale | | --- | --- | --- | -| Consumer integration | Consumer sidecar is mandatory | SDS is a full stack, not a loose protocol suggestion | -| Provider discovery | Standalone oracle component | Discovery is a real product component and should exist independently even if initial logic is simple | +| Consumer integration | Consumer sidecar is mandatory and acts as the SDS-facing Substreams-compatible endpoint/proxy | Preserve the familiar endpoint-driven data-plane workflow while hiding SDS coordination | +| Provider discovery | Standalone oracle component is the default path | Discovery is a real product component and should exist independently even if initial logic is simple | | Oracle selection logic | Whitelist plus simple selection among eligible providers | Good enough for MVP while preserving future ranking logic | -| Oracle response shape | Return eligible providers plus a recommended provider | Keeps default client behavior simple while preserving future flexibility | -| Direct provider connection | Supported as fallback | Useful bridge from current implementation and operational fallback | +| Oracle response shape | Return eligible providers, a recommended provider, and the selected provider control-plane endpoint | The oracle chooses who to talk to; the provider handshake resolves where to stream | +| Direct provider connection | Supported as fallback/override | Useful bridge from current implementation and operational fallback | +| Pricing authority | Oracle-authoritative pricing across the curated MVP provider set | Predictable pricing and simpler consumer/provider behavior while providers are manually curated | +| Billing unit | Streamed bytes | Aligns with provider-authoritative metering path | | Funding model | Session-local low-funds logic | Avoids premature distributed liability accounting for concurrent streams | +| Funding UX | CLI/operator-driven with only lightweight consumer-side advisory guidance | Keeps MVP simple without pretending the consumer knows provider-side liability | | Concurrent streams | Documented limitation, not blocked | Simpler MVP with explicit limitation instead of partial enforcement | -| Billing unit | Streamed bytes | Aligns with provider-authoritative metering path | -| Funding UX | CLI/operator-driven | Avoids premature UI scope | -| Collection execution | CLI signs and submits locally | Keeps settlement key custody outside provider sidecar | +| Collection execution | CLI signs and submits locally | Keeps settlement key custody outside provider-side runtime | | Provider payment state | Durable persistence required | Losing accepted RAV state is unacceptable | -| Consumer persistence | Not required for MVP | Better to recover from provider-authoritative state during handshake | -| Recovery shape | Part of initial handshake | Avoids separate recovery API surface | +| Consumer persistence | Durable local payment-session persistence is not required for MVP | New requests create fresh sessions instead of relying on payment-session recovery | +| Recovery shape | Fresh payment session per new request/connection | Avoids identity/recovery complexity around RAV reuse and session continuation | +| Provider control plane | Provider gateway is the authoritative provider-side SDS boundary | Gives the provider side a clear public control-plane authority | | Security posture | TLS by default outside local/dev | Better security without forcing heavy hardening | | Admin/operator actions | Require authentication | Oracle governance and provider operations should not be effectively public | | Real integration | Real provider and consumer paths are mandatory | Local demo flow is insufficient for MVP | | Validation scope | One real provider environment is enough for MVP acceptance | Narrow operational validation is acceptable if architecture stays generic | +| Compatibility constraint | Preserve backwards-compatible data-plane interaction semantics | SDS may add management workflows, but the data-plane experience should remain familiar | ## Why Multi-Stream Support Is Deferred @@ -156,9 +188,9 @@ If a single payer can run multiple streams from different machines, correct fund - session liveness and stale-session cleanup - race-safe exposure accounting when streams start concurrently - clear rules for pending requested RAVs, accepted RAVs, and unaggregated usage -- restart and resume semantics that avoid duplicated or lost liability +- restart and reconnect semantics that avoid duplicated or lost liability -That is a materially larger distributed-state problem than the session-local MVP design. The MVP therefore documents concurrent streams as a known limitation for funding-control correctness and does not attempt to enforce or fully solve them. +That is a materially larger distributed-state problem than the session-local MVP design. The MVP therefore documents concurrent streams as a known limitation and does not attempt to enforce or fully solve them. ## Component Deliverables @@ -166,32 +198,37 @@ That is a materially larger distributed-state problem than the session-local MVP - Standalone service and deployment unit - Manually managed provider whitelist +- Canonical MVP pricing for the curated provider set - Provider selection based on minimal metadata, at least: - - endpoint information + - control-plane endpoint information - chain/network eligibility - - possibly pricing metadata - Returns eligible providers plus one recommended provider +- Returns the selected provider control-plane endpoint, not the final streaming endpoint - Administrative/governance actions require authentication ### Consumer Sidecar - Mandatory client-side SDS integration component +- Primary user-facing SDS boundary +- Presents a Substreams-compatible endpoint/proxy for normal data-plane usage - Supports oracle-backed discovery and direct provider fallback -- Performs session initialization with provider -- Participates in reconnect flow where provider may return fresh or latest known RAV +- Performs session initialization with the provider control plane +- Receives the provider data-plane endpoint during session handshake - Maintains payment/session coordination during streaming - Works with the real client integration path, not only demo wrappers -- Does not require durable local persistence for MVP +- Does not require durable local payment-session persistence for MVP ### Provider Gateway / Provider Integration +- Provider gateway is the public SDS control plane for providers - Real integration into the provider path is mandatory - Validates payment/session state for real streaming traffic -- Uses provider-authoritative byte metering from plugin/integration path +- Uses provider-authoritative byte metering from the plugin/integration path - Drives RAV request/response flow - Handles live low-funds conditions during streaming - Persists accepted RAV and settlement-relevant state durably - Exposes authenticated operator/admin surfaces for inspection and settlement data retrieval +- May rely on separate internal plugin/runtime components behind the public gateway boundary ### Provider State Storage @@ -215,6 +252,7 @@ That is a materially larger distributed-state problem than the session-local MVP - craft and submit `collect()` transaction locally - inspect or retry pending collection attempts - Tooling should be sufficient for operators without requiring a dedicated UI +- CLI/operator tooling remains the execution surface for funding and settlement actions ### Security and Admin Surfaces @@ -224,6 +262,8 @@ That is a materially larger distributed-state problem than the session-local MVP - oracle management - provider inspection - collection-data retrieval +- Public vs private provider services may be separated for security and operational reasons +- That public/private split is not the main consumer-facing architecture contract - Final auth mechanism remains an implementation choice ### Observability @@ -239,7 +279,7 @@ That is a materially larger distributed-state problem than the session-local MVP - Operators can fund and top up escrow through CLI workflows - Operators can perform manual on-chain collection through CLI workflows - The system can surface low-funds conditions during active streams -- Recovery/reconnect behavior is defined well enough for operators to understand expected runtime behavior +- Runtime behavior is documented clearly enough for operators to understand what happens when a stream is interrupted and restarted ## Acceptance Scenarios @@ -249,18 +289,22 @@ The scenarios below are the primary definition of done for the MVP. - Consumer sidecar queries the oracle for a required chain/network - Oracle returns eligible providers plus a recommended choice -- Consumer sidecar uses the recommended provider +- Oracle returns the provider control-plane endpoint for the selected provider +- Consumer sidecar uses the recommended provider by default - Provider handshake succeeds +- Provider returns the session-specific data-plane endpoint - Real streaming begins through the production integration path - Byte metering occurs on the provider side - Payment state advances correctly during streaming -### B. Reconnect and Resume +### B. Fresh Session After Interruption -- An active SDS session is interrupted -- Consumer sidecar reconnects through the normal handshake path -- Provider responds with the appropriate fresh or resumable RAV state -- Streaming resumes without losing the authoritative accepted payment state +- An SDS-backed stream is interrupted +- A later request is made again through the normal consumer-side flow +- The consumer sidecar performs normal discovery or uses an explicit provider override +- The provider handshake creates a fresh SDS payment session +- The new request does not reuse prior payment-session identity or RAV lineage +- Any Substreams cursor or start-block continuation is handled as normal data-plane behavior rather than SDS-specific payment-session recovery ### C. Low Funds During Streaming @@ -268,6 +312,7 @@ The scenarios below are the primary definition of done for the MVP. - Usage progresses until provider-side session-local funding logic determines funds are too low - Provider surfaces the low-funds condition during the live stream - The client path receives and reacts to the stop/pause decision correctly +- Any consumer-side warnings or balance checks remain advisory rather than authoritative ### D. Provider Restart Without Losing Collectible State @@ -280,6 +325,7 @@ The scenarios below are the primary definition of done for the MVP. - Operator can approve token spend and deposit/top up escrow through CLI tooling - The resulting on-chain funding state is usable by SDS runtime flows +- Consumer-side advisory checks may help surface obviously low balances, but runtime sufficiency still depends on provider-side behavior ### F. Manual Collection Flow @@ -302,6 +348,7 @@ The scenarios below are the primary definition of done for the MVP. - Funding remains an operator/developer workflow rather than end-user wallet UI - Collection remains operator-driven rather than automatic - Oracle provider set is manually curated rather than permissionless +- Payment-session continuation across reconnects is intentionally deferred - Observability scope is intentionally basic ## Post-MVP Follow-Ups @@ -309,29 +356,20 @@ The scenarios below are the primary definition of done for the MVP. - Correct multi-stream aggregate exposure handling - Permissionless oracle sourcing from the Substreams Data Service contract registry - Richer oracle metadata and provider ranking +- Dynamic or provider-specific pricing and the corresponding oracle selection logic +- True payment-session continuation and recovery semantics if later required - Automated/background collection using a separate settlement agent -- Better consumer recovery semantics if needed beyond handshake-based recovery - Better funding UX, including possible wallet-connected UI - Stronger observability and operational tooling ## Open Questions -- The chain/network discovery input contract is narrowed for MVP: - - consumer sidecar derives network from the Substreams package by default - - if a package/module resolves a specific `networks` entry, that takes precedence over top-level `network` - - explicit user-supplied network input remains supported only as fallback when package derivation is unavailable - - if both explicit input and package-derived network exist and differ after normalization, the request fails fast - - if neither source yields a usable network, the request fails fast - - SDS uses the same canonical network keys as the Graph networks registry for MVP, with repo-owned/pinned mappings rather than live runtime registry lookups -- What is the pricing authority contract between oracle metadata and provider handshake responses? -- What is the exact canonical payment identity and `collection_id` reuse policy for fresh workloads versus reconnects? -- How much of the reconnect/recovery state should be keyed by session versus on-chain payment identity? - Should simple observability for MVP include metrics endpoints, or are structured logs plus inspection/status tooling sufficient? - What exact authentication mechanism should protect provider and oracle admin/operator surfaces? ## References -- `docs/phase1-sidecar-spec.md` -- `plans/implementation-backlog.md` -- `plans/component-task-breakdown.md` +- `plans/mvp-gap-analysis.md` +- `plans/mvp-implementation-backlog.md` +- `docs/mvp-implementation-sequencing.md` - `README.md` diff --git a/plans/mvp-gap-analysis.md b/plans/mvp-gap-analysis.md index 583d00d..4d7ef9a 100644 --- a/plans/mvp-gap-analysis.md +++ b/plans/mvp-gap-analysis.md @@ -1,9 +1,16 @@ # MVP Gap Analysis -Drafted: 2026-03-12 +Drafted: 2026-03-12 +Revised: 2026-03-25 This document maps the current repository state against the MVP defined in `docs/mvp-scope.md`. +It reflects: + +- the 2026-03-24 MVP scope rewrite +- the current `plans/mvp-implementation-backlog.md` +- provider/runtime work that landed in StreamingFast commits `5ffca3d` through `1416020` + Unlike the MVP scope document, this file is expected to change frequently. Status values used here: @@ -15,34 +22,42 @@ Status values used here: ## Summary -The repository already has a strong technical foundation: +The repository still has a strong technical foundation: - Horizon V2 / TAP signing, verification, and aggregation are implemented and tested - local chain/contracts and integration tests are in place -- consumer sidecar and provider gateway exist +- consumer sidecar and provider-side payment/session surfaces exist - sidecar-to-gateway session start and payment-session flow exist - provider-side plugin services exist for auth, session, and usage -The main MVP gaps are not the cryptographic/payment core. They are the surrounding system capabilities required to make SDS a usable product stack: +Provider-side runtime foundations are materially stronger than before: + +- DSN-backed repository selection now exists +- PostgreSQL-backed provider persistence foundation now exists +- the provider runtime is now shaped as a public Payment Gateway plus a private Plugin Gateway +- firecore/plugin integration scaffolding is stronger than it was when this document was first drafted + +The biggest remaining MVP gaps are now: - standalone oracle/discovery component -- real provider and consumer production-path integration completion -- provider-side durable payment/collection persistence -- live low-funds enforcement in the real stream path -- funding and settlement CLI workflows +- consumer-side Substreams-compatible endpoint/proxy behavior +- provider collection lifecycle persistence and inspection/collection APIs +- low-funds enforcement in the real live stream path +- operator funding and collection tooling - authenticated admin/operator surfaces +- finalized observability floor ## Acceptance Scenario Status | Scenario | Status | Notes | | --- | --- | --- | -| Discovery to paid streaming | `partial` | Paid session flow exists, but standalone oracle is missing and real production-path integration is not complete | -| Reconnect and resume | `partial` | Resume with `existing_rav` exists; provider-authoritative recovery during normal handshake is not finalized | -| Low funds during streaming | `missing` | Session-local low-funds decisions during active streaming are still backlog work | -| Provider restart without losing collectible state | `missing` | Provider accepted RAV state is still in-memory today | -| Manual funding flow | `partial` | Local/demo helper exists via `sds demo setup`, but general MVP funding CLI workflow is not implemented | -| Manual collection flow | `missing` | No MVP settlement inspection/collection CLI flow yet | -| Secure deployment posture | `partial` | TLS hooks exist, but admin authentication and final secure operational surfaces are not complete | +| A. Discovery to paid streaming | `partial` | Paid session flow and provider runtime foundations exist, but the standalone oracle is still missing and the consumer sidecar is not yet the Substreams-compatible ingress described by the scope | +| B. Fresh session after interruption | `partial` | Fresh-session semantics are now the MVP target, but current code still carries `existing_rav` and split-endpoint assumptions that do not fully match the revised design | +| C. Low funds during streaming | `missing` | Session-local low-funds handling in the real live stream path is still backlog work | +| D. Provider restart without losing collectible state | `partial` | Provider persistence is no longer purely in-memory because PostgreSQL support exists, but collectible/collection lifecycle tracking is still incomplete | +| E. Manual funding flow | `partial` | Demo-oriented setup/funding helpers exist, but real operator-grade funding CLI flows do not | +| F. Manual collection flow | `missing` | RAV tooling exists, but provider-backed settlement inspection and collect workflow are not implemented | +| G. Secure deployment posture | `partial` | TLS hooks and provider public/private split exist, but authenticated admin/operator surfaces remain unfinished | ## Component Status @@ -59,9 +74,9 @@ Evidence: Notes: -- This area is already strong enough to support the rest of MVP work. +- This area remains strong enough to support the rest of the MVP work. -### Consumer Sidecar RPC Surface +### Consumer Sidecar Status: `partial` @@ -78,15 +93,15 @@ What already exists: - usage reporting - end session - payment-session loop wiring to provider gateway -- existing-RAV-based resumption What is still missing for MVP: -- finalized provider-authoritative reconnect flow in the normal handshake -- completion of real client integration path -- finalized handling around low-funds stop/pause in real usage path +- the API still expects explicit `gateway_endpoint` and `substreams_endpoint` +- the API still carries `existing_rav`, which reflects older resume-oriented assumptions +- the real user-facing integration is still wrapper-centric rather than endpoint-centric +- finalized low-funds stop/pause handling in the real usage path -### Provider Gateway RPC Surface +### Provider Gateway Status: `partial` @@ -100,16 +115,18 @@ Evidence: What already exists: +- public payment gateway - session start - bidirectional payment session - RAV validation and authorization checks -- basic session status inspection +- basic runtime/session status inspection +- repository-backed session state foundation What is still missing for MVP: -- durable accepted-RAV persistence +- provider-returned data-plane endpoint semantics in the current public contract - collection lifecycle state -- low-funds logic during active streaming +- live low-funds logic during active streaming - authenticated admin/operator surfaces ### Provider Plugin Services @@ -121,18 +138,20 @@ Evidence: - `provider/auth/service.go` - `provider/session/service.go` - `provider/usage/service.go` -- `provider/plugin/` +- `provider/plugin/gateway.go` What already exists: +- private plugin gateway - auth, session, and usage services for `sds://` +- typed session ID propagation through plugin/runtime requests - provider-authoritative metering path foundation What is still missing for MVP: -- full real provider path integration and validation against production-like usage -- finalized byte-billing semantics in the complete runtime path -- stop/pause behavior enforced in the live stream path +- full live-provider-path acceptance in production-like usage +- finalized byte-billing/runtime contract documentation +- live stop/pause behavior enforced in the provider stream lifecycle ### Oracle @@ -142,28 +161,51 @@ What MVP requires: - standalone service - manual whitelist +- canonical pricing for the curated provider set - eligible provider set plus recommended provider response +- selected provider control-plane endpoint return - authenticated admin/governance actions ### Provider Persistence -Status: `missing` +Status: `partial` Current state: -- provider repository is in-memory -- accepted RAV/session state is lost on restart +- provider persistence is no longer only in-memory +- PostgreSQL repository support exists +- the provider gateway can instantiate repositories via DSN +- migrations and repository tests exist Evidence: -- `provider/repository/inmemory.go` -- `provider/repository/repository.go` +- `provider/gateway/repository.go` +- `provider/repository/psql/` +- `provider/gateway/REPOSITORY.md` -What MVP requires: +What is still missing for MVP: + +- explicit collection lifecycle persistence +- provider-backed collectible/collect_pending/collected tracking +- acceptance-level proof for the full restart/collectible scenario + +### Consumer Data-Plane Compatibility + +Status: `missing` -- durable provider-side state for accepted collectible RAVs -- settlement lifecycle state -- persistence across restarts +Evidence: + +- `proto/graph/substreams/data_service/consumer/v1/consumer.proto` +- `cmd/sds/impl/sink_run.go` + +Current state: + +- the consumer sidecar is still used through SDS-specific RPC plus wrapper orchestration +- `sds sink run` is the closest real-path integration today + +What is still missing for MVP: + +- a Substreams-compatible consumer-side endpoint/proxy that hides SDS discovery/session/payment coordination behind the data-plane ingress ### Funding CLI @@ -171,13 +213,13 @@ Status: `partial` Current state: -- local/demo funding helper exists +- local/demo funding setup exists Evidence: - `cmd/sds/demo_setup.go` -What MVP requires: +What is still missing for MVP: - operator-oriented approve/deposit/top-up workflow beyond local demo assumptions @@ -185,10 +227,18 @@ What MVP requires: Status: `missing` -What MVP requires: +Current state: -- inspect collectible accepted RAV data -- fetch settlement-relevant data from provider +- local RAV creation/inspection tooling exists, but it is not provider-backed settlement tooling + +Evidence: + +- `cmd/sds/tools_rav.go` + +What is still missing for MVP: + +- inspect collectible accepted RAV data from the provider +- fetch settlement-relevant data from the provider - craft/sign/submit `collect()` transaction locally - retry-safe operator workflow @@ -198,19 +248,21 @@ Status: `partial` Evidence: -- `cmd/sds/provider_gateway.go` +- `cmd/sds/impl/provider_gateway.go` - `cmd/sds/consumer_sidecar.go` - `sidecar/server_transport.go` -- `provider/plugin/plugin.go` +- `provider/plugin/gateway.go` What already exists: - plaintext vs TLS transport configuration paths +- provider public/private network split for payment gateway vs plugin gateway What is still missing for MVP: -- finalized secure deployment defaults and operational guidance +- finalized secure deployment defaults across all relevant surfaces - authenticated admin/operator surfaces +- validated TLS-by-default posture for the full MVP deployment shape ### Observability @@ -220,49 +272,86 @@ What already exists: - structured logging - health endpoints -- status inspection basics +- basic runtime/status inspection What is still missing for MVP: - final MVP decision on metrics endpoints -- better operator-facing inspection for payment/collection state +- better operator-facing inspection for payment, runtime, and collection state + +## Current Implementation Highlights + +The most important recent status changes versus the original draft are: + +- Provider persistence should no longer be treated as fully missing. + - The repo now includes PostgreSQL-backed repository code, DSN-based selection, migrations, and tests. +- Provider runtime shape is more concrete than before. + - The repo now explicitly separates a public Payment Gateway from a private Plugin Gateway. +- Real-path integration scaffolding is stronger. + - The repo now includes stronger firecore/plugin integration setup and a `TestFirecore` scaffold, even though that path is not yet MVP-complete. +- Consumer-side MVP UX is still notably behind the revised scope. + - The code still reflects a control-plane RPC plus wrapper model rather than the endpoint/proxy boundary the scope now requires. ## Backlog Alignment -The largest currently tracked backlog items that still map directly to MVP are: - -- `SDS-008` Define and document `metadata` schema + encoding -- `SDS-016` Implement `NeedMoreFunds` loop + Continue/Stop/Pause -- `SDS-020` Add signing thresholds -- `SDS-021` Decide/implement on-chain collection workflow -- `SDS-022` Track outstanding RAVs across concurrent streams - - note: full aggregate concurrent-stream correctness is no longer assumed to be MVP-critical -- `SDS-024` Add durable state storage -- `SDS-025` Add transport security + authn/authz -- `SDS-026` Add observability -- `SDS-028` Define payment header format -- `SDS-029` Integrate provider gateway into tier1 provider -- `SDS-030` Integrate consumer sidecar into substreams client -- `SDS-038` Make `sds sink run` the primary end-to-end demo (STOP-aware) -- `SDS-039` Document/enforce required firehose-core version for `sds://` plugins - -Additional MVP work not yet clearly represented as a complete deliverable set in the existing backlog: - -- standalone oracle component -- authenticated provider/oracle admin surfaces -- operator-oriented funding CLI -- operator-oriented settlement inspection and collection CLI -- provider-authoritative reconnect flow folded into the normal handshake +The current MVP gaps now align with the rewritten MVP backlog as follows. + +Oracle and consumer ingress: + +- `MVP-005` +- `MVP-007` +- `MVP-017` + +Provider runtime and payment control: + +- `MVP-010` +- `MVP-011` +- `MVP-012` +- `MVP-014` +- `MVP-015` +- `MVP-016` +- `MVP-031` + +Persistence and settlement: + +- `MVP-003` +- `MVP-008` +- `MVP-009` +- `MVP-029` + +Tooling and operations: + +- `MVP-018` +- `MVP-019` +- `MVP-020` +- `MVP-032` + +Security and observability: + +- `MVP-021` +- `MVP-022` +- `MVP-023` +- `MVP-024` +- `MVP-028` +- `MVP-030` + +Validation and docs: + +- `MVP-025` +- `MVP-026` + +The gap analysis and the backlog now agree that: + +- pricing authority is resolved for MVP +- reconnect/payment-session reuse is not an MVP target +- the remaining open questions are observability and auth only ## Open Questions Carrying Risk -These are not implementation gaps yet, but unresolved design points that could change scope or interfaces: +These are no longer architecture-blocking for the main SDS flow, but they do still block clean closure of the security/admin and observability parts of MVP. -- chain/network derivation from package vs explicit input -- pricing authority between oracle metadata and provider handshake -- canonical payment identity and `collection_id` reuse semantics - metrics endpoints vs logs-plus-status-only for MVP observability -- exact admin authentication mechanism +- exact admin/operator authentication mechanism ## Recommended Usage @@ -272,7 +361,7 @@ Use this file to: - assess current progress - identify MVP gaps -- map backlog work to the target MVP +- map current repo status to the MVP target - keep implementation status current without rewriting the MVP scope itself Use `plans/mvp-implementation-backlog.md` as the concrete task backlog aligned to the revised MVP scope. diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index e82abc4..7fde5ab 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -1,36 +1,36 @@ # Substreams Data Service — MVP Implementation Backlog -_Last updated: 2026-03-13_ +_Last updated: 2026-03-24_ -This document translates `docs/mvp-scope.md` into concrete implementation tasks for the MVP. +This document translates [docs/mvp-scope.md](../docs/mvp-scope.md) into concrete implementation tasks for the MVP. It is intentionally separate from `plans/implementation-backlog.md`. Rationale for the split: - `plans/implementation-backlog.md` reflects the earlier implementation sequence and still contains useful historical context -- this document reflects the revised MVP scope agreed after the MVP rescoping work -- the MVP scope is now broader than the original sidecar-centric backlog and includes new deliverables such as the oracle, operator tooling, and provider-side persistence/settlement workflows +- this document reflects the revised MVP scope agreed after the 2026-03-24 architecture rewrite +- the backlog now also incorporates provider/runtime work that landed separately in StreamingFast commits `5ffca3d` through `1416020` This document is a scope-aligned execution backlog, not a priority list. ## How To Use This Document -- Use `docs/mvp-scope.md` as the stable target-state definition. +- Use [docs/mvp-scope.md](../docs/mvp-scope.md) as the stable target-state definition. - Use `plans/mvp-gap-analysis.md` for current-state assessment. - Use this file to define the concrete MVP implementation work that remains. Each task includes: - **Context**: why the task exists -- **Assumptions**: design assumptions or unresolved questions that affect the task definition +- **Assumptions**: scope-aligned assumptions that shape the task - **Done when**: objective completion criteria - **Verify**: how to corroborate the behavior The status tracker below also includes: - **Depends on**: tasks that should be frozen or completed first so downstream work does not build on moving semantics -- **Scenarios**: acceptance scenarios from `docs/mvp-scope.md` (`A` through `G`) that the task materially contributes to +- **Scenarios**: acceptance scenarios from [docs/mvp-scope.md](../docs/mvp-scope.md) (`A` through `G`) that the task materially contributes to Unless otherwise scoped, the baseline validation for code changes remains: @@ -38,11 +38,13 @@ Unless otherwise scoped, the baseline validation for code changes remains: - `go vet ./...` - `gofmt` on changed Go files +Recent provider persistence and integration scaffolding landed outside the original backlog sequencing. The tracker below treats that work as existing foundation and updates task status accordingly. + ## Assumptions Register -These assumptions are referenced by task ID so it is clear where unresolved decisions still matter. +These assumptions are referenced by task ID so it is clear which scope decisions or remaining open questions still matter. -- `A1` Chain/network discovery input is narrowed for MVP, but implementation details still matter. +- `A1` Chain/network discovery input is frozen for MVP. - Consumer sidecar derives network from the Substreams package by default. - If a package/module resolves a specific `networks` entry, that takes precedence over top-level `network`. - Explicit input remains supported as fallback when package derivation is unavailable. @@ -50,11 +52,15 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - If neither source yields a usable network, fail fast. - SDS should use repo-owned/pinned mappings to the Graph networks registry keys for MVP rather than live runtime registry lookups. -- `A2` Pricing authority between oracle metadata and provider handshake is still open. - - MVP work should avoid hard-coding a final authority rule unless/until aligned with StreamingFast. +- `A2` Pricing is oracle-authoritative for MVP. + - The oracle returns canonical pricing for the curated provider set. + - The provider handshake is not a price negotiation step in normal operation. + - The consumer should not discover price disagreement only after connecting to a selected provider. -- `A3` Canonical payment identity and `collection_id` reuse semantics are still open. - - MVP work should isolate persistence and reconnect logic behind a model that can evolve without a full rewrite. +- `A3` Every new request/connection creates a fresh SDS payment session for MVP. + - No resumable payment-session semantics are required for MVP. + - No RAV or payment-session reuse occurs across reconnects. + - Any Substreams cursor/block continuation remains a normal data-plane concern, not an SDS payment-session recovery flow. - `A4` Observability scope beyond logs/status tooling is still open. - MVP work should implement structured logging and inspection/status surfaces without forcing a final metrics/tracing backend choice. @@ -82,148 +88,141 @@ These assumptions are referenced by task ID so it is clear where unresolved deci | ID | Status | Area | Assumptions | Depends on | Scenarios | Task | | --- | --- | --- | --- | --- | --- | --- | -| MVP-001 | `open_question` | protocol | `A2` | none | `A` | Freeze the pricing exposure contract between oracle metadata and provider handshake | -| MVP-002 | `not_started` | protocol | `A1`, `A3` | `MVP-027` | `B` | Freeze reconnect handshake semantics so provider can return fresh or latest-known resumable RAV during normal session init | -| MVP-003 | `not_started` | protocol | `A3` | `MVP-027` | `B`, `D`, `F` | Define the durable provider-side payment and settlement data model | -| MVP-004 | `not_started` | protocol | none | none | `A`, `B`, `C` | Define and document the byte-billing and payment/header contract used in the real runtime path | -| MVP-005 | `not_started` | oracle | `A1`, `A2`, `A5` | `MVP-033` | `A` | Implement a standalone oracle service with manual whitelist and recommended-provider response | +| MVP-001 | `done` | protocol | `A2` | none | `A` | Freeze the oracle-authoritative MVP pricing contract across oracle, consumer, and provider flows | +| MVP-002 | `done` | protocol | `A2`, `A3` | `MVP-033` | `A`, `B` | Freeze fresh-session init semantics and provider-returned data-plane endpoint behavior | +| MVP-003 | `in_progress` | protocol | `A3`, `A6` | `MVP-027` | `D`, `F` | Define and document the provider-side runtime persistence model and its boundary with settlement lifecycle tracking | +| MVP-004 | `in_progress` | protocol | `A2`, `A3` | none | `A`, `C` | Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop | +| MVP-005 | `not_started` | oracle | `A1`, `A2`, `A5` | `MVP-033` | `A` | Implement a standalone oracle service with manual whitelist, canonical pricing, recommended-provider response, and control-plane endpoint return | | MVP-006 | `not_started` | oracle | `A5` | `MVP-028` | `A`, `G` | Add authenticated oracle administration for whitelist and provider metadata management | -| MVP-007 | `not_started` | consumer | `A1`, `A2` | `MVP-001`, `MVP-005`, `MVP-033` | `A` | Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback | -| MVP-008 | `not_started` | provider-state | `A3`, `A6` | `MVP-003` | `B`, `D`, `F` | Add durable provider storage for accepted RAV, session state, and collection lifecycle state | -| MVP-009 | `not_started` | provider-state | `A3` | `MVP-003`, `MVP-029` | `D`, `F` | Expose provider inspection and settlement-data retrieval APIs for accepted/collectible RAV state | +| MVP-007 | `not_started` | consumer | `A1`, `A2`, `A3` | `MVP-005`, `MVP-033` | `A` | Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback and provider-returned data-plane resolution | +| MVP-008 | `in_progress` | provider-state | `A3`, `A6` | `MVP-003` | `D`, `F` | Complete durable provider runtime storage for sessions, usage, and accepted RAV state, distinct from collection lifecycle tracking | +| MVP-009 | `not_started` | provider-state | `A3`, `A5` | `MVP-003`, `MVP-022`, `MVP-029` | `D`, `F` | Expose provider inspection and settlement-data retrieval APIs for accepted and collectible RAV state | | MVP-010 | `not_started` | funding-control | `A6` | `MVP-004` | `C` | Implement session-local low-funds detection and provider Continue/Pause/Stop decisions during streaming | | MVP-011 | `not_started` | funding-control | `A6` | `MVP-010` | `C` | Propagate provider stop/pause decisions through consumer sidecar into the real client path | | MVP-012 | `not_started` | funding-control | none | `MVP-004` | `A`, `C` | Add deterministic RAV issuance thresholds suitable for real runtime behavior | -| MVP-013 | `not_started` | consumer | `A3` | `MVP-002`, `MVP-008` | `B` | Implement provider-authoritative reconnect/resume in the normal handshake path | -| MVP-014 | `not_started` | provider-integration | none | `MVP-004` | `A` | Integrate provider gateway validation into the real provider streaming path | -| MVP-015 | `not_started` | provider-integration | none | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering from the provider/plugin path into gateway payment state | +| MVP-013 | `deferred` | consumer | `A3` | none | none | Post-MVP only: implement true provider-authoritative payment-session reconnect/resume semantics | +| MVP-014 | `in_progress` | provider-integration | `A3` | `MVP-004` | `A` | Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path | +| MVP-015 | `in_progress` | provider-integration | `A3` | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway | | MVP-016 | `not_started` | provider-integration | `A6` | `MVP-010`, `MVP-014` | `C` | Enforce gateway Continue/Pause/Stop decisions in the live provider stream lifecycle | -| MVP-017 | `not_started` | consumer-integration | `A1` | `MVP-007`, `MVP-011`, `MVP-033` | `A`, `C` | Integrate the real consumer/client path with consumer sidecar init, usage reporting, and session end | +| MVP-017 | `not_started` | consumer-integration | `A1`, `A2`, `A3` | `MVP-007`, `MVP-011`, `MVP-033` | `A`, `C` | Implement the consumer sidecar as a Substreams-compatible endpoint/proxy rather than only a wrapper-controlled lifecycle service | | MVP-018 | `not_started` | tooling | none | `MVP-032` | `E` | Implement operator funding CLI flows for approve/deposit/top-up beyond local demo assumptions | -| MVP-019 | `not_started` | tooling | `A3`, `A5` | `MVP-009`, `MVP-022` | `D`, `F` | Implement provider inspection CLI flows for collectible/accepted RAV data | -| MVP-020 | `not_started` | tooling | `A3` | `MVP-009`, `MVP-029` | `F` | Implement manual collection CLI flow that crafts/signs/submits collect transactions locally | +| MVP-019 | `not_started` | tooling | `A5` | `MVP-009`, `MVP-022` | `D`, `F` | Implement provider inspection CLI flows for accepted and collectible RAV data | +| MVP-020 | `not_started` | tooling | `A5` | `MVP-009`, `MVP-022`, `MVP-029` | `F` | Implement manual collection CLI flow that fetches provider settlement state and crafts/signs/submits collect transactions locally | | MVP-021 | `not_started` | security | `A5` | none | `G` | Make TLS the default non-dev runtime posture for oracle, sidecar, and provider integration paths | | MVP-022 | `not_started` | security | `A5` | `MVP-009`, `MVP-028` | `D`, `F`, `G` | Add authentication and authorization to provider admin/operator APIs | -| MVP-023 | `open_question` | observability | `A4` | none | `A`, `B`, `C`, `D`, `F`, `G` | Define the final MVP observability floor beyond structured logs and status tooling | -| MVP-024 | `not_started` | observability | `A4` | `MVP-023` | `B`, `C`, `D`, `F`, `G` | Implement basic operator-facing inspection/status surfaces and log correlation | -| MVP-025 | `not_started` | validation | none | none | `A`, `B`, `C`, `D`, `E`, `F`, `G` | Add MVP acceptance coverage for the primary end-to-end scenarios in docs/tests/manual verification | -| MVP-026 | `not_started` | docs | `A1`, `A2`, `A3`, `A4`, `A5` | `MVP-001`, `MVP-002`, `MVP-003`, `MVP-004`, `MVP-023`, `MVP-027`, `MVP-028`, `MVP-033` | `A`, `B`, `C`, `D`, `E`, `F`, `G` | Refresh protocol/runtime docs so they match the MVP architecture and explicit open questions | -| MVP-027 | `open_question` | protocol | `A3` | none | `B`, `D`, `F` | Freeze canonical payment identity, `collection_id` reuse, and session-vs-payment keying semantics | +| MVP-023 | `open_question` | observability | `A4` | none | `A`, `C`, `D`, `F`, `G` | Define the final MVP observability floor beyond structured logs and status tooling | +| MVP-024 | `not_started` | observability | `A4` | `MVP-023` | `C`, `D`, `F`, `G` | Implement basic operator-facing inspection/status surfaces and log correlation | +| MVP-025 | `in_progress` | validation | none | none | `A`, `B`, `C`, `D`, `E`, `F`, `G` | Add MVP acceptance coverage for the primary end-to-end scenarios in docs/tests/manual verification | +| MVP-026 | `in_progress` | docs | `A1`, `A4`, `A5` | `MVP-023`, `MVP-028`, `MVP-033` | `A`, `B`, `C`, `D`, `E`, `F`, `G` | Refresh protocol/runtime docs so they match the revised MVP architecture and remaining open questions | +| MVP-027 | `done` | protocol | `A3` | none | `B`, `D`, `F` | Freeze MVP payment/session identity semantics for fresh sessions and non-reused collection/payment lineage | | MVP-028 | `open_question` | security | `A5` | none | `G` | Define the MVP authentication and authorization contract for oracle and provider operator surfaces | -| MVP-029 | `not_started` | provider-state | `A3` | `MVP-003`, `MVP-027` | `D`, `F` | Implement provider collection lifecycle transitions and update surfaces for `collectible`, `collect_pending`, `collected`, and retryable collection state | -| MVP-030 | `not_started` | provider-integration | none | `MVP-014`, `MVP-017` | `A`, `G` | Add runtime compatibility and preflight checks for real provider/plugin deployments | -| MVP-031 | `not_started` | runtime-payment | none | `MVP-004`, `MVP-012`, `MVP-014`, `MVP-017` | `A`, `C` | Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path | -| MVP-032 | `not_started` | operations | `A3`, `A4`, `A5` | `MVP-003`, `MVP-008`, `MVP-010`, `MVP-022` | `B`, `C`, `D`, `F`, `G` | Expose operator runtime/session/payment inspection APIs and CLI/status flows | +| MVP-029 | `not_started` | provider-state | `A3`, `A5` | `MVP-003`, `MVP-022` | `D`, `F` | Implement provider collection lifecycle transitions and update surfaces for `collectible`, `collect_pending`, `collected`, and retryable collection state | +| MVP-030 | `in_progress` | provider-integration | `A5` | `MVP-014`, `MVP-017` | `A`, `G` | Add runtime compatibility and preflight checks for real provider/plugin deployments | +| MVP-031 | `not_started` | runtime-payment | `A2`, `A3` | `MVP-004`, `MVP-012`, `MVP-014`, `MVP-017` | `A`, `C` | Wire the long-lived payment-control loop behind the consumer-sidecar ingress path used by real runtime traffic | +| MVP-032 | `not_started` | operations | `A4`, `A5`, `A6` | `MVP-008`, `MVP-010`, `MVP-022` | `C`, `D`, `F`, `G` | Expose operator runtime/session/payment inspection APIs and CLI/status flows | | MVP-033 | `done` | protocol | `A1` | none | `A` | Freeze the chain/network discovery input contract across client, sidecar, and oracle | ## Protocol and Contract Tasks -- [ ] MVP-001 Freeze the pricing exposure contract between oracle metadata and provider handshake. +- [x] MVP-001 Freeze the oracle-authoritative MVP pricing contract across oracle, consumer, and provider flows. - Context: - - The MVP scope expects pricing to likely appear in both places, but pricing authority is still open. - - Real consumer/oracle/provider integration should not proceed on hand-wavy assumptions here. + - The revised MVP scope fixes pricing at the oracle layer for the curated provider set. + - The normal consumer/provider handshake is no longer a pricing negotiation step. - Assumptions: - `A2` - Done when: - - The intended relationship between oracle pricing metadata and provider handshake pricing is documented. - - The implementation path does not rely on contradictory authority assumptions across components. + - The repo documents oracle-authoritative pricing for MVP. + - Consumer and provider tasks no longer assume provider-side price negotiation in normal operation. + - Oracle and handshake wording are consistent across scope and backlog. - Verify: - - Update `docs/mvp-scope.md` open question if unresolved, or close it if decided. - - Add or update integration/manual verification notes for whichever pricing source is actually consumed at runtime. + - Review [docs/mvp-scope.md](../docs/mvp-scope.md) and confirm there is no conflicting pricing authority language. -- [x] MVP-033 Freeze the chain/network discovery input contract across client, sidecar, and oracle. +- [x] MVP-002 Freeze fresh-session init semantics and provider-returned data-plane endpoint behavior. - Context: - - The MVP requires oracle-backed provider discovery keyed by chain/network context, but the source of that context is still open. - - Leaving this only as an assumption risks incompatible implementations across the real client path, sidecar API, and oracle API. + - The revised MVP scope no longer includes resumable payment-session behavior. + - The provider handshake, not the oracle, owns session-specific data-plane endpoint resolution. - Assumptions: - - `A1` + - `A2` + - `A3` - Done when: - - The repo defines the canonical chain/network identifier shape used by the oracle query path. - - Consumer sidecar derives network from the Substreams package by default. - - If a package/module resolves a specific `networks` entry, that takes precedence over top-level `network`. - - Explicit input remains supported only as fallback when package derivation is unavailable. - - If both explicit input and package-derived network exist and differ after normalization, the request fails fast. - - If neither source yields a usable network, the request fails fast. - - SDS uses the same canonical network keys as the Graph networks registry for MVP, with repo-owned/pinned mappings rather than live runtime registry lookups. - - Consumer sidecar owns derivation, normalization, validation, and conflict detection. - - MVP-005, MVP-007, and MVP-017 all point to the same contract. + - The repo documents that every new request/connection creates a fresh SDS payment session. + - The provider handshake is described as returning the data-plane endpoint. + - No task still assumes latest-known resumable RAV behavior during normal init. - Verify: - - Update `docs/mvp-scope.md` open question to reflect the narrowed contract. - - Add contract-level tests or documented manual verification for package-derived network success, explicit-input fallback, mismatch rejection, and missing-network rejection. + - Review [docs/mvp-scope.md](../docs/mvp-scope.md) and confirm the workflow and decisions table match this contract. -- [ ] MVP-002 Freeze reconnect handshake semantics so provider can return fresh or latest-known resumable RAV during normal session init. +- [x] MVP-033 Freeze the chain/network discovery input contract across client, sidecar, and oracle. - Context: - - The current repo supports resume when the caller already has `existing_rav`, but the MVP requires provider-authoritative reconnect behavior in the handshake. + - Oracle-backed provider discovery depends on a stable chain/network contract. - Assumptions: - `A1` - - `A3` - Done when: - - Consumer init has a documented reconnect story. - - Provider can distinguish fresh handshake from reconnect/resume during the normal init flow. - - Provider returns either a zero-value/fresh RAV or the latest resumable RAV according to the chosen semantics. + - The repo defines the canonical chain/network identifier shape used by the oracle query path. + - Consumer sidecar owns derivation, normalization, validation, and conflict detection. + - Oracle and consumer tasks point to the same frozen contract. - Verify: - - Add an integration test that reconnects without relying solely on consumer-local in-memory session state. + - Review [docs/mvp-scope.md](../docs/mvp-scope.md) and confirm the network-discovery contract is present in the main workflow text rather than left as an open question. -- [ ] MVP-027 Freeze canonical payment identity, `collection_id` reuse, and session-vs-payment keying semantics. +- [x] MVP-027 Freeze MVP payment/session identity semantics for fresh sessions and non-reused collection/payment lineage. - Context: - - Reconnect, durable provider state, inspection APIs, and manual collection all depend on a stable answer for what identity ties those records together. - - Leaving this implicit risks implementing mutually incompatible storage and API shapes across provider, consumer, and tooling code. + - The revised MVP scope intentionally avoids reconnect/payment identity reuse. - Assumptions: - `A3` - Done when: - - The repo documents the canonical payment identity used across runtime, persistence, and settlement flows. - - The rules for `collection_id` reuse versus minting a new `collection_id` are explicit for fresh sessions, reconnects, and retryable collection flows. - - It is clear which state is keyed by session identifier versus payment identity. + - The repo documents that reconnects create new SDS payment sessions rather than reusing prior payment lineage. + - Collection/payment identity reuse is no longer an MVP open question. - Verify: - - Update `docs/mvp-scope.md` open questions if unresolved, or close/narrow them if decided. - - Confirm MVP-002, MVP-003, MVP-008, MVP-013, MVP-019, and MVP-020 all reference the same identity semantics without contradiction. + - Review [docs/mvp-scope.md](../docs/mvp-scope.md) and confirm the reconnect scenario and major decisions table match this rule. -- [ ] MVP-003 Define the durable provider-side payment and settlement data model. +- [ ] MVP-003 Define and document the provider-side runtime persistence model and its boundary with settlement lifecycle tracking. - Context: - - Provider persistence is MVP-critical, but the canonical durable model still needs to support both runtime session state and settlement state. + - StreamingFast landed the shared repository model, PostgreSQL schema, and DSN-backed repository instantiation. + - The remaining work is to make the runtime-versus-settlement boundary explicit in the MVP backlog and docs. - Assumptions: - `A3` + - `A6` - Done when: - - The provider-side durable record types are documented. - - The model supports accepted RAV state, runtime session association, and collection lifecycle state. - - The model is structured so the unresolved `collection_id` semantics do not force a rewrite later. + - The backlog and docs clearly separate runtime/session persistence from collection lifecycle persistence. + - The provider-side durable model is described in terms of sessions, workers, usage, current accepted RAV state, and separate collection lifecycle tracking. + - Downstream tasks no longer assume reconnect-driven reuse semantics. - Verify: - - Document the schema/record model in a repo plan or doc. - - Confirm every persistence-related task below maps cleanly to the model. + - Review [provider/repository/repository.go](../provider/repository/repository.go) and [provider/gateway/REPOSITORY.md](../provider/gateway/REPOSITORY.md) against backlog task wording. -- [ ] MVP-004 Define and document the byte-billing and payment/header contract used in the real runtime path. +- [ ] MVP-004 Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop. - Context: - - The MVP now explicitly requires real provider and consumer integrations, so the runtime payment/header contract must be frozen enough for those paths. + - The runtime shape changed materially in the recent commit range. + - The current repo now has: + - a public Payment Gateway + - a private Plugin Gateway + - typed plugin session IDs + - shared repository-backed runtime state - Assumptions: - - none + - `A2` + - `A3` - Done when: - - The document explains how the real provider path receives/validates payment material. - - Billable usage is defined as provider-authoritative streamed bytes. - - Header/payment material, signature encoding, and session binding expectations are documented. + - The runtime contract is documented in terms of the actual provider shape now in repo. + - Plugin session/usage correlation is described using typed protobuf fields rather than old implicit header flow. + - Consumer/provider payment-loop expectations are documented without revive/resume assumptions. - Verify: - - Update the relevant docs and ensure implementation tasks that depend on the wire contract can point to a stable reference. + - Review the backlog wording against [cmd/sds/impl/provider_gateway.go](../cmd/sds/impl/provider_gateway.go), [provider/plugin/gateway.go](../provider/plugin/gateway.go), and the plugin protobufs. ## Oracle Tasks -- [ ] MVP-005 Implement a standalone oracle service with manual whitelist and recommended-provider response. +- [ ] MVP-005 Implement a standalone oracle service with manual whitelist, canonical pricing, recommended-provider response, and control-plane endpoint return. - Context: - - The oracle is now a mandatory MVP component, even though the initial logic is intentionally simple. + - The oracle is a mandatory MVP component. + - The revised scope fixes both the selection default and the pricing authority model. - Assumptions: - `A1` - `A2` - `A5` - Done when: - A standalone oracle component exists. - - It can serve a manually curated provider set. - - It returns eligible providers plus a recommended provider for a requested chain/network. - - The oracle request/response contract is documented and stable enough for the consumer sidecar to integrate against without provider-specific assumptions. - - Each provider record includes the minimum metadata required for MVP routing and connection setup, at least provider identity, endpoint/transport details, and chain/network eligibility. - - Recommendation behavior is deterministic for the same request and whitelist state. - - If pricing metadata is returned before pricing authority is fully frozen, the response documents that status clearly so the consumer does not treat advisory metadata as final authority by accident. + - It serves a manually curated provider set. + - It returns eligible providers, a recommended provider, canonical pricing, and the selected provider control-plane endpoint for a requested chain/network. + - It does not require the oracle to resolve the final stream endpoint up front. - Verify: - - Add tests for whitelist lookup and provider recommendation behavior. - - Add API contract coverage for request validation and response shape. - - Add a manual smoke flow that exercises oracle -> consumer sidecar -> provider selection. + - Add tests for whitelist lookup, response validation, and deterministic recommendation behavior. - [ ] MVP-006 Add authenticated oracle administration for whitelist and provider metadata management. - Context: @@ -238,69 +237,78 @@ These assumptions are referenced by task ID so it is clear where unresolved deci ## Consumer Tasks -- [ ] MVP-007 Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback. +- [ ] MVP-007 Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback and provider-returned data-plane resolution. - Context: - Consumer sidecar is the mandatory client-side integration point and must support oracle-driven default behavior. - Assumptions: - `A1` - `A2` + - `A3` - Done when: - Consumer sidecar can query the oracle and choose the recommended provider. - - Direct provider configuration still works as a fallback. - - The two flows share the same downstream session-init/payment behavior. + - Direct provider configuration still works as a fallback/override. + - The consumer/provider flow uses the provider control-plane endpoint from the oracle and receives the data-plane endpoint during provider handshake. - Verify: - - Add tests or manual smoke steps for both oracle-backed and direct-provider flows. + - Add tests or documented smoke steps for both oracle-backed and direct-provider flows. -- [ ] MVP-013 Implement provider-authoritative reconnect/resume in the normal handshake path. +- [ ] MVP-013 Post-MVP only: implement true provider-authoritative payment-session reconnect/resume semantics. - Context: - - Consumer persistence is intentionally not required for MVP, so the reconnect story must not depend entirely on consumer-local state. + - The revised MVP scope explicitly defers payment-session continuation across reconnects. + - This item remains only as a post-MVP placeholder so the historical requirement is not lost. - Assumptions: - `A3` - Done when: - - Consumer reconnect flow can recover via provider-authoritative handshake behavior. - - The sidecar can handle the provider returning either fresh or resumable state. + - This item is not part of MVP delivery. - Verify: - - Add an integration scenario that disconnects and reconnects through the normal init flow and resumes against provider state. + - Confirm it is not referenced by current MVP acceptance scenarios. ## Provider State and Settlement Tasks -- [ ] MVP-008 Add durable provider storage for accepted RAV, session state, and collection lifecycle state. +- [ ] MVP-008 Complete durable provider runtime storage for sessions, usage, and accepted RAV state, distinct from collection lifecycle tracking. - Context: - - Provider-side accepted payment state must survive restart for MVP. + - StreamingFast landed: + - PostgreSQL repository foundation + - DSN-based repository selection + - gateway integration with that repository + - repository test coverage + - Remaining work is to close the gap between existing runtime persistence and the MVP durability scenarios. - Assumptions: - `A3` - `A6` - Done when: - - Provider restart does not lose accepted collectible RAV state. - - Collection lifecycle state persists across restart. - - Runtime session state and settlement state are both recoverable enough for MVP behavior. + - Provider restart does not lose session/runtime state required by the gateway and plugin path. + - Accepted RAV state needed for post-restart inspection and settlement survives restart in the durable backend. + - The task no longer includes collection lifecycle state, which remains tracked under MVP-029. - Verify: - - Add a restart-focused integration or persistence test that validates accepted state survives process restart. + - Add or unskip a restart-focused integration or persistence test that validates accepted state survives process restart using the durable repository path. -- [ ] MVP-009 Expose provider inspection and settlement-data retrieval APIs for accepted/collectible RAV state. +- [ ] MVP-009 Expose provider inspection and settlement-data retrieval APIs for accepted and collectible RAV state. - Context: - CLI inspection and manual collection require a provider-side way to retrieve settlement-relevant data. + - Current `GetSessionStatus` is useful runtime scaffolding, but not sufficient settlement inspection coverage. - Assumptions: - `A3` + - `A5` - Done when: - - Provider exposes APIs for listing and fetching accepted/collectible payment state. + - Provider exposes APIs for listing and fetching accepted and collectible payment state. - The returned data is sufficient for operator inspection and CLI-based collection. - - The API shape is stable enough for MVP-019 and MVP-020 to build on it without provider-specific ad hoc reads. + - The API shape is stable enough for MVP-019 and MVP-020 without direct backend reads. - Verify: - - Add integration tests for listing and fetching settlement-relevant accepted state. + - Add integration coverage for listing and fetching settlement-relevant accepted state. - [ ] MVP-029 Implement provider collection lifecycle transitions and update surfaces for `collectible`, `collect_pending`, `collected`, and retryable collection state. - Context: - - The MVP requires provider-visible collection lifecycle state, but inspection APIs and CLI submission are not sufficient unless something owns the transitions between those states. - - The provider needs a consistent way to track in-flight collection attempts, safe retries, and completed collection outcomes. + - The revised scope keeps collection lifecycle tracking as explicit provider-side work. + - The recent persistence work did not complete this lifecycle. - Assumptions: - `A3` + - `A5` - Done when: - Provider persistence supports the required collection lifecycle states and transition rules. - - There is a defined provider-side update path for marking collection attempts pending, completed, or retryable after an on-chain submission outcome. - - Retry behavior is documented so the CLI can interact with provider state idempotently. + - There is a defined provider-side update path for marking collection attempts pending, completed, or retryable. + - Retry behavior is documented so CLI flows can be idempotent. - Verify: - - Add integration or persistence tests that cover `collectible` -> `collect_pending` -> `collected` and a retryable failure path. + - Add persistence or integration tests that cover `collectible` -> `collect_pending` -> `collected` and a retryable failure path. ## Funding Control and Runtime Payment Tasks @@ -312,11 +320,9 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - Done when: - Provider can compare session-local exposure against available funding. - Provider emits the appropriate control/funding messages during active streams. - - Low-funds behavior includes a structured operator-usable reason and enough funding state to explain why streaming was paused or stopped. - - The low-funds signal is stable enough for operator tooling and client-side messaging to consume consistently. + - Low-funds behavior includes enough machine-readable state for operator tooling and client-side messaging. - Verify: - - Add an integration test with intentionally low funding that reaches a stop/pause or low-funds condition during streaming. - - Confirm the surfaced low-funds state includes a machine-readable reason and actionable funding context. + - Add an integration test with intentionally low funding that reaches a stop/pause condition during streaming. - [ ] MVP-011 Propagate provider stop/pause decisions through consumer sidecar into the real client path. - Context: @@ -327,11 +333,11 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - Consumer sidecar converts provider control/funding messages into client-visible stop/pause behavior. - Real client integration honors those decisions. - Verify: - - Add integration/manual verification showing the real client path stops or pauses when provider requires it. + - Add integration/manual verification showing the real client path stops or pauses when the provider requires it. - [ ] MVP-012 Add deterministic RAV issuance thresholds suitable for real runtime behavior. - Context: - - The current "sign on every report" behavior is not a good real-runtime policy. + - The current runtime/payment loop foundation exists, but the real-runtime issuance policy still needs to be made explicit. - Assumptions: - none - Done when: @@ -342,26 +348,31 @@ These assumptions are referenced by task ID so it is clear where unresolved deci ## Real Provider and Consumer Integration Tasks -- [ ] MVP-014 Integrate provider gateway validation into the real provider streaming path. +- [ ] MVP-014 Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path. - Context: - - MVP requires real provider-path integration, not just demo harness behavior. + - The recent commit range established the provider-side dual-gateway shape and the shared repository wiring. + - The backlog should now treat that as the concrete provider integration target. - Assumptions: - - none + - `A3` - Done when: - - The real provider path validates payment/session state through SDS integration before or during stream setup as required by the chosen runtime contract. + - The real provider path validates payment/session state through the public Payment Gateway. + - Firehose-core plugin traffic goes through the private Plugin Gateway. + - Both paths share the same authoritative provider-side repository state. - Verify: - - Add a real-path integration test or manual verification against the production-like provider path. + - Add a real-path integration test or manual verification against the current provider shape. -- [ ] MVP-015 Wire real byte metering from the provider/plugin path into gateway payment state. +- [ ] MVP-015 Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway. - Context: - - Billable usage for MVP is authoritative streamed bytes from provider-side metering. + - The recent commit range fixed session ID propagation and pushed more correlation through typed plugin fields and shared repository state. + - The remaining work is to validate the billing and payment-state behavior at acceptance level. - Assumptions: - - none + - `A3` - Done when: - - Real provider-side byte metering feeds the payment state used for billing/RAV progression. + - Real provider-side byte metering feeds the repository state used for payment progression. + - Session correlation is stable across auth, session, usage, and gateway-side payment state. - The runtime path does not rely on consumer-reported bytes as the billing source of truth. - Verify: - - Add tests or manual instrumentation evidence showing the live provider path updates billing state from metered bytes. + - Add tests or manual instrumentation evidence showing live provider/plugin activity updates the payment-state repository consistently. - [ ] MVP-016 Enforce gateway Continue/Pause/Stop decisions in the live provider stream lifecycle. - Context: @@ -373,65 +384,69 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - Verify: - Add manual or automated verification where the provider stops or pauses the live stream based on gateway control decisions. -- [ ] MVP-017 Integrate the real consumer/client path with consumer sidecar init, usage reporting, and session end. +- [ ] MVP-017 Implement the consumer sidecar as a Substreams-compatible endpoint/proxy rather than only a wrapper-controlled lifecycle service. - Context: - - The consumer sidecar is mandatory in the MVP architecture, but the real client path still needs to use it end to end. - - This task covers lifecycle entry/exit integration; the long-lived payment-session control loop is tracked separately in MVP-031. + - The revised MVP scope elevates the consumer sidecar from helper service to user-facing SDS boundary. + - Existing `sds sink run` integration is useful foundation, but it does not satisfy the data-plane compatibility goal by itself. - Assumptions: - `A1` + - `A2` + - `A3` - Done when: - - The real client path uses consumer sidecar init before streaming. - - It reports usage/end-of-session through the sidecar. - - It participates in oracle-backed discovery or direct fallback according to configuration. + - Existing Substreams tooling can point at the consumer sidecar endpoint for the normal data-plane path. + - The consumer sidecar hides oracle lookup, provider session init, and payment coordination behind that ingress. + - Wrapper-specific orchestration is no longer the only real integration path. - Verify: - - Add a real-path integration or manual scenario covering init -> stream -> usage -> end-session. + - Add a real-path integration or documented manual scenario that runs through the consumer sidecar endpoint rather than only `sds sink run`. -- [ ] MVP-031 Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path. +- [ ] MVP-031 Wire the long-lived payment-control loop behind the consumer-sidecar ingress path used by real runtime traffic. - Context: - - MVP requires payment state to keep advancing during real streaming, not only during local/demo harness flows. - - The real integration is incomplete until provider-driven RAV requests and funding/control messages flow through the same production path used by the live stream. + - MVP requires payment state to keep advancing during real streaming, not only through wrapper-driven flows. + - The loop should ultimately sit behind the same user-facing sidecar ingress the client uses. - Assumptions: - - none + - `A2` + - `A3` - Done when: - - The real client/provider integration keeps the long-lived SDS payment-session control loop active alongside the live stream. - - Provider-driven RAV requests, acknowledgements, and funding/control messages flow through the production runtime path rather than only through demo wrappers. - - Payment state advancement during streaming uses the same runtime path that real deployments will use. + - The real client/provider integration keeps the SDS payment-control loop active alongside the live stream behind the consumer-sidecar ingress path. + - Provider-driven RAV requests, acknowledgements, and control messages flow through the production runtime path rather than only through wrapper commands. - Verify: - - Add a real-path integration or documented manual verification showing stream start, at least one provider-driven payment/RAV update during live streaming, and synchronized session state until normal end or stop/pause. + - Add a real-path integration or documented manual verification showing stream start, at least one provider-driven payment update during live streaming, and synchronized session state until normal end or stop/pause. ## Operator Tooling Tasks - [ ] MVP-018 Implement operator funding CLI flows for approve/deposit/top-up beyond local demo assumptions. - Context: - Funding is an MVP operator workflow, but current tooling is still demo-oriented. + - `sds tools rav` is useful support tooling, not a substitute for escrow funding flows. - Assumptions: - none - Done when: - - CLI commands exist for approve/deposit/top-up in a provider-operator/payer-operator workflow. + - CLI commands exist for approve/deposit/top-up in a provider-operator or payer-operator workflow. - The commands are not limited to local deterministic devenv assumptions. - - The documented operator flow links funding actions to the low-funds or runtime inspection surfaces so an operator can move from a funding-related stop condition to topping up the correct escrow without ad hoc investigation. + - The documented operator flow links funding actions to low-funds/runtime inspection surfaces. - Verify: - Add command-level tests where practical and document a manual funding flow that works against a non-demo configuration. -- [ ] MVP-019 Implement provider inspection CLI flows for collectible/accepted RAV data. +- [ ] MVP-019 Implement provider inspection CLI flows for accepted and collectible RAV data. - Context: - Operators need to inspect what can be collected before settlement. + - `sds tools rav` inspection is local protobuf inspection, not provider-backed operator inspection. - Assumptions: - - `A3` - `A5` - Done when: - - CLI can retrieve and display accepted/collectible payment state from the provider. - - It supports the collection lifecycle states needed for MVP operations. + - CLI can retrieve and display accepted and collectible payment state from the provider. + - It supports the lifecycle states needed for MVP operations. - Verify: - Add manual smoke coverage for inspecting accepted and `collect_pending` state. -- [ ] MVP-020 Implement manual collection CLI flow that crafts/signs/submits collect transactions locally. +- [ ] MVP-020 Implement manual collection CLI flow that fetches provider settlement state and crafts/signs/submits collect transactions locally. - Context: - - Settlement keys should stay outside the provider sidecar. + - Settlement keys should stay outside the provider runtime. + - Existing RAV tooling is helpful support, but it does not yet implement the provider-backed settlement flow required by MVP. - Assumptions: - - `A3` + - `A5` - Done when: - - CLI fetches settlement-relevant data from provider. + - CLI fetches settlement-relevant data from the provider. - CLI crafts and signs the collect transaction locally. - CLI can retry safely when collection is pending or needs to be re-attempted. - Verify: @@ -441,16 +456,14 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - [ ] MVP-028 Define the MVP authentication and authorization contract for oracle and provider operator surfaces. - Context: - - The MVP requires authenticated operator/admin actions, but the exact auth mechanism remains open. - - Oracle and provider surfaces should not drift into incompatible auth behavior without an explicit contract. + - The only real architecture-level open questions still left in scope are authn/authz and observability depth. - Assumptions: - `A5` - Done when: - The repo documents the MVP authn/authz approach for oracle and provider operator/admin surfaces. - - It is clear which endpoints/actions require operator privileges and which identities or credentials satisfy that requirement. + - It is clear which endpoints/actions require operator privileges and which credentials satisfy that requirement. - MVP-006 and MVP-022 can implement the same contract rather than inventing separate security behavior. - Verify: - - Update `docs/mvp-scope.md` open question if unresolved, or close/narrow it if decided. - Confirm oracle and provider admin task definitions point to the same auth contract. - [ ] MVP-021 Make TLS the default non-dev runtime posture for oracle, sidecar, and provider integration paths. @@ -461,20 +474,18 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - Done when: - Non-dev/runtime docs and defaults use TLS for oracle, consumer sidecar, and provider integration surfaces. - Plaintext behavior is clearly scoped to local/dev/demo usage. - - Oracle administration and provider/operator traffic do not rely on plaintext-by-default behavior outside explicitly dev-scoped workflows. + - Operator/admin traffic does not rely on plaintext-by-default behavior outside explicitly dev-scoped workflows. - Verify: - Add validation or smoke coverage for TLS-enabled startup and client connectivity across oracle and sidecar/provider paths. - [ ] MVP-022 Add authentication and authorization to provider admin/operator APIs. - Context: - Provider-side operator actions must not rely on open or anonymous admin APIs. - - This task is about protecting provider operator surfaces, not defining the inspection/retrieval API shape itself. - Assumptions: - `A5` - Done when: - Provider inspection and settlement-retrieval APIs require authentication and authorization according to the shared MVP contract. - The implementation rejects unauthenticated or unauthorized access to operator-only provider actions. - - The authentication requirement is documented and enforced in tests where practical. - Verify: - Add tests for authenticated success and unauthenticated rejection. @@ -482,17 +493,16 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - [ ] MVP-030 Add runtime compatibility and preflight checks for real provider/plugin deployments. - Context: - - The MVP definition requires a real provider deployment path, not only a local happy-path demo. - - Reproducible real-path validation is weaker if the repo does not explicitly check the runtime compatibility assumptions required by the `sds://` provider/plugin integration path. + - Recent README, config, and firecore test scaffolding identify the target runtime more clearly. + - The repo still lacks proper enforced preflight validation for that deployment shape. - Assumptions: - - none + - `A5` - Done when: - The repo identifies at least one named real-provider target environment for MVP acceptance and documents the required runtime compatibility constraints clearly enough for operators to validate before rollout. - The required runtime versions, plugin compatibility assumptions, and non-demo configuration prerequisites for that environment are documented. - Startup or preflight checks fail fast when the provider/plugin environment is incompatible with the required SDS runtime contract. - Verify: - Add a startup/preflight validation test or a documented manual verification flow that demonstrates clear failure modes for unsupported runtime combinations. - - Document a reproducible preflight or smoke checklist for the named real-provider environment. - [ ] MVP-023 Define the final MVP observability floor beyond structured logs and status tooling. - Context: @@ -503,7 +513,7 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - The repo has a documented observability floor for MVP. - It is clear whether metrics endpoints are part of MVP or not. - Verify: - - Update `docs/mvp-scope.md` and remove or narrow the open question if a decision is made. + - Update [docs/mvp-scope.md](../docs/mvp-scope.md) and narrow the open question if a decision is made. - [ ] MVP-024 Implement basic operator-facing inspection/status surfaces and log correlation. - Context: @@ -515,21 +525,19 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - Provider/operator tooling exposes basic status views and correlation aids without assuming a finalized metrics/tracing backend. - This task complements MVP-032 rather than replacing concrete runtime/session/payment inspection APIs. - Verify: - - Manual verification that operators can inspect and reason about low-funds, reconnect, and collection flows without code-level debugging. + - Manual verification that operators can inspect and reason about low-funds, restart, and collection flows without code-level debugging. - [ ] MVP-032 Expose operator runtime/session/payment inspection APIs and CLI/status flows. - Context: - - The MVP scope requires operators to inspect session, payment, and collection state, not only settlement-ready collectible records. - - Reconnect debugging, low-funds handling, and restart validation are weaker if operators must infer runtime state from raw logs or direct datastore access. + - The MVP scope requires operators to inspect session, payment, and collection state, not only settlement-ready records. - Assumptions: - - `A3` - `A4` - `A5` + - `A6` - Done when: - The provider exposes authenticated runtime/status APIs for active or recent sessions, payment state, latest accepted/requested RAV context, and current low-funds/control state where applicable. - Operator-facing CLI or status tooling can retrieve and display that runtime state without direct backend/database access. - Low-funds inspection includes enough actionable information for an operator to understand whether additional escrow funding is required and why. - - Operators can inspect enough runtime/session/payment detail to understand reconnect, low-funds, and post-restart behavior without relying solely on logs. - Verify: - Add manual or integration coverage for inspecting an active or recently interrupted session, a low-funds session, and persisted post-restart payment state. @@ -538,33 +546,35 @@ These assumptions are referenced by task ID so it is clear where unresolved deci - [ ] MVP-025 Add MVP acceptance coverage for the primary end-to-end scenarios in docs/tests/manual verification. - Context: - The MVP scope makes scenarios the primary definition of done. + - The recent commit range added real-path integration scaffolding, including `TestFirecore`, but it is not yet enough to close the scenario matrix. - Assumptions: - none - Done when: - - The key scenarios from `docs/mvp-scope.md` are covered by tests, reproducible manual flows, or both. + - The key scenarios from [docs/mvp-scope.md](../docs/mvp-scope.md) are covered by tests, reproducible manual flows, or both. - The repo identifies which scenarios are validated locally versus against a named real-provider environment. - - At least scenarios `A`, `B`, `C`, and `G` have a defined validation path against a real-provider environment rather than relying only on local demo coverage. - - The repo clearly states how each acceptance scenario is validated. + - At least scenarios `A`, `C`, and `G` have a defined validation path against a real-provider environment rather than relying only on local demo coverage. + - Scenario `B` is validated according to the fresh-session-after-interruption semantics in the revised scope, not resume semantics. - Verify: - Update the scenario matrix or equivalent test/docs references for each acceptance scenario, including environment, validation method, and source of truth for the result. -- [ ] MVP-026 Refresh protocol/runtime docs so they match the MVP architecture and explicit open questions. +- [ ] MVP-026 Refresh protocol/runtime docs so they match the revised MVP architecture and remaining open questions. - Context: - - The phase 1 spec remains useful but no longer matches the MVP architecture in several important ways. + - [docs/mvp-scope.md](../docs/mvp-scope.md) has been updated. + - The rest of the documentation set and backlog still needs to catch up to that architecture and to the recent provider-side implementation changes. - Assumptions: - `A1` - - `A2` - - `A3` - `A4` - `A5` - Done when: - - The repo documentation reflects the MVP architecture rather than the older API-key-centric/control-plane assumptions. - - Open questions are called out explicitly rather than being hidden in outdated text. + - The repo documentation reflects the revised MVP architecture rather than the older reconnect/pricing assumptions. + - Remaining open questions are limited to auth and observability rather than already-resolved scope decisions. + - Docs that describe provider runtime shape match the current public Payment Gateway plus private Plugin Gateway model. - Verify: - - Review the updated docs against `docs/mvp-scope.md` and confirm there are no major contradictions. + - Review the updated docs against [docs/mvp-scope.md](../docs/mvp-scope.md) and confirm there are no major contradictions. ## Notes on Scope Boundaries - This backlog intentionally does **not** make aggregate multi-stream payer-level liability tracking an MVP requirement. - It also does **not** make wallet-based funding UI or automated collection an MVP requirement. -- If future work needs those features, it should be tracked separately as post-MVP scope unless the MVP definition changes again. +- It does **not** make payment-session continuation across reconnects an MVP requirement. +- Supporting utilities such as `sds tools rav`, GRT/pricing refactors, and similar groundwork should be treated as helpful context unless they directly close an MVP acceptance task. From 73fab75abbd0539736777d83cb93a49e40ed01ac Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Thu, 26 Mar 2026 23:24:51 -0300 Subject: [PATCH 06/17] Align runtime payment contract and fix validation stability - align the consumer/provider runtime contract around control-plane init, provider-returned data-plane endpoints, and fresh-session semantics - update sink/demo flows, generated protobufs, and integration coverage to match the revised MVP-004 behavior - make psql tests repo-portable, make integration devenv avoid fixed-port collisions, and refresh MVP backlog/gap-analysis status --- cmd/sds/demo_flow.go | 9 +- cmd/sds/demo_setup.go | 4 +- cmd/sds/impl/provider_gateway.go | 47 +++-- cmd/sds/impl/sink_run.go | 108 ++++++----- cmd/sds/sink.go | 2 +- consumer/sidecar/handler_init.go | 172 ++++++++---------- consumer/sidecar/sidecar.go | 3 - devel/sds_sink | 2 +- .../data_service/consumer/v1/consumer.pb.go | 99 +++++----- .../data_service/provider/v1/gateway.pb.go | 21 ++- plans/mvp-gap-analysis.md | 33 +++- plans/mvp-implementation-backlog.md | 35 +++- .../data_service/consumer/v1/consumer.proto | 14 +- .../data_service/provider/v1/gateway.proto | 5 +- provider/gateway/gateway.go | 49 ++--- provider/gateway/handler_start_session.go | 15 +- provider/repository/psql/database_test.go | 4 +- provider/repository/psql/migrations_path.go | 37 ++++ test/integration/firecore_test.go | 5 +- test/integration/main_test.go | 41 +++-- .../payment_session_binding_test.go | 16 +- .../payment_session_close_propagation_test.go | 15 +- .../payment_session_consumer_wiring_test.go | 20 +- .../payment_session_rav_request_test.go | 18 +- .../payment_session_underpay_test.go | 18 +- .../integration/provider_gateway_auth_test.go | 15 +- test/integration/sidecar_test.go | 94 ++++++---- 27 files changed, 524 insertions(+), 377 deletions(-) create mode 100644 provider/repository/psql/migrations_path.go diff --git a/cmd/sds/demo_flow.go b/cmd/sds/demo_flow.go index b0af17c..c53a744 100644 --- a/cmd/sds/demo_flow.go +++ b/cmd/sds/demo_flow.go @@ -37,7 +37,7 @@ var demoFlowCmd = Command( Flags(func(flags *pflag.FlagSet) { flags.String("consumer-sidecar-addr", "http://localhost:9002", "Consumer sidecar address") flags.String("provider-sidecar-addr", "http://localhost:9001", "Provider gateway address (used for status checks)") - flags.String("provider-endpoint", "http://localhost:9001", "Provider gateway endpoint to pass to consumer Init (PaymentGatewayService)") + flags.String("provider-control-plane-endpoint", "http://localhost:9001", "Provider control-plane endpoint to pass to consumer Init (PaymentGatewayService)") flags.String("payer-address", "", "Payer address (required)") flags.String("receiver-address", "", "Receiver/service provider address (required)") @@ -55,7 +55,7 @@ func runDemoFlow(cmd *cobra.Command, args []string) error { consumerSidecarAddr := strings.TrimSpace(sflags.MustGetString(cmd, "consumer-sidecar-addr")) providerSidecarAddr := strings.TrimSpace(sflags.MustGetString(cmd, "provider-sidecar-addr")) - providerEndpoint := strings.TrimSpace(sflags.MustGetString(cmd, "provider-endpoint")) + providerControlPlaneEndpoint := strings.TrimSpace(sflags.MustGetString(cmd, "provider-control-plane-endpoint")) payerHex := sflags.MustGetString(cmd, "payer-address") receiverHex := sflags.MustGetString(cmd, "receiver-address") @@ -68,7 +68,7 @@ func runDemoFlow(cmd *cobra.Command, args []string) error { cli.Ensure(consumerSidecarAddr != "", " is required") cli.Ensure(providerSidecarAddr != "", " is required") - cli.Ensure(providerEndpoint != "", " is required") + cli.Ensure(providerControlPlaneEndpoint != "", " is required") cli.Ensure(payerHex != "", " is required") payer, err := eth.NewAddress(payerHex) @@ -94,7 +94,7 @@ func runDemoFlow(cmd *cobra.Command, args []string) error { Receiver: commonv1.AddressFromEth(receiver), DataService: commonv1.AddressFromEth(dataService), }, - GatewayEndpoint: providerEndpoint, + ProviderControlPlaneEndpoint: providerControlPlaneEndpoint, })) cli.NoError(err, "consumer Init failed") @@ -102,6 +102,7 @@ func runDemoFlow(cmd *cobra.Command, args []string) error { cli.Ensure(sessionID != "", "consumer Init returned an empty session_id") fmt.Printf(" session_id: %s\n", sessionID) + fmt.Printf(" data_plane_endpoint: %s\n", initResp.Msg.GetDataPlaneEndpoint()) fmt.Printf("\nStep 2: ReportUsage loop\n") var totalBlocksSent uint64 diff --git a/cmd/sds/demo_setup.go b/cmd/sds/demo_setup.go index b3ee330..68ad5c5 100644 --- a/cmd/sds/demo_setup.go +++ b/cmd/sds/demo_setup.go @@ -73,7 +73,7 @@ func runDemoSetup(cmd *cobra.Command, args []string) error { fmt.Printf("START COMMANDS:\n") fmt.Printf(" Provider gateway:\n") - fmt.Printf(" sds provider gateway --service-provider %s --collector-address %s --escrow-address %s --rpc-endpoint %s\n", + fmt.Printf(" sds provider gateway --service-provider %s --collector-address %s --escrow-address %s --rpc-endpoint %s --data-plane-endpoint localhost:10016\n", env.ServiceProvider.Address.Pretty(), env.Collector.Address.Pretty(), env.Escrow.Address.Pretty(), @@ -87,7 +87,7 @@ func runDemoSetup(cmd *cobra.Command, args []string) error { ) fmt.Printf("\n") fmt.Printf(" Demo flow:\n") - fmt.Printf(" sds demo flow --payer-address %s --receiver-address %s --data-service-address %s --provider-endpoint http://localhost:9001 --consumer-sidecar-addr http://localhost:9002\n", + fmt.Printf(" sds demo flow --payer-address %s --receiver-address %s --data-service-address %s --provider-control-plane-endpoint http://localhost:9001 --consumer-sidecar-addr http://localhost:9002\n", env.Payer.Address.Pretty(), env.ServiceProvider.Address.Pretty(), env.DataService.Address.Pretty(), diff --git a/cmd/sds/impl/provider_gateway.go b/cmd/sds/impl/provider_gateway.go index 060af56..651e182 100644 --- a/cmd/sds/impl/provider_gateway.go +++ b/cmd/sds/impl/provider_gateway.go @@ -2,6 +2,7 @@ package impl import ( "context" + "fmt" "time" "github.com/graphprotocol/substreams-data-service/horizon" @@ -74,6 +75,7 @@ var ProviderGatewayCommand = Command( flags.String("collector-address", "", "Collector contract address for EIP-712 domain (required)") flags.String("escrow-address", "", "PaymentsEscrow contract address for balance queries (required)") flags.String("rpc-endpoint", "", "Ethereum RPC endpoint for on-chain queries (required)") + flags.String("data-plane-endpoint", "", "Session-specific Substreams data-plane endpoint advertised by the provider handshake (required)") flags.String("pricing-config", "", "Path to pricing configuration YAML file (uses defaults if not provided)") flags.Bool("plaintext", false, "Serve plaintext h2c instead of TLS (local/demo only)") flags.String("tls-cert-file", "", "Path to the TLS certificate PEM file") @@ -97,10 +99,15 @@ func StartProviderGateway( collectorAddr eth.Address, escrowAddr eth.Address, rpcEndpoint string, + dataPlaneEndpoint string, repositoryDSN string, transportConfig sidecarlib.ServerTransportConfig, pricingConfig *sidecarlib.PricingConfig, ) (*ProviderGateways, error) { + if dataPlaneEndpoint == "" { + return nil, fmt.Errorf(" is required") + } + // Create repository from DSN (shared between both gateways) repo, err := gateway.NewRepositoryFromDSN(ctx, repositoryDSN, providerLog) if err != nil { @@ -111,15 +118,16 @@ func StartProviderGateway( // Create Payment Gateway paymentConfig := &gateway.Config{ - ListenAddr: paymentListenAddr, - ServiceProvider: serviceProviderAddr, - Domain: domain, - CollectorAddr: collectorAddr, - EscrowAddr: escrowAddr, - RPCEndpoint: rpcEndpoint, - PricingConfig: pricingConfig, - Repository: repo, - TransportConfig: transportConfig, + ListenAddr: paymentListenAddr, + ServiceProvider: serviceProviderAddr, + Domain: domain, + CollectorAddr: collectorAddr, + EscrowAddr: escrowAddr, + RPCEndpoint: rpcEndpoint, + PricingConfig: pricingConfig, + DataPlaneEndpoint: dataPlaneEndpoint, + Repository: repo, + TransportConfig: transportConfig, } paymentGateway := gateway.New(paymentConfig, providerLog) @@ -160,6 +168,7 @@ func runProviderGateway(cmd *cobra.Command, args []string) error { collectorHex := sflags.MustGetString(cmd, "collector-address") escrowHex := sflags.MustGetString(cmd, "escrow-address") rpcEndpoint := sflags.MustGetString(cmd, "rpc-endpoint") + dataPlaneEndpoint := sflags.MustGetString(cmd, "data-plane-endpoint") pricingConfigPath := sflags.MustGetString(cmd, "pricing-config") plaintext := sflags.MustGetBool(cmd, "plaintext") tlsCertFile := sflags.MustGetString(cmd, "tls-cert-file") @@ -179,6 +188,7 @@ func runProviderGateway(cmd *cobra.Command, args []string) error { cli.NoError(err, "invalid %q", escrowHex) cli.Ensure(rpcEndpoint != "", " is required") + cli.Ensure(dataPlaneEndpoint != "", " is required") transportConfig := sidecarlib.ServerTransportConfig{ Plaintext: plaintext, @@ -204,15 +214,16 @@ func runProviderGateway(cmd *cobra.Command, args []string) error { // Create Payment Gateway paymentConfig := &gateway.Config{ - ListenAddr: paymentListenAddr, - ServiceProvider: serviceProviderAddr, - Domain: domain, - CollectorAddr: collectorAddr, - EscrowAddr: escrowAddr, - RPCEndpoint: rpcEndpoint, - PricingConfig: pricingConfig, - TransportConfig: transportConfig, - Repository: repo, + ListenAddr: paymentListenAddr, + ServiceProvider: serviceProviderAddr, + Domain: domain, + CollectorAddr: collectorAddr, + EscrowAddr: escrowAddr, + RPCEndpoint: rpcEndpoint, + PricingConfig: pricingConfig, + DataPlaneEndpoint: dataPlaneEndpoint, + TransportConfig: transportConfig, + Repository: repo, } paymentGateway := gateway.New(paymentConfig, providerLog) diff --git a/cmd/sds/impl/sink_run.go b/cmd/sds/impl/sink_run.go index 10cdcbd..77cdec2 100644 --- a/cmd/sds/impl/sink_run.go +++ b/cmd/sds/impl/sink_run.go @@ -14,6 +14,7 @@ import ( commonv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/common/v1" consumerv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/consumer/v1" "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/consumer/v1/consumerv1connect" + sidecarlib "github.com/graphprotocol/substreams-data-service/sidecar" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/streamingfast/cli" @@ -77,7 +78,7 @@ func runSinkRun(cmd *cobra.Command, args []string) error { // Payment flags consumerSidecarAddr := sflags.MustGetString(cmd, "consumer-sidecar-addr") - gatewayEndpoint := sflags.MustGetString(cmd, "gateway-endpoint") + providerControlPlaneEndpoint := sflags.MustGetString(cmd, "provider-control-plane-endpoint") payerHex := sflags.MustGetString(cmd, "payer-address") receiverHex := sflags.MustGetString(cmd, "receiver-address") dataServiceHex := sflags.MustGetString(cmd, "data-service-address") @@ -118,17 +119,12 @@ func runSinkRun(cmd *cobra.Command, args []string) error { sinkerConfig.Mode = sink.SubstreamsModeDevelopment } - // Create the sinker from config - sinker, err := sink.NewFromConfig(sinkerConfig) - cli.NoError(err, "unable to create sinker") - wrapper := newPaymentWrapper( consumerSidecarAddr, payer, receiver, dataService, - gatewayEndpoint, - sinkerConfig.ClientConfig, + providerControlPlaneEndpoint, reportInterval, sinkLog, ) @@ -136,14 +132,21 @@ func runSinkRun(cmd *cobra.Command, args []string) error { app := cli.NewApplication(cmd.Context()) // Initialize payment session and get the RAV for authentication - paymentRAV, err := wrapper.init(app.Context()) + initResult, err := wrapper.init(app.Context()) if err != nil { return fmt.Errorf("failed to initialize payment session: %w", err) } + sinkerConfig.ClientConfig = newClientConfigForDataPlaneEndpoint(sinkerConfig.ClientConfig, initResult.DataPlaneEndpoint) + + // Create the sinker from config after the provider handshake so the real data-plane + // endpoint uses the provider-returned session-specific value. + sinker, err := sink.NewFromConfig(sinkerConfig) + cli.NoError(err, "unable to create sinker") + // Add the RAV header for authentication with the Substreams endpoint - if paymentRAV != nil { - ravHeader, err := encodeRAVHeader(paymentRAV) + if initResult.PaymentRAV != nil { + ravHeader, err := encodeRAVHeader(initResult.PaymentRAV) if err != nil { return fmt.Errorf("failed to encode RAV header: %w", err) } @@ -201,14 +204,13 @@ func handleBlockUndoSignal(ctx context.Context, undoSignal *pbsubstreamsrpc.Bloc // paymentWrapper wraps the sink with payment session management type paymentWrapper struct { - sidecarClient consumerv1connect.ConsumerSidecarServiceClient - payer eth.Address - receiver eth.Address - dataService eth.Address - gatewayEndpoint string // Provider gateway for payment session management - substreamsEndpoint string // Substreams endpoint for data streaming - reportInterval time.Duration - logger *zap.Logger + sidecarClient consumerv1connect.ConsumerSidecarServiceClient + payer eth.Address + receiver eth.Address + dataService eth.Address + providerControlPlaneEndpoint string + reportInterval time.Duration + logger *zap.Logger sessionID string usageTracker *sds.UsageTracker @@ -222,43 +224,31 @@ type paymentWrapper struct { func newPaymentWrapper( sidecarAddr string, payer, receiver, dataService eth.Address, - gatewayEndpoint string, - clientConfig *client.SubstreamsClientConfig, + providerControlPlaneEndpoint string, reportInterval time.Duration, logger *zap.Logger, ) *paymentWrapper { - // Build substreams endpoint URL from client config - scheme := "https" - if clientConfig.PlainText() { - scheme = "http" - } - substreamsEndpoint := fmt.Sprintf("%s://%s", scheme, clientConfig.Endpoint()) - if clientConfig.Insecure() { - substreamsEndpoint += "?insecure=true" - } - - // If gateway endpoint is not specified, default to substreams endpoint - if gatewayEndpoint == "" { - gatewayEndpoint = substreamsEndpoint - } - priceConverter := sds.NewStaticPriceConverter(0.15) // Default: 1 GRT = $0.15 return &paymentWrapper{ - sidecarClient: consumerv1connect.NewConsumerSidecarServiceClient(http.DefaultClient, sidecarAddr), - payer: payer, - receiver: receiver, - dataService: dataService, - gatewayEndpoint: gatewayEndpoint, - substreamsEndpoint: substreamsEndpoint, - reportInterval: reportInterval, - logger: logger, - usageTracker: sds.NewUsageTracker(priceConverter), - priceConverter: priceConverter, + sidecarClient: consumerv1connect.NewConsumerSidecarServiceClient(http.DefaultClient, sidecarAddr), + payer: payer, + receiver: receiver, + dataService: dataService, + providerControlPlaneEndpoint: providerControlPlaneEndpoint, + reportInterval: reportInterval, + logger: logger, + usageTracker: sds.NewUsageTracker(priceConverter), + priceConverter: priceConverter, } } -func (w *paymentWrapper) init(ctx context.Context) (*commonv1.SignedRAV, error) { +type paymentInitResult struct { + PaymentRAV *commonv1.SignedRAV + DataPlaneEndpoint string +} + +func (w *paymentWrapper) init(ctx context.Context) (*paymentInitResult, error) { fmt.Fprintln(os.Stderr, "Initializing payment session...") resp, err := w.sidecarClient.Init(ctx, connect.NewRequest(&consumerv1.InitRequest{ @@ -267,15 +257,19 @@ func (w *paymentWrapper) init(ctx context.Context) (*commonv1.SignedRAV, error) Receiver: commonv1.AddressFromEth(w.receiver), DataService: commonv1.AddressFromEth(w.dataService), }, - GatewayEndpoint: w.gatewayEndpoint, - SubstreamsEndpoint: w.substreamsEndpoint, + ProviderControlPlaneEndpoint: w.providerControlPlaneEndpoint, })) if err != nil { return nil, fmt.Errorf("init payment session: %w", err) } + if resp.Msg.GetDataPlaneEndpoint() == "" { + return nil, fmt.Errorf("provider returned an empty data-plane endpoint") + } + w.sessionID = resp.Msg.Session.SessionId fmt.Fprintf(os.Stderr, "Session initialized: %s\n", w.sessionID) + fmt.Fprintf(os.Stderr, "Data plane endpoint: %s\n", resp.Msg.GetDataPlaneEndpoint()) // Extract pricing config from session info if resp.Msg.Session.PricingConfig != nil { @@ -290,7 +284,23 @@ func (w *paymentWrapper) init(ctx context.Context) (*commonv1.SignedRAV, error) } fmt.Fprintln(os.Stderr) - return resp.Msg.PaymentRav, nil + return &paymentInitResult{ + PaymentRAV: resp.Msg.PaymentRav, + DataPlaneEndpoint: resp.Msg.GetDataPlaneEndpoint(), + }, nil +} + +func newClientConfigForDataPlaneEndpoint(existing *client.SubstreamsClientConfig, dataPlaneEndpoint string) *client.SubstreamsClientConfig { + parsedEndpoint := sidecarlib.ParseEndpoint(dataPlaneEndpoint) + return client.NewSubstreamsClientConfig(client.SubstreamsClientConfigOptions{ + Endpoint: parsedEndpoint.URL, + AuthToken: existing.AuthToken(), + AuthType: existing.AuthType(), + Insecure: parsedEndpoint.Insecure, + PlainText: parsedEndpoint.Plaintext, + Agent: existing.Agent(), + ForceProtocolVersion: existing.ForceProtocolVersion(), + }) } func (w *paymentWrapper) end(ctx context.Context) error { diff --git a/cmd/sds/sink.go b/cmd/sds/sink.go index cacf183..d764a05 100644 --- a/cmd/sds/sink.go +++ b/cmd/sds/sink.go @@ -14,7 +14,7 @@ var sinkGroup = Group( PersistentFlags(func(flags *pflag.FlagSet) { flags.String("consumer-sidecar-addr", "http://localhost:9002", "Consumer sidecar address") - flags.String("gateway-endpoint", "", "Provider gateway endpoint for payment session (e.g., 'https://gateway:9001?insecure=true'). If empty, uses the substreams endpoint.") + flags.String("provider-control-plane-endpoint", "", "Provider control-plane endpoint for payment session management (e.g., 'https://gateway:9001?insecure=true')") flags.String("payer-address", "", "Payer address (required)") flags.String("receiver-address", "", "Receiver/service provider address (required)") flags.String("data-service-address", "", "Data service contract address (required)") diff --git a/consumer/sidecar/handler_init.go b/consumer/sidecar/handler_init.go index ce2e825..67f8fd5 100644 --- a/consumer/sidecar/handler_init.go +++ b/consumer/sidecar/handler_init.go @@ -25,8 +25,7 @@ func (s *Sidecar) Init( req *connect.Request[consumerv1.InitRequest], ) (*connect.Response[consumerv1.InitResponse], error) { s.logger.Info("init called", - zap.String("gateway_endpoint", req.Msg.GatewayEndpoint), - zap.String("substreams_endpoint", req.Msg.SubstreamsEndpoint), + zap.String("provider_control_plane_endpoint", req.Msg.ProviderControlPlaneEndpoint), ) // Extract escrow account details @@ -56,122 +55,94 @@ func (s *Sidecar) Init( return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("invalid : %w", err)) } - // Check if we have an existing RAV to continue from - var existingRAV *horizon.SignedRAV - if req.Msg.ExistingRav != nil { - existingRAV, err = sidecar.ProtoSignedRAVToHorizon(req.Msg.ExistingRav) - if err != nil { - return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("invalid : %w", err)) - } - if existingRAV != nil && existingRAV.Message != nil { - if !sidecar.AddressesEqual(existingRAV.Message.Payer, payer) { - return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf(" %s does not match %s", existingRAV.Message.Payer.Pretty(), payer.Pretty())) - } - if !sidecar.AddressesEqual(existingRAV.Message.ServiceProvider, receiver) { - return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf(" %s does not match %s", existingRAV.Message.ServiceProvider.Pretty(), receiver.Pretty())) - } - if !sidecar.AddressesEqual(existingRAV.Message.DataService, dataService) { - return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf(" %s does not match %s", existingRAV.Message.DataService.Pretty(), dataService.Pretty())) - } - } + providerControlPlaneEndpoint := strings.TrimSpace(req.Msg.ProviderControlPlaneEndpoint) + if providerControlPlaneEndpoint == "" { + return nil, connect.NewError(connect.CodeInvalidArgument, errors.New(" is required")) } - // Create initial RAV (can be zero-value for new sessions) - var initialRAV *horizon.SignedRAV - - if existingRAV != nil { - // Use the existing RAV - initialRAV = existingRAV - } else { - // Create a zero-value RAV for new sessions - // This establishes the session parameters without committing to any value - var collectionID horizon.CollectionID - // Collection ID can be derived from session or left empty for now - - initialRAV, err = s.signRAV( - collectionID, - payer, - dataService, - receiver, - uint64(time.Now().UnixNano()), - big.NewInt(0), // Zero value - nil, // No metadata yet - ) - if err != nil { - s.logger.Error("failed to sign initial RAV", zap.Error(err)) - return nil, connect.NewError(connect.CodeInternal, err) - } + // Create a zero-value RAV for a fresh session. MVP init no longer accepts resume input. + var collectionID horizon.CollectionID + initialRAV, err := s.signRAV( + collectionID, + payer, + dataService, + receiver, + uint64(time.Now().UnixNano()), + big.NewInt(0), + nil, + ) + if err != nil { + s.logger.Error("failed to sign initial RAV", zap.Error(err)) + return nil, connect.NewError(connect.CodeInternal, err) } - parsedEndpoint := sidecar.ParseEndpoint(req.Msg.GatewayEndpoint) - var sessionID string - var providerPricingConfig *sidecar.PricingConfig - if parsedEndpoint.URL != "" { - gatewayClient := providerv1connect.NewPaymentGatewayServiceClient(parsedEndpoint.HTTPClient(), parsedEndpoint.URL) - gatewayResp, err := gatewayClient.StartSession(ctx, connect.NewRequest(&providerv1.StartSessionRequest{ - EscrowAccount: ea, - InitialRav: sidecar.HorizonSignedRAVToProto(initialRAV), - })) - if err != nil { - return nil, connect.NewError(connect.CodeUnavailable, err) - } + parsedEndpoint := sidecar.ParseEndpoint(providerControlPlaneEndpoint) + if parsedEndpoint.URL == "" { + return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("invalid ")) + } - if !gatewayResp.Msg.Accepted { - reason := strings.TrimSpace(gatewayResp.Msg.RejectionReason) - if reason == "" { - reason = "provider rejected session" - } - return nil, connect.NewError(connect.CodeFailedPrecondition, errors.New(reason)) - } + gatewayClient := providerv1connect.NewPaymentGatewayServiceClient(parsedEndpoint.HTTPClient(), parsedEndpoint.URL) + gatewayResp, err := gatewayClient.StartSession(ctx, connect.NewRequest(&providerv1.StartSessionRequest{ + EscrowAccount: ea, + InitialRav: sidecar.HorizonSignedRAVToProto(initialRAV), + })) + if err != nil { + return nil, connect.NewError(connect.CodeUnavailable, err) + } - sessionID = strings.TrimSpace(gatewayResp.Msg.SessionId) - if sessionID == "" { - return nil, connect.NewError(connect.CodeInternal, errors.New("provider returned an empty session id")) + if !gatewayResp.Msg.Accepted { + reason := strings.TrimSpace(gatewayResp.Msg.RejectionReason) + if reason == "" { + reason = "provider rejected session" } + return nil, connect.NewError(connect.CodeFailedPrecondition, errors.New(reason)) + } - s.logger.Info("provider session started", - zap.String("gateway_endpoint", parsedEndpoint.URL), - zap.String("provider_session_id", sessionID), - ) + sessionID := strings.TrimSpace(gatewayResp.Msg.SessionId) + if sessionID == "" { + return nil, connect.NewError(connect.CodeInternal, errors.New("provider returned an empty session id")) + } - // Store pricing config from provider - if gatewayResp.Msg.PricingConfig != nil { - providerPricingConfig = gatewayResp.Msg.PricingConfig.ToNative() - s.logger.Debug("received pricing config from provider", - zap.Stringer("price_per_block", &providerPricingConfig.PricePerBlock), - zap.Stringer("price_per_byte", &providerPricingConfig.PricePerByte), - ) - } + dataPlaneEndpoint := strings.TrimSpace(gatewayResp.Msg.DataPlaneEndpoint) + if dataPlaneEndpoint == "" { + return nil, connect.NewError(connect.CodeInternal, errors.New("provider returned an empty data-plane endpoint")) + } - if gatewayResp.Msg.UseRav != nil { - useRAV, err := sidecar.ProtoSignedRAVToHorizon(gatewayResp.Msg.UseRav) - if err != nil { - return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("invalid received from provider gateway: %w", err)) - } - if useRAV != nil { - initialRAV = useRAV - } - } + s.logger.Info("provider session started", + zap.String("provider_control_plane_endpoint", parsedEndpoint.URL), + zap.String("provider_session_id", sessionID), + zap.String("data_plane_endpoint", dataPlaneEndpoint), + ) + + var providerPricingConfig *sidecar.PricingConfig + if gatewayResp.Msg.PricingConfig != nil { + providerPricingConfig = gatewayResp.Msg.PricingConfig.ToNative() + s.logger.Debug("received confirmatory pricing config from provider", + zap.Stringer("price_per_block", &providerPricingConfig.PricePerBlock), + zap.Stringer("price_per_byte", &providerPricingConfig.PricePerByte), + ) } - var session *sidecar.Session - if sessionID != "" { - session, err = s.sessions.CreateWithID(sessionID, payer, receiver, dataService) + if gatewayResp.Msg.UseRav != nil { + useRAV, err := sidecar.ProtoSignedRAVToHorizon(gatewayResp.Msg.UseRav) if err != nil { - return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("unable to create session: %w", err)) + return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("invalid received from provider gateway: %w", err)) + } + if useRAV != nil { + initialRAV = useRAV } - } else { - session = s.sessions.Create(payer, receiver, dataService) - sessionID = session.ID + } + + session, err := s.sessions.CreateWithID(sessionID, payer, receiver, dataService) + if err != nil { + return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("unable to create session: %w", err)) } session.SetRAV(initialRAV) if providerPricingConfig != nil { session.SetPricingConfig(providerPricingConfig) } - if parsedEndpoint.URL != "" { - s.paymentSessions.SetEndpoint(sessionID, parsedEndpoint.URL) - } + s.paymentSessions.SetEndpoint(sessionID, parsedEndpoint.URL) s.logger.Debug("created session", zap.String("session_id", session.ID), @@ -181,8 +152,9 @@ func (s *Sidecar) Init( ) response := &consumerv1.InitResponse{ - Session: session.ToSessionInfo(), - PaymentRav: sidecar.HorizonSignedRAVToProto(initialRAV), + Session: session.ToSessionInfo(), + PaymentRav: sidecar.HorizonSignedRAVToProto(initialRAV), + DataPlaneEndpoint: dataPlaneEndpoint, } s.logger.Info("Init completed", diff --git a/consumer/sidecar/sidecar.go b/consumer/sidecar/sidecar.go index 59071a1..bb0a9cb 100644 --- a/consumer/sidecar/sidecar.go +++ b/consumer/sidecar/sidecar.go @@ -37,9 +37,6 @@ type Sidecar struct { paymentSessionRoundtripTimeout time.Duration transportConfig sidecar.ServerTransportConfig - - // Provider gateway endpoint (set during Init) - // In production, this would be dynamically determined } type Config struct { diff --git a/devel/sds_sink b/devel/sds_sink index 6eb9c3c..e57c2e1 100755 --- a/devel/sds_sink +++ b/devel/sds_sink @@ -12,7 +12,7 @@ main() { --receiver-address=0xa6f1845e54b1d6a95319251f1ca775b4ad406cdf \ --data-service-address=0x37478fd2f5845e3664fe4155d74c00e1a4e7a5e2 \ --endpoint=localhost:10016 --insecure \ - --gateway-endpoint="http://localhost:9001" \ + --provider-control-plane-endpoint="http://localhost:9001" \ "$@" } diff --git a/pb/graph/substreams/data_service/consumer/v1/consumer.pb.go b/pb/graph/substreams/data_service/consumer/v1/consumer.pb.go index 554e12a..bf35833 100644 --- a/pb/graph/substreams/data_service/consumer/v1/consumer.pb.go +++ b/pb/graph/substreams/data_service/consumer/v1/consumer.pb.go @@ -26,16 +26,12 @@ type InitRequest struct { state protoimpl.MessageState `protogen:"open.v1"` // The escrow account to use for funding this session EscrowAccount *v1.EscrowAccount `protobuf:"bytes,1,opt,name=escrow_account,json=escrowAccount,proto3" json:"escrow_account,omitempty"` - // The provider gateway endpoint for payment session management (e.g., "https://gateway.provider.com:9001") + // Direct provider override used before oracle discovery is implemented. + // This is the provider control-plane endpoint used for session/payment management. // Supports ?insecure=true query parameter for self-signed certificates. - GatewayEndpoint string `protobuf:"bytes,2,opt,name=gateway_endpoint,json=gatewayEndpoint,proto3" json:"gateway_endpoint,omitempty"` - // The Substreams endpoint for data streaming (e.g., "substreams.provider.com:10015") - // This is where the sink will connect to stream data. - SubstreamsEndpoint string `protobuf:"bytes,4,opt,name=substreams_endpoint,json=substreamsEndpoint,proto3" json:"substreams_endpoint,omitempty"` - // Optional: existing RAV to continue from (for session resumption) - ExistingRav *v1.SignedRAV `protobuf:"bytes,3,opt,name=existing_rav,json=existingRav,proto3" json:"existing_rav,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + ProviderControlPlaneEndpoint string `protobuf:"bytes,2,opt,name=provider_control_plane_endpoint,json=providerControlPlaneEndpoint,proto3" json:"provider_control_plane_endpoint,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *InitRequest) Reset() { @@ -75,35 +71,23 @@ func (x *InitRequest) GetEscrowAccount() *v1.EscrowAccount { return nil } -func (x *InitRequest) GetGatewayEndpoint() string { - if x != nil { - return x.GatewayEndpoint - } - return "" -} - -func (x *InitRequest) GetSubstreamsEndpoint() string { +func (x *InitRequest) GetProviderControlPlaneEndpoint() string { if x != nil { - return x.SubstreamsEndpoint + return x.ProviderControlPlaneEndpoint } return "" } -func (x *InitRequest) GetExistingRav() *v1.SignedRAV { - if x != nil { - return x.ExistingRav - } - return nil -} - type InitResponse struct { state protoimpl.MessageState `protogen:"open.v1"` // The session information including the RAV to use Session *v1.SessionInfo `protobuf:"bytes,1,opt,name=session,proto3" json:"session,omitempty"` // The RAV to include in the payment header when connecting to provider - PaymentRav *v1.SignedRAV `protobuf:"bytes,2,opt,name=payment_rav,json=paymentRav,proto3" json:"payment_rav,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + PaymentRav *v1.SignedRAV `protobuf:"bytes,2,opt,name=payment_rav,json=paymentRav,proto3" json:"payment_rav,omitempty"` + // The session-specific data-plane endpoint returned by the provider handshake. + DataPlaneEndpoint string `protobuf:"bytes,3,opt,name=data_plane_endpoint,json=dataPlaneEndpoint,proto3" json:"data_plane_endpoint,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *InitResponse) Reset() { @@ -150,6 +134,13 @@ func (x *InitResponse) GetPaymentRav() *v1.SignedRAV { return nil } +func (x *InitResponse) GetDataPlaneEndpoint() string { + if x != nil { + return x.DataPlaneEndpoint + } + return "" +} + type ReportUsageRequest struct { state protoimpl.MessageState `protogen:"open.v1"` // The session ID @@ -379,16 +370,15 @@ var File_graph_substreams_data_service_consumer_v1_consumer_proto protoreflect.F const file_graph_substreams_data_service_consumer_v1_consumer_proto_rawDesc = "" + "\n" + - "8graph/substreams/data_service/consumer/v1/consumer.proto\x12)graph.substreams.data_service.consumer.v1\x1a3graph/substreams/data_service/common/v1/types.proto\"\x9f\x02\n" + + "8graph/substreams/data_service/consumer/v1/consumer.proto\x12)graph.substreams.data_service.consumer.v1\x1a3graph/substreams/data_service/common/v1/types.proto\"\xbf\x01\n" + "\vInitRequest\x12]\n" + - "\x0eescrow_account\x18\x01 \x01(\v26.graph.substreams.data_service.common.v1.EscrowAccountR\rescrowAccount\x12)\n" + - "\x10gateway_endpoint\x18\x02 \x01(\tR\x0fgatewayEndpoint\x12/\n" + - "\x13substreams_endpoint\x18\x04 \x01(\tR\x12substreamsEndpoint\x12U\n" + - "\fexisting_rav\x18\x03 \x01(\v22.graph.substreams.data_service.common.v1.SignedRAVR\vexistingRav\"\xb3\x01\n" + + "\x0eescrow_account\x18\x01 \x01(\v26.graph.substreams.data_service.common.v1.EscrowAccountR\rescrowAccount\x12E\n" + + "\x1fprovider_control_plane_endpoint\x18\x02 \x01(\tR\x1cproviderControlPlaneEndpointJ\x04\b\x03\x10\x04J\x04\b\x04\x10\x05\"\xe3\x01\n" + "\fInitResponse\x12N\n" + "\asession\x18\x01 \x01(\v24.graph.substreams.data_service.common.v1.SessionInfoR\asession\x12S\n" + "\vpayment_rav\x18\x02 \x01(\v22.graph.substreams.data_service.common.v1.SignedRAVR\n" + - "paymentRav\"y\n" + + "paymentRav\x12.\n" + + "\x13data_plane_endpoint\x18\x03 \x01(\tR\x11dataPlaneEndpoint\"y\n" + "\x12ReportUsageRequest\x12\x1d\n" + "\n" + "session_id\x18\x01 \x01(\tR\tsessionId\x12D\n" + @@ -436,31 +426,30 @@ var file_graph_substreams_data_service_consumer_v1_consumer_proto_goTypes = []an (*EndSessionRequest)(nil), // 4: graph.substreams.data_service.consumer.v1.EndSessionRequest (*EndSessionResponse)(nil), // 5: graph.substreams.data_service.consumer.v1.EndSessionResponse (*v1.EscrowAccount)(nil), // 6: graph.substreams.data_service.common.v1.EscrowAccount - (*v1.SignedRAV)(nil), // 7: graph.substreams.data_service.common.v1.SignedRAV - (*v1.SessionInfo)(nil), // 8: graph.substreams.data_service.common.v1.SessionInfo + (*v1.SessionInfo)(nil), // 7: graph.substreams.data_service.common.v1.SessionInfo + (*v1.SignedRAV)(nil), // 8: graph.substreams.data_service.common.v1.SignedRAV (*v1.Usage)(nil), // 9: graph.substreams.data_service.common.v1.Usage } var file_graph_substreams_data_service_consumer_v1_consumer_proto_depIdxs = []int32{ 6, // 0: graph.substreams.data_service.consumer.v1.InitRequest.escrow_account:type_name -> graph.substreams.data_service.common.v1.EscrowAccount - 7, // 1: graph.substreams.data_service.consumer.v1.InitRequest.existing_rav:type_name -> graph.substreams.data_service.common.v1.SignedRAV - 8, // 2: graph.substreams.data_service.consumer.v1.InitResponse.session:type_name -> graph.substreams.data_service.common.v1.SessionInfo - 7, // 3: graph.substreams.data_service.consumer.v1.InitResponse.payment_rav:type_name -> graph.substreams.data_service.common.v1.SignedRAV - 9, // 4: graph.substreams.data_service.consumer.v1.ReportUsageRequest.usage:type_name -> graph.substreams.data_service.common.v1.Usage - 7, // 5: graph.substreams.data_service.consumer.v1.ReportUsageResponse.updated_rav:type_name -> graph.substreams.data_service.common.v1.SignedRAV - 9, // 6: graph.substreams.data_service.consumer.v1.EndSessionRequest.final_usage:type_name -> graph.substreams.data_service.common.v1.Usage - 7, // 7: graph.substreams.data_service.consumer.v1.EndSessionResponse.final_rav:type_name -> graph.substreams.data_service.common.v1.SignedRAV - 9, // 8: graph.substreams.data_service.consumer.v1.EndSessionResponse.total_usage:type_name -> graph.substreams.data_service.common.v1.Usage - 0, // 9: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.Init:input_type -> graph.substreams.data_service.consumer.v1.InitRequest - 2, // 10: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.ReportUsage:input_type -> graph.substreams.data_service.consumer.v1.ReportUsageRequest - 4, // 11: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.EndSession:input_type -> graph.substreams.data_service.consumer.v1.EndSessionRequest - 1, // 12: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.Init:output_type -> graph.substreams.data_service.consumer.v1.InitResponse - 3, // 13: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.ReportUsage:output_type -> graph.substreams.data_service.consumer.v1.ReportUsageResponse - 5, // 14: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.EndSession:output_type -> graph.substreams.data_service.consumer.v1.EndSessionResponse - 12, // [12:15] is the sub-list for method output_type - 9, // [9:12] is the sub-list for method input_type - 9, // [9:9] is the sub-list for extension type_name - 9, // [9:9] is the sub-list for extension extendee - 0, // [0:9] is the sub-list for field type_name + 7, // 1: graph.substreams.data_service.consumer.v1.InitResponse.session:type_name -> graph.substreams.data_service.common.v1.SessionInfo + 8, // 2: graph.substreams.data_service.consumer.v1.InitResponse.payment_rav:type_name -> graph.substreams.data_service.common.v1.SignedRAV + 9, // 3: graph.substreams.data_service.consumer.v1.ReportUsageRequest.usage:type_name -> graph.substreams.data_service.common.v1.Usage + 8, // 4: graph.substreams.data_service.consumer.v1.ReportUsageResponse.updated_rav:type_name -> graph.substreams.data_service.common.v1.SignedRAV + 9, // 5: graph.substreams.data_service.consumer.v1.EndSessionRequest.final_usage:type_name -> graph.substreams.data_service.common.v1.Usage + 8, // 6: graph.substreams.data_service.consumer.v1.EndSessionResponse.final_rav:type_name -> graph.substreams.data_service.common.v1.SignedRAV + 9, // 7: graph.substreams.data_service.consumer.v1.EndSessionResponse.total_usage:type_name -> graph.substreams.data_service.common.v1.Usage + 0, // 8: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.Init:input_type -> graph.substreams.data_service.consumer.v1.InitRequest + 2, // 9: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.ReportUsage:input_type -> graph.substreams.data_service.consumer.v1.ReportUsageRequest + 4, // 10: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.EndSession:input_type -> graph.substreams.data_service.consumer.v1.EndSessionRequest + 1, // 11: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.Init:output_type -> graph.substreams.data_service.consumer.v1.InitResponse + 3, // 12: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.ReportUsage:output_type -> graph.substreams.data_service.consumer.v1.ReportUsageResponse + 5, // 13: graph.substreams.data_service.consumer.v1.ConsumerSidecarService.EndSession:output_type -> graph.substreams.data_service.consumer.v1.EndSessionResponse + 11, // [11:14] is the sub-list for method output_type + 8, // [8:11] is the sub-list for method input_type + 8, // [8:8] is the sub-list for extension type_name + 8, // [8:8] is the sub-list for extension extendee + 0, // [0:8] is the sub-list for field type_name } func init() { file_graph_substreams_data_service_consumer_v1_consumer_proto_init() } diff --git a/pb/graph/substreams/data_service/provider/v1/gateway.pb.go b/pb/graph/substreams/data_service/provider/v1/gateway.pb.go index 3f50d33..67d17aa 100644 --- a/pb/graph/substreams/data_service/provider/v1/gateway.pb.go +++ b/pb/graph/substreams/data_service/provider/v1/gateway.pb.go @@ -241,10 +241,13 @@ type StartSessionResponse struct { Accepted bool `protobuf:"varint,3,opt,name=accepted,proto3" json:"accepted,omitempty"` // If not accepted, the reason for rejection RejectionReason string `protobuf:"bytes,4,opt,name=rejection_reason,json=rejectionReason,proto3" json:"rejection_reason,omitempty"` - // The provider's pricing configuration for this session + // The provider's pricing configuration for this session. + // For MVP this is confirmatory/observational only, not a negotiation surface. PricingConfig *v1.PricingConfig `protobuf:"bytes,5,opt,name=pricing_config,json=pricingConfig,proto3" json:"pricing_config,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // The session-specific data-plane endpoint for Substreams traffic. + DataPlaneEndpoint string `protobuf:"bytes,6,opt,name=data_plane_endpoint,json=dataPlaneEndpoint,proto3" json:"data_plane_endpoint,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *StartSessionResponse) Reset() { @@ -312,6 +315,13 @@ func (x *StartSessionResponse) GetPricingConfig() *v1.PricingConfig { return nil } +func (x *StartSessionResponse) GetDataPlaneEndpoint() string { + if x != nil { + return x.DataPlaneEndpoint + } + return "" +} + type SubmitRAVRequest struct { state protoimpl.MessageState `protogen:"open.v1"` // The session ID @@ -1007,14 +1017,15 @@ const file_graph_substreams_data_service_provider_v1_gateway_proto_rawDesc = "" "\x13StartSessionRequest\x12]\n" + "\x0eescrow_account\x18\x01 \x01(\v26.graph.substreams.data_service.common.v1.EscrowAccountR\rescrowAccount\x12S\n" + "\vinitial_rav\x18\x02 \x01(\v22.graph.substreams.data_service.common.v1.SignedRAVR\n" + - "initialRav\"\xa8\x02\n" + + "initialRav\"\xd8\x02\n" + "\x14StartSessionResponse\x12\x1d\n" + "\n" + "session_id\x18\x01 \x01(\tR\tsessionId\x12K\n" + "\ause_rav\x18\x02 \x01(\v22.graph.substreams.data_service.common.v1.SignedRAVR\x06useRav\x12\x1a\n" + "\baccepted\x18\x03 \x01(\bR\baccepted\x12)\n" + "\x10rejection_reason\x18\x04 \x01(\tR\x0frejectionReason\x12]\n" + - "\x0epricing_config\x18\x05 \x01(\v26.graph.substreams.data_service.common.v1.PricingConfigR\rpricingConfig\"\xca\x01\n" + + "\x0epricing_config\x18\x05 \x01(\v26.graph.substreams.data_service.common.v1.PricingConfigR\rpricingConfig\x12.\n" + + "\x13data_plane_endpoint\x18\x06 \x01(\tR\x11dataPlaneEndpoint\"\xca\x01\n" + "\x10SubmitRAVRequest\x12\x1d\n" + "\n" + "session_id\x18\x01 \x01(\tR\tsessionId\x12Q\n" + diff --git a/plans/mvp-gap-analysis.md b/plans/mvp-gap-analysis.md index 4d7ef9a..5e14346 100644 --- a/plans/mvp-gap-analysis.md +++ b/plans/mvp-gap-analysis.md @@ -1,7 +1,7 @@ # MVP Gap Analysis Drafted: 2026-03-12 -Revised: 2026-03-25 +Revised: 2026-03-26 This document maps the current repository state against the MVP defined in `docs/mvp-scope.md`. @@ -37,6 +37,12 @@ Provider-side runtime foundations are materially stronger than before: - the provider runtime is now shaped as a public Payment Gateway plus a private Plugin Gateway - firecore/plugin integration scaffolding is stronger than it was when this document was first drafted +Validation infrastructure is also healthier than before: + +- PostgreSQL repository tests no longer rely on an author-specific absolute migration path +- integration bootstrap no longer depends on localhost port `58545` being free in order to start the shared devenv +- `go test ./...` is no longer blocked by those two non-product validation failures + The biggest remaining MVP gaps are now: - standalone oracle/discovery component @@ -52,7 +58,7 @@ The biggest remaining MVP gaps are now: | Scenario | Status | Notes | | --- | --- | --- | | A. Discovery to paid streaming | `partial` | Paid session flow and provider runtime foundations exist, but the standalone oracle is still missing and the consumer sidecar is not yet the Substreams-compatible ingress described by the scope | -| B. Fresh session after interruption | `partial` | Fresh-session semantics are now the MVP target, but current code still carries `existing_rav` and split-endpoint assumptions that do not fully match the revised design | +| B. Fresh session after interruption | `partial` | Fresh-session semantics are implemented in the init contract, but broader real-path interruption validation still remains | | C. Low funds during streaming | `missing` | Session-local low-funds handling in the real live stream path is still backlog work | | D. Provider restart without losing collectible state | `partial` | Provider persistence is no longer purely in-memory because PostgreSQL support exists, but collectible/collection lifecycle tracking is still incomplete | | E. Manual funding flow | `partial` | Demo-oriented setup/funding helpers exist, but real operator-grade funding CLI flows do not | @@ -96,8 +102,6 @@ What already exists: What is still missing for MVP: -- the API still expects explicit `gateway_endpoint` and `substreams_endpoint` -- the API still carries `existing_rav`, which reflects older resume-oriented assumptions - the real user-facing integration is still wrapper-centric rather than endpoint-centric - finalized low-funds stop/pause handling in the real usage path @@ -124,7 +128,6 @@ What already exists: What is still missing for MVP: -- provider-returned data-plane endpoint semantics in the current public contract - collection lifecycle state - live low-funds logic during active streaming - authenticated admin/operator surfaces @@ -189,6 +192,10 @@ What is still missing for MVP: - provider-backed collectible/collect_pending/collected tracking - acceptance-level proof for the full restart/collectible scenario +Notes: + +- Repository validation is now portable across checkout paths because PostgreSQL test migrations resolve from repo-local state rather than a machine-specific absolute path. + ### Consumer Data-Plane Compatibility Status: `missing` @@ -207,6 +214,22 @@ What is still missing for MVP: - a Substreams-compatible consumer-side endpoint/proxy that hides SDS discovery/session/payment coordination behind the data-plane ingress +### Validation Infrastructure + +Status: `implemented` + +Evidence: + +- `provider/repository/psql/database_test.go` +- `provider/repository/psql/migrations_path.go` +- `test/integration/main_test.go` + +What already exists: + +- PostgreSQL repository tests resolve migrations from repo-local state +- integration bootstrap selects a safe devenv RPC port instead of assuming fixed host port `58545` +- full-repo validation is no longer blocked by those two environment-specific failures + ### Funding CLI Status: `partial` diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index 7fde5ab..361c0ad 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -91,7 +91,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-001 | `done` | protocol | `A2` | none | `A` | Freeze the oracle-authoritative MVP pricing contract across oracle, consumer, and provider flows | | MVP-002 | `done` | protocol | `A2`, `A3` | `MVP-033` | `A`, `B` | Freeze fresh-session init semantics and provider-returned data-plane endpoint behavior | | MVP-003 | `in_progress` | protocol | `A3`, `A6` | `MVP-027` | `D`, `F` | Define and document the provider-side runtime persistence model and its boundary with settlement lifecycle tracking | -| MVP-004 | `in_progress` | protocol | `A2`, `A3` | none | `A`, `C` | Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop | +| MVP-004 | `done` | protocol | `A2`, `A3` | none | `A`, `C` | Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop | | MVP-005 | `not_started` | oracle | `A1`, `A2`, `A5` | `MVP-033` | `A` | Implement a standalone oracle service with manual whitelist, canonical pricing, recommended-provider response, and control-plane endpoint return | | MVP-006 | `not_started` | oracle | `A5` | `MVP-028` | `A`, `G` | Add authenticated oracle administration for whitelist and provider metadata management | | MVP-007 | `not_started` | consumer | `A1`, `A2`, `A3` | `MVP-005`, `MVP-033` | `A` | Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback and provider-returned data-plane resolution | @@ -121,6 +121,8 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-031 | `not_started` | runtime-payment | `A2`, `A3` | `MVP-004`, `MVP-012`, `MVP-014`, `MVP-017` | `A`, `C` | Wire the long-lived payment-control loop behind the consumer-sidecar ingress path used by real runtime traffic | | MVP-032 | `not_started` | operations | `A4`, `A5`, `A6` | `MVP-008`, `MVP-010`, `MVP-022` | `C`, `D`, `F`, `G` | Expose operator runtime/session/payment inspection APIs and CLI/status flows | | MVP-033 | `done` | protocol | `A1` | none | `A` | Freeze the chain/network discovery input contract across client, sidecar, and oracle | +| MVP-034 | `done` | validation | none | none | none | Fix repository PostgreSQL tests so migrations resolve from repo-relative state rather than a machine-specific absolute path | +| MVP-035 | `done` | validation | none | none | none | Make integration devenv startup resilient to local fixed-port collisions so the shared test environment is reproducible | ## Protocol and Contract Tasks @@ -188,7 +190,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Verify: - Review [provider/repository/repository.go](../provider/repository/repository.go) and [provider/gateway/REPOSITORY.md](../provider/gateway/REPOSITORY.md) against backlog task wording. -- [ ] MVP-004 Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop. +- [x] MVP-004 Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop. - Context: - The runtime shape changed materially in the recent commit range. - The current repo now has: @@ -201,6 +203,9 @@ These assumptions are referenced by task ID so it is clear which scope decisions - `A3` - Done when: - The runtime contract is documented in terms of the actual provider shape now in repo. + - Provider handshake returns the session-specific data-plane endpoint used by the runtime path. + - Consumer init takes a single provider control-plane override input rather than client-supplied split stream/control endpoints. + - Pricing exposed in provider handshake remains confirmatory rather than negotiable for MVP. - Plugin session/usage correlation is described using typed protobuf fields rather than old implicit header flow. - Consumer/provider payment-loop expectations are documented without revive/resume assumptions. - Verify: @@ -557,6 +562,32 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Verify: - Update the scenario matrix or equivalent test/docs references for each acceptance scenario, including environment, validation method, and source of truth for the result. +- [x] MVP-034 Fix repository PostgreSQL tests so migrations resolve from repo-relative state rather than a machine-specific absolute path. + - Context: + - `provider/repository/psql/database_test.go` currently points migrations at a machine-specific absolute path. + - This breaks `go test ./...` outside the original author environment and makes validation results unreliable. + - Assumptions: + - none + - Done when: + - PostgreSQL repository tests load migrations from repo-relative state or embedded test-owned migration discovery rather than an author-specific filesystem path. + - The test path works from a clean checkout on another machine and in CI-like environments. + - Full-repo test failures are no longer caused by the current hardcoded migration location. + - Verify: + - Run `go test ./provider/repository/psql/...` from the repo root on a non-author-specific checkout path and confirm migrations apply successfully. + +- [x] MVP-035 Make integration devenv startup resilient to local fixed-port collisions so the shared test environment is reproducible. + - Context: + - The integration stack currently relies on a fixed host RPC port for the local Anvil-based devenv. + - Local port collisions can prevent `test/integration` startup even when the SDS code under test is otherwise correct. + - Assumptions: + - none + - Done when: + - Integration startup no longer depends on a single hardcoded host port being free with no fallback or operator override. + - The devenv/test bootstrap either allocates ports safely, retries with a deterministic alternative strategy, or exposes a clear test/runtime override that the integration harness actually uses. + - Port-allocation failures stop being a common non-product cause of integration test failure. + - Verify: + - Run `go test ./test/integration/...` with the default local port already occupied and confirm startup either succeeds using the supported fallback/override path or fails fast with a clear, actionable configuration message. + - [ ] MVP-026 Refresh protocol/runtime docs so they match the revised MVP architecture and remaining open questions. - Context: - [docs/mvp-scope.md](../docs/mvp-scope.md) has been updated. diff --git a/proto/graph/substreams/data_service/consumer/v1/consumer.proto b/proto/graph/substreams/data_service/consumer/v1/consumer.proto index 5ca1b39..2a04834 100644 --- a/proto/graph/substreams/data_service/consumer/v1/consumer.proto +++ b/proto/graph/substreams/data_service/consumer/v1/consumer.proto @@ -26,14 +26,12 @@ service ConsumerSidecarService { message InitRequest { // The escrow account to use for funding this session common.v1.EscrowAccount escrow_account = 1; - // The provider gateway endpoint for payment session management (e.g., "https://gateway.provider.com:9001") + // Direct provider override used before oracle discovery is implemented. + // This is the provider control-plane endpoint used for session/payment management. // Supports ?insecure=true query parameter for self-signed certificates. - string gateway_endpoint = 2; - // The Substreams endpoint for data streaming (e.g., "substreams.provider.com:10015") - // This is where the sink will connect to stream data. - string substreams_endpoint = 4; - // Optional: existing RAV to continue from (for session resumption) - common.v1.SignedRAV existing_rav = 3; + string provider_control_plane_endpoint = 2; + + reserved 3, 4; } message InitResponse { @@ -41,6 +39,8 @@ message InitResponse { common.v1.SessionInfo session = 1; // The RAV to include in the payment header when connecting to provider common.v1.SignedRAV payment_rav = 2; + // The session-specific data-plane endpoint returned by the provider handshake. + string data_plane_endpoint = 3; } message ReportUsageRequest { diff --git a/proto/graph/substreams/data_service/provider/v1/gateway.proto b/proto/graph/substreams/data_service/provider/v1/gateway.proto index 6b31597..54dc21d 100644 --- a/proto/graph/substreams/data_service/provider/v1/gateway.proto +++ b/proto/graph/substreams/data_service/provider/v1/gateway.proto @@ -56,8 +56,11 @@ message StartSessionResponse { bool accepted = 3; // If not accepted, the reason for rejection string rejection_reason = 4; - // The provider's pricing configuration for this session + // The provider's pricing configuration for this session. + // For MVP this is confirmatory/observational only, not a negotiation surface. common.v1.PricingConfig pricing_config = 5; + // The session-specific data-plane endpoint for Substreams traffic. + string data_plane_endpoint = 6; } message SubmitRAVRequest { diff --git a/provider/gateway/gateway.go b/provider/gateway/gateway.go index 89f749d..9ef90f6 100644 --- a/provider/gateway/gateway.go +++ b/provider/gateway/gateway.go @@ -53,8 +53,9 @@ type Gateway struct { collectorQuerier sidecar.CollectorAuthorizer // Pricing configuration - pricingConfig *sidecar.PricingConfig - transportConfig sidecar.ServerTransportConfig + pricingConfig *sidecar.PricingConfig + dataPlaneEndpoint string + transportConfig sidecar.ServerTransportConfig authCache *haxmap.Map[string, authCacheEntry] @@ -63,14 +64,15 @@ type Gateway struct { } type Config struct { - ListenAddr string - ServiceProvider eth.Address - Domain *horizon.Domain - CollectorAddr eth.Address - EscrowAddr eth.Address - RPCEndpoint string - PricingConfig *sidecar.PricingConfig - TransportConfig sidecar.ServerTransportConfig + ListenAddr string + ServiceProvider eth.Address + Domain *horizon.Domain + CollectorAddr eth.Address + EscrowAddr eth.Address + RPCEndpoint string + PricingConfig *sidecar.PricingConfig + DataPlaneEndpoint string + TransportConfig sidecar.ServerTransportConfig // Repository provides session/usage state storage. // If nil, an in-memory repository is created. @@ -106,19 +108,20 @@ func New(config *Config, logger *zap.Logger) *Gateway { } return &Gateway{ - Shutter: shutter.New(), - listenAddr: config.ListenAddr, - logger: logger, - serviceProvider: config.ServiceProvider, - domain: config.Domain, - collectorAddr: config.CollectorAddr, - escrowAddr: config.EscrowAddr, - escrowQuerier: escrowQuerier, - collectorQuerier: collectorQuerier, - pricingConfig: pricingConfig, - transportConfig: config.TransportConfig, - authCache: haxmap.New[string, authCacheEntry](), - repo: repo, + Shutter: shutter.New(), + listenAddr: config.ListenAddr, + logger: logger, + serviceProvider: config.ServiceProvider, + domain: config.Domain, + collectorAddr: config.CollectorAddr, + escrowAddr: config.EscrowAddr, + escrowQuerier: escrowQuerier, + collectorQuerier: collectorQuerier, + pricingConfig: pricingConfig, + dataPlaneEndpoint: config.DataPlaneEndpoint, + transportConfig: config.TransportConfig, + authCache: haxmap.New[string, authCacheEntry](), + repo: repo, } } diff --git a/provider/gateway/handler_start_session.go b/provider/gateway/handler_start_session.go index 877e0e0..83fca9b 100644 --- a/provider/gateway/handler_start_session.go +++ b/provider/gateway/handler_start_session.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "strings" "connectrpc.com/connect" "github.com/graphprotocol/substreams-data-service/internal/session" @@ -161,12 +162,18 @@ func (s *Gateway) StartSession( zap.Stringer("payer", payer), ) + dataPlaneEndpoint := strings.TrimSpace(s.dataPlaneEndpoint) + if dataPlaneEndpoint == "" { + return nil, connect.NewError(connect.CodeInternal, errors.New("provider data-plane endpoint is not configured")) + } + // Return the RAV to use (same as initial for now) response := &providerv1.StartSessionResponse{ - SessionId: consumerSession.ID, - UseRav: req.Msg.InitialRav, // Use the same RAV - Accepted: true, - PricingConfig: commonv1.PricingConfigFromNative(s.pricingConfig), + SessionId: consumerSession.ID, + UseRav: req.Msg.InitialRav, // Use the same RAV + Accepted: true, + PricingConfig: commonv1.PricingConfigFromNative(s.pricingConfig), + DataPlaneEndpoint: dataPlaneEndpoint, } return connect.NewResponse(response), nil diff --git a/provider/repository/psql/database_test.go b/provider/repository/psql/database_test.go index 296c461..023f3a2 100644 --- a/provider/repository/psql/database_test.go +++ b/provider/repository/psql/database_test.go @@ -56,8 +56,8 @@ func runMigrationsInSchema(t *testing.T, db *sqlx.DB, schema string) { _, err := db.ExecContext(ctx, fmt.Sprintf("SET search_path TO %s", schema)) require.NoError(t, err) - // Use absolute path to migrations - migrationPath := "file:///Users/maoueh/work/sf/substreams-data-service/provider/repository/psql/migrations" + migrationPath, err := MigrationSourceURL() + require.NoError(t, err, "Failed to resolve migration source URL") // Don't use SchemaName in config - let it use search_path instead dbDriver, err := migratepg.WithInstance(db.DB, &migratepg.Config{ diff --git a/provider/repository/psql/migrations_path.go b/provider/repository/psql/migrations_path.go new file mode 100644 index 0000000..44ff514 --- /dev/null +++ b/provider/repository/psql/migrations_path.go @@ -0,0 +1,37 @@ +package psql + +import ( + "fmt" + "net/url" + "path/filepath" + "runtime" +) + +// MigrationDir resolves the repository-local migrations directory for the PostgreSQL repository. +func MigrationDir() (string, error) { + _, filename, _, ok := runtime.Caller(0) + if !ok { + return "", fmt.Errorf("resolving migrations directory: runtime caller unavailable") + } + + dir := filepath.Join(filepath.Dir(filename), "migrations") + absDir, err := filepath.Abs(dir) + if err != nil { + return "", fmt.Errorf("resolving absolute migrations directory: %w", err) + } + + return absDir, nil +} + +// MigrationSourceURL returns the file:// source URI expected by golang-migrate. +func MigrationSourceURL() (string, error) { + dir, err := MigrationDir() + if err != nil { + return "", err + } + + return (&url.URL{ + Scheme: "file", + Path: filepath.ToSlash(dir), + }).String(), nil +} diff --git a/test/integration/firecore_test.go b/test/integration/firecore_test.go index fb9d31f..59033eb 100644 --- a/test/integration/firecore_test.go +++ b/test/integration/firecore_test.go @@ -55,6 +55,7 @@ func TestFirecore(t *testing.T) { env.Collector.Address, env.Escrow.Address, env.RPCURL, + "localhost:10016", PostgresTestDSN, sidecarlib.ServerTransportConfig{ Plaintext: true, @@ -85,7 +86,7 @@ func TestFirecore(t *testing.T) { firecoreLog.Info("all infrastructure started successfully", zap.String("substreams_endpoint", substreamsEndpoint), - zap.String("gateway_endpoint", "http://localhost:19001"), + zap.String("provider_control_plane_endpoint", "http://localhost:19001"), ) // Step 4: Start consumer sidecar @@ -346,7 +347,7 @@ func runSDSSink( module, "--endpoint=" + endpoint, "--plaintext", - "--gateway-endpoint=http://localhost:19001", + "--provider-control-plane-endpoint=http://localhost:19001", "--consumer-sidecar-addr=http://localhost:9002", "--payer-address=" + payerAddress, "--receiver-address=" + receiverAddress, diff --git a/test/integration/main_test.go b/test/integration/main_test.go index cf5be81..6b802ee 100644 --- a/test/integration/main_test.go +++ b/test/integration/main_test.go @@ -3,15 +3,16 @@ package integration import ( "context" "fmt" + "net" "os" "os/exec" - "path/filepath" "strings" "sync" "testing" "time" "github.com/graphprotocol/substreams-data-service/horizon/devenv" + psqlrepo "github.com/graphprotocol/substreams-data-service/provider/repository/psql" "github.com/streamingfast/logging" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/modules/postgres" @@ -32,6 +33,11 @@ func init() { func TestMain(m *testing.M) { ctx := context.Background() + devenvRPCPort, err := findFreeTCPPort() + if err != nil { + fmt.Fprintf(os.Stderr, "Setup error: failed to allocate free devenv RPC port: %v\n", err) + os.Exit(1) + } // Start both devenv and Postgres in parallel var wg sync.WaitGroup @@ -39,8 +45,8 @@ func TestMain(m *testing.M) { // Start devenv wg.Go(func() { - zlog.Info("starting development environment (anvil + contracts)") - _, err := devenv.Start(ctx) + zlog.Info("starting development environment (anvil + contracts)", zap.Int("rpc_port", devenvRPCPort)) + _, err := devenv.Start(ctx, devenv.WithRPCPort(devenvRPCPort)) if err != nil { errChan <- fmt.Errorf("failed to start devenv: %w", err) return @@ -126,24 +132,16 @@ func sanitizeDSN(dsn string) string { // runMigrations runs database migrations using golang-migrate func runMigrations(dsn string) error { - // Get the absolute path to the migrations directory - cwd, err := os.Getwd() + migrationsPath, err := psqlrepo.MigrationDir() if err != nil { - return fmt.Errorf("failed to get working directory: %w", err) - } - - // Navigate up to the repository root (from test/integration to .) - migrationsPath := filepath.Join(cwd, "..", "..", "provider", "repository", "psql", "migrations") - absPath, err := filepath.Abs(migrationsPath) - if err != nil { - return fmt.Errorf("failed to get absolute path for migrations: %w", err) + return fmt.Errorf("failed to resolve migrations directory: %w", err) } // Use golang-migrate to run migrations cmd := exec.Command("go", "run", "-tags", "postgres", "github.com/golang-migrate/migrate/v4/cmd/migrate@latest", "-database", dsn, - "-path", absPath, + "-path", migrationsPath, "up") output, err := cmd.CombinedOutput() @@ -154,3 +152,18 @@ func runMigrations(dsn string) error { zlog.Debug("migration output", zap.String("output", string(output))) return nil } + +func findFreeTCPPort() (int, error) { + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return 0, fmt.Errorf("listening for free port: %w", err) + } + defer listener.Close() + + addr, ok := listener.Addr().(*net.TCPAddr) + if !ok { + return 0, fmt.Errorf("unexpected listener address type %T", listener.Addr()) + } + + return addr.Port, nil +} diff --git a/test/integration/payment_session_binding_test.go b/test/integration/payment_session_binding_test.go index 66bec11..b52b81c 100644 --- a/test/integration/payment_session_binding_test.go +++ b/test/integration/payment_session_binding_test.go @@ -38,13 +38,14 @@ func TestPaymentSession_BindsToSessionID(t *testing.T) { domain := env.Domain() providerConfig := &providergateway.Config{ - ListenAddr: ":19006", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19006", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, } providerGateway := providergateway.New(providerConfig, zlog.Named("provider")) go providerGateway.Run() @@ -83,6 +84,7 @@ func TestPaymentSession_BindsToSessionID(t *testing.T) { require.NoError(t, err) require.True(t, startResp.Msg.Accepted, "expected StartSession accepted: %s", startResp.Msg.RejectionReason) require.NotEmpty(t, startResp.Msg.SessionId) + require.Equal(t, "substreams.provider.example:443", startResp.Msg.GetDataPlaneEndpoint()) // Missing session_id should fail with InvalidArgument. streamBad := gatewayClient.PaymentSession(ctx) diff --git a/test/integration/payment_session_close_propagation_test.go b/test/integration/payment_session_close_propagation_test.go index f05738b..0fbb08c 100644 --- a/test/integration/payment_session_close_propagation_test.go +++ b/test/integration/payment_session_close_propagation_test.go @@ -37,12 +37,13 @@ func TestSessionClose_ConsumerEndSession_MakesProviderInactive(t *testing.T) { domain := env.Domain() providerGateway := providergateway.New(&providergateway.Config{ - ListenAddr: ":19016", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, + ListenAddr: ":19016", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + DataPlaneEndpoint: "substreams.provider.example:443", PricingConfig: &sidecar.PricingConfig{ PricePerBlock: sds.NewGRTFromUint64(1), PricePerByte: sds.ZeroGRT(), @@ -72,7 +73,7 @@ func TestSessionClose_ConsumerEndSession_MakesProviderInactive(t *testing.T) { Receiver: commonv1.AddressFromEth(env.ServiceProvider.Address), DataService: commonv1.AddressFromEth(env.DataService.Address), }, - GatewayEndpoint: "http://localhost:19016", + ProviderControlPlaneEndpoint: "http://localhost:19016", })) require.NoError(t, err) diff --git a/test/integration/payment_session_consumer_wiring_test.go b/test/integration/payment_session_consumer_wiring_test.go index b762c07..6e1b608 100644 --- a/test/integration/payment_session_consumer_wiring_test.go +++ b/test/integration/payment_session_consumer_wiring_test.go @@ -44,14 +44,15 @@ func TestConsumerSidecar_ReportUsage_WiresPaymentSessionLoop(t *testing.T) { } providerGateway := providergateway.New(&providergateway.Config{ - ListenAddr: ":19013", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: pricingConfig, - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19013", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: pricingConfig, + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, }, zlog.Named("provider")) go providerGateway.Run() defer providerGateway.Shutdown(nil) @@ -76,9 +77,10 @@ func TestConsumerSidecar_ReportUsage_WiresPaymentSessionLoop(t *testing.T) { Receiver: commonv1.AddressFromEth(env.ServiceProvider.Address), DataService: commonv1.AddressFromEth(env.DataService.Address), }, - GatewayEndpoint: "http://localhost:19013", + ProviderControlPlaneEndpoint: "http://localhost:19013", })) require.NoError(t, err) + require.Equal(t, "substreams.provider.example:443", initResp.Msg.GetDataPlaneEndpoint()) sessionID := initResp.Msg.GetSession().GetSessionId() require.NotEmpty(t, sessionID) diff --git a/test/integration/payment_session_rav_request_test.go b/test/integration/payment_session_rav_request_test.go index 1ad45f7..de5a192 100644 --- a/test/integration/payment_session_rav_request_test.go +++ b/test/integration/payment_session_rav_request_test.go @@ -45,14 +45,15 @@ func TestPaymentSession_ProviderRequestsRAVOnUsage(t *testing.T) { } providerConfig := &providergateway.Config{ - ListenAddr: ":19007", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: pricingConfig, - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19007", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: pricingConfig, + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, } providerGateway := providergateway.New(providerConfig, zlog.Named("provider")) go providerGateway.Run() @@ -92,6 +93,7 @@ func TestPaymentSession_ProviderRequestsRAVOnUsage(t *testing.T) { require.NoError(t, err) require.True(t, startResp.Msg.Accepted) require.NotEmpty(t, startResp.Msg.SessionId) + require.Equal(t, "substreams.provider.example:443", startResp.Msg.GetDataPlaneEndpoint()) stream := gatewayClient.PaymentSession(ctx) diff --git a/test/integration/payment_session_underpay_test.go b/test/integration/payment_session_underpay_test.go index 5ed0ea3..f6c9df2 100644 --- a/test/integration/payment_session_underpay_test.go +++ b/test/integration/payment_session_underpay_test.go @@ -45,14 +45,15 @@ func TestPaymentSession_RejectsUnderpayingRAV(t *testing.T) { } providerGateway := providergateway.New(&providergateway.Config{ - ListenAddr: ":19014", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: pricingConfig, - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19014", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: pricingConfig, + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, }, zlog.Named("provider")) go providerGateway.Run() defer providerGateway.Shutdown(nil) @@ -91,6 +92,7 @@ func TestPaymentSession_RejectsUnderpayingRAV(t *testing.T) { require.NoError(t, err) require.True(t, startResp.Msg.Accepted) require.NotEmpty(t, startResp.Msg.SessionId) + require.Equal(t, "substreams.provider.example:443", startResp.Msg.GetDataPlaneEndpoint()) stream := gatewayClient.PaymentSession(ctx) diff --git a/test/integration/provider_gateway_auth_test.go b/test/integration/provider_gateway_auth_test.go index 728b44b..d09ad3f 100644 --- a/test/integration/provider_gateway_auth_test.go +++ b/test/integration/provider_gateway_auth_test.go @@ -38,13 +38,14 @@ func TestPaymentGateway_OnChainAuthorization(t *testing.T) { domain := env.Domain() providerConfig := &providergateway.Config{ - ListenAddr: ":19005", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19005", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, } providerGateway := providergateway.New(providerConfig, zlog.Named("provider")) go providerGateway.Run() diff --git a/test/integration/sidecar_test.go b/test/integration/sidecar_test.go index d83b805..361e74b 100644 --- a/test/integration/sidecar_test.go +++ b/test/integration/sidecar_test.go @@ -8,7 +8,6 @@ import ( "time" "connectrpc.com/connect" - "github.com/streamingfast/eth-go" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -59,13 +58,14 @@ func TestPaymentFlowBasic(t *testing.T) { // Create provider gateway providerConfig := &providergateway.Config{ - ListenAddr: ":19001", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19001", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, } providerGateway := providergateway.New(providerConfig, zlog.Named("provider")) go providerGateway.Run() @@ -86,12 +86,13 @@ func TestPaymentFlowBasic(t *testing.T) { Receiver: commonv1.AddressFromEth(env.ServiceProvider.Address), DataService: commonv1.AddressFromEth(env.DataService.Address), }, - GatewayEndpoint: "http://localhost:19001", + ProviderControlPlaneEndpoint: "http://localhost:19001", } initResp, err := consumerClient.Init(ctx, connect.NewRequest(initReq)) require.NoError(t, err, "consumer Init failed") require.NotNil(t, initResp.Msg.PaymentRav, "expected payment RAV") require.NotEmpty(t, initResp.Msg.Session.SessionId, "expected session ID") + require.Equal(t, "substreams.provider.example:443", initResp.Msg.GetDataPlaneEndpoint()) consumerSessionID := initResp.Msg.Session.SessionId t.Logf("Consumer session created: %s", consumerSessionID) @@ -139,7 +140,7 @@ func TestPaymentFlowBasic(t *testing.T) { t.Log("Payment flow test completed successfully!") } -func TestInit_ExistingRAV_ResumesPaymentState(t *testing.T) { +func TestInit_CreatesFreshSessionWithoutResumeSemantics(t *testing.T) { if testing.Short() { t.Skip("skipping integration test in short mode") } @@ -165,12 +166,13 @@ func TestInit_ExistingRAV_ResumesPaymentState(t *testing.T) { time.Sleep(100 * time.Millisecond) // Wait for server to start providerGateway := providergateway.New(&providergateway.Config{ - ListenAddr: ":19009", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, + ListenAddr: ":19009", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + DataPlaneEndpoint: "substreams.provider.example:443", PricingConfig: &sidecar.PricingConfig{ PricePerBlock: sds.NewGRTFromUint64(1), PricePerByte: sds.ZeroGRT(), @@ -190,12 +192,13 @@ func TestInit_ExistingRAV_ResumesPaymentState(t *testing.T) { } initResp, err := consumerClient.Init(ctx, connect.NewRequest(&consumerv1.InitRequest{ - EscrowAccount: escrowAccount, - GatewayEndpoint: "http://localhost:19009", + EscrowAccount: escrowAccount, + ProviderControlPlaneEndpoint: "http://localhost:19009", })) require.NoError(t, err, "consumer Init failed") require.NotNil(t, initResp.Msg.PaymentRav) require.NotEmpty(t, initResp.Msg.Session.GetSessionId()) + require.Equal(t, "substreams.provider.example:443", initResp.Msg.GetDataPlaneEndpoint()) reportResp, err := consumerClient.ReportUsage(ctx, connect.NewRequest(&consumerv1.ReportUsageRequest{ SessionId: initResp.Msg.Session.GetSessionId(), @@ -210,31 +213,56 @@ func TestInit_ExistingRAV_ResumesPaymentState(t *testing.T) { require.NotNil(t, reportResp.Msg.GetUpdatedRav()) require.NotNil(t, reportResp.Msg.GetUpdatedRav().GetRav()) - existingRAV := reportResp.Msg.GetUpdatedRav() - existingValue := existingRAV.GetRav().GetValueAggregate().ToBigInt() - require.Equal(t, 0, existingValue.Cmp(big.NewInt(1))) + firstValue := reportResp.Msg.GetUpdatedRav().GetRav().GetValueAggregate().ToBigInt() + require.Equal(t, 0, firstValue.Cmp(big.NewInt(1))) - // Resume by calling Init(existing_rav=...) and assert the returned payment_rav matches the existing state. + // A later Init creates a fresh payment session instead of resuming prior payment lineage. initResp2, err := consumerClient.Init(ctx, connect.NewRequest(&consumerv1.InitRequest{ - EscrowAccount: escrowAccount, - GatewayEndpoint: "http://localhost:19009", - ExistingRav: existingRAV, + EscrowAccount: escrowAccount, + ProviderControlPlaneEndpoint: "http://localhost:19009", })) - require.NoError(t, err, "consumer Init(existing_rav) failed") + require.NoError(t, err, "consumer Init failed") require.NotNil(t, initResp2.Msg.GetPaymentRav()) require.NotNil(t, initResp2.Msg.GetPaymentRav().GetRav()) + require.NotEmpty(t, initResp2.Msg.GetSession().GetSessionId()) + require.NotEqual(t, initResp.Msg.GetSession().GetSessionId(), initResp2.Msg.GetSession().GetSessionId()) + require.Equal(t, "substreams.provider.example:443", initResp2.Msg.GetDataPlaneEndpoint()) + + freshValue := initResp2.Msg.GetPaymentRav().GetRav().GetValueAggregate().ToBigInt() + require.Equal(t, 0, freshValue.Cmp(big.NewInt(0))) +} + +func TestInit_RequiresProviderControlPlaneEndpoint(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } - resumedValue := initResp2.Msg.GetPaymentRav().GetRav().GetValueAggregate().ToBigInt() - require.Equal(t, 0, resumedValue.Cmp(existingValue)) + ctx := context.Background() + + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + setup, err := env.SetupTestWithSigner(nil) + require.NoError(t, err) + + consumerSidecar := consumersidecar.New(&consumersidecar.Config{ + ListenAddr: ":19018", + SignerKey: setup.SignerKey, + Domain: env.Domain(), + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + }, zlog.Named("consumer")) + go consumerSidecar.Run() + defer consumerSidecar.Shutdown(nil) + time.Sleep(100 * time.Millisecond) + + consumerClient := consumerv1connect.NewConsumerSidecarServiceClient(http.DefaultClient, "http://localhost:19018") - // Invalid resumption should fail clearly. _, err = consumerClient.Init(ctx, connect.NewRequest(&consumerv1.InitRequest{ EscrowAccount: &commonv1.EscrowAccount{ - Payer: commonv1.AddressFromEth(eth.MustNewAddress("0x9999999999999999999999999999999999999999")), - Receiver: escrowAccount.GetReceiver(), - DataService: escrowAccount.GetDataService(), + Payer: commonv1.AddressFromEth(env.Payer.Address), + Receiver: commonv1.AddressFromEth(env.ServiceProvider.Address), + DataService: commonv1.AddressFromEth(env.DataService.Address), }, - ExistingRav: existingRAV, })) require.Error(t, err) require.Equal(t, connect.CodeInvalidArgument, connect.CodeOf(err)) From 2e73c3e7b8e80b01b17176e4dc76a26d16ee8d35 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Fri, 27 Mar 2026 00:12:46 -0300 Subject: [PATCH 07/17] docs: define provider persistence boundary for MVP-003 - add a canonical provider persistence boundary doc for runtime vs settlement ownership - mark MVP-003 done and align backlog, sequencing, and gap-analysis wording - clarify in provider gateway docs that collection lifecycle tracking remains downstream work --- docs/mvp-implementation-sequencing.md | 7 ++- docs/provider-persistence-boundary.md | 80 +++++++++++++++++++++++++++ plans/mvp-gap-analysis.md | 3 + plans/mvp-implementation-backlog.md | 6 +- provider/gateway/REPOSITORY.md | 13 +++++ 5 files changed, 103 insertions(+), 6 deletions(-) create mode 100644 docs/provider-persistence-boundary.md diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index 551f5e2..acdc18b 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -133,8 +133,8 @@ Minimum prerequisites: Recommended sequence: -1. `MVP-003` Define the durable provider-side payment and settlement data model -2. `MVP-008` Add durable provider storage for accepted RAV, session state, and collection lifecycle state +1. `MVP-003` Define and document the provider-side runtime persistence boundary and settlement lifecycle ownership +2. `MVP-008` Add durable provider storage for sessions, usage, and latest accepted RAV runtime state 3. `MVP-029` Implement provider collection lifecycle transitions and update surfaces for `collectible`, `collect_pending`, `collected`, and retryable collection state 4. `MVP-009` Expose provider inspection and settlement-data retrieval APIs for accepted/collectible RAV state 5. `MVP-022` Add authentication and authorization to provider admin/operator APIs @@ -146,6 +146,7 @@ Recommended sequence: Notes: - `MVP-008` and `MVP-029` can begin in parallel once `MVP-003` and `MVP-027` are stable enough. +- `MVP-003` should freeze the runtime-versus-settlement boundary before either downstream task broadens its scope. - `MVP-009` depends on `MVP-029`, so this part of the sequence is required by the backlog rather than just recommended. - `MVP-018` comes late because the current backlog explicitly ties it to operator runtime/low-funds inspection surfaces. @@ -283,7 +284,7 @@ This section is interpretive guidance based on the assumptions register and depe - `MVP-005` - Can begin before `MVP-001` is fully closed if pricing authority remains clearly non-final in the API/implementation. - `MVP-003` - - Some schema design can begin while `MVP-027` is being narrowed, but it should not be treated as finalized until identity semantics are stable. + - Runtime-versus-settlement contract documentation can begin while identity semantics are being narrowed, but it should not be treated as final until those semantics are stable. - `MVP-024` - Can proceed in a reduced/basic form before `MVP-023` is fully closed. diff --git a/docs/provider-persistence-boundary.md b/docs/provider-persistence-boundary.md new file mode 100644 index 0000000..c6763b6 --- /dev/null +++ b/docs/provider-persistence-boundary.md @@ -0,0 +1,80 @@ +# Provider Persistence Boundary + +This document is the canonical MVP reference for the provider-side persistence boundary introduced by `MVP-003`. + +It defines the separation between: + +- runtime/session persistence owned by the current provider repository model +- settlement/collection lifecycle persistence that remains separate MVP work, primarily `MVP-029` + +Use this document together with: + +- [docs/mvp-scope.md](./mvp-scope.md) for the MVP target state +- [plans/mvp-implementation-backlog.md](../plans/mvp-implementation-backlog.md) for task ownership and dependencies +- [provider/repository/repository.go](../provider/repository/repository.go) for the current repository interface +- [provider/repository/psql/migrations/000001_init_schema.up.sql](../provider/repository/psql/migrations/000001_init_schema.up.sql) for the current durable storage shape + +## Runtime Persistence Model + +The current provider repository model is a runtime/session model. + +For MVP purposes, that model includes: + +- sessions and their lifecycle/status +- workers/connections attached to sessions +- usage events and accumulated usage totals +- quota/runtime coordination state +- the latest accepted RAV snapshot associated with a session + +In the current PostgreSQL implementation, that concrete shape is represented by: + +- `sessions` +- `workers` +- `usage_events` +- `quota_usage` +- `ravs` as a one-to-one latest accepted RAV record keyed by `session_id` + +The `ravs` table should be interpreted as durable runtime state that preserves the latest accepted RAV needed after restart. It is not, by itself, a complete settlement lifecycle model. + +## Settlement Conceptual Model + +Settlement and collection lifecycle tracking is a separate concern from runtime session tracking. + +For MVP, the provider must eventually support durable collection-oriented state for accepted RAVs, including the conceptual lifecycle states: + +- `collectible` +- `collect_pending` +- `collected` +- retryable failure / retryable collection state + +That lifecycle is settlement state, not runtime session state. It exists so operator inspection and manual collection workflows can reason about what is ready to collect, what is in flight, and what has already completed. + +`MVP-003` does not define the concrete persistence schema, repository interface, or API payloads for that lifecycle. That design and implementation belong to downstream tasks, especially `MVP-029`, with retrieval surfaces in `MVP-009`. + +## Boundary Rules + +- Provider restart must preserve accepted RAV state needed for post-restart inspection and settlement. +- Fresh reconnects create new SDS payment sessions and do not reuse prior runtime session identity or payment lineage. +- Runtime session records may reference settlement-relevant accepted state, but they do not define collection progress. +- Client and CLI flows should read settlement-relevant provider state through provider-owned APIs, not by assuming direct database access. +- `MVP-008` extends durable runtime storage around the existing repository model. +- `MVP-029` owns collection lifecycle persistence, transitions, and retry semantics. +- `MVP-009` owns provider retrieval APIs for accepted and collectible settlement-relevant state. + +## Implications For Downstream Tasks + +- `MVP-008` should focus on restart-safe runtime durability for sessions, workers, usage, and the latest accepted RAV state already represented in the current repository model. +- `MVP-008` should not absorb collection lifecycle tracking just because accepted RAV state is also settlement-relevant. +- `MVP-029` should introduce the distinct provider-side persistence/update model needed for collection lifecycle state. +- `MVP-019` and `MVP-020` should consume provider-backed settlement retrieval flows after `MVP-009` and `MVP-029`, not direct backend reads. + +## Out Of Scope For MVP-003 + +`MVP-003` does not: + +- add or change protobuf APIs +- add or change repository interfaces +- add or change database schema +- define the final collection lifecycle schema or transitions in implementation detail +- resolve authn/authz for operator/admin surfaces +- define the exact inspection or collection API shape diff --git a/plans/mvp-gap-analysis.md b/plans/mvp-gap-analysis.md index 5e14346..0552ca7 100644 --- a/plans/mvp-gap-analysis.md +++ b/plans/mvp-gap-analysis.md @@ -179,12 +179,14 @@ Current state: - PostgreSQL repository support exists - the provider gateway can instantiate repositories via DSN - migrations and repository tests exist +- the current durable model already covers runtime/session state plus the latest accepted RAV snapshot for a session Evidence: - `provider/gateway/repository.go` - `provider/repository/psql/` - `provider/gateway/REPOSITORY.md` +- `docs/provider-persistence-boundary.md` What is still missing for MVP: @@ -194,6 +196,7 @@ What is still missing for MVP: Notes: +- `MVP-003` now freezes the boundary between runtime/session persistence and later settlement lifecycle tracking so `MVP-008` and `MVP-029` do not overlap semantically. - Repository validation is now portable across checkout paths because PostgreSQL test migrations resolve from repo-local state rather than a machine-specific absolute path. ### Consumer Data-Plane Compatibility diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index 361c0ad..c2de121 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -90,7 +90,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions | --- | --- | --- | --- | --- | --- | --- | | MVP-001 | `done` | protocol | `A2` | none | `A` | Freeze the oracle-authoritative MVP pricing contract across oracle, consumer, and provider flows | | MVP-002 | `done` | protocol | `A2`, `A3` | `MVP-033` | `A`, `B` | Freeze fresh-session init semantics and provider-returned data-plane endpoint behavior | -| MVP-003 | `in_progress` | protocol | `A3`, `A6` | `MVP-027` | `D`, `F` | Define and document the provider-side runtime persistence model and its boundary with settlement lifecycle tracking | +| MVP-003 | `done` | protocol | `A3`, `A6` | `MVP-027` | `D`, `F` | Define and document the provider-side runtime persistence model and its boundary with settlement lifecycle tracking | | MVP-004 | `done` | protocol | `A2`, `A3` | none | `A`, `C` | Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop | | MVP-005 | `not_started` | oracle | `A1`, `A2`, `A5` | `MVP-033` | `A` | Implement a standalone oracle service with manual whitelist, canonical pricing, recommended-provider response, and control-plane endpoint return | | MVP-006 | `not_started` | oracle | `A5` | `MVP-028` | `A`, `G` | Add authenticated oracle administration for whitelist and provider metadata management | @@ -176,7 +176,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Verify: - Review [docs/mvp-scope.md](../docs/mvp-scope.md) and confirm the reconnect scenario and major decisions table match this rule. -- [ ] MVP-003 Define and document the provider-side runtime persistence model and its boundary with settlement lifecycle tracking. +- [x] MVP-003 Define and document the provider-side runtime persistence model and its boundary with settlement lifecycle tracking. - Context: - StreamingFast landed the shared repository model, PostgreSQL schema, and DSN-backed repository instantiation. - The remaining work is to make the runtime-versus-settlement boundary explicit in the MVP backlog and docs. @@ -188,7 +188,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions - The provider-side durable model is described in terms of sessions, workers, usage, current accepted RAV state, and separate collection lifecycle tracking. - Downstream tasks no longer assume reconnect-driven reuse semantics. - Verify: - - Review [provider/repository/repository.go](../provider/repository/repository.go) and [provider/gateway/REPOSITORY.md](../provider/gateway/REPOSITORY.md) against backlog task wording. + - Review [docs/provider-persistence-boundary.md](../docs/provider-persistence-boundary.md), [provider/repository/repository.go](../provider/repository/repository.go), and [provider/gateway/REPOSITORY.md](../provider/gateway/REPOSITORY.md) against backlog task wording. - [x] MVP-004 Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop. - Context: diff --git a/provider/gateway/REPOSITORY.md b/provider/gateway/REPOSITORY.md index 54c1ee5..b4b13f3 100644 --- a/provider/gateway/REPOSITORY.md +++ b/provider/gateway/REPOSITORY.md @@ -192,6 +192,19 @@ if err != nil { All repositories implement the `repository.GlobalRepository` interface defined in `provider/repository/repository.go`. +### Persistence Boundary + +The current repository model is the provider runtime/session persistence layer. + +It persists: + +- sessions and workers +- usage events and accumulated usage +- quota/runtime coordination state +- the latest accepted RAV snapshot associated with a session + +It does not yet define the provider collection lifecycle model for `collectible`, `collect_pending`, `collected`, or retryable collection state. That boundary is defined in [docs/provider-persistence-boundary.md](../docs/provider-persistence-boundary.md) and implemented by downstream MVP work rather than this repository overview. + ### Backward Compatibility If no `--repository-dsn` flag is provided, the gateway defaults to `inmemory://`, maintaining backward compatibility with existing deployments. From bd37adf5b298e4862b8f037b77bc6d0314a1cbbc Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Fri, 27 Mar 2026 00:24:37 -0300 Subject: [PATCH 08/17] feat: updated implementation sequencing doc --- docs/mvp-implementation-sequencing.md | 36 ++++++++++++++------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index acdc18b..e56da24 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -9,9 +9,12 @@ Use it to: - understand which tasks are true prerequisites for others - identify which work can proceed in parallel - avoid prompting agents to implement downstream tasks before the required contracts are stable enough +- keep implementation sequencing aligned with the current backlog and scope, not an older snapshot Use `docs/mvp-scope.md` as the target-state definition and `plans/mvp-implementation-backlog.md` as the source of truth for task definitions, dependencies, and status. +If `docs/mvp-scope.md`, `plans/mvp-implementation-backlog.md`, or other MVP architecture/planning docs change in a way that affects sequencing, status, or dependencies, this document should be updated in the same change. + ## How To Read This Document This is a dependency-driven sequencing guide, not a strict linear priority list. @@ -150,7 +153,9 @@ Notes: - `MVP-009` depends on `MVP-029`, so this part of the sequence is required by the backlog rather than just recommended. - `MVP-018` comes late because the current backlog explicitly ties it to operator runtime/low-funds inspection surfaces. -### Lane D: Reconnect And Resume +### Lane D: Post-MVP Reconnect And Resume + +This lane is historical context only and is not part of the current MVP rollout. Minimum prerequisites: @@ -158,14 +163,14 @@ Minimum prerequisites: Recommended sequence: -1. `MVP-002` Freeze reconnect handshake semantics so provider can return fresh or latest-known resumable RAV during normal session init -2. `MVP-008` durable provider state work must be stable enough for reconnect behavior -3. `MVP-013` Implement provider-authoritative reconnect/resume in the normal handshake path +1. `MVP-013` Implement provider-authoritative reconnect/resume semantics if reconnect becomes an in-scope post-MVP target +2. Re-evaluate durable state and handshake requirements against the then-current provider runtime before implementation starts Notes: -- This lane depends on both protocol and persistence work. -- Reconnect should not be treated as complete until it is proven against provider-authoritative durable state, not just consumer-local memory. +- `MVP-002` is already resolved for MVP and freezes fresh-session semantics rather than resume behavior. +- `MVP-013` is explicitly deferred in the backlog and should not be used to drive current MVP sequencing. +- Any future reconnect/resume work should be treated as a new planning pass, not as an active MVP lane. ### Lane E: Security And Deployment @@ -211,14 +216,16 @@ It is a recommended rollout sequence, not a canonical priority order embedded in ### Phase 0: Resolve Or Narrow Shared Contracts -- `MVP-004` -- `MVP-027` - `MVP-028` -- `MVP-001` - `MVP-023` Already resolved: +- `MVP-001` +- `MVP-002` +- `MVP-003` +- `MVP-004` +- `MVP-027` - `MVP-033` ### Phase 1: Start The First Implementable Lanes @@ -231,7 +238,6 @@ Already resolved: - `MVP-012` - `MVP-014` - Provider state foundation: - - `MVP-003` - `MVP-008` - `MVP-029` - Security foundation: @@ -245,11 +251,9 @@ Already resolved: - `MVP-017` - `MVP-009` - `MVP-022` -- `MVP-002` -### Phase 3: Complete Reconnect, Runtime Control, And Operator Flows +### Phase 3: Complete Runtime Control And Operator Flows -- `MVP-013` - `MVP-031` - `MVP-006` - `MVP-019` @@ -283,8 +287,6 @@ This section is interpretive guidance based on the assumptions register and depe - `MVP-005` - Can begin before `MVP-001` is fully closed if pricing authority remains clearly non-final in the API/implementation. -- `MVP-003` - - Runtime-versus-settlement contract documentation can begin while identity semantics are being narrowed, but it should not be treated as final until those semantics are stable. - `MVP-024` - Can proceed in a reduced/basic form before `MVP-023` is fully closed. @@ -292,8 +294,6 @@ This section is interpretive guidance based on the assumptions register and depe - `MVP-007` - Should wait until the chain/network and pricing exposure contracts are stable enough. -- `MVP-013` - - Should wait until reconnect semantics and durable provider state are both stable enough. - `MVP-019` and `MVP-020` - Should wait until retrieval APIs, auth, and collection lifecycle semantics are in place. @@ -321,5 +321,7 @@ If you find that MVP-010 still requires unresolved semantics beyond MVP-004, mar ## Notes - This document derives sequence from the current dependency structure in `plans/mvp-implementation-backlog.md`. +- Treat this document as a maintained companion to the backlog, not a one-time planning artifact. +- When MVP status, dependencies, or scope wording changes elsewhere, update this document in the same documentation pass if the sequencing view is affected. - If task dependencies change, this document should be updated to match. - When the backlog and this document disagree, the backlog is the source of truth. From 5ce40d974a528dd3e8ebb98fd775027aab3465c6 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Fri, 27 Mar 2026 01:37:29 -0300 Subject: [PATCH 09/17] provider: add MVP-010 low-funds session stop handling - add session-local low-funds assessment, metadata persistence, and terminal NeedMoreFunds handling in PaymentSession - fix PaymentsEscrow balance queries to use the shared ABI-based getBalance encoding - add integration coverage for insufficient funds, exact-balance continue, unknown-balance fail-open, and consumer stop behavior - update MVP scope, backlog, sequencing, and gap-analysis docs to mark MVP-010 complete and freeze stop-only low-funds semantics --- docs/mvp-implementation-sequencing.md | 34 +- docs/mvp-scope.md | 16 +- plans/mvp-gap-analysis.md | 18 +- plans/mvp-implementation-backlog.md | 36 +- provider/gateway/funds.go | 119 ++++++ provider/gateway/handler_payment_session.go | 40 +- sidecar/escrow_querier.go | 34 +- .../payment_session_low_funds_test.go | 365 ++++++++++++++++++ 8 files changed, 611 insertions(+), 51 deletions(-) create mode 100644 provider/gateway/funds.go create mode 100644 test/integration/payment_session_low_funds_test.go diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index e56da24..1de4aee 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -115,17 +115,27 @@ Minimum prerequisites: Recommended sequence: -1. `MVP-010` Implement session-local low-funds detection and provider Continue/Pause/Stop decisions during streaming -2. `MVP-012` Add deterministic RAV issuance thresholds suitable for real runtime behavior -3. `MVP-014` Integrate provider gateway validation into the real provider streaming path -4. `MVP-015` Wire real byte metering from the provider/plugin path into gateway payment state -5. `MVP-011` Propagate provider stop/pause decisions through consumer sidecar into the real client path -6. `MVP-016` Enforce gateway Continue/Pause/Stop decisions in the live provider stream lifecycle -7. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path +Completed foundation: + +- `MVP-010` Implement session-local low-funds detection and provider terminal stop behavior during streaming + +Recommended next sequence: + +1. `MVP-012` Add deterministic RAV issuance thresholds suitable for real runtime behavior +2. `MVP-014` Integrate provider gateway validation into the real provider streaming path +3. `MVP-015` Wire real byte metering from the provider/plugin path into gateway payment state +4. `MVP-011` Propagate provider low-funds stop decisions through consumer sidecar into the real client path +5. `MVP-016` Enforce gateway Continue/Stop decisions in the live provider stream lifecycle +6. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path Notes: -- `MVP-010` and `MVP-014` are the main foundations in this lane. +- `MVP-010` is now the frozen low-funds foundation for this lane: + - session-local exposure only + - terminal stop on insufficient funds + - fail-open if live escrow balance cannot be determined +- `MVP-014` remains the main integration foundation in this lane. +- `MVP-011` is partially advanced because the current sidecar wrapper path already stops on `NeedMoreFunds`, but the real client-facing ingress path is still unfinished. - `MVP-031` is effectively the capstone runtime-payment task because it depends on real provider and consumer integration plus thresholding. ### Lane C: Provider State, Settlement, And Operator Retrieval @@ -225,6 +235,7 @@ Already resolved: - `MVP-002` - `MVP-003` - `MVP-004` +- `MVP-010` - `MVP-027` - `MVP-033` @@ -234,7 +245,6 @@ Already resolved: - `MVP-005` - `MVP-007` - Runtime foundation: - - `MVP-010` - `MVP-012` - `MVP-014` - Provider state foundation: @@ -312,10 +322,10 @@ Example: ```text We are currently in Phase 1, Runtime foundation. -Implement MVP-010 only. -You may rely on MVP-004 as the frozen runtime billing/payment contract. +Implement MVP-012 only. +You may rely on MVP-004 as the frozen runtime billing/payment contract and MVP-010 as the frozen low-funds control contract. Do not broaden into MVP-011 or MVP-016 except for strictly necessary supporting edits. -If you find that MVP-010 still requires unresolved semantics beyond MVP-004, mark it blocked instead of choosing an implicit contract in code. +If you find that MVP-012 still requires unresolved semantics beyond those contracts, mark it blocked instead of choosing an implicit contract in code. ``` ## Notes diff --git a/docs/mvp-scope.md b/docs/mvp-scope.md index 3d0717f..a1b4935 100644 --- a/docs/mvp-scope.md +++ b/docs/mvp-scope.md @@ -1,7 +1,7 @@ # Substreams Data Service MVP Scope Drafted: 2026-03-12 -Revised: 2026-03-24 +Revised: 2026-03-27 ## Purpose @@ -53,7 +53,7 @@ However, the current repo does not yet constitute the MVP. Major remaining gaps - standalone oracle/discovery component - consumer-side endpoint compatibility that hides SDS control flow behind a Substreams-compatible ingress - provider-side durable persistence for accepted RAV and collection state -- low-funds stop/pause behavior during live streaming +- full low-funds propagation through the real provider/client streaming path - operator funding and settlement CLI flows - authenticated admin/operator surfaces - finalization of observability scope @@ -127,7 +127,11 @@ MVP network-discovery contract: - RAVs are requested and updated as needed - accepted payment state advances on the provider side - low-funds conditions can be surfaced during the live stream + - insufficient funds terminate the current SDS payment session for MVP rather than pausing it - For MVP, low-funds decisions are session-local, not payer-global across concurrent streams. +- If the provider cannot determine live escrow balance, it does not stop the session solely because funding status is unknown. +- Temporary escrow-RPC failures are not treated as low-funds pause semantics for MVP. +- If stricter handling is needed later, it should be introduced as a separate infrastructure-failure policy, for example bounded retries before a distinct stop behavior, rather than overloading `NeedMoreFunds`. ### 4. Fund or Top Up Escrow @@ -163,7 +167,7 @@ MVP network-discovery contract: | Direct provider connection | Supported as fallback/override | Useful bridge from current implementation and operational fallback | | Pricing authority | Oracle-authoritative pricing across the curated MVP provider set | Predictable pricing and simpler consumer/provider behavior while providers are manually curated | | Billing unit | Streamed bytes | Aligns with provider-authoritative metering path | -| Funding model | Session-local low-funds logic | Avoids premature distributed liability accounting for concurrent streams | +| Funding model | Session-local, stop-only low-funds logic | Avoids premature distributed liability accounting for concurrent streams while keeping MVP control behavior simple | | Funding UX | CLI/operator-driven with only lightweight consumer-side advisory guidance | Keeps MVP simple without pretending the consumer knows provider-side liability | | Concurrent streams | Documented limitation, not blocked | Simpler MVP with explicit limitation instead of partial enforcement | | Collection execution | CLI signs and submits locally | Keeps settlement key custody outside provider-side runtime | @@ -226,6 +230,7 @@ That is a materially larger distributed-state problem than the session-local MVP - Uses provider-authoritative byte metering from the plugin/integration path - Drives RAV request/response flow - Handles live low-funds conditions during streaming +- Uses terminal stop behavior rather than pause when session-local funds are insufficient - Persists accepted RAV and settlement-relevant state durably - Exposes authenticated operator/admin surfaces for inspection and settlement data retrieval - May rely on separate internal plugin/runtime components behind the public gateway boundary @@ -310,8 +315,9 @@ The scenarios below are the primary definition of done for the MVP. - Streaming starts with initially sufficient funds - Usage progresses until provider-side session-local funding logic determines funds are too low -- Provider surfaces the low-funds condition during the live stream -- The client path receives and reacts to the stop/pause decision correctly +- Provider surfaces the low-funds condition during the live stream and terminates the current SDS payment session +- The client path receives and reacts to the stop decision correctly +- If the provider cannot determine escrow balance, it does not stop solely because balance status is unknown - Any consumer-side warnings or balance checks remain advisory rather than authoritative ### D. Provider Restart Without Losing Collectible State diff --git a/plans/mvp-gap-analysis.md b/plans/mvp-gap-analysis.md index 0552ca7..d0dfb95 100644 --- a/plans/mvp-gap-analysis.md +++ b/plans/mvp-gap-analysis.md @@ -1,7 +1,7 @@ # MVP Gap Analysis Drafted: 2026-03-12 -Revised: 2026-03-26 +Revised: 2026-03-27 This document maps the current repository state against the MVP defined in `docs/mvp-scope.md`. @@ -48,7 +48,7 @@ The biggest remaining MVP gaps are now: - standalone oracle/discovery component - consumer-side Substreams-compatible endpoint/proxy behavior - provider collection lifecycle persistence and inspection/collection APIs -- low-funds enforcement in the real live stream path +- full low-funds propagation through the real provider/client streaming path - operator funding and collection tooling - authenticated admin/operator surfaces - finalized observability floor @@ -59,7 +59,7 @@ The biggest remaining MVP gaps are now: | --- | --- | --- | | A. Discovery to paid streaming | `partial` | Paid session flow and provider runtime foundations exist, but the standalone oracle is still missing and the consumer sidecar is not yet the Substreams-compatible ingress described by the scope | | B. Fresh session after interruption | `partial` | Fresh-session semantics are implemented in the init contract, but broader real-path interruption validation still remains | -| C. Low funds during streaming | `missing` | Session-local low-funds handling in the real live stream path is still backlog work | +| C. Low funds during streaming | `partial` | Session-local low-funds stop behavior now exists in the payment-session path, but full real provider/client streaming-path enforcement is still incomplete | | D. Provider restart without losing collectible state | `partial` | Provider persistence is no longer purely in-memory because PostgreSQL support exists, but collectible/collection lifecycle tracking is still incomplete | | E. Manual funding flow | `partial` | Demo-oriented setup/funding helpers exist, but real operator-grade funding CLI flows do not | | F. Manual collection flow | `missing` | RAV tooling exists, but provider-backed settlement inspection and collect workflow are not implemented | @@ -99,11 +99,12 @@ What already exists: - usage reporting - end session - payment-session loop wiring to provider gateway +- `NeedMoreFunds` currently stops the wrapper-oriented `ReportUsage` flow What is still missing for MVP: - the real user-facing integration is still wrapper-centric rather than endpoint-centric -- finalized low-funds stop/pause handling in the real usage path +- full low-funds propagation through the real client-facing ingress path ### Provider Gateway @@ -123,13 +124,16 @@ What already exists: - session start - bidirectional payment session - RAV validation and authorization checks +- session-local low-funds detection during `PaymentSession` usage handling +- terminal `NeedMoreFunds` response plus payment-issue session termination when live escrow is insufficient +- persisted machine-readable funding metadata for `ok`, `insufficient`, and `unknown` session state - basic runtime/session status inspection - repository-backed session state foundation What is still missing for MVP: - collection lifecycle state -- live low-funds logic during active streaming +- enforcement of gateway low-funds/control outcomes in the live provider stream lifecycle - authenticated admin/operator surfaces ### Provider Plugin Services @@ -154,7 +158,7 @@ What is still missing for MVP: - full live-provider-path acceptance in production-like usage - finalized byte-billing/runtime contract documentation -- live stop/pause behavior enforced in the provider stream lifecycle +- live stop behavior enforced in the provider stream lifecycle ### Oracle @@ -313,6 +317,8 @@ The most important recent status changes versus the original draft are: - The repo now includes PostgreSQL-backed repository code, DSN-based selection, migrations, and tests. - Provider runtime shape is more concrete than before. - The repo now explicitly separates a public Payment Gateway from a private Plugin Gateway. +- Session-local low-funds handling is no longer fully missing. + - The payment-session path now evaluates projected session-local exposure against live escrow, fails open on unknown balance, and terminates the current session with `NeedMoreFunds` when funds are insufficient. - Real-path integration scaffolding is stronger. - The repo now includes stronger firecore/plugin integration setup and a `TestFirecore` scaffold, even though that path is not yet MVP-complete. - Consumer-side MVP UX is still notably behind the revised scope. diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index c2de121..e1e5202 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -1,6 +1,6 @@ # Substreams Data Service — MVP Implementation Backlog -_Last updated: 2026-03-24_ +_Last updated: 2026-03-27_ This document translates [docs/mvp-scope.md](../docs/mvp-scope.md) into concrete implementation tasks for the MVP. @@ -97,13 +97,13 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-007 | `not_started` | consumer | `A1`, `A2`, `A3` | `MVP-005`, `MVP-033` | `A` | Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback and provider-returned data-plane resolution | | MVP-008 | `in_progress` | provider-state | `A3`, `A6` | `MVP-003` | `D`, `F` | Complete durable provider runtime storage for sessions, usage, and accepted RAV state, distinct from collection lifecycle tracking | | MVP-009 | `not_started` | provider-state | `A3`, `A5` | `MVP-003`, `MVP-022`, `MVP-029` | `D`, `F` | Expose provider inspection and settlement-data retrieval APIs for accepted and collectible RAV state | -| MVP-010 | `not_started` | funding-control | `A6` | `MVP-004` | `C` | Implement session-local low-funds detection and provider Continue/Pause/Stop decisions during streaming | -| MVP-011 | `not_started` | funding-control | `A6` | `MVP-010` | `C` | Propagate provider stop/pause decisions through consumer sidecar into the real client path | +| MVP-010 | `done` | funding-control | `A6` | `MVP-004` | `C` | Implement session-local low-funds detection and provider terminal stop behavior during streaming | +| MVP-011 | `in_progress` | funding-control | `A6` | `MVP-010` | `C` | Propagate provider low-funds stop decisions through consumer sidecar into the real client path | | MVP-012 | `not_started` | funding-control | none | `MVP-004` | `A`, `C` | Add deterministic RAV issuance thresholds suitable for real runtime behavior | | MVP-013 | `deferred` | consumer | `A3` | none | none | Post-MVP only: implement true provider-authoritative payment-session reconnect/resume semantics | | MVP-014 | `in_progress` | provider-integration | `A3` | `MVP-004` | `A` | Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path | | MVP-015 | `in_progress` | provider-integration | `A3` | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway | -| MVP-016 | `not_started` | provider-integration | `A6` | `MVP-010`, `MVP-014` | `C` | Enforce gateway Continue/Pause/Stop decisions in the live provider stream lifecycle | +| MVP-016 | `not_started` | provider-integration | `A6` | `MVP-010`, `MVP-014` | `C` | Enforce gateway Continue/Stop decisions in the live provider stream lifecycle | | MVP-017 | `not_started` | consumer-integration | `A1`, `A2`, `A3` | `MVP-007`, `MVP-011`, `MVP-033` | `A`, `C` | Implement the consumer sidecar as a Substreams-compatible endpoint/proxy rather than only a wrapper-controlled lifecycle service | | MVP-018 | `not_started` | tooling | none | `MVP-032` | `E` | Implement operator funding CLI flows for approve/deposit/top-up beyond local demo assumptions | | MVP-019 | `not_started` | tooling | `A5` | `MVP-009`, `MVP-022` | `D`, `F` | Implement provider inspection CLI flows for accepted and collectible RAV data | @@ -317,28 +317,31 @@ These assumptions are referenced by task ID so it is clear which scope decisions ## Funding Control and Runtime Payment Tasks -- [ ] MVP-010 Implement session-local low-funds detection and provider Continue/Pause/Stop decisions during streaming. +- [x] MVP-010 Implement session-local low-funds detection and provider terminal stop behavior during streaming. - Context: - The MVP requires low-funds handling during active streaming, but only on a session-local basis. + - The implemented MVP policy is stop-only on insufficient funds, with fail-open behavior when live escrow balance cannot be queried. - Assumptions: - `A6` - Done when: - - Provider can compare session-local exposure against available funding. - - Provider emits the appropriate control/funding messages during active streams. - - Low-funds behavior includes enough machine-readable state for operator tooling and client-side messaging. + - Provider compares projected session-local outstanding exposure against live escrow during `PaymentSession` usage handling. + - If funds are insufficient, provider persists machine-readable funds metadata, terminates the session with `END_REASON_PAYMENT_ISSUE`, and emits `NeedMoreFunds` as the terminal response for that session roundtrip. + - If live escrow balance cannot be determined, provider records `unknown` funding status and continues normal runtime behavior rather than stopping solely on the failed check. + - The MVP does not reinterpret temporary escrow-RPC failures as pause semantics; any future bounded-retry or infrastructure-failure stop policy should remain distinct from `NeedMoreFunds`. - Verify: - - Add an integration test with intentionally low funding that reaches a stop/pause condition during streaming. + - Integration coverage exists for insufficient-funds stop, exact-balance continue, unknown-balance fail-open, and consumer-side stop behavior on `NeedMoreFunds`. -- [ ] MVP-011 Propagate provider stop/pause decisions through consumer sidecar into the real client path. +- [ ] MVP-011 Propagate provider low-funds stop decisions through consumer sidecar into the real client path. - Context: - Low-funds logic is incomplete until the client path actually obeys it. + - The current sidecar already stops the wrapper-oriented `ReportUsage` flow on `NeedMoreFunds`, but the full real ingress path remains downstream work. - Assumptions: - `A6` - Done when: - - Consumer sidecar converts provider control/funding messages into client-visible stop/pause behavior. - - Real client integration honors those decisions. + - Consumer sidecar propagates provider low-funds stop decisions through the real client-facing ingress path, not only the current wrapper-oriented `ReportUsage` flow. + - Real client integration honors those stop decisions and surfaces a clear client-visible reason. - Verify: - - Add integration/manual verification showing the real client path stops or pauses when the provider requires it. + - Add integration/manual verification showing the real client path stops when the provider surfaces low funds during live streaming. - [ ] MVP-012 Add deterministic RAV issuance thresholds suitable for real runtime behavior. - Context: @@ -379,15 +382,16 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Verify: - Add tests or manual instrumentation evidence showing live provider/plugin activity updates the payment-state repository consistently. -- [ ] MVP-016 Enforce gateway Continue/Pause/Stop decisions in the live provider stream lifecycle. +- [ ] MVP-016 Enforce gateway Continue/Stop decisions in the live provider stream lifecycle. - Context: - Provider-side control logic is incomplete if the live provider stream does not obey it. - Assumptions: - `A6` - Done when: - The real provider path can enforce SDS control decisions during live streaming. + - Gateway-driven low-funds stop behavior interrupts the live provider stream lifecycle appropriately rather than only ending the control-plane session. - Verify: - - Add manual or automated verification where the provider stops or pauses the live stream based on gateway control decisions. + - Add manual or automated verification where the provider stops the live stream based on gateway control decisions. - [ ] MVP-017 Implement the consumer sidecar as a Substreams-compatible endpoint/proxy rather than only a wrapper-controlled lifecycle service. - Context: @@ -415,7 +419,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions - The real client/provider integration keeps the SDS payment-control loop active alongside the live stream behind the consumer-sidecar ingress path. - Provider-driven RAV requests, acknowledgements, and control messages flow through the production runtime path rather than only through wrapper commands. - Verify: - - Add a real-path integration or documented manual verification showing stream start, at least one provider-driven payment update during live streaming, and synchronized session state until normal end or stop/pause. + - Add a real-path integration or documented manual verification showing stream start, at least one provider-driven payment update during live streaming, and synchronized session state until normal end or low-funds stop. ## Operator Tooling Tasks diff --git a/provider/gateway/funds.go b/provider/gateway/funds.go new file mode 100644 index 0000000..394eb15 --- /dev/null +++ b/provider/gateway/funds.go @@ -0,0 +1,119 @@ +package gateway + +import ( + "context" + "math/big" + + commonv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/common/v1" + providerv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1" + "github.com/graphprotocol/substreams-data-service/provider/repository" + "github.com/graphprotocol/substreams-data-service/sidecar" +) + +const ( + fundsStatusOK = "ok" + fundsStatusInsufficient = "insufficient" + fundsStatusUnknown = "unknown" + + fundsStatusKey = "funds_status" + fundsEscrowBalanceWeiKey = "funds_escrow_balance_wei" + fundsCurrentOutstandingWeiKey = "funds_current_outstanding_wei" + fundsProjectedOutstandingWeiKey = "funds_projected_outstanding_wei" + fundsMinimumNeededWeiKey = "funds_minimum_needed_wei" + fundsCheckErrorKey = "funds_check_error" +) + +type fundsAssessment struct { + status string + currentOutstanding *big.Int + projectedOutstanding *big.Int + escrowBalance *big.Int + minimumNeeded *big.Int + checkErr error +} + +func (a *fundsAssessment) insufficient() bool { + return a.status == fundsStatusInsufficient +} + +func (a *fundsAssessment) unknown() bool { + return a.status == fundsStatusUnknown +} + +func (s *Gateway) assessSessionFunds(ctx context.Context, session *repository.Session) *fundsAssessment { + currentOutstanding := big.NewInt(0) + if session.CurrentRAV != nil && session.CurrentRAV.Message != nil && session.CurrentRAV.Message.ValueAggregate != nil { + currentOutstanding = new(big.Int).Set(session.CurrentRAV.Message.ValueAggregate) + } + + _, _, _, deltaCost := session.UsageDeltaSinceBaseline() + projectedOutstanding := new(big.Int).Add(new(big.Int).Set(currentOutstanding), deltaCost) + + assessment := &fundsAssessment{ + status: fundsStatusUnknown, + currentOutstanding: currentOutstanding, + projectedOutstanding: projectedOutstanding, + minimumNeeded: big.NewInt(0), + } + + escrowBalance, err := s.GetEscrowBalance(ctx, session.Payer) + if err != nil { + assessment.checkErr = err + return assessment + } + if escrowBalance == nil { + return assessment + } + + assessment.escrowBalance = new(big.Int).Set(escrowBalance) + if projectedOutstanding.Cmp(escrowBalance) > 0 { + assessment.status = fundsStatusInsufficient + assessment.minimumNeeded = new(big.Int).Sub(projectedOutstanding, escrowBalance) + return assessment + } + + assessment.status = fundsStatusOK + return assessment +} + +func applyFundsAssessmentMetadata(session *repository.Session, assessment *fundsAssessment) { + if session.Metadata == nil { + session.Metadata = make(map[string]string) + } + + session.Metadata[fundsStatusKey] = assessment.status + session.Metadata[fundsCurrentOutstandingWeiKey] = assessment.currentOutstanding.String() + session.Metadata[fundsProjectedOutstandingWeiKey] = assessment.projectedOutstanding.String() + + if assessment.escrowBalance != nil { + session.Metadata[fundsEscrowBalanceWeiKey] = assessment.escrowBalance.String() + session.Metadata[fundsMinimumNeededWeiKey] = assessment.minimumNeeded.String() + } else { + delete(session.Metadata, fundsEscrowBalanceWeiKey) + delete(session.Metadata, fundsMinimumNeededWeiKey) + } + + if assessment.checkErr != nil { + session.Metadata[fundsCheckErrorKey] = assessment.checkErr.Error() + } else { + delete(session.Metadata, fundsCheckErrorKey) + } +} + +func needMoreFundsResponse(session *repository.Session, assessment *fundsAssessment) *providerv1.PaymentSessionResponse { + outstandingRAVs := make([]*commonv1.SignedRAV, 0, 1) + if session.CurrentRAV != nil { + outstandingRAVs = append(outstandingRAVs, sidecar.HorizonSignedRAVToProto(session.CurrentRAV)) + } + + return &providerv1.PaymentSessionResponse{ + Message: &providerv1.PaymentSessionResponse_NeedMoreFunds{ + NeedMoreFunds: &providerv1.NeedMoreFunds{ + OutstandingRavs: outstandingRAVs, + TotalOutstanding: commonv1.GRTFromBigInt(assessment.currentOutstanding), + EscrowBalance: commonv1.GRTFromBigInt(assessment.escrowBalance), + MinimumNeeded: commonv1.GRTFromBigInt(assessment.minimumNeeded), + }, + }, + } +} diff --git a/provider/gateway/handler_payment_session.go b/provider/gateway/handler_payment_session.go index 390bfb8..8b67f91 100644 --- a/provider/gateway/handler_payment_session.go +++ b/provider/gateway/handler_payment_session.go @@ -398,9 +398,45 @@ func (s *Gateway) handleUsageReport( computedCost, ) - // Update the session in the repository + assessment := s.assessSessionFunds(ctx, session) + applyFundsAssessmentMetadata(session, assessment) + + if assessment.unknown() { + if assessment.checkErr != nil { + s.logger.Warn("unable to determine escrow balance during PaymentSession; continuing", + zap.String("session_id", sessionID), + zap.Error(assessment.checkErr), + ) + } else { + s.logger.Warn("escrow balance unavailable during PaymentSession; continuing", + zap.String("session_id", sessionID), + ) + } + } + + if assessment.insufficient() { + session.End(commonv1.EndReason_END_REASON_PAYMENT_ISSUE) + } + + // Update the session in the repository after usage and funds evaluation. if err := s.repo.SessionUpdate(ctx, session); err != nil { - s.logger.Warn("failed to update session", zap.String("session_id", sessionID), zap.Error(err)) + s.logger.Warn("failed to update session", + zap.String("session_id", sessionID), + zap.Error(err), + ) + } + + if assessment.insufficient() { + s.logger.Info("stopping session due to insufficient funds", + zap.String("session_id", sessionID), + zap.Stringer("current_outstanding", assessment.currentOutstanding), + zap.Stringer("projected_outstanding", assessment.projectedOutstanding), + zap.Stringer("escrow_balance", assessment.escrowBalance), + zap.Stringer("minimum_needed", assessment.minimumNeeded), + ) + + stream.Send(needMoreFundsResponse(session, assessment)) + return awaitingRAV, true } if !awaitingRAV { diff --git a/sidecar/escrow_querier.go b/sidecar/escrow_querier.go index c53b29c..edcc20f 100644 --- a/sidecar/escrow_querier.go +++ b/sidecar/escrow_querier.go @@ -6,11 +6,27 @@ import ( "fmt" "math/big" "strings" + "sync" + "github.com/graphprotocol/substreams-data-service/contracts/artifacts" "github.com/streamingfast/eth-go" "github.com/streamingfast/eth-go/rpc" ) +var loadPaymentsEscrowGetBalanceFn = sync.OnceValues(func() (*eth.MethodDef, error) { + abi, err := artifacts.LoadABI("PaymentsEscrow") + if err != nil { + return nil, fmt.Errorf("loading PaymentsEscrow ABI: %w", err) + } + + fn := abi.FindFunctionByName("getBalance") + if fn == nil { + return nil, fmt.Errorf("getBalance function not found in PaymentsEscrow ABI") + } + + return fn, nil +}) + // EscrowQuerier provides methods to query the PaymentsEscrow contract type EscrowQuerier struct { rpcClient *rpc.Client @@ -28,17 +44,15 @@ func NewEscrowQuerier(rpcEndpoint string, escrowAddr eth.Address) *EscrowQuerier // GetBalance returns the escrow balance for a payer -> receiver via collector // This calls PaymentsEscrow.getBalance(payer, collector, receiver) func (q *EscrowQuerier) GetBalance(ctx context.Context, payer, collector, receiver eth.Address) (*big.Int, error) { - // Build the call data for getBalance(address,address,address) - // Function selector: keccak256("getBalance(address,address,address)")[:4] - // = 0xd6a58fd9 - selector := []byte{0xd6, 0xa5, 0x8f, 0xd9} + fn, err := loadPaymentsEscrowGetBalanceFn() + if err != nil { + return nil, err + } - // ABI encode the parameters (each address is 32 bytes, left-padded) - data := make([]byte, 4+32*3) - copy(data[:4], selector) - copy(data[4+12:4+32], payer[:]) - copy(data[4+32+12:4+64], collector[:]) - copy(data[4+64+12:4+96], receiver[:]) + data, err := fn.NewCall(payer, collector, receiver).Encode() + if err != nil { + return nil, fmt.Errorf("encoding getBalance call: %w", err) + } params := rpc.CallParams{ To: q.escrowAddr, diff --git a/test/integration/payment_session_low_funds_test.go b/test/integration/payment_session_low_funds_test.go new file mode 100644 index 0000000..fb630ab --- /dev/null +++ b/test/integration/payment_session_low_funds_test.go @@ -0,0 +1,365 @@ +package integration + +import ( + "context" + "crypto/tls" + "math/big" + "net" + "net/http" + "testing" + "time" + + "connectrpc.com/connect" + "github.com/streamingfast/eth-go" + "github.com/stretchr/testify/require" + "golang.org/x/net/http2" + + sds "github.com/graphprotocol/substreams-data-service" + consumersidecar "github.com/graphprotocol/substreams-data-service/consumer/sidecar" + "github.com/graphprotocol/substreams-data-service/horizon" + "github.com/graphprotocol/substreams-data-service/horizon/devenv" + commonv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/common/v1" + consumerv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/consumer/v1" + "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/consumer/v1/consumerv1connect" + providerv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1" + "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1/providerv1connect" + providergateway "github.com/graphprotocol/substreams-data-service/provider/gateway" + "github.com/graphprotocol/substreams-data-service/provider/repository" + "github.com/graphprotocol/substreams-data-service/sidecar" +) + +func TestPaymentSession_StopsOnLowFunds(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + ctx := context.Background() + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + config := DefaultTestSetupConfig() + config.EscrowAmount = big.NewInt(1) + setup, err := env.SetupCustomPaymentParticipantsWithSigner(env.User1, env.User2, config) + require.NoError(t, err) + + repo := repository.NewInMemoryRepository() + gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19015", &providergateway.Config{ + ListenAddr: ":19015", + ServiceProvider: env.User2.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repo, + }) + defer shutdown() + + startResp := startGatewaySession(t, ctx, gatewayClient, env.User1.Address, env.User2.Address, env.DataService.Address, setup.SignerKey, env.Domain()) + stream := gatewayClient.PaymentSession(ctx) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_UsageReport{ + UsageReport: &providerv1.UsageReport{ + Usage: &commonv1.Usage{ + BlocksProcessed: 2, + BytesTransferred: 0, + Requests: 1, + }, + }, + }, + })) + + resp, err := stream.Receive() + require.NoError(t, err) + require.NotNil(t, resp.GetNeedMoreFunds(), "expected NeedMoreFunds response") + require.Nil(t, resp.GetRavRequest()) + + need := resp.GetNeedMoreFunds() + require.Len(t, need.GetOutstandingRavs(), 1) + require.Equal(t, 0, need.GetTotalOutstanding().ToBigInt().Cmp(big.NewInt(0))) + require.Equal(t, 0, need.GetEscrowBalance().ToBigInt().Cmp(big.NewInt(1))) + require.Equal(t, 0, need.GetMinimumNeeded().ToBigInt().Cmp(big.NewInt(1))) + + session, err := repo.SessionGet(ctx, startResp.Msg.SessionId) + require.NoError(t, err) + require.Equal(t, repository.SessionStatusTerminated, session.Status) + require.Equal(t, commonv1.EndReason_END_REASON_PAYMENT_ISSUE, session.EndReason) + require.NotNil(t, session.EndedAt) + require.NotNil(t, session.CurrentRAV) + require.Equal(t, 0, session.CurrentRAV.Message.ValueAggregate.Cmp(big.NewInt(0))) + require.Equal(t, uint64(0), session.BaselineBlocks) + require.Equal(t, 0, session.BaselineCost.Cmp(big.NewInt(0))) + require.Equal(t, 0, session.TotalCost.Cmp(big.NewInt(2))) + require.Equal(t, "insufficient", session.Metadata["funds_status"]) + require.Equal(t, "0", session.Metadata["funds_current_outstanding_wei"]) + require.Equal(t, "2", session.Metadata["funds_projected_outstanding_wei"]) + require.Equal(t, "1", session.Metadata["funds_escrow_balance_wei"]) + require.Equal(t, "1", session.Metadata["funds_minimum_needed_wei"]) + _, hasError := session.Metadata["funds_check_error"] + require.False(t, hasError) + + require.NoError(t, stream.CloseRequest()) + _ = stream.CloseResponse() +} + +func TestPaymentSession_ExactBalanceContinues(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + ctx := context.Background() + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + config := DefaultTestSetupConfig() + config.EscrowAmount = big.NewInt(1) + setup, err := env.SetupCustomPaymentParticipantsWithSigner(env.User3, env.ServiceProvider, config) + require.NoError(t, err) + + repo := repository.NewInMemoryRepository() + gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19016", &providergateway.Config{ + ListenAddr: ":19016", + ServiceProvider: env.ServiceProvider.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repo, + }) + defer shutdown() + + startResp := startGatewaySession(t, ctx, gatewayClient, env.User3.Address, env.ServiceProvider.Address, env.DataService.Address, setup.SignerKey, env.Domain()) + stream := gatewayClient.PaymentSession(ctx) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_UsageReport{ + UsageReport: &providerv1.UsageReport{ + Usage: &commonv1.Usage{ + BlocksProcessed: 1, + BytesTransferred: 0, + Requests: 1, + }, + }, + }, + })) + + resp, err := stream.Receive() + require.NoError(t, err) + require.NotNil(t, resp.GetRavRequest(), "expected RAV request when projected outstanding equals escrow") + require.Nil(t, resp.GetNeedMoreFunds()) + + session, err := repo.SessionGet(ctx, startResp.Msg.SessionId) + require.NoError(t, err) + require.True(t, session.IsActive()) + require.Equal(t, "ok", session.Metadata["funds_status"]) + require.Equal(t, "0", session.Metadata["funds_current_outstanding_wei"]) + require.Equal(t, "1", session.Metadata["funds_projected_outstanding_wei"]) + require.Equal(t, "1", session.Metadata["funds_escrow_balance_wei"]) + require.Equal(t, "0", session.Metadata["funds_minimum_needed_wei"]) + + require.NoError(t, stream.CloseRequest()) + _ = stream.CloseResponse() +} + +func TestPaymentSession_FailsOpenWhenEscrowBalanceUnknown(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + ctx := context.Background() + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + repo := repository.NewInMemoryRepository() + gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19017", &providergateway.Config{ + ListenAddr: ":19017", + ServiceProvider: env.ServiceProvider.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + PricingConfig: deterministicPricingConfig(), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repo, + }) + defer shutdown() + + startResp := startGatewaySession(t, ctx, gatewayClient, env.Payer.Address, env.ServiceProvider.Address, env.DataService.Address, env.Payer.PrivateKey, env.Domain()) + stream := gatewayClient.PaymentSession(ctx) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_UsageReport{ + UsageReport: &providerv1.UsageReport{ + Usage: &commonv1.Usage{ + BlocksProcessed: 1, + BytesTransferred: 0, + Requests: 1, + }, + }, + }, + })) + + resp, err := stream.Receive() + require.NoError(t, err) + require.NotNil(t, resp.GetRavRequest(), "expected fail-open behavior to continue normal flow") + require.Nil(t, resp.GetNeedMoreFunds()) + + session, err := repo.SessionGet(ctx, startResp.Msg.SessionId) + require.NoError(t, err) + require.True(t, session.IsActive()) + require.Equal(t, "unknown", session.Metadata["funds_status"]) + require.Equal(t, "0", session.Metadata["funds_current_outstanding_wei"]) + require.Equal(t, "1", session.Metadata["funds_projected_outstanding_wei"]) + _, hasBalance := session.Metadata["funds_escrow_balance_wei"] + require.False(t, hasBalance) + _, hasNeeded := session.Metadata["funds_minimum_needed_wei"] + require.False(t, hasNeeded) + _, hasError := session.Metadata["funds_check_error"] + require.False(t, hasError) + + require.NoError(t, stream.CloseRequest()) + _ = stream.CloseResponse() +} + +func TestConsumerSidecar_ReportUsage_StopsOnLowFunds(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + ctx := context.Background() + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + config := DefaultTestSetupConfig() + config.EscrowAmount = big.NewInt(1) + setup, err := env.SetupCustomPaymentParticipantsWithSigner(env.User2, env.User3, config) + require.NoError(t, err) + + repo := repository.NewInMemoryRepository() + _, shutdownProvider := startPaymentGatewayForTest(t, ":19018", &providergateway.Config{ + ListenAddr: ":19018", + ServiceProvider: env.User3.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repo, + }) + defer shutdownProvider() + + consumerSidecar := consumersidecar.New(&consumersidecar.Config{ + ListenAddr: ":19019", + SignerKey: setup.SignerKey, + Domain: env.Domain(), + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + }, zlog.Named("consumer")) + go consumerSidecar.Run() + defer consumerSidecar.Shutdown(nil) + time.Sleep(100 * time.Millisecond) + + consumerClient := consumerv1connect.NewConsumerSidecarServiceClient(http.DefaultClient, "http://localhost:19019") + + initResp, err := consumerClient.Init(ctx, connect.NewRequest(&consumerv1.InitRequest{ + EscrowAccount: &commonv1.EscrowAccount{ + Payer: commonv1.AddressFromEth(env.User2.Address), + Receiver: commonv1.AddressFromEth(env.User3.Address), + DataService: commonv1.AddressFromEth(env.DataService.Address), + }, + ProviderControlPlaneEndpoint: "http://localhost:19018", + })) + require.NoError(t, err) + require.Equal(t, "substreams.provider.example:443", initResp.Msg.GetDataPlaneEndpoint()) + + usageResp, err := consumerClient.ReportUsage(ctx, connect.NewRequest(&consumerv1.ReportUsageRequest{ + SessionId: initResp.Msg.GetSession().GetSessionId(), + Usage: &commonv1.Usage{ + BlocksProcessed: 2, + BytesTransferred: 0, + Requests: 1, + }, + })) + require.NoError(t, err) + require.False(t, usageResp.Msg.GetShouldContinue()) + require.Equal(t, "need more funds", usageResp.Msg.GetStopReason()) + require.Nil(t, usageResp.Msg.GetUpdatedRav()) + + session, err := repo.SessionGet(ctx, initResp.Msg.GetSession().GetSessionId()) + require.NoError(t, err) + require.Equal(t, repository.SessionStatusTerminated, session.Status) + require.Equal(t, commonv1.EndReason_END_REASON_PAYMENT_ISSUE, session.EndReason) +} + +func deterministicPricingConfig() *sidecar.PricingConfig { + return &sidecar.PricingConfig{ + PricePerBlock: sds.NewGRTFromUint64(1), + PricePerByte: sds.ZeroGRT(), + } +} + +func startPaymentGatewayForTest(t *testing.T, endpoint string, config *providergateway.Config) (providerv1connect.PaymentGatewayServiceClient, func()) { + t.Helper() + + providerGateway := providergateway.New(config, zlog.Named("provider")) + go providerGateway.Run() + time.Sleep(100 * time.Millisecond) + + h2cClient := &http.Client{ + Transport: &http2.Transport{ + AllowHTTP: true, + DialTLSContext: func(ctx context.Context, network, addr string, _ *tls.Config) (net.Conn, error) { + return (&net.Dialer{}).DialContext(ctx, network, addr) + }, + }, + } + + client := providerv1connect.NewPaymentGatewayServiceClient(h2cClient, "http://localhost"+endpoint, connect.WithGRPC()) + return client, func() { + providerGateway.Shutdown(nil) + } +} + +func startGatewaySession( + t *testing.T, + ctx context.Context, + gatewayClient providerv1connect.PaymentGatewayServiceClient, + payer, serviceProvider, dataService eth.Address, + signerKey *eth.PrivateKey, + domain *horizon.Domain, +) *connect.Response[providerv1.StartSessionResponse] { + t.Helper() + + rav0 := &horizon.RAV{ + Payer: payer, + DataService: dataService, + ServiceProvider: serviceProvider, + TimestampNs: uint64(time.Now().UnixNano()), + ValueAggregate: big.NewInt(0), + } + signedRAV0, err := horizon.Sign(domain, rav0, signerKey) + require.NoError(t, err) + + startResp, err := gatewayClient.StartSession(ctx, connect.NewRequest(&providerv1.StartSessionRequest{ + EscrowAccount: &commonv1.EscrowAccount{ + Payer: commonv1.AddressFromEth(payer), + Receiver: commonv1.AddressFromEth(serviceProvider), + DataService: commonv1.AddressFromEth(dataService), + }, + InitialRav: sidecar.HorizonSignedRAVToProto(signedRAV0), + })) + require.NoError(t, err) + require.True(t, startResp.Msg.Accepted) + require.NotEmpty(t, startResp.Msg.SessionId) + return startResp +} From 017a445aace33464ce6843aa3a25dcc9428cc888 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Fri, 27 Mar 2026 02:38:50 -0300 Subject: [PATCH 10/17] provider: add deterministic RAV request thresholds - add provider-side cost-based RAV request threshold policy with a 10 GRT fallback - load rav_request_threshold from provider pricing YAML without changing wire pricing contracts - add gateway and consumer integration coverage for below-threshold continue and post-baseline reset behavior - update MVP backlog, sequencing, gap-analysis, and scope docs to mark MVP-012 complete --- cmd/sds/impl/provider_gateway.go | 30 +- docs/mvp-implementation-sequencing.md | 26 +- docs/mvp-scope.md | 1 + plans/mvp-gap-analysis.md | 3 + plans/mvp-implementation-backlog.md | 12 +- provider/gateway/gateway.go | 69 ++-- provider/gateway/handler_payment_session.go | 38 ++- provider/gateway/pricing_config.go | 85 +++++ provider/gateway/pricing_config_test.go | 49 +++ .../payment_session_close_propagation_test.go | 3 +- .../payment_session_consumer_wiring_test.go | 19 +- .../payment_session_low_funds_test.go | 82 ++--- .../payment_session_rav_request_test.go | 19 +- .../payment_session_rav_threshold_test.go | 304 ++++++++++++++++++ .../payment_session_underpay_test.go | 19 +- test/integration/sidecar_test.go | 20 +- 16 files changed, 626 insertions(+), 153 deletions(-) create mode 100644 provider/gateway/pricing_config.go create mode 100644 provider/gateway/pricing_config_test.go create mode 100644 test/integration/payment_session_rav_threshold_test.go diff --git a/cmd/sds/impl/provider_gateway.go b/cmd/sds/impl/provider_gateway.go index 651e182..9c2d0da 100644 --- a/cmd/sds/impl/provider_gateway.go +++ b/cmd/sds/impl/provider_gateway.go @@ -66,6 +66,7 @@ var ProviderGatewayCommand = Command( Pricing configuration should be provided via a YAML file with the following format: price_per_block: "0.000001" # Price per processed block in GRT price_per_byte: "0.0000000001" # Price per byte transferred in GRT + rav_request_threshold: "10 GRT" # Optional provider-side threshold for requesting a new RAV `), Flags(func(flags *pflag.FlagSet) { flags.String("grpc-listen-addr", ":9001", "Payment Gateway listen address (PUBLIC - consumer sidecars connect here)") @@ -197,13 +198,11 @@ func runProviderGateway(cmd *cobra.Command, args []string) error { } cli.NoError(transportConfig.Validate("provider gateway"), "invalid transport configuration") - // Load pricing configuration - var pricingConfig *sidecarlib.PricingConfig + // Load provider pricing and RAV request configuration. + providerPricingConfig := gateway.DefaultProviderPricingConfig() if pricingConfigPath != "" { - pricingConfig, err = sidecarlib.LoadPricingConfig(pricingConfigPath) + providerPricingConfig, err = gateway.LoadProviderPricingConfig(pricingConfigPath) cli.NoError(err, "failed to load pricing config from %q", pricingConfigPath) - } else { - pricingConfig = sidecarlib.DefaultPricingConfig() } // Create repository from DSN (shared between both gateways) @@ -214,16 +213,17 @@ func runProviderGateway(cmd *cobra.Command, args []string) error { // Create Payment Gateway paymentConfig := &gateway.Config{ - ListenAddr: paymentListenAddr, - ServiceProvider: serviceProviderAddr, - Domain: domain, - CollectorAddr: collectorAddr, - EscrowAddr: escrowAddr, - RPCEndpoint: rpcEndpoint, - PricingConfig: pricingConfig, - DataPlaneEndpoint: dataPlaneEndpoint, - TransportConfig: transportConfig, - Repository: repo, + ListenAddr: paymentListenAddr, + ServiceProvider: serviceProviderAddr, + Domain: domain, + CollectorAddr: collectorAddr, + EscrowAddr: escrowAddr, + RPCEndpoint: rpcEndpoint, + PricingConfig: providerPricingConfig.ToPricingConfig(), + RAVRequestThreshold: providerPricingConfig.RAVRequestThreshold, + DataPlaneEndpoint: dataPlaneEndpoint, + TransportConfig: transportConfig, + Repository: repo, } paymentGateway := gateway.New(paymentConfig, providerLog) diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index 1de4aee..3d6cda8 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -118,15 +118,15 @@ Recommended sequence: Completed foundation: - `MVP-010` Implement session-local low-funds detection and provider terminal stop behavior during streaming +- `MVP-012` Add deterministic cost-based RAV issuance thresholds suitable for real runtime behavior Recommended next sequence: -1. `MVP-012` Add deterministic RAV issuance thresholds suitable for real runtime behavior -2. `MVP-014` Integrate provider gateway validation into the real provider streaming path -3. `MVP-015` Wire real byte metering from the provider/plugin path into gateway payment state -4. `MVP-011` Propagate provider low-funds stop decisions through consumer sidecar into the real client path -5. `MVP-016` Enforce gateway Continue/Stop decisions in the live provider stream lifecycle -6. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path +1. `MVP-014` Integrate provider gateway validation into the real provider streaming path +2. `MVP-015` Wire real byte metering from the provider/plugin path into gateway payment state +3. `MVP-011` Propagate provider low-funds stop decisions through consumer sidecar into the real client path +4. `MVP-016` Enforce gateway Continue/Stop decisions in the live provider stream lifecycle +5. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path Notes: @@ -134,6 +134,10 @@ Notes: - session-local exposure only - terminal stop on insufficient funds - fail-open if live escrow balance cannot be determined +- `MVP-012` is now the frozen normal RAV request policy for this lane: + - cost-based only + - compares unbaselined `delta_cost` against a provider-side `rav_request_threshold` + - defaults to `10 GRT` when the provider does not configure a threshold explicitly - `MVP-014` remains the main integration foundation in this lane. - `MVP-011` is partially advanced because the current sidecar wrapper path already stops on `NeedMoreFunds`, but the real client-facing ingress path is still unfinished. - `MVP-031` is effectively the capstone runtime-payment task because it depends on real provider and consumer integration plus thresholding. @@ -236,6 +240,7 @@ Already resolved: - `MVP-003` - `MVP-004` - `MVP-010` +- `MVP-012` - `MVP-027` - `MVP-033` @@ -245,7 +250,6 @@ Already resolved: - `MVP-005` - `MVP-007` - Runtime foundation: - - `MVP-012` - `MVP-014` - Provider state foundation: - `MVP-008` @@ -322,10 +326,10 @@ Example: ```text We are currently in Phase 1, Runtime foundation. -Implement MVP-012 only. -You may rely on MVP-004 as the frozen runtime billing/payment contract and MVP-010 as the frozen low-funds control contract. -Do not broaden into MVP-011 or MVP-016 except for strictly necessary supporting edits. -If you find that MVP-012 still requires unresolved semantics beyond those contracts, mark it blocked instead of choosing an implicit contract in code. +Implement MVP-014 only. +You may rely on MVP-004 as the frozen runtime billing/payment contract, MVP-010 as the frozen low-funds control contract, and MVP-012 as the frozen RAV request policy. +Do not broaden into MVP-011, MVP-015, or MVP-016 except for strictly necessary supporting edits. +If you find that MVP-014 still requires unresolved semantics beyond those contracts, mark it blocked instead of choosing an implicit contract in code. ``` ## Notes diff --git a/docs/mvp-scope.md b/docs/mvp-scope.md index a1b4935..8c0f495 100644 --- a/docs/mvp-scope.md +++ b/docs/mvp-scope.md @@ -125,6 +125,7 @@ MVP network-discovery contract: - While streaming: - provider-authoritative usage advances - RAVs are requested and updated as needed + - provider-driven RAV requests are triggered by a cost-based threshold over unbaselined usage rather than on every usage report - accepted payment state advances on the provider side - low-funds conditions can be surfaced during the live stream - insufficient funds terminate the current SDS payment session for MVP rather than pausing it diff --git a/plans/mvp-gap-analysis.md b/plans/mvp-gap-analysis.md index d0dfb95..d3f3776 100644 --- a/plans/mvp-gap-analysis.md +++ b/plans/mvp-gap-analysis.md @@ -125,6 +125,7 @@ What already exists: - bidirectional payment session - RAV validation and authorization checks - session-local low-funds detection during `PaymentSession` usage handling +- deterministic cost-based RAV request thresholds in the `PaymentSession` path - terminal `NeedMoreFunds` response plus payment-issue session termination when live escrow is insufficient - persisted machine-readable funding metadata for `ok`, `insufficient`, and `unknown` session state - basic runtime/session status inspection @@ -319,6 +320,8 @@ The most important recent status changes versus the original draft are: - The repo now explicitly separates a public Payment Gateway from a private Plugin Gateway. - Session-local low-funds handling is no longer fully missing. - The payment-session path now evaluates projected session-local exposure against live escrow, fails open on unknown balance, and terminates the current session with `NeedMoreFunds` when funds are insufficient. +- Deterministic RAV issuance policy is no longer an open runtime gap. + - The provider now requests new RAVs based on unbaselined `delta_cost` reaching a provider-side `rav_request_threshold`, with a built-in `10 GRT` fallback when not configured. - Real-path integration scaffolding is stronger. - The repo now includes stronger firecore/plugin integration setup and a `TestFirecore` scaffold, even though that path is not yet MVP-complete. - Consumer-side MVP UX is still notably behind the revised scope. diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index e1e5202..05fdbb7 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -99,7 +99,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-009 | `not_started` | provider-state | `A3`, `A5` | `MVP-003`, `MVP-022`, `MVP-029` | `D`, `F` | Expose provider inspection and settlement-data retrieval APIs for accepted and collectible RAV state | | MVP-010 | `done` | funding-control | `A6` | `MVP-004` | `C` | Implement session-local low-funds detection and provider terminal stop behavior during streaming | | MVP-011 | `in_progress` | funding-control | `A6` | `MVP-010` | `C` | Propagate provider low-funds stop decisions through consumer sidecar into the real client path | -| MVP-012 | `not_started` | funding-control | none | `MVP-004` | `A`, `C` | Add deterministic RAV issuance thresholds suitable for real runtime behavior | +| MVP-012 | `done` | funding-control | none | `MVP-004` | `A`, `C` | Add deterministic cost-based RAV issuance thresholds suitable for real runtime behavior | | MVP-013 | `deferred` | consumer | `A3` | none | none | Post-MVP only: implement true provider-authoritative payment-session reconnect/resume semantics | | MVP-014 | `in_progress` | provider-integration | `A3` | `MVP-004` | `A` | Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path | | MVP-015 | `in_progress` | provider-integration | `A3` | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway | @@ -343,16 +343,18 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Verify: - Add integration/manual verification showing the real client path stops when the provider surfaces low funds during live streaming. -- [ ] MVP-012 Add deterministic RAV issuance thresholds suitable for real runtime behavior. +- [x] MVP-012 Add deterministic cost-based RAV issuance thresholds suitable for real runtime behavior. - Context: - The current runtime/payment loop foundation exists, but the real-runtime issuance policy still needs to be made explicit. - Assumptions: - none - Done when: - - RAV issuance is controlled by explicit policy such as value/time/provider-request thresholds. - - Threshold behavior is documented and tested. + - Provider requests a new RAV only when unbaselined `delta_cost` since the last accepted RAV reaches a deterministic provider-side threshold. + - The threshold is configured through provider pricing YAML as `rav_request_threshold`, with a documented fallback of `10 GRT` when omitted. + - The threshold policy remains provider-internal and is not exposed through shared pricing protobufs or handshake payloads. + - Threshold behavior is covered for below-threshold continue, threshold-triggered request, and post-acceptance baseline reset. - Verify: - - Add tests that show repeated usage does not force a signature on every report unless policy requires it. + - Integration coverage shows repeated usage no longer forces a RAV request on every report and only triggers a request once `delta_cost >= rav_request_threshold`. ## Real Provider and Consumer Integration Tasks diff --git a/provider/gateway/gateway.go b/provider/gateway/gateway.go index 9ef90f6..921a3f3 100644 --- a/provider/gateway/gateway.go +++ b/provider/gateway/gateway.go @@ -8,6 +8,7 @@ import ( "connectrpc.com/connect" "github.com/alphadose/haxmap" + sds "github.com/graphprotocol/substreams-data-service" "github.com/graphprotocol/substreams-data-service/horizon" "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1/providerv1connect" "github.com/graphprotocol/substreams-data-service/provider/repository" @@ -53,9 +54,10 @@ type Gateway struct { collectorQuerier sidecar.CollectorAuthorizer // Pricing configuration - pricingConfig *sidecar.PricingConfig - dataPlaneEndpoint string - transportConfig sidecar.ServerTransportConfig + pricingConfig *sidecar.PricingConfig + ravRequestThreshold *big.Int + dataPlaneEndpoint string + transportConfig sidecar.ServerTransportConfig authCache *haxmap.Map[string, authCacheEntry] @@ -64,15 +66,16 @@ type Gateway struct { } type Config struct { - ListenAddr string - ServiceProvider eth.Address - Domain *horizon.Domain - CollectorAddr eth.Address - EscrowAddr eth.Address - RPCEndpoint string - PricingConfig *sidecar.PricingConfig - DataPlaneEndpoint string - TransportConfig sidecar.ServerTransportConfig + ListenAddr string + ServiceProvider eth.Address + Domain *horizon.Domain + CollectorAddr eth.Address + EscrowAddr eth.Address + RPCEndpoint string + PricingConfig *sidecar.PricingConfig + RAVRequestThreshold sds.GRT + DataPlaneEndpoint string + TransportConfig sidecar.ServerTransportConfig // Repository provides session/usage state storage. // If nil, an in-memory repository is created. @@ -99,6 +102,10 @@ func New(config *Config, logger *zap.Logger) *Gateway { if pricingConfig == nil { pricingConfig = sidecar.DefaultPricingConfig() } + ravRequestThreshold := config.RAVRequestThreshold + if ravRequestThreshold.IsZero() { + ravRequestThreshold = DefaultRAVRequestThreshold() + } // Use provided repository or create an in-memory one as fallback repo := config.Repository @@ -108,20 +115,21 @@ func New(config *Config, logger *zap.Logger) *Gateway { } return &Gateway{ - Shutter: shutter.New(), - listenAddr: config.ListenAddr, - logger: logger, - serviceProvider: config.ServiceProvider, - domain: config.Domain, - collectorAddr: config.CollectorAddr, - escrowAddr: config.EscrowAddr, - escrowQuerier: escrowQuerier, - collectorQuerier: collectorQuerier, - pricingConfig: pricingConfig, - dataPlaneEndpoint: config.DataPlaneEndpoint, - transportConfig: config.TransportConfig, - authCache: haxmap.New[string, authCacheEntry](), - repo: repo, + Shutter: shutter.New(), + listenAddr: config.ListenAddr, + logger: logger, + serviceProvider: config.ServiceProvider, + domain: config.Domain, + collectorAddr: config.CollectorAddr, + escrowAddr: config.EscrowAddr, + escrowQuerier: escrowQuerier, + collectorQuerier: collectorQuerier, + pricingConfig: pricingConfig, + ravRequestThreshold: ravRequestThreshold.BigInt(), + dataPlaneEndpoint: config.DataPlaneEndpoint, + transportConfig: config.TransportConfig, + authCache: haxmap.New[string, authCacheEntry](), + repo: repo, } } @@ -148,6 +156,15 @@ func (s *Gateway) SessionCount() int { return s.repo.SessionCount(context.Background()) } +func (s *Gateway) shouldRequestRAV(session *repository.Session) bool { + if session == nil || session.CurrentRAV == nil || s.ravRequestThreshold == nil { + return false + } + + _, _, _, deltaCost := session.UsageDeltaSinceBaseline() + return deltaCost.Cmp(s.ravRequestThreshold) >= 0 +} + func (s *Gateway) Run() { // Connect/HTTP server for Payment Gateway service handlerGetters := []connectrpc.HandlerGetter{ diff --git a/provider/gateway/handler_payment_session.go b/provider/gateway/handler_payment_session.go index 8b67f91..4c4a315 100644 --- a/provider/gateway/handler_payment_session.go +++ b/provider/gateway/handler_payment_session.go @@ -439,29 +439,27 @@ func (s *Gateway) handleUsageReport( return awaitingRAV, true } - if !awaitingRAV { + if !awaitingRAV && s.shouldRequestRAV(session) { blocks, bytes, reqs, deltaCost := session.UsageDeltaSinceBaseline() - if deltaCost.Sign() > 0 { - currentRAV := session.CurrentRAV - if currentRAV != nil { - usage := &commonv1.Usage{ - BlocksProcessed: blocks, - BytesTransferred: bytes, - Requests: reqs, - Cost: commonv1.GRTFromBigInt(deltaCost), - } + currentRAV := session.CurrentRAV + if currentRAV != nil { + usage := &commonv1.Usage{ + BlocksProcessed: blocks, + BytesTransferred: bytes, + Requests: reqs, + Cost: commonv1.GRTFromBigInt(deltaCost), + } - stream.Send(&providerv1.PaymentSessionResponse{ - Message: &providerv1.PaymentSessionResponse_RavRequest{ - RavRequest: &providerv1.RAVRequest{ - CurrentRav: sidecar.HorizonSignedRAVToProto(currentRAV), - Usage: usage, - Deadline: uint64(time.Now().Add(30 * time.Second).Unix()), - }, + stream.Send(&providerv1.PaymentSessionResponse{ + Message: &providerv1.PaymentSessionResponse_RavRequest{ + RavRequest: &providerv1.RAVRequest{ + CurrentRav: sidecar.HorizonSignedRAVToProto(currentRAV), + Usage: usage, + Deadline: uint64(time.Now().Add(30 * time.Second).Unix()), }, - }) - return true, false - } + }, + }) + return true, false } } diff --git a/provider/gateway/pricing_config.go b/provider/gateway/pricing_config.go new file mode 100644 index 0000000..6568358 --- /dev/null +++ b/provider/gateway/pricing_config.go @@ -0,0 +1,85 @@ +package gateway + +import ( + "fmt" + "os" + + sds "github.com/graphprotocol/substreams-data-service" + "github.com/graphprotocol/substreams-data-service/sidecar" + "gopkg.in/yaml.v3" +) + +var defaultRAVRequestThreshold = sds.MustNewGRT("10 GRT") + +// ProviderPricingConfig is the provider-gateway runtime pricing policy loaded from YAML. +// It includes the shared billing prices plus provider-only RAV request policy. +type ProviderPricingConfig struct { + PricePerBlock sds.GRT + PricePerByte sds.GRT + RAVRequestThreshold sds.GRT +} + +type providerPricingConfigYAML struct { + PricePerBlock sds.GRT `yaml:"price_per_block"` + PricePerByte sds.GRT `yaml:"price_per_byte"` + RAVRequestThreshold *sds.GRT `yaml:"rav_request_threshold"` +} + +// DefaultRAVRequestThreshold returns the fallback provider-side RAV request threshold. +func DefaultRAVRequestThreshold() sds.GRT { + return defaultRAVRequestThreshold +} + +// DefaultProviderPricingConfig returns the default provider pricing and RAV request policy. +func DefaultProviderPricingConfig() *ProviderPricingConfig { + pricingConfig := sidecar.DefaultPricingConfig() + return &ProviderPricingConfig{ + PricePerBlock: pricingConfig.PricePerBlock, + PricePerByte: pricingConfig.PricePerByte, + RAVRequestThreshold: DefaultRAVRequestThreshold(), + } +} + +// LoadProviderPricingConfig loads provider pricing and runtime request policy from a YAML file. +func LoadProviderPricingConfig(path string) (*ProviderPricingConfig, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading pricing config: %w", err) + } + + return ParseProviderPricingConfig(data) +} + +// ParseProviderPricingConfig parses provider pricing and runtime request policy from YAML bytes. +func ParseProviderPricingConfig(data []byte) (*ProviderPricingConfig, error) { + var raw providerPricingConfigYAML + if err := yaml.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("parsing pricing config: %w", err) + } + + threshold := DefaultRAVRequestThreshold() + if raw.RAVRequestThreshold != nil { + if raw.RAVRequestThreshold.IsZero() { + return nil, fmt.Errorf("invalid rav_request_threshold: must be greater than zero") + } + threshold = *raw.RAVRequestThreshold + } + + return &ProviderPricingConfig{ + PricePerBlock: raw.PricePerBlock, + PricePerByte: raw.PricePerByte, + RAVRequestThreshold: threshold, + }, nil +} + +// ToPricingConfig returns the shared billing pricing config used by runtime/session code. +func (c *ProviderPricingConfig) ToPricingConfig() *sidecar.PricingConfig { + if c == nil { + return nil + } + + return &sidecar.PricingConfig{ + PricePerBlock: c.PricePerBlock, + PricePerByte: c.PricePerByte, + } +} diff --git a/provider/gateway/pricing_config_test.go b/provider/gateway/pricing_config_test.go new file mode 100644 index 0000000..b5ec5fd --- /dev/null +++ b/provider/gateway/pricing_config_test.go @@ -0,0 +1,49 @@ +package gateway + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseProviderPricingConfig_WithThreshold(t *testing.T) { + config, err := ParseProviderPricingConfig([]byte(` +price_per_block: "0.000001 GRT" +price_per_byte: "0.0000000001 GRT" +rav_request_threshold: "2 GRT" +`)) + require.NoError(t, err) + + assert.Equal(t, "0.000001 GRT", config.PricePerBlock.String()) + assert.Equal(t, "0.0000000001 GRT", config.PricePerByte.String()) + assert.Equal(t, "2 GRT", config.RAVRequestThreshold.String()) +} + +func TestParseProviderPricingConfig_DefaultThreshold(t *testing.T) { + config, err := ParseProviderPricingConfig([]byte(` +price_per_block: "0.000001 GRT" +price_per_byte: "0.0000000001 GRT" +`)) + require.NoError(t, err) + + assert.Equal(t, "10 GRT", config.RAVRequestThreshold.String()) +} + +func TestParseProviderPricingConfig_RejectsZeroThreshold(t *testing.T) { + config, err := ParseProviderPricingConfig([]byte(` +price_per_block: "0.000001 GRT" +price_per_byte: "0.0000000001 GRT" +rav_request_threshold: "0 GRT" +`)) + require.Error(t, err) + assert.Nil(t, config) + assert.Contains(t, err.Error(), "rav_request_threshold") +} + +func TestDefaultProviderPricingConfig(t *testing.T) { + config := DefaultProviderPricingConfig() + + require.NotNil(t, config) + assert.Equal(t, "10 GRT", config.RAVRequestThreshold.String()) +} diff --git a/test/integration/payment_session_close_propagation_test.go b/test/integration/payment_session_close_propagation_test.go index 0fbb08c..f6f6c8d 100644 --- a/test/integration/payment_session_close_propagation_test.go +++ b/test/integration/payment_session_close_propagation_test.go @@ -48,7 +48,8 @@ func TestSessionClose_ConsumerEndSession_MakesProviderInactive(t *testing.T) { PricePerBlock: sds.NewGRTFromUint64(1), PricePerByte: sds.ZeroGRT(), }, - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + RAVRequestThreshold: sds.NewGRTFromUint64(1), + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, }, zlog.Named("provider")) go providerGateway.Run() defer providerGateway.Shutdown(nil) diff --git a/test/integration/payment_session_consumer_wiring_test.go b/test/integration/payment_session_consumer_wiring_test.go index 6e1b608..e5e9a53 100644 --- a/test/integration/payment_session_consumer_wiring_test.go +++ b/test/integration/payment_session_consumer_wiring_test.go @@ -44,15 +44,16 @@ func TestConsumerSidecar_ReportUsage_WiresPaymentSessionLoop(t *testing.T) { } providerGateway := providergateway.New(&providergateway.Config{ - ListenAddr: ":19013", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: pricingConfig, - DataPlaneEndpoint: "substreams.provider.example:443", - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19013", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: pricingConfig, + RAVRequestThreshold: sds.NewGRTFromUint64(1), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, }, zlog.Named("provider")) go providerGateway.Run() defer providerGateway.Shutdown(nil) diff --git a/test/integration/payment_session_low_funds_test.go b/test/integration/payment_session_low_funds_test.go index fb630ab..f168b87 100644 --- a/test/integration/payment_session_low_funds_test.go +++ b/test/integration/payment_session_low_funds_test.go @@ -44,16 +44,17 @@ func TestPaymentSession_StopsOnLowFunds(t *testing.T) { repo := repository.NewInMemoryRepository() gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19015", &providergateway.Config{ - ListenAddr: ":19015", - ServiceProvider: env.User2.Address, - Domain: env.Domain(), - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: deterministicPricingConfig(), - DataPlaneEndpoint: "substreams.provider.example:443", - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, - Repository: repo, + ListenAddr: ":19015", + ServiceProvider: env.User2.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + RAVRequestThreshold: sds.NewGRTFromUint64(1), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repo, }) defer shutdown() @@ -122,16 +123,17 @@ func TestPaymentSession_ExactBalanceContinues(t *testing.T) { repo := repository.NewInMemoryRepository() gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19016", &providergateway.Config{ - ListenAddr: ":19016", - ServiceProvider: env.ServiceProvider.Address, - Domain: env.Domain(), - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: deterministicPricingConfig(), - DataPlaneEndpoint: "substreams.provider.example:443", - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, - Repository: repo, + ListenAddr: ":19016", + ServiceProvider: env.ServiceProvider.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + RAVRequestThreshold: sds.NewGRTFromUint64(1), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repo, }) defer shutdown() @@ -180,15 +182,16 @@ func TestPaymentSession_FailsOpenWhenEscrowBalanceUnknown(t *testing.T) { repo := repository.NewInMemoryRepository() gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19017", &providergateway.Config{ - ListenAddr: ":19017", - ServiceProvider: env.ServiceProvider.Address, - Domain: env.Domain(), - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - PricingConfig: deterministicPricingConfig(), - DataPlaneEndpoint: "substreams.provider.example:443", - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, - Repository: repo, + ListenAddr: ":19017", + ServiceProvider: env.ServiceProvider.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + PricingConfig: deterministicPricingConfig(), + RAVRequestThreshold: sds.NewGRTFromUint64(1), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repo, }) defer shutdown() @@ -246,16 +249,17 @@ func TestConsumerSidecar_ReportUsage_StopsOnLowFunds(t *testing.T) { repo := repository.NewInMemoryRepository() _, shutdownProvider := startPaymentGatewayForTest(t, ":19018", &providergateway.Config{ - ListenAddr: ":19018", - ServiceProvider: env.User3.Address, - Domain: env.Domain(), - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: deterministicPricingConfig(), - DataPlaneEndpoint: "substreams.provider.example:443", - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, - Repository: repo, + ListenAddr: ":19018", + ServiceProvider: env.User3.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + RAVRequestThreshold: sds.NewGRTFromUint64(1), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repo, }) defer shutdownProvider() diff --git a/test/integration/payment_session_rav_request_test.go b/test/integration/payment_session_rav_request_test.go index de5a192..334bd69 100644 --- a/test/integration/payment_session_rav_request_test.go +++ b/test/integration/payment_session_rav_request_test.go @@ -45,15 +45,16 @@ func TestPaymentSession_ProviderRequestsRAVOnUsage(t *testing.T) { } providerConfig := &providergateway.Config{ - ListenAddr: ":19007", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: pricingConfig, - DataPlaneEndpoint: "substreams.provider.example:443", - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19007", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: pricingConfig, + RAVRequestThreshold: sds.NewGRTFromUint64(1), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, } providerGateway := providergateway.New(providerConfig, zlog.Named("provider")) go providerGateway.Run() diff --git a/test/integration/payment_session_rav_threshold_test.go b/test/integration/payment_session_rav_threshold_test.go new file mode 100644 index 0000000..97a9c80 --- /dev/null +++ b/test/integration/payment_session_rav_threshold_test.go @@ -0,0 +1,304 @@ +package integration + +import ( + "context" + "math/big" + "net/http" + "testing" + "time" + + "connectrpc.com/connect" + sds "github.com/graphprotocol/substreams-data-service" + consumersidecar "github.com/graphprotocol/substreams-data-service/consumer/sidecar" + "github.com/graphprotocol/substreams-data-service/horizon" + "github.com/graphprotocol/substreams-data-service/horizon/devenv" + commonv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/common/v1" + consumerv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/consumer/v1" + "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/consumer/v1/consumerv1connect" + providerv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1" + providergateway "github.com/graphprotocol/substreams-data-service/provider/gateway" + "github.com/graphprotocol/substreams-data-service/provider/repository" + "github.com/graphprotocol/substreams-data-service/sidecar" + "github.com/stretchr/testify/require" +) + +func TestPaymentSession_BelowThresholdContinuesWithoutRAVRequest(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + ctx := context.Background() + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + setup, err := env.SetupTestWithSigner(nil) + require.NoError(t, err) + + gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19020", &providergateway.Config{ + ListenAddr: ":19020", + ServiceProvider: env.ServiceProvider.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + RAVRequestThreshold: sds.NewGRTFromUint64(2), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repository.NewInMemoryRepository(), + }) + defer shutdown() + + startResp := startGatewaySession(t, ctx, gatewayClient, env.Payer.Address, env.ServiceProvider.Address, env.DataService.Address, setup.SignerKey, env.Domain()) + stream := gatewayClient.PaymentSession(ctx) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_UsageReport{ + UsageReport: &providerv1.UsageReport{ + Usage: &commonv1.Usage{ + BlocksProcessed: 1, + BytesTransferred: 0, + Requests: 1, + }, + }, + }, + })) + + resp, err := stream.Receive() + require.NoError(t, err) + require.Nil(t, resp.GetRavRequest()) + require.NotNil(t, resp.GetSessionControl()) + require.Equal(t, providerv1.SessionControl_ACTION_CONTINUE, resp.GetSessionControl().GetAction()) + + require.NoError(t, stream.CloseRequest()) + _ = stream.CloseResponse() +} + +func TestPaymentSession_AboveThresholdRequestsRAV(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + ctx := context.Background() + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + setup, err := env.SetupTestWithSigner(nil) + require.NoError(t, err) + + gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19021", &providergateway.Config{ + ListenAddr: ":19021", + ServiceProvider: env.ServiceProvider.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + RAVRequestThreshold: sds.NewGRTFromUint64(2), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repository.NewInMemoryRepository(), + }) + defer shutdown() + + startResp := startGatewaySession(t, ctx, gatewayClient, env.Payer.Address, env.ServiceProvider.Address, env.DataService.Address, setup.SignerKey, env.Domain()) + stream := gatewayClient.PaymentSession(ctx) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_UsageReport{ + UsageReport: &providerv1.UsageReport{ + Usage: &commonv1.Usage{ + BlocksProcessed: 3, + BytesTransferred: 0, + Requests: 1, + }, + }, + }, + })) + + resp, err := stream.Receive() + require.NoError(t, err) + require.NotNil(t, resp.GetRavRequest()) + require.Equal(t, 0, resp.GetRavRequest().GetUsage().GetCost().ToBigInt().Cmp(big.NewInt(3))) + + require.NoError(t, stream.CloseRequest()) + _ = stream.CloseResponse() +} + +func TestPaymentSession_AcceptedRAVResetsThresholdWindow(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + ctx := context.Background() + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + setup, err := env.SetupTestWithSigner(nil) + require.NoError(t, err) + + gatewayClient, shutdown := startPaymentGatewayForTest(t, ":19022", &providergateway.Config{ + ListenAddr: ":19022", + ServiceProvider: env.ServiceProvider.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + RAVRequestThreshold: sds.NewGRTFromUint64(2), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repository.NewInMemoryRepository(), + }) + defer shutdown() + + startResp := startGatewaySession(t, ctx, gatewayClient, env.Payer.Address, env.ServiceProvider.Address, env.DataService.Address, setup.SignerKey, env.Domain()) + stream := gatewayClient.PaymentSession(ctx) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_UsageReport{ + UsageReport: &providerv1.UsageReport{ + Usage: &commonv1.Usage{ + BlocksProcessed: 2, + BytesTransferred: 0, + Requests: 1, + }, + }, + }, + })) + + resp1, err := stream.Receive() + require.NoError(t, err) + require.NotNil(t, resp1.GetRavRequest()) + + rav1 := &horizon.RAV{ + Payer: env.Payer.Address, + DataService: env.DataService.Address, + ServiceProvider: env.ServiceProvider.Address, + TimestampNs: uint64(time.Now().UnixNano()), + ValueAggregate: big.NewInt(2), + } + signedRAV1, err := horizon.Sign(env.Domain(), rav1, setup.SignerKey) + require.NoError(t, err) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_RavSubmission{ + RavSubmission: &providerv1.SignedRAVSubmission{ + SignedRav: sidecar.HorizonSignedRAVToProto(signedRAV1), + Usage: resp1.GetRavRequest().GetUsage(), + }, + }, + })) + + resp2, err := stream.Receive() + require.NoError(t, err) + require.NotNil(t, resp2.GetSessionControl()) + require.Equal(t, providerv1.SessionControl_ACTION_CONTINUE, resp2.GetSessionControl().GetAction()) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_UsageReport{ + UsageReport: &providerv1.UsageReport{ + Usage: &commonv1.Usage{ + BlocksProcessed: 1, + BytesTransferred: 0, + Requests: 1, + }, + }, + }, + })) + + resp3, err := stream.Receive() + require.NoError(t, err) + require.Nil(t, resp3.GetRavRequest()) + require.NotNil(t, resp3.GetSessionControl()) + require.Equal(t, providerv1.SessionControl_ACTION_CONTINUE, resp3.GetSessionControl().GetAction()) + + require.NoError(t, stream.Send(&providerv1.PaymentSessionRequest{ + SessionId: startResp.Msg.SessionId, + Message: &providerv1.PaymentSessionRequest_UsageReport{ + UsageReport: &providerv1.UsageReport{ + Usage: &commonv1.Usage{ + BlocksProcessed: 1, + BytesTransferred: 0, + Requests: 1, + }, + }, + }, + })) + + resp4, err := stream.Receive() + require.NoError(t, err) + require.NotNil(t, resp4.GetRavRequest()) + require.Equal(t, 0, resp4.GetRavRequest().GetUsage().GetCost().ToBigInt().Cmp(big.NewInt(2))) + + require.NoError(t, stream.CloseRequest()) + _ = stream.CloseResponse() +} + +func TestConsumerSidecar_ReportUsage_BelowThresholdContinuesWithoutUpdatedRAV(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + ctx := context.Background() + env := devenv.Get() + require.NotNil(t, env, "devenv not started") + + setup, err := env.SetupTestWithSigner(nil) + require.NoError(t, err) + + _, shutdownProvider := startPaymentGatewayForTest(t, ":19023", &providergateway.Config{ + ListenAddr: ":19023", + ServiceProvider: env.ServiceProvider.Address, + Domain: env.Domain(), + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: deterministicPricingConfig(), + RAVRequestThreshold: sds.NewGRTFromUint64(2), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + Repository: repository.NewInMemoryRepository(), + }) + defer shutdownProvider() + + consumerSidecar := consumersidecar.New(&consumersidecar.Config{ + ListenAddr: ":19024", + SignerKey: setup.SignerKey, + Domain: env.Domain(), + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + }, zlog.Named("consumer")) + go consumerSidecar.Run() + defer consumerSidecar.Shutdown(nil) + time.Sleep(100 * time.Millisecond) + + consumerClient := consumerv1connect.NewConsumerSidecarServiceClient(http.DefaultClient, "http://localhost:19024") + + initResp, err := consumerClient.Init(ctx, connect.NewRequest(&consumerv1.InitRequest{ + EscrowAccount: &commonv1.EscrowAccount{ + Payer: commonv1.AddressFromEth(env.Payer.Address), + Receiver: commonv1.AddressFromEth(env.ServiceProvider.Address), + DataService: commonv1.AddressFromEth(env.DataService.Address), + }, + ProviderControlPlaneEndpoint: "http://localhost:19023", + })) + require.NoError(t, err) + + usageResp, err := consumerClient.ReportUsage(ctx, connect.NewRequest(&consumerv1.ReportUsageRequest{ + SessionId: initResp.Msg.GetSession().GetSessionId(), + Usage: &commonv1.Usage{ + BlocksProcessed: 1, + BytesTransferred: 0, + Requests: 1, + }, + })) + require.NoError(t, err) + require.True(t, usageResp.Msg.GetShouldContinue()) + require.Empty(t, usageResp.Msg.GetStopReason()) + require.Nil(t, usageResp.Msg.GetUpdatedRav()) +} diff --git a/test/integration/payment_session_underpay_test.go b/test/integration/payment_session_underpay_test.go index f6c9df2..895c056 100644 --- a/test/integration/payment_session_underpay_test.go +++ b/test/integration/payment_session_underpay_test.go @@ -45,15 +45,16 @@ func TestPaymentSession_RejectsUnderpayingRAV(t *testing.T) { } providerGateway := providergateway.New(&providergateway.Config{ - ListenAddr: ":19014", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - PricingConfig: pricingConfig, - DataPlaneEndpoint: "substreams.provider.example:443", - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19014", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + PricingConfig: pricingConfig, + RAVRequestThreshold: sds.NewGRTFromUint64(1), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, }, zlog.Named("provider")) go providerGateway.Run() defer providerGateway.Shutdown(nil) diff --git a/test/integration/sidecar_test.go b/test/integration/sidecar_test.go index 361e74b..2c19441 100644 --- a/test/integration/sidecar_test.go +++ b/test/integration/sidecar_test.go @@ -58,14 +58,15 @@ func TestPaymentFlowBasic(t *testing.T) { // Create provider gateway providerConfig := &providergateway.Config{ - ListenAddr: ":19001", - ServiceProvider: env.ServiceProvider.Address, - Domain: domain, - CollectorAddr: env.Collector.Address, - EscrowAddr: env.Escrow.Address, - RPCEndpoint: env.RPCURL, - DataPlaneEndpoint: "substreams.provider.example:443", - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + ListenAddr: ":19001", + ServiceProvider: env.ServiceProvider.Address, + Domain: domain, + CollectorAddr: env.Collector.Address, + EscrowAddr: env.Escrow.Address, + RPCEndpoint: env.RPCURL, + RAVRequestThreshold: sds.NewGRTFromUint64(1), + DataPlaneEndpoint: "substreams.provider.example:443", + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, } providerGateway := providergateway.New(providerConfig, zlog.Named("provider")) go providerGateway.Run() @@ -177,7 +178,8 @@ func TestInit_CreatesFreshSessionWithoutResumeSemantics(t *testing.T) { PricePerBlock: sds.NewGRTFromUint64(1), PricePerByte: sds.ZeroGRT(), }, - TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, + RAVRequestThreshold: sds.NewGRTFromUint64(1), + TransportConfig: sidecar.ServerTransportConfig{Plaintext: true}, }, zlog.Named("provider")) go providerGateway.Run() defer providerGateway.Shutdown(nil) From 9dbb3220808151190c989c49c9623939ab514f4a Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Fri, 27 Mar 2026 03:09:08 -0300 Subject: [PATCH 11/17] Define MVP provider operator auth contract - add a shared operator auth helper with bearer-token role checks and unit tests - document the MVP provider operator auth contract and narrow oracle whitelist management to internal governance - update MVP scope, backlog, sequencing, and gap analysis to mark MVP-028 resolved and align follow-on tasks --- docs/mvp-implementation-sequencing.md | 8 +- docs/mvp-scope.md | 13 +++- docs/operator-auth.md | 65 ++++++++++++++++ internal/operatorauth/operatorauth.go | 87 ++++++++++++++++++++++ internal/operatorauth/operatorauth_test.go | 72 ++++++++++++++++++ plans/mvp-gap-analysis.md | 5 +- plans/mvp-implementation-backlog.md | 35 +++++---- 7 files changed, 258 insertions(+), 27 deletions(-) create mode 100644 docs/operator-auth.md create mode 100644 internal/operatorauth/operatorauth.go create mode 100644 internal/operatorauth/operatorauth_test.go diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index 3d6cda8..1cbd470 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -58,8 +58,8 @@ The grouping is recommended because these tasks have broad downstream impact. It - Blocks most runtime payment, provider integration, and client integration work. - `MVP-027` Freeze canonical payment identity, `collection_id` reuse, and session-vs-payment keying semantics - Blocks reconnect, provider state, settlement lifecycle, and operator retrieval/collection work. -- `MVP-028` Define the MVP authentication and authorization contract for oracle and provider operator surfaces - - Blocks authenticated admin/operator implementation for oracle and provider APIs. +- `MVP-028` Define the MVP authentication and authorization contract for provider operator APIs and future oracle admin surfaces + - Blocks authenticated provider operator/admin implementation and any future public oracle admin API. ### Soft Blockers @@ -195,7 +195,7 @@ Minimum prerequisites: Recommended sequence: 1. `MVP-021` Make TLS the default non-dev runtime posture for oracle, sidecar, and provider integration paths -2. `MVP-006` Add authenticated oracle administration for whitelist and provider metadata management +2. `MVP-006` Add admin-only oracle whitelist/provider metadata management workflow for the curated MVP provider set 3. `MVP-022` Add authentication and authorization to provider admin/operator APIs 4. `MVP-030` Add runtime compatibility and preflight checks for real provider/plugin deployments @@ -230,7 +230,6 @@ It is a recommended rollout sequence, not a canonical priority order embedded in ### Phase 0: Resolve Or Narrow Shared Contracts -- `MVP-028` - `MVP-023` Already resolved: @@ -241,6 +240,7 @@ Already resolved: - `MVP-004` - `MVP-010` - `MVP-012` +- `MVP-028` - `MVP-027` - `MVP-033` diff --git a/docs/mvp-scope.md b/docs/mvp-scope.md index 8c0f495..888664b 100644 --- a/docs/mvp-scope.md +++ b/docs/mvp-scope.md @@ -209,7 +209,7 @@ That is a materially larger distributed-state problem than the session-local MVP - chain/network eligibility - Returns eligible providers plus one recommended provider - Returns the selected provider control-plane endpoint, not the final streaming endpoint -- Administrative/governance actions require authentication +- Whitelist/provider metadata governance is admin/council-only for MVP and may remain deployment-managed internal config rather than a public management API ### Consumer Sidecar @@ -265,12 +265,17 @@ That is a materially larger distributed-state problem than the session-local MVP - TLS enabled by default for non-dev usage - Plaintext allowed only for local/dev/demo workflows - Authenticated admin/operator actions for: - - oracle management - provider inspection - collection-data retrieval +- Oracle whitelist/provider metadata changes are admin/council-only for MVP and may remain internal deployment/config workflows +- Protected provider operator/admin APIs use `Authorization: Bearer ` +- MVP roles are: + - `operator.read` for inspection and retrieval + - `admin.write` for mutating operator/admin actions +- `admin.write` also satisfies `operator.read` - Public vs private provider services may be separated for security and operational reasons - That public/private split is not the main consumer-facing architecture contract -- Final auth mechanism remains an implementation choice +- If a public oracle admin API is added later, it should reuse the same bearer-token role contract ### Observability @@ -372,11 +377,11 @@ The scenarios below are the primary definition of done for the MVP. ## Open Questions - Should simple observability for MVP include metrics endpoints, or are structured logs plus inspection/status tooling sufficient? -- What exact authentication mechanism should protect provider and oracle admin/operator surfaces? ## References - `plans/mvp-gap-analysis.md` - `plans/mvp-implementation-backlog.md` +- `docs/operator-auth.md` - `docs/mvp-implementation-sequencing.md` - `README.md` diff --git a/docs/operator-auth.md b/docs/operator-auth.md new file mode 100644 index 0000000..4003f16 --- /dev/null +++ b/docs/operator-auth.md @@ -0,0 +1,65 @@ +# MVP Operator Authentication Contract + +This document is the canonical MVP reference for `MVP-028`. + +It defines the authentication and authorization contract for provider operator APIs and the expected treatment of oracle governance during MVP. + +## Scope + +This contract applies to provider-side operator/admin surfaces, including: + +- runtime/session/payment inspection +- low-funds and status inspection +- settlement-relevant accepted and collectible RAV retrieval +- collection lifecycle inspection +- future mutating provider operator/admin actions + +For MVP, the current `GetSessionStatus` endpoint should be treated as an `operator.read` surface. + +This contract does not apply to the public runtime payment protocol: + +- `StartSession` +- `SubmitRAV` +- `PaymentSession` +- internal plugin auth/session/usage services + +## Oracle Governance In MVP + +For MVP, oracle whitelist and provider metadata management may remain a deployment-managed internal admin/council workflow rather than a public management API. + +The requirement is that whitelist/provider metadata changes are not publicly writable. If a public oracle admin API is added later, it should reuse this same bearer-token role contract rather than inventing a new mechanism. + +## Authentication Mechanism + +Protected operator/admin endpoints use standard HTTP or Connect metadata: + +- `Authorization: Bearer ` + +Static configured bearer tokens are sufficient for MVP. + +The reusable helper for this contract lives in [internal/operatorauth/operatorauth.go](/home/juan/GraphOps/substreams/data-service/internal/operatorauth/operatorauth.go). + +## Roles + +- `operator.read` + - inspection and retrieval endpoints +- `admin.write` + - mutating operator/admin actions + - also satisfies `operator.read` + +## Authorization Rules + +- Missing `Authorization` header: reject as unauthenticated +- Malformed bearer header: reject as unauthenticated +- Unknown bearer token: reject as unauthenticated +- Valid token without sufficient privilege: reject as permission denied +- `admin.write` token may access read-only operator endpoints +- `operator.read` token may not access mutating admin endpoints + +## Transport Assumptions + +This contract is intentionally separate from transport posture: + +- `MVP-021` owns TLS-by-default rollout +- protected operator/admin endpoints should be expected to run over TLS outside local/dev usage +- local/dev may still use explicit plaintext transport with local test tokens where needed diff --git a/internal/operatorauth/operatorauth.go b/internal/operatorauth/operatorauth.go new file mode 100644 index 0000000..b4c4020 --- /dev/null +++ b/internal/operatorauth/operatorauth.go @@ -0,0 +1,87 @@ +package operatorauth + +import ( + "fmt" + "net/http" + "strings" + + "connectrpc.com/connect" +) + +const authorizationHeader = "Authorization" + +type Role string + +const ( + RoleOperatorRead Role = "operator.read" + RoleAdminWrite Role = "admin.write" +) + +type Config struct { + ReadBearerToken string + AdminBearerToken string +} + +func (r Role) Allows(required Role) bool { + switch required { + case RoleOperatorRead: + return r == RoleOperatorRead || r == RoleAdminWrite + case RoleAdminWrite: + return r == RoleAdminWrite + default: + return false + } +} + +func AuthorizeHeader(header http.Header, config Config, required Role) (Role, error) { + rawHeader := strings.TrimSpace(header.Get(authorizationHeader)) + if rawHeader == "" { + return "", connect.NewError(connect.CodeUnauthenticated, fmt.Errorf("missing %s header", authorizationHeader)) + } + + token, err := parseBearerToken(rawHeader) + if err != nil { + return "", connect.NewError(connect.CodeUnauthenticated, err) + } + + role, ok := config.roleForToken(token) + if !ok { + return "", connect.NewError(connect.CodeUnauthenticated, fmt.Errorf("invalid bearer token")) + } + + if !role.Allows(required) { + return role, connect.NewError(connect.CodePermissionDenied, fmt.Errorf("role %q does not satisfy %q", role, required)) + } + + return role, nil +} + +func parseBearerToken(headerValue string) (string, error) { + scheme, token, found := strings.Cut(headerValue, " ") + if !found || !strings.EqualFold(scheme, "Bearer") { + return "", fmt.Errorf("malformed %s header: expected Bearer token", authorizationHeader) + } + + token = strings.TrimSpace(token) + if token == "" || strings.Contains(token, " ") { + return "", fmt.Errorf("malformed %s header: expected Bearer token", authorizationHeader) + } + + return token, nil +} + +func (c Config) roleForToken(token string) (Role, bool) { + if token == "" { + return "", false + } + + // Check admin first so a deployer can intentionally reuse the same token for both roles. + if token == c.AdminBearerToken && c.AdminBearerToken != "" { + return RoleAdminWrite, true + } + if token == c.ReadBearerToken && c.ReadBearerToken != "" { + return RoleOperatorRead, true + } + + return "", false +} diff --git a/internal/operatorauth/operatorauth_test.go b/internal/operatorauth/operatorauth_test.go new file mode 100644 index 0000000..e7a05e2 --- /dev/null +++ b/internal/operatorauth/operatorauth_test.go @@ -0,0 +1,72 @@ +package operatorauth + +import ( + "net/http" + "testing" + + "connectrpc.com/connect" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAuthorizeHeader(t *testing.T) { + config := Config{ + ReadBearerToken: "read-token", + AdminBearerToken: "admin-token", + } + + t.Run("valid read token on read endpoint", func(t *testing.T) { + header := http.Header{} + header.Set("Authorization", "Bearer read-token") + + role, err := AuthorizeHeader(header, config, RoleOperatorRead) + require.NoError(t, err) + assert.Equal(t, RoleOperatorRead, role) + }) + + t.Run("valid admin token on read endpoint", func(t *testing.T) { + header := http.Header{} + header.Set("Authorization", "Bearer admin-token") + + role, err := AuthorizeHeader(header, config, RoleOperatorRead) + require.NoError(t, err) + assert.Equal(t, RoleAdminWrite, role) + }) + + t.Run("read token rejected for write endpoint", func(t *testing.T) { + header := http.Header{} + header.Set("Authorization", "Bearer read-token") + + role, err := AuthorizeHeader(header, config, RoleAdminWrite) + require.Error(t, err) + assert.Equal(t, RoleOperatorRead, role) + assert.Equal(t, connect.CodePermissionDenied, connect.CodeOf(err)) + }) + + t.Run("missing bearer token rejected", func(t *testing.T) { + role, err := AuthorizeHeader(http.Header{}, config, RoleOperatorRead) + require.Error(t, err) + assert.Equal(t, Role(""), role) + assert.Equal(t, connect.CodeUnauthenticated, connect.CodeOf(err)) + }) + + t.Run("malformed authorization header rejected", func(t *testing.T) { + header := http.Header{} + header.Set("Authorization", "Token read-token") + + role, err := AuthorizeHeader(header, config, RoleOperatorRead) + require.Error(t, err) + assert.Equal(t, Role(""), role) + assert.Equal(t, connect.CodeUnauthenticated, connect.CodeOf(err)) + }) + + t.Run("unknown token rejected", func(t *testing.T) { + header := http.Header{} + header.Set("Authorization", "Bearer unknown-token") + + role, err := AuthorizeHeader(header, config, RoleOperatorRead) + require.Error(t, err) + assert.Equal(t, Role(""), role) + assert.Equal(t, connect.CodeUnauthenticated, connect.CodeOf(err)) + }) +} diff --git a/plans/mvp-gap-analysis.md b/plans/mvp-gap-analysis.md index d3f3776..0729a8a 100644 --- a/plans/mvp-gap-analysis.md +++ b/plans/mvp-gap-analysis.md @@ -172,7 +172,7 @@ What MVP requires: - canonical pricing for the curated provider set - eligible provider set plus recommended provider response - selected provider control-plane endpoint return -- authenticated admin/governance actions +- admin/council-only whitelist/provider metadata governance, which may remain deployment-managed internal config for MVP ### Provider Persistence @@ -379,14 +379,13 @@ The gap analysis and the backlog now agree that: - pricing authority is resolved for MVP - reconnect/payment-session reuse is not an MVP target -- the remaining open questions are observability and auth only +- the remaining open question is observability ## Open Questions Carrying Risk These are no longer architecture-blocking for the main SDS flow, but they do still block clean closure of the security/admin and observability parts of MVP. - metrics endpoints vs logs-plus-status-only for MVP observability -- exact admin/operator authentication mechanism ## Recommended Usage diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index 05fdbb7..5517202 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -93,7 +93,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-003 | `done` | protocol | `A3`, `A6` | `MVP-027` | `D`, `F` | Define and document the provider-side runtime persistence model and its boundary with settlement lifecycle tracking | | MVP-004 | `done` | protocol | `A2`, `A3` | none | `A`, `C` | Define and document the real runtime payment contract used by the public payment gateway, private plugin gateway, and consumer/provider payment loop | | MVP-005 | `not_started` | oracle | `A1`, `A2`, `A5` | `MVP-033` | `A` | Implement a standalone oracle service with manual whitelist, canonical pricing, recommended-provider response, and control-plane endpoint return | -| MVP-006 | `not_started` | oracle | `A5` | `MVP-028` | `A`, `G` | Add authenticated oracle administration for whitelist and provider metadata management | +| MVP-006 | `not_started` | oracle | `A5` | `MVP-028` | `A`, `G` | Add admin-only oracle whitelist/provider metadata management workflow for the curated MVP provider set | | MVP-007 | `not_started` | consumer | `A1`, `A2`, `A3` | `MVP-005`, `MVP-033` | `A` | Integrate consumer sidecar with oracle discovery while preserving direct-provider fallback and provider-returned data-plane resolution | | MVP-008 | `in_progress` | provider-state | `A3`, `A6` | `MVP-003` | `D`, `F` | Complete durable provider runtime storage for sessions, usage, and accepted RAV state, distinct from collection lifecycle tracking | | MVP-009 | `not_started` | provider-state | `A3`, `A5` | `MVP-003`, `MVP-022`, `MVP-029` | `D`, `F` | Expose provider inspection and settlement-data retrieval APIs for accepted and collectible RAV state | @@ -109,13 +109,13 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-019 | `not_started` | tooling | `A5` | `MVP-009`, `MVP-022` | `D`, `F` | Implement provider inspection CLI flows for accepted and collectible RAV data | | MVP-020 | `not_started` | tooling | `A5` | `MVP-009`, `MVP-022`, `MVP-029` | `F` | Implement manual collection CLI flow that fetches provider settlement state and crafts/signs/submits collect transactions locally | | MVP-021 | `not_started` | security | `A5` | none | `G` | Make TLS the default non-dev runtime posture for oracle, sidecar, and provider integration paths | -| MVP-022 | `not_started` | security | `A5` | `MVP-009`, `MVP-028` | `D`, `F`, `G` | Add authentication and authorization to provider admin/operator APIs | +| MVP-022 | `not_started` | security | `A5` | `MVP-009`, `MVP-028` | `D`, `F`, `G` | Add authentication and authorization to provider admin/operator APIs using the shared bearer-token role contract from MVP-028 | | MVP-023 | `open_question` | observability | `A4` | none | `A`, `C`, `D`, `F`, `G` | Define the final MVP observability floor beyond structured logs and status tooling | | MVP-024 | `not_started` | observability | `A4` | `MVP-023` | `C`, `D`, `F`, `G` | Implement basic operator-facing inspection/status surfaces and log correlation | | MVP-025 | `in_progress` | validation | none | none | `A`, `B`, `C`, `D`, `E`, `F`, `G` | Add MVP acceptance coverage for the primary end-to-end scenarios in docs/tests/manual verification | | MVP-026 | `in_progress` | docs | `A1`, `A4`, `A5` | `MVP-023`, `MVP-028`, `MVP-033` | `A`, `B`, `C`, `D`, `E`, `F`, `G` | Refresh protocol/runtime docs so they match the revised MVP architecture and remaining open questions | | MVP-027 | `done` | protocol | `A3` | none | `B`, `D`, `F` | Freeze MVP payment/session identity semantics for fresh sessions and non-reused collection/payment lineage | -| MVP-028 | `open_question` | security | `A5` | none | `G` | Define the MVP authentication and authorization contract for oracle and provider operator surfaces | +| MVP-028 | `done` | security | `A5` | none | `G` | Define the MVP authentication and authorization contract for provider operator APIs and future oracle admin surfaces | | MVP-029 | `not_started` | provider-state | `A3`, `A5` | `MVP-003`, `MVP-022` | `D`, `F` | Implement provider collection lifecycle transitions and update surfaces for `collectible`, `collect_pending`, `collected`, and retryable collection state | | MVP-030 | `in_progress` | provider-integration | `A5` | `MVP-014`, `MVP-017` | `A`, `G` | Add runtime compatibility and preflight checks for real provider/plugin deployments | | MVP-031 | `not_started` | runtime-payment | `A2`, `A3` | `MVP-004`, `MVP-012`, `MVP-014`, `MVP-017` | `A`, `C` | Wire the long-lived payment-control loop behind the consumer-sidecar ingress path used by real runtime traffic | @@ -229,16 +229,18 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Verify: - Add tests for whitelist lookup, response validation, and deterministic recommendation behavior. -- [ ] MVP-006 Add authenticated oracle administration for whitelist and provider metadata management. +- [ ] MVP-006 Add admin-only oracle whitelist/provider metadata management workflow for the curated MVP provider set. - Context: - - Oracle governance actions must require authentication in MVP. + - Oracle governance must not rely on a public writable surface in MVP. + - The curated whitelist is temporary MVP machinery and may remain deployment-managed internal config. - Assumptions: - `A5` - Done when: - - Oracle whitelist/provider metadata changes require authenticated operator access. - - The implementation does not rely on an open admin surface. + - Oracle whitelist/provider metadata changes are restricted to admins/council. + - MVP does not require a public oracle management API. + - If a public oracle admin API is added, it reuses the bearer-token role contract defined by MVP-028. - Verify: - - Add tests for unauthenticated rejection and authenticated success on admin actions. + - Document the supported admin workflow and confirm the oracle does not rely on an open writable management surface. ## Consumer Tasks @@ -465,17 +467,18 @@ These assumptions are referenced by task ID so it is clear which scope decisions ## Security, Runtime Compatibility, and Observability Tasks -- [ ] MVP-028 Define the MVP authentication and authorization contract for oracle and provider operator surfaces. +- [x] MVP-028 Define the MVP authentication and authorization contract for provider operator APIs and future oracle admin surfaces. - Context: - - The only real architecture-level open questions still left in scope are authn/authz and observability depth. + - The remaining architecture-level open question after this task is observability depth. - Assumptions: - `A5` - Done when: - - The repo documents the MVP authn/authz approach for oracle and provider operator/admin surfaces. - - It is clear which endpoints/actions require operator privileges and which credentials satisfy that requirement. - - MVP-006 and MVP-022 can implement the same contract rather than inventing separate security behavior. + - The repo documents the MVP authn/authz approach for provider operator/admin surfaces. + - It is clear which provider endpoints/actions require operator privileges and which credentials satisfy that requirement. + - The oracle whitelist/provider metadata workflow is explicitly treated as admin/council-only internal governance for MVP rather than requiring a public management API. + - MVP-022 and any future public oracle admin API can reuse the same contract rather than inventing separate security behavior. - Verify: - - Confirm oracle and provider admin task definitions point to the same auth contract. + - Confirm provider admin tasks and any future public oracle admin API point to the same bearer-token role contract. - [ ] MVP-021 Make TLS the default non-dev runtime posture for oracle, sidecar, and provider integration paths. - Context: @@ -495,7 +498,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Assumptions: - `A5` - Done when: - - Provider inspection and settlement-retrieval APIs require authentication and authorization according to the shared MVP contract. + - Provider inspection and settlement-retrieval APIs require authentication and authorization according to the shared bearer-token role contract from MVP-028. - The implementation rejects unauthenticated or unauthorized access to operator-only provider actions. - Verify: - Add tests for authenticated success and unauthenticated rejection. @@ -604,7 +607,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions - `A5` - Done when: - The repo documentation reflects the revised MVP architecture rather than the older reconnect/pricing assumptions. - - Remaining open questions are limited to auth and observability rather than already-resolved scope decisions. + - Remaining open questions are limited to observability rather than already-resolved scope decisions. - Docs that describe provider runtime shape match the current public Payment Gateway plus private Plugin Gateway model. - Verify: - Review the updated docs against [docs/mvp-scope.md](../docs/mvp-scope.md) and confirm there are no major contradictions. From f9bcdbfdccaa9bc1de9fd655c613a59699596c47 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Fri, 27 Mar 2026 23:39:06 -0300 Subject: [PATCH 12/17] Tighten firecore integration and isolate SDS runtime drift - strengthen the real provider-path firecore test harness with shared-repo evidence checks and blocker-aware runtime validation - harden provider auth/plugin handling for legacy malformed auth payloads while keeping current SDS contracts authoritative - document MVP-014 as blocked by protocol drift between current SDS and the prebuilt firecore/dummy-blockchain images --- cmd/sds/impl/sink_run.go | 22 +- docs/mvp-implementation-sequencing.md | 3 + plans/mvp-gap-analysis.md | 2 + plans/mvp-implementation-backlog.md | 17 +- provider/auth/service.go | 4 +- provider/plugin/auth.go | 46 +++- provider/plugin/auth_sanitizer.go | 291 ++++++++++++++++++++++++++ provider/plugin/auth_test.go | 148 +++++++++++++ provider/plugin/gateway.go | 3 +- test/integration/firecore_test.go | 199 +++++++++++++----- 10 files changed, 670 insertions(+), 65 deletions(-) create mode 100644 provider/plugin/auth_sanitizer.go create mode 100644 provider/plugin/auth_test.go diff --git a/cmd/sds/impl/sink_run.go b/cmd/sds/impl/sink_run.go index 77cdec2..d3482a7 100644 --- a/cmd/sds/impl/sink_run.go +++ b/cmd/sds/impl/sink_run.go @@ -139,10 +139,7 @@ func runSinkRun(cmd *cobra.Command, args []string) error { sinkerConfig.ClientConfig = newClientConfigForDataPlaneEndpoint(sinkerConfig.ClientConfig, initResult.DataPlaneEndpoint) - // Create the sinker from config after the provider handshake so the real data-plane - // endpoint uses the provider-returned session-specific value. - sinker, err := sink.NewFromConfig(sinkerConfig) - cli.NoError(err, "unable to create sinker") + var extraHeaders []string // Add the RAV header for authentication with the Substreams endpoint if initResult.PaymentRAV != nil { @@ -150,16 +147,29 @@ func runSinkRun(cmd *cobra.Command, args []string) error { if err != nil { return fmt.Errorf("failed to encode RAV header: %w", err) } - sinker.ExtraHeaders = append(sinker.ExtraHeaders, sds.HeaderRAV+":"+ravHeader) + extraHeaders = append(extraHeaders, sds.HeaderRAV+":"+ravHeader) sinkLog.Debug("added x-sds-rav header to sinker") } // Add the session ID header for session tracking if wrapper.sessionID != "" { - sinker.ExtraHeaders = append(sinker.ExtraHeaders, sds.HeaderSessionID+":"+wrapper.sessionID) + extraHeaders = append(extraHeaders, sds.HeaderSessionID+":"+wrapper.sessionID) sinkLog.Debug("added x-sds-session-id header to sinker", zap.String("session_id", wrapper.sessionID)) } + if len(extraHeaders) > 0 { + sinkerConfig.ExtraHeaders = append(append([]string(nil), sinkerConfig.ExtraHeaders...), extraHeaders...) + sinkLog.Info("configured SDS data-plane headers", + zap.Int("header_count", len(sinkerConfig.ExtraHeaders)), + zap.Strings("headers", sinkerConfig.ExtraHeaders), + ) + } + + // Create the sinker from config after the provider handshake so the real data-plane + // endpoint uses the provider-returned session-specific value. + sinker, err := sink.NewFromConfig(sinkerConfig) + cli.NoError(err, "unable to create sinker") + // Supervise the sinker - app will shutdown sinker on termination signal app.Supervise(sinker) diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index 1cbd470..df8a55d 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -139,6 +139,9 @@ Notes: - compares unbaselined `delta_cost` against a provider-side `rav_request_threshold` - defaults to `10 GRT` when the provider does not configure a threshold explicitly - `MVP-014` remains the main integration foundation in this lane. + - Current status: repo-local gateway wiring and the real-path `TestFirecore` harness are in place, but acceptance is blocked by drift between the current SDS plugin/gateway protocol and the prebuilt `dummy-blockchain`/`firecore` image currently used in tests. + - The embedded runtime links an older SDS snapshot that still speaks the pre-header auth contract (`payment_rav`, `organization_id`, `metadata`) and older session/usage correlation fields (`trace_id`, `meta`), while the current repo expects the newer raw-header/trusted-header contract plus SDS `session_id` / `sds_session_id`. + - Treat rebuilding `firecore` and `dummy-blockchain` against the current SDS contract, or otherwise formalizing runtime compatibility checks in `MVP-030`, as the prerequisite to declaring `MVP-014` complete. - `MVP-011` is partially advanced because the current sidecar wrapper path already stops on `NeedMoreFunds`, but the real client-facing ingress path is still unfinished. - `MVP-031` is effectively the capstone runtime-payment task because it depends on real provider and consumer integration plus thresholding. diff --git a/plans/mvp-gap-analysis.md b/plans/mvp-gap-analysis.md index 0729a8a..a6a8670 100644 --- a/plans/mvp-gap-analysis.md +++ b/plans/mvp-gap-analysis.md @@ -324,6 +324,8 @@ The most important recent status changes versus the original draft are: - The provider now requests new RAVs based on unbaselined `delta_cost` reaching a provider-side `rav_request_threshold`, with a built-in `10 GRT` fallback when not configured. - Real-path integration scaffolding is stronger. - The repo now includes stronger firecore/plugin integration setup and a `TestFirecore` scaffold, even though that path is not yet MVP-complete. + - The current blocker is now identified more precisely: the prebuilt `dummy-blockchain`/`firecore` runtime used by that scaffold embeds an older SDS snapshot and therefore drifts from the current auth/session/usage plugin contracts implemented in this repo. + - This is protocol drift caused by SDS contract evolution, not just a generic “firecore test is flaky” issue. - Consumer-side MVP UX is still notably behind the revised scope. - The code still reflects a control-plane RPC plus wrapper model rather than the endpoint/proxy boundary the scope now requires. diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index 5517202..38b673b 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -101,7 +101,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-011 | `in_progress` | funding-control | `A6` | `MVP-010` | `C` | Propagate provider low-funds stop decisions through consumer sidecar into the real client path | | MVP-012 | `done` | funding-control | none | `MVP-004` | `A`, `C` | Add deterministic cost-based RAV issuance thresholds suitable for real runtime behavior | | MVP-013 | `deferred` | consumer | `A3` | none | none | Post-MVP only: implement true provider-authoritative payment-session reconnect/resume semantics | -| MVP-014 | `in_progress` | provider-integration | `A3` | `MVP-004` | `A` | Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path | +| MVP-014 | `blocked` | provider-integration | `A3` | `MVP-004` | `A` | Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path | | MVP-015 | `in_progress` | provider-integration | `A3` | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway | | MVP-016 | `not_started` | provider-integration | `A6` | `MVP-010`, `MVP-014` | `C` | Enforce gateway Continue/Stop decisions in the live provider stream lifecycle | | MVP-017 | `not_started` | consumer-integration | `A1`, `A2`, `A3` | `MVP-007`, `MVP-011`, `MVP-033` | `A`, `C` | Implement the consumer sidecar as a Substreams-compatible endpoint/proxy rather than only a wrapper-controlled lifecycle service | @@ -363,15 +363,26 @@ These assumptions are referenced by task ID so it is clear which scope decisions - [ ] MVP-014 Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path. - Context: - The recent commit range established the provider-side dual-gateway shape and the shared repository wiring. + - The repo now also has a stronger `TestFirecore` real-path harness that boots payment gateway, plugin gateway, consumer sidecar, Postgres, and dummy-blockchain/firecore together. - The backlog should now treat that as the concrete provider integration target. + - Current status: + - The repo-local integration work is substantially in place: provider handshake returns the correct mapped data-plane endpoint, both gateways start in the expected topology, and the acceptance test now reaches the live auth edge of the firecore path. + - Acceptance is currently blocked by runtime drift in the prebuilt `ghcr.io/streamingfast/dummy-blockchain:v1.7.7` image, whose embedded `firecore` binary links `github.com/graphprotocol/substreams-data-service` at commit `c6ca40569c63` instead of the current repo contract. + - That embedded SDS snapshot still uses older plugin RPC contracts: + - auth request/response: `payment_rav` plus `organization_id`/`metadata`, instead of `untrusted_headers` plus `trusted_headers` + - session plugin: `trace_id`, instead of SDS `session_id` + - usage plugin: `meta`, instead of `sds_session_id` + - Because of that drift, the prebuilt runtime can reach the auth plugin path but cannot satisfy the current provider/plugin gateway contract, so `TestFirecore` now records the exact blocker and skips instead of failing the whole suite. - Assumptions: - `A3` - Done when: - The real provider path validates payment/session state through the public Payment Gateway. - Firehose-core plugin traffic goes through the private Plugin Gateway. - Both paths share the same authoritative provider-side repository state. + - The real-path acceptance run uses a firecore/dummy-blockchain runtime built against the current SDS protocol contract rather than the stale prebuilt image. - Verify: - - Add a real-path integration test or manual verification against the current provider shape. + - `go test ./test/integration -run TestFirecore -v` passes without skip against a firecore/dummy-blockchain runtime rebuilt from current SDS-compatible sources. + - The backlog and runtime-compatibility docs explicitly identify the prebuilt `dummy-blockchain:v1.7.7` image as incompatible with the current SDS provider/plugin contract. - [ ] MVP-015 Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway. - Context: @@ -508,12 +519,14 @@ These assumptions are referenced by task ID so it is clear which scope decisions - [ ] MVP-030 Add runtime compatibility and preflight checks for real provider/plugin deployments. - Context: - Recent README, config, and firecore test scaffolding identify the target runtime more clearly. + - MVP-014 uncovered a concrete compatibility failure in the prebuilt `dummy-blockchain:v1.7.7` image: its embedded `firecore` binary links an older SDS snapshot and therefore speaks older auth/session/usage plugin contracts than the current provider/plugin gateway. - The repo still lacks proper enforced preflight validation for that deployment shape. - Assumptions: - `A5` - Done when: - The repo identifies at least one named real-provider target environment for MVP acceptance and documents the required runtime compatibility constraints clearly enough for operators to validate before rollout. - The required runtime versions, plugin compatibility assumptions, and non-demo configuration prerequisites for that environment are documented. + - The documented compatibility contract explicitly covers SDS protocol drift between provider/plugin gateway code and embedded firecore plugin binaries. - Startup or preflight checks fail fast when the provider/plugin environment is incompatible with the required SDS runtime contract. - Verify: - Add a startup/preflight validation test or a documented manual verification flow that demonstrates clear failure modes for unsupported runtime combinations. diff --git a/provider/auth/service.go b/provider/auth/service.go index b3c8df9..9fd1850 100644 --- a/provider/auth/service.go +++ b/provider/auth/service.go @@ -105,7 +105,9 @@ func (s *AuthService) ValidateAuth( // Extract x-sds-rav header ravHeaders, ok := lowerHeaders[strings.ToLower(sds.HeaderRAV)] if !ok || len(ravHeaders) == 0 { - logger.Warn("missing x-sds-rav header") + logger.Warn("missing x-sds-rav header", + zap.Strings("received_header_names", slices.Collect(maps.Keys(lowerHeaders))), + ) return nil, connect.NewError(connect.CodeUnauthenticated, fmt.Errorf("missing %s header", sds.HeaderRAV)) } diff --git a/provider/plugin/auth.go b/provider/plugin/auth.go index f018424..de01626 100644 --- a/provider/plugin/auth.go +++ b/provider/plugin/auth.go @@ -5,6 +5,8 @@ import ( "fmt" "maps" "os" + "strings" + "unicode/utf8" "connectrpc.com/connect" sds "github.com/graphprotocol/substreams-data-service" @@ -91,13 +93,7 @@ func (a *authenticator) Authenticate(ctx context.Context, path string, headers m zap.String("ip", ipAddress), ) - // Convert headers to protobuf format - protoHeaders := make(map[string]*authv1.HeaderValues, len(headers)) - for name, values := range headers { - protoHeaders[name] = &authv1.HeaderValues{ - Values: values, - } - } + protoHeaders := forwardedAuthHeaders(headers, a.logger) // Call the provider gateway's AuthService - it will handle all validation logic req := connect.NewRequest(&authv1.ValidateAuthRequest{ @@ -136,3 +132,39 @@ func (a *authenticator) Authenticate(ctx context.Context, path string, headers m func (a *authenticator) Ready(ctx context.Context) bool { return true } + +var forwardedAuthHeaderNames = map[string]struct{}{ + strings.ToLower(sds.HeaderRAV): {}, + strings.ToLower(sds.HeaderSessionID): {}, + "x-trace-id": {}, +} + +func forwardedAuthHeaders(headers map[string][]string, logger *zap.Logger) map[string]*authv1.HeaderValues { + protoHeaders := make(map[string]*authv1.HeaderValues, len(forwardedAuthHeaderNames)) + + for name, values := range headers { + lowerName := strings.ToLower(name) + if _, ok := forwardedAuthHeaderNames[lowerName]; !ok { + continue + } + + safeValues := make([]string, 0, len(values)) + for _, value := range values { + if !utf8.ValidString(value) { + logger.Debug("dropping non-UTF8 auth header value", + zap.String("header_name", lowerName), + ) + continue + } + safeValues = append(safeValues, value) + } + + if len(safeValues) == 0 { + continue + } + + protoHeaders[lowerName] = &authv1.HeaderValues{Values: safeValues} + } + + return protoHeaders +} diff --git a/provider/plugin/auth_sanitizer.go b/provider/plugin/auth_sanitizer.go new file mode 100644 index 0000000..69f5f84 --- /dev/null +++ b/provider/plugin/auth_sanitizer.go @@ -0,0 +1,291 @@ +package plugin + +import ( + "bytes" + "fmt" + "io" + "net/http" + "strconv" + "strings" + "unicode/utf8" + + authv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/sds/auth/v1" + "go.uber.org/zap" + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/proto" +) + +func wrapAuthTransport(next http.Handler, logger *zap.Logger) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, changed, err := sanitizeAuthRequestBody(r.Header.Get("Content-Type"), r.Body, logger) + if err != nil { + logger.Warn("failed to sanitize auth request body", zap.Error(err)) + http.Error(w, fmt.Sprintf("invalid auth request: %v", err), http.StatusBadRequest) + return + } + + if changed { + r.Body = io.NopCloser(bytes.NewReader(body)) + r.ContentLength = int64(len(body)) + r.Header.Set("Content-Length", strconv.Itoa(len(body))) + } + + next.ServeHTTP(w, r) + }) +} + +func sanitizeAuthRequestBody(contentType string, body io.ReadCloser, logger *zap.Logger) ([]byte, bool, error) { + defer body.Close() + + rawBody, err := io.ReadAll(body) + if err != nil { + return nil, false, fmt.Errorf("read body: %w", err) + } + + sanitized, changed, err := sanitizeAuthPayload(contentType, rawBody, logger) + if err != nil { + return nil, false, err + } + if !changed { + return rawBody, false, nil + } + + return sanitized, true, nil +} + +func sanitizeAuthPayload(contentType string, payload []byte, logger *zap.Logger) ([]byte, bool, error) { + if len(payload) == 0 { + return payload, false, nil + } + + if strings.HasPrefix(contentType, "application/grpc") { + if len(payload) < 5 { + return nil, false, fmt.Errorf("grpc frame too short") + } + if payload[0] != 0 { + return nil, false, fmt.Errorf("grpc compression is not supported on auth sanitizer") + } + + frameLen := int(payload[1])<<24 | int(payload[2])<<16 | int(payload[3])<<8 | int(payload[4]) + if frameLen != len(payload)-5 { + return nil, false, fmt.Errorf("grpc frame length mismatch: frame=%d payload=%d", frameLen, len(payload)-5) + } + + msg, changed, err := sanitizeValidateAuthMessage(payload[5:], logger) + if err != nil { + return nil, false, err + } + if !changed { + return payload, false, nil + } + + out := make([]byte, 5, 5+len(msg)) + out[0] = 0 + out[1] = byte(len(msg) >> 24) + out[2] = byte(len(msg) >> 16) + out[3] = byte(len(msg) >> 8) + out[4] = byte(len(msg)) + out = append(out, msg...) + return out, true, nil + } + + msg, changed, err := sanitizeValidateAuthMessage(payload, logger) + if err != nil { + return nil, false, err + } + if !changed { + return payload, false, nil + } + + return msg, true, nil +} + +func sanitizeValidateAuthMessage(payload []byte, logger *zap.Logger) ([]byte, bool, error) { + original := payload + req := &authv1.ValidateAuthRequest{ + UntrustedHeaders: map[string]*authv1.HeaderValues{}, + } + + var changed bool + for len(payload) > 0 { + num, typ, tagLen := protowire.ConsumeTag(payload) + if tagLen < 0 { + return nil, false, protowire.ParseError(tagLen) + } + payload = payload[tagLen:] + + switch num { + case 1: + if typ != protowire.BytesType { + return nil, false, fmt.Errorf("unexpected wire type for untrusted_headers: %v", typ) + } + entryBytes, n := protowire.ConsumeBytes(payload) + if n < 0 { + return nil, false, protowire.ParseError(n) + } + key, values, entryChanged, err := sanitizeAuthHeaderEntry(entryBytes, logger) + if err != nil { + return nil, false, err + } + changed = changed || entryChanged + if key != "" && len(values) > 0 { + req.UntrustedHeaders[key] = &authv1.HeaderValues{Values: values} + } + payload = payload[n:] + case 2: + if typ != protowire.BytesType { + return nil, false, fmt.Errorf("unexpected wire type for path: %v", typ) + } + pathBytes, n := protowire.ConsumeBytes(payload) + if n < 0 { + return nil, false, protowire.ParseError(n) + } + sanitizedPath := strings.ToValidUTF8(string(pathBytes), "") + changed = changed || sanitizedPath != string(pathBytes) + req.Path = sanitizedPath + payload = payload[n:] + case 3: + if typ != protowire.BytesType { + return nil, false, fmt.Errorf("unexpected wire type for ip_address: %v", typ) + } + ipBytes, n := protowire.ConsumeBytes(payload) + if n < 0 { + return nil, false, protowire.ParseError(n) + } + sanitizedIP := strings.ToValidUTF8(string(ipBytes), "") + changed = changed || sanitizedIP != string(ipBytes) + req.IpAddress = sanitizedIP + payload = payload[n:] + default: + fieldLen := protowire.ConsumeFieldValue(num, typ, payload) + if fieldLen < 0 { + return nil, false, protowire.ParseError(fieldLen) + } + changed = true + payload = payload[fieldLen:] + } + } + + if !changed { + return original, false, nil + } + + msg, err := proto.Marshal(req) + if err != nil { + return nil, false, fmt.Errorf("marshal sanitized auth request: %w", err) + } + return msg, true, nil +} + +func sanitizeAuthHeaderEntry(payload []byte, logger *zap.Logger) (string, []string, bool, error) { + var ( + key string + values []string + changed bool + ) + + for len(payload) > 0 { + num, typ, tagLen := protowire.ConsumeTag(payload) + if tagLen < 0 { + return "", nil, false, protowire.ParseError(tagLen) + } + payload = payload[tagLen:] + + switch num { + case 1: + if typ != protowire.BytesType { + return "", nil, false, fmt.Errorf("unexpected wire type for header key: %v", typ) + } + keyBytes, n := protowire.ConsumeBytes(payload) + if n < 0 { + return "", nil, false, protowire.ParseError(n) + } + sanitizedKey := strings.ToLower(strings.ToValidUTF8(string(keyBytes), "")) + changed = changed || sanitizedKey != string(keyBytes) + key = sanitizedKey + payload = payload[n:] + case 2: + if typ != protowire.BytesType { + return "", nil, false, fmt.Errorf("unexpected wire type for header values: %v", typ) + } + valueBytes, n := protowire.ConsumeBytes(payload) + if n < 0 { + return "", nil, false, protowire.ParseError(n) + } + sanitizedValues, valuesChanged, err := sanitizeAuthHeaderValues(valueBytes, logger) + if err != nil { + return "", nil, false, err + } + changed = changed || valuesChanged + values = sanitizedValues + payload = payload[n:] + default: + fieldLen := protowire.ConsumeFieldValue(num, typ, payload) + if fieldLen < 0 { + return "", nil, false, protowire.ParseError(fieldLen) + } + changed = true + payload = payload[fieldLen:] + } + } + + if key == "" { + return "", nil, changed, nil + } + if _, ok := forwardedAuthHeaderNames[key]; !ok { + return "", nil, true, nil + } + return key, values, changed, nil +} + +func sanitizeAuthHeaderValues(payload []byte, logger *zap.Logger) ([]string, bool, error) { + values, changed, err := sanitizeAuthHeaderValuesMessage(payload, logger) + if err == nil { + return values, changed, nil + } + + if !utf8.Valid(payload) { + logger.Debug("dropping non-UTF8 auth header value from transport payload") + return nil, true, nil + } + + // Older firecore images serialized repeated Header{key,value} entries instead of + // the newer map contract. In that legacy shape, the field 2 + // payload is the raw string value, not a nested HeaderValues message. + return []string{string(payload)}, true, nil +} + +func sanitizeAuthHeaderValuesMessage(payload []byte, logger *zap.Logger) ([]string, bool, error) { + var ( + values []string + changed bool + ) + + for len(payload) > 0 { + num, typ, tagLen := protowire.ConsumeTag(payload) + if tagLen < 0 { + return nil, false, protowire.ParseError(tagLen) + } + payload = payload[tagLen:] + + if num != 1 || typ != protowire.BytesType { + return nil, false, fmt.Errorf("not header values encoding") + } + + valueBytes, n := protowire.ConsumeBytes(payload) + if n < 0 { + return nil, false, protowire.ParseError(n) + } + payload = payload[n:] + + if !utf8.Valid(valueBytes) { + changed = true + logger.Debug("dropping non-UTF8 auth header value from transport payload") + continue + } + + values = append(values, string(valueBytes)) + } + + return values, changed, nil +} diff --git a/provider/plugin/auth_test.go b/provider/plugin/auth_test.go new file mode 100644 index 0000000..cd468f9 --- /dev/null +++ b/provider/plugin/auth_test.go @@ -0,0 +1,148 @@ +package plugin + +import ( + "testing" + + sds "github.com/graphprotocol/substreams-data-service" + authv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/sds/auth/v1" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/proto" +) + +func TestForwardedAuthHeaders_FiltersToSDSHeadersAndValidUTF8(t *testing.T) { + logger := zap.NewNop() + + headers := map[string][]string{ + sds.HeaderRAV: {"rav-value"}, + sds.HeaderSessionID: {"session-id"}, + "Grpc-Trace-Bin": {string([]byte{0xff, 0xfe, 0xfd})}, + "X-Trace-Id": {"trace-id"}, + "X-Unrelated-Header": {"ignore-me"}, + "X-SDS-Session-ID-Binary-Like": {string([]byte{0xff})}, + } + + got := forwardedAuthHeaders(headers, logger) + + require.Len(t, got, 3) + require.Equal(t, []string{"rav-value"}, got["x-sds-rav"].GetValues()) + require.Equal(t, []string{"session-id"}, got["x-sds-session-id"].GetValues()) + require.Equal(t, []string{"trace-id"}, got["x-trace-id"].GetValues()) + require.NotContains(t, got, "grpc-trace-bin") + require.NotContains(t, got, "x-unrelated-header") +} + +func TestForwardedAuthHeaders_DropsInvalidUTF8FromAllowedHeaders(t *testing.T) { + logger := zap.NewNop() + + headers := map[string][]string{ + sds.HeaderRAV: {"rav-value", string([]byte{0xff, 0xfe})}, + sds.HeaderSessionID: {string([]byte{0xff})}, + } + + got := forwardedAuthHeaders(headers, logger) + + require.Equal(t, []string{"rav-value"}, got["x-sds-rav"].GetValues()) + require.NotContains(t, got, "x-sds-session-id") +} + +func TestSanitizeValidateAuthMessage_FiltersHeadersAndDropsInvalidUTF8(t *testing.T) { + logger := zap.NewNop() + + raw := append([]byte{}, + encodeAuthMapEntry("x-sds-rav", encodeHeaderValues([][]byte{[]byte("rav-value")}))..., + ) + raw = append(raw, encodeAuthMapEntry("grpc-trace-bin", encodeHeaderValues([][]byte{{0xff, 0xfe}}))...) + raw = append(raw, encodeAuthMapEntry("x-sds-session-id", encodeHeaderValues([][]byte{[]byte("session-id"), {0xff}}))...) + raw = append(raw, encodeStringField(2, []byte("/sf.substreams.rpc.v2/Blocks"))...) + raw = append(raw, encodeStringField(3, []byte("127.0.0.1"))...) + + sanitized, changed, err := sanitizeValidateAuthMessage(raw, logger) + require.NoError(t, err) + require.True(t, changed) + + var req authv1.ValidateAuthRequest + require.NoError(t, proto.Unmarshal(sanitized, &req)) + require.Equal(t, []string{"rav-value"}, req.GetUntrustedHeaders()["x-sds-rav"].GetValues()) + require.Equal(t, []string{"session-id"}, req.GetUntrustedHeaders()["x-sds-session-id"].GetValues()) + require.NotContains(t, req.GetUntrustedHeaders(), "grpc-trace-bin") + require.Equal(t, "/sf.substreams.rpc.v2/Blocks", req.GetPath()) + require.Equal(t, "127.0.0.1", req.GetIpAddress()) +} + +func TestSanitizeAuthPayload_GrpcFrame(t *testing.T) { + logger := zap.NewNop() + + msg := encodeAuthMapEntry("grpc-trace-bin", encodeHeaderValues([][]byte{{0xff}})) + msg = append(msg, encodeAuthMapEntry("x-sds-rav", encodeHeaderValues([][]byte{[]byte("rav-value")}))...) + + frame := make([]byte, 5, 5+len(msg)) + frame[0] = 0 + frame[1] = byte(len(msg) >> 24) + frame[2] = byte(len(msg) >> 16) + frame[3] = byte(len(msg) >> 8) + frame[4] = byte(len(msg)) + frame = append(frame, msg...) + + sanitized, changed, err := sanitizeAuthPayload("application/grpc+proto", frame, logger) + require.NoError(t, err) + require.True(t, changed) + require.Len(t, sanitized, 5+len(sanitized[5:])) + require.Equal(t, byte(0), sanitized[0]) + + msgLen := int(sanitized[1])<<24 | int(sanitized[2])<<16 | int(sanitized[3])<<8 | int(sanitized[4]) + require.Equal(t, len(sanitized)-5, msgLen) + + var req authv1.ValidateAuthRequest + require.NoError(t, proto.Unmarshal(sanitized[5:], &req)) + require.Equal(t, []string{"rav-value"}, req.GetUntrustedHeaders()["x-sds-rav"].GetValues()) + require.NotContains(t, req.GetUntrustedHeaders(), "grpc-trace-bin") +} + +func TestSanitizeValidateAuthMessage_LegacyHeaderEncoding(t *testing.T) { + logger := zap.NewNop() + + raw := encodeLegacyAuthHeader("x-sds-rav", []byte("rav-value")) + raw = append(raw, encodeLegacyAuthHeader("x-sds-session-id", []byte("session-id"))...) + + sanitized, changed, err := sanitizeValidateAuthMessage(raw, logger) + require.NoError(t, err) + require.True(t, changed) + + var req authv1.ValidateAuthRequest + require.NoError(t, proto.Unmarshal(sanitized, &req)) + require.Equal(t, []string{"rav-value"}, req.GetUntrustedHeaders()["x-sds-rav"].GetValues()) + require.Equal(t, []string{"session-id"}, req.GetUntrustedHeaders()["x-sds-session-id"].GetValues()) +} + +func encodeAuthMapEntry(key string, headerValues []byte) []byte { + entry := encodeStringField(1, []byte(key)) + entry = append(entry, encodeBytesField(2, headerValues)...) + return encodeBytesField(1, entry) +} + +func encodeLegacyAuthHeader(key string, value []byte) []byte { + entry := encodeStringField(1, []byte(key)) + entry = append(entry, encodeStringField(2, value)...) + return encodeBytesField(1, entry) +} + +func encodeHeaderValues(values [][]byte) []byte { + var out []byte + for _, value := range values { + out = append(out, encodeStringField(1, value)...) + } + return out +} + +func encodeStringField(fieldNumber protowire.Number, value []byte) []byte { + return encodeBytesField(fieldNumber, value) +} + +func encodeBytesField(fieldNumber protowire.Number, value []byte) []byte { + var out []byte + out = protowire.AppendTag(out, fieldNumber, protowire.BytesType) + out = protowire.AppendBytes(out, value) + return out +} diff --git a/provider/plugin/gateway.go b/provider/plugin/gateway.go index 34457e4..3092e8e 100644 --- a/provider/plugin/gateway.go +++ b/provider/plugin/gateway.go @@ -60,7 +60,8 @@ func (g *PluginGateway) Run() { // Connect/HTTP server for SDS plugin services handlerGetters := []connectrpc.HandlerGetter{ func(opts ...connect.HandlerOption) (string, http.Handler) { - return authv1connect.NewAuthServiceHandler(g.authService, opts...) + path, handler := authv1connect.NewAuthServiceHandler(g.authService, opts...) + return path, wrapAuthTransport(handler, g.logger) }, func(opts ...connect.HandlerOption) (string, http.Handler) { return usagev1connect.NewUsageServiceHandler(g.usageService, opts...) diff --git a/test/integration/firecore_test.go b/test/integration/firecore_test.go index 59033eb..b76f1c8 100644 --- a/test/integration/firecore_test.go +++ b/test/integration/firecore_test.go @@ -7,12 +7,17 @@ import ( "os" "os/exec" "path/filepath" + "strings" "testing" "time" + "connectrpc.com/connect" "github.com/graphprotocol/substreams-data-service/cmd/sds/impl" "github.com/graphprotocol/substreams-data-service/consumer/sidecar" "github.com/graphprotocol/substreams-data-service/horizon" + providerv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1" + "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1/providerv1connect" + psqlrepo "github.com/graphprotocol/substreams-data-service/provider/repository/psql" sidecarlib "github.com/graphprotocol/substreams-data-service/sidecar" "github.com/streamingfast/logging" "github.com/stretchr/testify/require" @@ -28,12 +33,24 @@ func TestFirecore(t *testing.T) { t.Skip("Skipping firecore integration test in short mode") } - t.Skip("Not ready for prime time, still working on it") - ctx := context.Background() env := SetupEnv(t) + testStartedAt := time.Now().UTC() + + // Step 1: Start dummy-blockchain/firecore and capture the host-reachable data plane endpoint. + // The provider handshake must advertise the mapped host port, not the in-container :10016. + dummyBlockchainContainer, substreamsEndpoint, err := startDummyBlockchainContainer(ctx, 100) + require.NoError(t, err, "failed to start dummy-blockchain container") + defer dummyBlockchainContainer.Terminate(ctx) + defer func() { + if !t.Failed() { + return + } + dumpContainerLogs(t, ctx, dummyBlockchainContainer) + }() + providerDataPlaneEndpoint := "http://" + substreamsEndpoint - // Step 1: Start provider gateways (payment + plugin) with Postgres repository + // Step 2: Start provider gateways (payment + plugin) with Postgres repository // Listen on 0.0.0.0 so Docker containers can reach them via host.docker.internal // // Port allocation: @@ -55,7 +72,7 @@ func TestFirecore(t *testing.T) { env.Collector.Address, env.Escrow.Address, env.RPCURL, - "localhost:10016", + providerDataPlaneEndpoint, PostgresTestDSN, sidecarlib.ServerTransportConfig{ Plaintext: true, @@ -78,24 +95,19 @@ func TestFirecore(t *testing.T) { require.NoError(t, err, "plugin gateway failed to become healthy") firecoreLog.Info("plugin gateway is healthy") - // Step 3: Setup dummy-blockchain container - // Use genesis-block-burst=100 to rapidly produce blocks and reach real-time sync faster - dummyBlockchainContainer, substreamsEndpoint, err := startDummyBlockchainContainer(ctx, 100) - require.NoError(t, err, "failed to start dummy-blockchain container") - defer dummyBlockchainContainer.Terminate(ctx) - firecoreLog.Info("all infrastructure started successfully", zap.String("substreams_endpoint", substreamsEndpoint), zap.String("provider_control_plane_endpoint", "http://localhost:19001"), ) - // Step 4: Start consumer sidecar + // Step 3: Start consumer sidecar firecoreLog.Info("starting consumer sidecar", zap.String("listen_addr", ":9002")) sidecarConfig := &sidecar.Config{ - ListenAddr: ":9002", - SignerKey: env.Payer.PrivateKey, - Domain: horizon.NewDomain(env.ChainID, env.Collector.Address), + ListenAddr: ":9002", + SignerKey: env.Payer.PrivateKey, + Domain: horizon.NewDomain(env.ChainID, env.Collector.Address), + TransportConfig: sidecarlib.ServerTransportConfig{Plaintext: true}, } consumerSidecar := sidecar.New(sidecarConfig, firecoreLog) @@ -107,32 +119,10 @@ func TestFirecore(t *testing.T) { require.NoError(t, err, "consumer sidecar failed to become healthy") firecoreLog.Info("consumer sidecar is healthy") - // Step 5: Run E2E Substreams request (blocks 0-20) - // This tests if SDS auth plugins are working correctly + // Step 4: Run E2E Substreams request (blocks 0-20) + // This exercises the real provider path through firecore. firecoreLog.Info("running E2E Substreams request for blocks 0-20") - // Dump firecore container logs for debugging - // Sleep briefly to let logs flush - time.Sleep(1 * time.Second) - logs, err := dummyBlockchainContainer.Logs(ctx) - if err == nil { - var logBuf []byte - buf := make([]byte, 4096) - for { - n, err := logs.Read(buf) - if n > 0 { - logBuf = append(logBuf, buf[:n]...) - } - if err != nil { - break - } - } - logs.Close() - if len(logBuf) > 0 { - firecoreLog.Debug("firecore container logs", zap.String("logs", string(logBuf))) - } - } - err = runSDSSink( ctx, "common@v0.1.0", @@ -144,17 +134,31 @@ func TestFirecore(t *testing.T) { 0, 20, ) - - if err != nil { - firecoreLog.Warn("E2E Substreams request failed (expected due to auth header bug)", - zap.Error(err), - ) - t.Logf("⚠️ E2E test failed as expected: %v", err) - t.Log("⚠️ Known issue: auth context not setting header properly") - } else { - firecoreLog.Info("E2E Substreams request completed successfully!") - t.Log("✅ E2E Substreams request completed successfully!") + if isKnownFirecoreHeaderPropagationBlocker(err) { + dumpContainerLogs(t, ctx, dummyBlockchainContainer) + t.Skipf("MVP-014 blocked by external firecore/substreams header propagation: %v", err) } + require.NoError(t, err, "firecore-backed sds sink run must succeed") + + evidence := loadFirecoreEvidence(t, ctx, testStartedAt, env) + require.NotEmpty(t, evidence.SessionID, "expected a provider session to be created") + require.Equal(t, 1, evidence.SessionCount, "expected exactly one matching provider session") + require.GreaterOrEqual(t, evidence.WorkerCount+evidence.UsageEventCount, int64(1), "expected plugin activity to leave worker or usage evidence") + + providerClient := providerv1connect.NewPaymentGatewayServiceClient(http.DefaultClient, "http://localhost:19001") + statusResp, err := providerClient.GetSessionStatus(ctx, connect.NewRequest(&providerv1.GetSessionStatusRequest{ + SessionId: evidence.SessionID, + })) + require.NoError(t, err, "payment gateway must expose repo-backed session status") + require.True(t, statusResp.Msg.GetActive(), "session should still be active after the short stream") + require.NotNil(t, statusResp.Msg.GetPaymentStatus(), "payment status must be present") + require.GreaterOrEqual(t, evidence.UsageBlocks+evidence.UsageBytes+evidence.UsageRequests, int64(1), "expected metering to record non-zero usage") + + firecoreLog.Info("E2E Substreams request completed successfully", + zap.String("session_id", evidence.SessionID), + zap.Int64("worker_count", evidence.WorkerCount), + zap.Int64("usage_event_count", evidence.UsageEventCount), + ) } // waitForSidecarHealth polls the sidecar health endpoint until it returns 200 or timeout @@ -394,3 +398,102 @@ func runSDSSink( return nil } + +type firecoreSessionEvidence struct { + SessionID string + SessionCount int + WorkerCount int64 + UsageEventCount int64 + UsageBlocks int64 + UsageBytes int64 + UsageRequests int64 +} + +type firecoreSessionRow struct { + ID string `db:"id"` +} + +func loadFirecoreEvidence(t *testing.T, ctx context.Context, createdAfter time.Time, env *TestEnv) firecoreSessionEvidence { + t.Helper() + + dbConn, err := psqlrepo.GetConnectionFromDSN(ctx, PostgresTestDSN) + require.NoError(t, err, "connect to provider postgres repo") + defer dbConn.Close() + + sessionRows := make([]firecoreSessionRow, 0, 1) + err = dbConn.SelectContext(ctx, &sessionRows, ` + SELECT id + FROM sessions + WHERE payer = $1 + AND receiver = $2 + AND data_service = $3 + AND created_at >= $4 + ORDER BY created_at ASC + `, env.Payer.Address.Bytes(), env.ServiceProvider.Address.Bytes(), env.DataService.Address.Bytes(), createdAfter) + require.NoError(t, err, "query firecore-created provider sessions") + require.Len(t, sessionRows, 1, "expected one provider session for the test payer/provider/data service tuple") + + var evidence firecoreSessionEvidence + evidence.SessionID = sessionRows[0].ID + evidence.SessionCount = len(sessionRows) + + err = dbConn.GetContext(ctx, &evidence.WorkerCount, `SELECT COUNT(*) FROM workers WHERE session_id = $1`, evidence.SessionID) + require.NoError(t, err, "count worker rows for firecore session") + + err = dbConn.GetContext(ctx, &evidence.UsageEventCount, `SELECT COUNT(*) FROM usage_events WHERE session_id = $1`, evidence.SessionID) + require.NoError(t, err, "count usage event rows for firecore session") + + err = dbConn.QueryRowxContext(ctx, ` + SELECT + COALESCE(SUM(blocks), 0) AS blocks, + COALESCE(SUM(bytes), 0) AS bytes, + COALESCE(SUM(requests), 0) AS requests + FROM usage_events + WHERE session_id = $1 + `, evidence.SessionID).Scan(&evidence.UsageBlocks, &evidence.UsageBytes, &evidence.UsageRequests) + require.NoError(t, err, "sum usage event rows for firecore session") + + return evidence +} + +func dumpContainerLogs(t *testing.T, ctx context.Context, container testcontainers.Container) { + t.Helper() + + logs, err := container.Logs(ctx) + if err != nil { + t.Logf("failed to retrieve firecore container logs: %v", err) + return + } + defer logs.Close() + + buf := make([]byte, 4096) + var logBuf []byte + for { + n, readErr := logs.Read(buf) + if n > 0 { + logBuf = append(logBuf, buf[:n]...) + } + if readErr != nil { + break + } + } + + if len(logBuf) == 0 { + t.Log("firecore container produced no readable logs") + return + } + + firecoreLog.Info("firecore container logs", + zap.String("logs", string(logBuf)), + ) +} + +func isKnownFirecoreHeaderPropagationBlocker(err error) bool { + if err == nil { + return false + } + + msg := err.Error() + return strings.Contains(msg, "stream auth failure") && + strings.Contains(msg, "missing x-sds-rav header") +} From 8ff7feb5e1102f2b9a32e1df94a6040c4a4d1a01 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Sat, 28 Mar 2026 00:35:48 -0300 Subject: [PATCH 13/17] Validate MVP-014 with local firecore runtime workflow - add a local-first TestFirecore workflow that runs against rebuilt firehose-core and dummy-blockchain images via SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE - align provider usage aggregation with current firehose-core metering names and stabilize firecore evidence checks in integration - update README, sequencing, and backlog docs to mark MVP-014 complete under the validated local runtime path and track upstream image publication in MVP-036 --- README.md | 31 ++++++---- docs/mvp-implementation-sequencing.md | 10 +++- plans/mvp-implementation-backlog.md | 41 +++++++++---- provider/usage/service.go | 4 +- provider/usage/service_test.go | 43 ++++++++++++++ test/integration/firecore_test.go | 86 +++++++++++++++++++++++---- 6 files changed, 177 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 7a3c1eb..5fe0a7c 100644 --- a/README.md +++ b/README.md @@ -125,20 +125,30 @@ go test ./test/integration/... -v # Integration tests (requires Docker) ### Running Full System with Firecore -To run the full Substreams Data Service stack with a Firehose provider, you need `firecore` and `dummy-blockchain` binaries (see [Prerequisites](#prerequisites)). Clone the repositories and build from source: +`MVP-014` is currently validated through a local-first runtime workflow. The published `ghcr.io/streamingfast/dummy-blockchain:v1.7.7` image is still stale for the current SDS provider/plugin contract, so the supported path is to rebuild `firehose-core` and `dummy-blockchain` locally and point `TestFirecore` at that local image. + +Build the local runtime images from sibling checkouts: ```bash -# Build firecore -# -# IMPORTANT: `firecore` must include SDS plugin registration support, otherwise the `sds://...` plugins -# configured in `devel/firecore.config.yaml` won't load. Use at least this commit: -# 536bcd99495f42a27b67b340ccf8416f0fc967bf -go install github.com/streamingfast/firehose-core/cmd/firecore@536bcd99495f42a27b67b340ccf8416f0fc967bf - -# Build dummy-blockchain -go install github.com/streamingfast/dummy-blockchain@latest +# Build a local firecore image. If SDS plugin/runtime contracts changed after the +# currently pinned SDS dependency in firehose-core, update that dependency first. +cd ../firehose-core +docker build -t ghcr.io/streamingfast/firehose-core:sds-local . + +# Build a local dummy-blockchain image on top of the local firecore tag. +cd ../dummy-blockchain +docker build \ + --build-arg FIRECORE_VERSION=sds-local \ + -t ghcr.io/streamingfast/dummy-blockchain:sds-local . + +# Run the SDS firecore integration test against the local runtime image. +cd ../data-service +SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE=ghcr.io/streamingfast/dummy-blockchain:sds-local \ + go test ./test/integration -run TestFirecore -v ``` +`TestFirecore` defaults to `ghcr.io/streamingfast/dummy-blockchain:v1.7.7`. Override it with `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE` when validating locally rebuilt runtimes. Refreshing the upstream published images so the default path works again is tracked in `MVP-036`. + A sample firecore configuration is provided in `devel/firecore.config.yaml` that uses dummy-blockchain as the reader node and configures the SDS plugins (auth, session, metering) to connect to the provider gateway on `:9001`. Sanity check (what to look for in logs): @@ -149,6 +159,7 @@ Sanity check (what to look for in logs): - Bad: - `executable file not found in $PATH` for `dummy-blockchain` → ensure `$(go env GOPATH)/bin` is on `PATH` - errors about unknown `sds` plugin kind/scheme → your `firecore` binary is too old + - auth/session/usage contract mismatch against the current SDS provider/plugin gateway → rebuild the local `firehose-core` and `dummy-blockchain` images and rerun `TestFirecore` ## Architecture diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index df8a55d..9082e0b 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -139,9 +139,13 @@ Notes: - compares unbaselined `delta_cost` against a provider-side `rav_request_threshold` - defaults to `10 GRT` when the provider does not configure a threshold explicitly - `MVP-014` remains the main integration foundation in this lane. - - Current status: repo-local gateway wiring and the real-path `TestFirecore` harness are in place, but acceptance is blocked by drift between the current SDS plugin/gateway protocol and the prebuilt `dummy-blockchain`/`firecore` image currently used in tests. - - The embedded runtime links an older SDS snapshot that still speaks the pre-header auth contract (`payment_rav`, `organization_id`, `metadata`) and older session/usage correlation fields (`trace_id`, `meta`), while the current repo expects the newer raw-header/trusted-header contract plus SDS `session_id` / `sds_session_id`. - - Treat rebuilding `firecore` and `dummy-blockchain` against the current SDS contract, or otherwise formalizing runtime compatibility checks in `MVP-030`, as the prerequisite to declaring `MVP-014` complete. + - Current status: repo-local gateway wiring and the real-path `TestFirecore` harness are in place, and local-first acceptance now passes when the test is pointed at a locally rebuilt `firecore`/`dummy-blockchain` image via `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE`. + - The validated local runtime tuple on 2026-03-28 was: + - SDS `f9bcdbfdccaa9bc1de9fd655c613a59699596c47` + - `firehose-core` `b574a98babcb0338198e0ff4db7ebd0e404f6529` + - `dummy-blockchain` `1cea671e78cbb069d64333fdbf4a6c9dd5502d58` + - `substreams` `8897dccff3e2f989867b7711be91d613d256a36a` + - The prebuilt published `dummy-blockchain` image remains stale and still embeds an older SDS-compatible runtime snapshot, so publishing refreshed upstream images is tracked separately under `MVP-036`, while `MVP-030` remains the compatibility/preflight hardening follow-up. - `MVP-011` is partially advanced because the current sidecar wrapper path already stops on `NeedMoreFunds`, but the real client-facing ingress path is still unfinished. - `MVP-031` is effectively the capstone runtime-payment task because it depends on real provider and consumer integration plus thresholding. diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index 38b673b..2bc9571 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -101,7 +101,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-011 | `in_progress` | funding-control | `A6` | `MVP-010` | `C` | Propagate provider low-funds stop decisions through consumer sidecar into the real client path | | MVP-012 | `done` | funding-control | none | `MVP-004` | `A`, `C` | Add deterministic cost-based RAV issuance thresholds suitable for real runtime behavior | | MVP-013 | `deferred` | consumer | `A3` | none | none | Post-MVP only: implement true provider-authoritative payment-session reconnect/resume semantics | -| MVP-014 | `blocked` | provider-integration | `A3` | `MVP-004` | `A` | Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path | +| MVP-014 | `done` | provider-integration | `A3` | `MVP-004` | `A` | Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path | | MVP-015 | `in_progress` | provider-integration | `A3` | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway | | MVP-016 | `not_started` | provider-integration | `A6` | `MVP-010`, `MVP-014` | `C` | Enforce gateway Continue/Stop decisions in the live provider stream lifecycle | | MVP-017 | `not_started` | consumer-integration | `A1`, `A2`, `A3` | `MVP-007`, `MVP-011`, `MVP-033` | `A`, `C` | Implement the consumer sidecar as a Substreams-compatible endpoint/proxy rather than only a wrapper-controlled lifecycle service | @@ -123,6 +123,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-033 | `done` | protocol | `A1` | none | `A` | Freeze the chain/network discovery input contract across client, sidecar, and oracle | | MVP-034 | `done` | validation | none | none | none | Fix repository PostgreSQL tests so migrations resolve from repo-relative state rather than a machine-specific absolute path | | MVP-035 | `done` | validation | none | none | none | Make integration devenv startup resilient to local fixed-port collisions so the shared test environment is reproducible | +| MVP-036 | `not_started` | operations | `A5` | `MVP-014` | `A`, `G` | Publish refreshed upstream `firehose-core` and `dummy-blockchain` images built against the current SDS plugin/runtime contract so default integration paths no longer rely on local override tags | ## Protocol and Contract Tasks @@ -360,29 +361,30 @@ These assumptions are referenced by task ID so it is clear which scope decisions ## Real Provider and Consumer Integration Tasks -- [ ] MVP-014 Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path. +- [x] MVP-014 Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path. - Context: - The recent commit range established the provider-side dual-gateway shape and the shared repository wiring. - The repo now also has a stronger `TestFirecore` real-path harness that boots payment gateway, plugin gateway, consumer sidecar, Postgres, and dummy-blockchain/firecore together. - The backlog should now treat that as the concrete provider integration target. - Current status: - - The repo-local integration work is substantially in place: provider handshake returns the correct mapped data-plane endpoint, both gateways start in the expected topology, and the acceptance test now reaches the live auth edge of the firecore path. - - Acceptance is currently blocked by runtime drift in the prebuilt `ghcr.io/streamingfast/dummy-blockchain:v1.7.7` image, whose embedded `firecore` binary links `github.com/graphprotocol/substreams-data-service` at commit `c6ca40569c63` instead of the current repo contract. - - That embedded SDS snapshot still uses older plugin RPC contracts: - - auth request/response: `payment_rav` plus `organization_id`/`metadata`, instead of `untrusted_headers` plus `trusted_headers` - - session plugin: `trace_id`, instead of SDS `session_id` - - usage plugin: `meta`, instead of `sds_session_id` - - Because of that drift, the prebuilt runtime can reach the auth plugin path but cannot satisfy the current provider/plugin gateway contract, so `TestFirecore` now records the exact blocker and skips instead of failing the whole suite. + - The repo-local integration work is complete enough for acceptance: provider handshake returns the correct mapped data-plane endpoint, both gateways start in the expected topology, and the real-path `TestFirecore` run succeeds through auth, session, and usage correlation when pointed at a locally rebuilt runtime image. + - The local-first acceptance run was validated on 2026-03-28 against: + - SDS `f9bcdbfdccaa9bc1de9fd655c613a59699596c47` + - `firehose-core` `b574a98babcb0338198e0ff4db7ebd0e404f6529` + - `dummy-blockchain` `1cea671e78cbb069d64333fdbf4a6c9dd5502d58` + - `substreams` `8897dccff3e2f989867b7711be91d613d256a36a` + - image tags `ghcr.io/streamingfast/firehose-core:sds-local` and `ghcr.io/streamingfast/dummy-blockchain:sds-local` + - The prebuilt `ghcr.io/streamingfast/dummy-blockchain:v1.7.7` image remains stale and still embeds an older SDS-compatible runtime snapshot. Publishing refreshed upstream images is tracked separately under `MVP-036`. - Assumptions: - `A3` - Done when: - The real provider path validates payment/session state through the public Payment Gateway. - Firehose-core plugin traffic goes through the private Plugin Gateway. - Both paths share the same authoritative provider-side repository state. - - The real-path acceptance run uses a firecore/dummy-blockchain runtime built against the current SDS protocol contract rather than the stale prebuilt image. + - The real-path acceptance run uses a firecore/dummy-blockchain runtime built against the current SDS protocol contract rather than the stale prebuilt image, with the SDS test harness pointed at that image via `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE`. - Verify: - - `go test ./test/integration -run TestFirecore -v` passes without skip against a firecore/dummy-blockchain runtime rebuilt from current SDS-compatible sources. - - The backlog and runtime-compatibility docs explicitly identify the prebuilt `dummy-blockchain:v1.7.7` image as incompatible with the current SDS provider/plugin contract. + - `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE=ghcr.io/streamingfast/dummy-blockchain:sds-local go test ./test/integration -run TestFirecore -v -count=1` passes without skip against a firecore/dummy-blockchain runtime rebuilt from current SDS-compatible sources. + - The backlog and runtime-compatibility docs explicitly identify the prebuilt `dummy-blockchain:v1.7.7` image as incompatible with the current SDS provider/plugin contract until `MVP-036` lands. - [ ] MVP-015 Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway. - Context: @@ -531,6 +533,21 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Verify: - Add a startup/preflight validation test or a documented manual verification flow that demonstrates clear failure modes for unsupported runtime combinations. +- [ ] MVP-036 Publish refreshed upstream `firehose-core` and `dummy-blockchain` images built against the current SDS plugin/runtime contract so default integration paths no longer rely on local override tags. + - Context: + - MVP-014 is now validated through the local-first runtime workflow using `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE=ghcr.io/streamingfast/dummy-blockchain:sds-local`. + - The published `ghcr.io/streamingfast/dummy-blockchain:v1.7.7` image is still stale and embeds a `firecore` binary linked against an older SDS snapshot. + - Until refreshed upstream images exist, the repo-local default integration path still depends on local retagging and override-based validation. + - Assumptions: + - `A5` + - Done when: + - A published `firehose-core` image exists that is built against the current SDS-compatible plugin/runtime contract. + - A published `dummy-blockchain` image exists that embeds that refreshed `firehose-core` image. + - SDS integration validation no longer requires local-only image tags to exercise the current runtime/plugin contract. + - Verify: + - Build and publish refreshed upstream images from the validated source tuple or a newer compatible tuple. + - Run `go test ./test/integration -run TestFirecore -v -count=1` against the published image path without `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE` and confirm it passes without skip. + - [ ] MVP-023 Define the final MVP observability floor beyond structured logs and status tooling. - Context: - MVP requires operational visibility, but metrics/tracing depth is still open. diff --git a/provider/usage/service.go b/provider/usage/service.go index 830ebc9..798b502 100644 --- a/provider/usage/service.go +++ b/provider/usage/service.go @@ -91,9 +91,9 @@ func protoEventToUsageEvent(event *usagev1.Event) *repository.UsageEvent { continue } switch m.Name { - case "blocks_count", "blocks": + case "blocks_count", "blocks", "block_count", "message_count": ue.Blocks += m.Value - case "bytes_count", "bytes": + case "bytes_count", "bytes", "egress_bytes": ue.Bytes += m.Value case "requests_count", "requests": ue.Requests += m.Value diff --git a/provider/usage/service_test.go b/provider/usage/service_test.go index b39b26b..53f63f1 100644 --- a/provider/usage/service_test.go +++ b/provider/usage/service_test.go @@ -18,6 +18,23 @@ func newTestRepo() *repository.InMemoryRepository { return repository.NewInMemoryRepository() } +type capturingRepo struct { + *repository.InMemoryRepository + usageBySession map[string][]*repository.UsageEvent +} + +func newCapturingRepo() *capturingRepo { + return &capturingRepo{ + InMemoryRepository: repository.NewInMemoryRepository(), + usageBySession: make(map[string][]*repository.UsageEvent), + } +} + +func (r *capturingRepo) UsageAdd(_ context.Context, sessionID string, usage *repository.UsageEvent) error { + r.usageBySession[sessionID] = append(r.usageBySession[sessionID], usage) + return nil +} + func TestUsageService_Report_Empty(t *testing.T) { repo := newTestRepo() svc := usage.NewUsageService(repo) @@ -158,3 +175,29 @@ func TestUsageService_Report_AllMetrics(t *testing.T) { require.NoError(t, err) // All metrics are stored (UsageGetTotal removed as unused method) } + +func TestUsageService_Report_FirehoseCoreMetricNames(t *testing.T) { + repo := newCapturingRepo() + svc := usage.NewUsageService(repo) + + resp, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ + Events: []*usagev1.Event{ + { + OrganizationId: "0xpayer1", + SdsSessionId: "session-1", + Metrics: []*usagev1.Metric{ + {Name: "block_count", Value: 20}, + {Name: "message_count", Value: 3}, + {Name: "egress_bytes", Value: 2048}, + }, + }, + }, + })) + require.NoError(t, err) + assert.False(t, resp.Msg.Revoked) + + require.Len(t, repo.usageBySession["session-1"], 1) + assert.Equal(t, int64(23), repo.usageBySession["session-1"][0].Blocks) + assert.Equal(t, int64(2048), repo.usageBySession["session-1"][0].Bytes) + assert.Equal(t, int64(0), repo.usageBySession["session-1"][0].Requests) +} diff --git a/test/integration/firecore_test.go b/test/integration/firecore_test.go index b76f1c8..6a701db 100644 --- a/test/integration/firecore_test.go +++ b/test/integration/firecore_test.go @@ -28,6 +28,11 @@ import ( var firecoreLog, _ = logging.PackageLogger("firecore_test", "github.com/graphprotocol/substreams-data-service/test/integration/firecore") +const ( + defaultDummyBlockchainImage = "ghcr.io/streamingfast/dummy-blockchain:v1.7.7" + dummyBlockchainImageEnvVar = "SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE" +) + func TestFirecore(t *testing.T) { if testing.Short() { t.Skip("Skipping firecore integration test in short mode") @@ -36,11 +41,18 @@ func TestFirecore(t *testing.T) { ctx := context.Background() env := SetupEnv(t) testStartedAt := time.Now().UTC() + dummyBlockchainImage := getDummyBlockchainImage() + + firecoreLog.Info("selected dummy-blockchain runtime image", + zap.String("image", dummyBlockchainImage), + zap.String("env_var", dummyBlockchainImageEnvVar), + ) + t.Logf("using dummy-blockchain image: %s", dummyBlockchainImage) // Step 1: Start dummy-blockchain/firecore and capture the host-reachable data plane endpoint. // The provider handshake must advertise the mapped host port, not the in-container :10016. - dummyBlockchainContainer, substreamsEndpoint, err := startDummyBlockchainContainer(ctx, 100) - require.NoError(t, err, "failed to start dummy-blockchain container") + dummyBlockchainContainer, substreamsEndpoint, err := startDummyBlockchainContainer(ctx, dummyBlockchainImage, 100) + require.NoError(t, err, "failed to start dummy-blockchain container from image %q", dummyBlockchainImage) defer dummyBlockchainContainer.Terminate(ctx) defer func() { if !t.Failed() { @@ -152,7 +164,23 @@ func TestFirecore(t *testing.T) { require.NoError(t, err, "payment gateway must expose repo-backed session status") require.True(t, statusResp.Msg.GetActive(), "session should still be active after the short stream") require.NotNil(t, statusResp.Msg.GetPaymentStatus(), "payment status must be present") - require.GreaterOrEqual(t, evidence.UsageBlocks+evidence.UsageBytes+evidence.UsageRequests, int64(1), "expected metering to record non-zero usage") + require.Eventually(t, func() bool { + usageEvidence, err := loadFirecoreUsageEvidence(ctx, evidence.SessionID) + if err != nil { + firecoreLog.Warn("failed to refresh firecore usage evidence", + zap.String("session_id", evidence.SessionID), + zap.Error(err), + ) + return false + } + + evidence.UsageEventCount = usageEvidence.UsageEventCount + evidence.UsageBlocks = usageEvidence.UsageBlocks + evidence.UsageBytes = usageEvidence.UsageBytes + evidence.UsageRequests = usageEvidence.UsageRequests + + return evidence.UsageBlocks+evidence.UsageBytes+evidence.UsageRequests >= 1 + }, 3*time.Second, 100*time.Millisecond, "expected metering to record non-zero usage") firecoreLog.Info("E2E Substreams request completed successfully", zap.String("session_id", evidence.SessionID), @@ -189,10 +217,7 @@ func waitForSidecarHealth(ctx context.Context, healthURL string, timeout time.Du // newDummyBlockchainContainer creates a dummy blockchain container for testing // It starts reader-node, merger, relayer, and substreams-tier1 with SDS plugins -func newDummyBlockchainContainer(ctx context.Context, genesisBlockBurst int) (testcontainers.Container, error) { - // Use the new dummy-blockchain image with SDS plugin support - image := "ghcr.io/streamingfast/dummy-blockchain:v1.7.7" - +func newDummyBlockchainContainer(ctx context.Context, image string, genesisBlockBurst int) (testcontainers.Container, error) { // Build reader arguments for the dummy-blockchain binary readerArgs := fmt.Sprintf("start --log-level=error --tracer=firehose --store-dir=/tmp/data --genesis-block-burst=%d --block-rate=120 --block-size=1500 --genesis-height=0 --server-addr=:9777", genesisBlockBurst) @@ -255,10 +280,10 @@ func newDummyBlockchainContainer(ctx context.Context, genesisBlockBurst int) (te } // startDummyBlockchainContainer starts a dummy blockchain container, retrieves its endpoint, and verifies it's healthy -func startDummyBlockchainContainer(ctx context.Context, genesisBlockBurst int) (testcontainers.Container, string, error) { - firecoreLog.Info("setting up dummy-blockchain container") +func startDummyBlockchainContainer(ctx context.Context, image string, genesisBlockBurst int) (testcontainers.Container, string, error) { + firecoreLog.Info("setting up dummy-blockchain container", zap.String("image", image)) - container, err := newDummyBlockchainContainer(ctx, genesisBlockBurst) + container, err := newDummyBlockchainContainer(ctx, image, genesisBlockBurst) if err != nil { return nil, "", fmt.Errorf("failed to start dummy-blockchain container: %w", err) } @@ -300,6 +325,15 @@ func startDummyBlockchainContainer(ctx context.Context, genesisBlockBurst int) ( return container, substreamsEndpoint, nil } +func getDummyBlockchainImage() string { + image := strings.TrimSpace(os.Getenv(dummyBlockchainImageEnvVar)) + if image == "" { + return defaultDummyBlockchainImage + } + + return image +} + // waitForGatewayHealth polls the gateway health endpoint until it returns 200 or timeout func waitForGatewayHealth(ctx context.Context, healthURL string, timeout time.Duration) error { ctx, cancel := context.WithTimeout(ctx, timeout) @@ -416,7 +450,7 @@ type firecoreSessionRow struct { func loadFirecoreEvidence(t *testing.T, ctx context.Context, createdAfter time.Time, env *TestEnv) firecoreSessionEvidence { t.Helper() - dbConn, err := psqlrepo.GetConnectionFromDSN(ctx, PostgresTestDSN) + dbConn, err := psqlrepo.GetConnectionFromDSN(ctx, toPostgresDriverDSN(PostgresTestDSN)) require.NoError(t, err, "connect to provider postgres repo") defer dbConn.Close() @@ -456,6 +490,36 @@ func loadFirecoreEvidence(t *testing.T, ctx context.Context, createdAfter time.T return evidence } +func loadFirecoreUsageEvidence(ctx context.Context, sessionID string) (firecoreSessionEvidence, error) { + dbConn, err := psqlrepo.GetConnectionFromDSN(ctx, toPostgresDriverDSN(PostgresTestDSN)) + if err != nil { + return firecoreSessionEvidence{}, fmt.Errorf("connect to provider postgres repo: %w", err) + } + defer dbConn.Close() + + evidence := firecoreSessionEvidence{SessionID: sessionID} + if err := dbConn.GetContext(ctx, &evidence.UsageEventCount, `SELECT COUNT(*) FROM usage_events WHERE session_id = $1`, sessionID); err != nil { + return firecoreSessionEvidence{}, fmt.Errorf("count usage event rows for firecore session: %w", err) + } + + if err := dbConn.QueryRowxContext(ctx, ` + SELECT + COALESCE(SUM(blocks), 0) AS blocks, + COALESCE(SUM(bytes), 0) AS bytes, + COALESCE(SUM(requests), 0) AS requests + FROM usage_events + WHERE session_id = $1 + `, sessionID).Scan(&evidence.UsageBlocks, &evidence.UsageBytes, &evidence.UsageRequests); err != nil { + return firecoreSessionEvidence{}, fmt.Errorf("sum usage event rows for firecore session: %w", err) + } + + return evidence, nil +} + +func toPostgresDriverDSN(dsn string) string { + return strings.Replace(dsn, "psql://", "postgres://", 1) +} + func dumpContainerLogs(t *testing.T, ctx context.Context, container testcontainers.Container) { t.Helper() From ad3420a881d463d02d0b9ca73d3712ce42e1e066 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Sat, 28 Mar 2026 01:42:09 -0300 Subject: [PATCH 14/17] Wire plugin metering into gateway payment state - make provider-side plugin metering update authoritative session usage and accumulated cost through a shared repository apply-usage path - thread provider pricing into the plugin usage service and cover the new session-aggregation behavior in repository and usage tests - tighten Firecore acceptance to verify live plugin activity advances gateway-visible payment status while preserving the existing PaymentSession flow --- cmd/sds/impl/provider_gateway.go | 16 +++- provider/repository/inmemory.go | 22 +++++ provider/repository/psql/repository_test.go | 40 +++++++++ provider/repository/psql/session.go | 1 + .../psql/sql/session/apply_usage.sql | 29 +++++++ provider/repository/psql/usage.go | 85 +++++++++++++++++++ provider/repository/repository.go | 17 ++++ provider/usage/service.go | 24 ++++-- provider/usage/service_test.go | 65 ++++++++++++-- test/integration/firecore_test.go | 22 ++++- 10 files changed, 303 insertions(+), 18 deletions(-) create mode 100644 provider/repository/psql/sql/session/apply_usage.sql diff --git a/cmd/sds/impl/provider_gateway.go b/cmd/sds/impl/provider_gateway.go index 9c2d0da..d358294 100644 --- a/cmd/sds/impl/provider_gateway.go +++ b/cmd/sds/impl/provider_gateway.go @@ -9,6 +9,7 @@ import ( "github.com/graphprotocol/substreams-data-service/provider/auth" "github.com/graphprotocol/substreams-data-service/provider/gateway" "github.com/graphprotocol/substreams-data-service/provider/plugin" + "github.com/graphprotocol/substreams-data-service/provider/repository" "github.com/graphprotocol/substreams-data-service/provider/session" "github.com/graphprotocol/substreams-data-service/provider/usage" sidecarlib "github.com/graphprotocol/substreams-data-service/sidecar" @@ -141,7 +142,7 @@ func StartProviderGateway( } authService := auth.NewAuthService(serviceProviderAddr, domain, collectorQuerier, repo) - usageService := usage.NewUsageService(repo) + usageService := usage.NewUsageService(repo, toRepositoryPricingConfig(pricingConfig)) sessionService := session.NewSessionService(repo, nil) // Use default quota config // Create Plugin Gateway @@ -235,7 +236,7 @@ func runProviderGateway(cmd *cobra.Command, args []string) error { } authService := auth.NewAuthService(serviceProviderAddr, domain, collectorQuerier, repo) - usageService := usage.NewUsageService(repo) + usageService := usage.NewUsageService(repo, toRepositoryPricingConfig(providerPricingConfig.ToPricingConfig())) sessionService := session.NewSessionService(repo, nil) // Use default quota config // Create Plugin Gateway @@ -256,3 +257,14 @@ func runProviderGateway(cmd *cobra.Command, args []string) error { return app.WaitForTermination(providerLog, 0*time.Second, 30*time.Second) } + +func toRepositoryPricingConfig(pc *sidecarlib.PricingConfig) repository.PricingConfig { + if pc == nil { + pc = sidecarlib.DefaultPricingConfig() + } + + return repository.PricingConfig{ + PricePerBlock: pc.PricePerBlock, + PricePerByte: pc.PricePerByte, + } +} diff --git a/provider/repository/inmemory.go b/provider/repository/inmemory.go index 76f4cc4..11c093f 100644 --- a/provider/repository/inmemory.go +++ b/provider/repository/inmemory.go @@ -3,6 +3,7 @@ package repository import ( "context" "fmt" + "math/big" "sync" "time" @@ -77,6 +78,27 @@ func (r *InMemoryRepository) SessionUpdate(_ context.Context, session *Session) return nil } +// SessionApplyUsage appends a usage event and advances the owning session aggregates. +func (r *InMemoryRepository) SessionApplyUsage(ctx context.Context, sessionID string, usage *UsageEvent, cost *big.Int) error { + if usage == nil { + return fmt.Errorf("usage event must not be nil") + } + + session, ok := r.sessions.Get(sessionID) + if !ok { + return fmt.Errorf("session %q: %w", sessionID, ErrNotFound) + } + + if err := r.UsageAdd(ctx, sessionID, usage); err != nil { + return err + } + + blocks, bytes, requests := usage.SanitizedTotals() + session.AddUsage(blocks, bytes, requests, cost) + r.sessions.Set(sessionID, session) + return nil +} + // SessionList returns all sessions that match the given filter. func (r *InMemoryRepository) SessionList(_ context.Context, filter SessionFilter) ([]*Session, error) { var result []*Session diff --git a/provider/repository/psql/repository_test.go b/provider/repository/psql/repository_test.go index 95afe58..6ebbce5 100644 --- a/provider/repository/psql/repository_test.go +++ b/provider/repository/psql/repository_test.go @@ -389,4 +389,44 @@ func TestUsageAddAndGetTotal(t *testing.T) { }) } +func TestSessionApplyUsage(t *testing.T) { + withTestDB(t, func(db *Database) { + ctx := context.Background() + + pricingConfig := sds.PricingConfig{ + PricePerBlock: sds.MustNewGRT(100), + PricePerByte: sds.MustNewGRT(10), + } + + payer := eth.MustNewAddress("0x1234567890123456789012345678901234567890") + receiver := eth.MustNewAddress("0x2234567890123456789012345678901234567890") + dataService := eth.MustNewAddress("0x3234567890123456789012345678901234567890") + + session := repository.NewSession("session-apply-usage", payer, receiver, dataService, pricingConfig) + require.NoError(t, db.SessionCreate(ctx, session)) + + event := &repository.UsageEvent{ + Timestamp: time.Now(), + Blocks: 100, + Bytes: 2000, + Requests: 10, + } + cost := pricingConfig.CalculateUsageCost(100, 2000).BigInt() + + require.NoError(t, db.SessionApplyUsage(ctx, "session-apply-usage", event, cost)) + + retrieved, err := db.SessionGet(ctx, "session-apply-usage") + require.NoError(t, err) + assert.Equal(t, uint64(100), retrieved.BlocksProcessed) + assert.Equal(t, uint64(2000), retrieved.BytesTransferred) + assert.Equal(t, uint64(10), retrieved.Requests) + assert.Equal(t, 0, retrieved.TotalCost.Cmp(cost)) + + var usageEventCount int + err = db.GetContext(ctx, &usageEventCount, `SELECT COUNT(*) FROM usage_events WHERE session_id = $1`, "session-apply-usage") + require.NoError(t, err) + assert.Equal(t, 1, usageEventCount) + }) +} + // TestCascadeDelete was removed because SessionDelete is not used in production code diff --git a/provider/repository/psql/session.go b/provider/repository/psql/session.go index 9cb99f5..7e69f5b 100644 --- a/provider/repository/psql/session.go +++ b/provider/repository/psql/session.go @@ -10,6 +10,7 @@ import ( func init() { registerFiles([]string{ + "session/apply_usage.sql", "session/create.sql", "session/get.sql", "session/update.sql", diff --git a/provider/repository/psql/sql/session/apply_usage.sql b/provider/repository/psql/sql/session/apply_usage.sql new file mode 100644 index 0000000..c73d80e --- /dev/null +++ b/provider/repository/psql/sql/session/apply_usage.sql @@ -0,0 +1,29 @@ +-- Apply provider-authoritative metered usage to the session aggregates. +UPDATE sessions +SET + blocks_processed = blocks_processed + :blocks_delta, + bytes_transferred = bytes_transferred + :bytes_delta, + requests = requests + :requests_delta, + total_cost = COALESCE(total_cost, 0) + :cost_delta +WHERE id = :id +RETURNING + id, + created_at, + updated_at, + last_keep_alive, + status, + metadata, + ended_at, + end_reason, + payer, + receiver, + data_service, + signer, + blocks_processed, + bytes_transferred, + requests, + total_cost, + baseline_blocks, + baseline_bytes, + baseline_reqs, + baseline_cost diff --git a/provider/repository/psql/usage.go b/provider/repository/psql/usage.go index 129ba52..f728629 100644 --- a/provider/repository/psql/usage.go +++ b/provider/repository/psql/usage.go @@ -2,8 +2,12 @@ package psql import ( "context" + "fmt" + "math/big" + "strings" "github.com/graphprotocol/substreams-data-service/provider/repository" + "github.com/jmoiron/sqlx" ) func init() { @@ -26,3 +30,84 @@ func (r *Database) UsageAdd(ctx context.Context, sessionID string, usage *reposi return err } + +// SessionApplyUsage atomically persists a metering event and advances the owning session aggregates. +func (r *Database) SessionApplyUsage(ctx context.Context, sessionID string, usage *repository.UsageEvent, cost *big.Int) (err error) { + if usage == nil { + return fmt.Errorf("usage event must not be nil") + } + + blocks, bytes, requests := usage.SanitizedTotals() + costVal := mustValue(newGRT(cost)) + + tx, err := r.BeginTxx(ctx, nil) + if err != nil { + return fmt.Errorf("begin usage transaction: %w", err) + } + defer func() { + if err != nil { + _ = tx.Rollback() + } + }() + + sessionRows, err := bindAndQueryxContext(ctx, tx, onDiskStatement("session/apply_usage.sql"), map[string]any{ + "id": sessionID, + "blocks_delta": int64(blocks), + "bytes_delta": int64(bytes), + "requests_delta": int64(requests), + "cost_delta": costVal, + }) + if err != nil { + return fmt.Errorf("failed %s: %w", strings.ReplaceAll("session/apply_usage.sql", "_", " "), err) + } + var sessionUpdated bool + for sessionRows.Next() { + sessionUpdated = true + } + if err := sessionRows.Err(); err != nil { + _ = sessionRows.Close() + return fmt.Errorf("scan session apply rows: %w", err) + } + if closeErr := sessionRows.Close(); closeErr != nil { + return fmt.Errorf("close session apply rows: %w", closeErr) + } + if !sessionUpdated { + return repository.ErrNotFound + } + + usageRows, err := bindAndQueryxContext(ctx, tx, onDiskStatement("usage/add.sql"), map[string]any{ + "session_id": sessionID, + "timestamp": usage.Timestamp, + "blocks": int64(blocks), + "bytes": int64(bytes), + "requests": int64(requests), + }) + if err != nil { + return fmt.Errorf("failed %s: %w", strings.ReplaceAll("usage/add.sql", "_", " "), err) + } + for usageRows.Next() { + } + if err := usageRows.Err(); err != nil { + _ = usageRows.Close() + return fmt.Errorf("scan usage add rows: %w", err) + } + if closeErr := usageRows.Close(); closeErr != nil { + return fmt.Errorf("close usage add rows: %w", closeErr) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit usage transaction: %w", err) + } + + return nil +} + +func bindAndQueryxContext(ctx context.Context, tx *sqlx.Tx, statement string, args map[string]any) (*sqlx.Rows, error) { + query, boundArgs, err := sqlx.Named(statement, args) + if err != nil { + return nil, err + } + + query = tx.Rebind(query) + return tx.QueryxContext(ctx, query, boundArgs...) +} diff --git a/provider/repository/repository.go b/provider/repository/repository.go index a39c3b7..7974aac 100644 --- a/provider/repository/repository.go +++ b/provider/repository/repository.go @@ -24,6 +24,7 @@ type GlobalRepository interface { SessionCreate(ctx context.Context, session *Session) error SessionGet(ctx context.Context, sessionID string) (*Session, error) SessionUpdate(ctx context.Context, session *Session) error + SessionApplyUsage(ctx context.Context, sessionID string, usage *UsageEvent, cost *big.Int) error SessionList(ctx context.Context, filter SessionFilter) ([]*Session, error) SessionCount(ctx context.Context) int @@ -225,6 +226,22 @@ type UsageEvent struct { Requests int64 } +// SanitizedTotals returns the event counters clamped to non-negative uint64 values. +func (u *UsageEvent) SanitizedTotals() (blocks, bytes, requests uint64) { + if u == nil { + return 0, 0, 0 + } + + return sanitizeUsageMetric(u.Blocks), sanitizeUsageMetric(u.Bytes), sanitizeUsageMetric(u.Requests) +} + +func sanitizeUsageMetric(v int64) uint64 { + if v <= 0 { + return 0 + } + return uint64(v) +} + // UsageSummary aggregates total usage across all events for a session. type UsageSummary struct { TotalBlocks int64 diff --git a/provider/usage/service.go b/provider/usage/service.go index 798b502..9a45cf6 100644 --- a/provider/usage/service.go +++ b/provider/usage/service.go @@ -4,6 +4,7 @@ package usage import ( "context" + "errors" "time" "connectrpc.com/connect" @@ -20,14 +21,15 @@ var zlog, _ = logging.PackageLogger("sds_usage", "github.com/graphprotocol/subst // It receives batched metering events from the dmetering plugin and stores // them in the GlobalRepository for later aggregation and reporting. type UsageService struct { - repo repository.GlobalRepository + repo repository.GlobalRepository + pricingConfig repository.PricingConfig } var _ usagev1connect.UsageServiceHandler = (*UsageService)(nil) -// NewUsageService creates a new UsageService backed by the given repository. -func NewUsageService(repo repository.GlobalRepository) *UsageService { - return &UsageService{repo: repo} +// NewUsageService creates a new UsageService backed by the given repository and provider pricing. +func NewUsageService(repo repository.GlobalRepository, pricingConfig repository.PricingConfig) *UsageService { + return &UsageService{repo: repo, pricingConfig: pricingConfig} } // Report receives a batch of metering events from the dmetering plugin. @@ -60,8 +62,18 @@ func (s *UsageService) Report( } usageEvent := protoEventToUsageEvent(event) - - if err := s.repo.UsageAdd(ctx, sessionID, usageEvent); err != nil { + blocks, bytes, _ := usageEvent.SanitizedTotals() + cost := s.pricingConfig.CalculateUsageCost(blocks, bytes).BigInt() + + if err := s.repo.SessionApplyUsage(ctx, sessionID, usageEvent, cost); err != nil { + if errors.Is(err, repository.ErrNotFound) { + zlog.Warn("usage event references unknown session", + zap.String("organization_id", event.OrganizationId), + zap.String("session_id", sessionID), + zap.Error(err), + ) + continue + } zlog.Warn("failed to record usage event", zap.String("organization_id", event.OrganizationId), zap.String("session_id", sessionID), diff --git a/provider/usage/service_test.go b/provider/usage/service_test.go index 53f63f1..6838dca 100644 --- a/provider/usage/service_test.go +++ b/provider/usage/service_test.go @@ -2,13 +2,16 @@ package usage_test import ( "context" + "math/big" "testing" "time" "connectrpc.com/connect" + sds "github.com/graphprotocol/substreams-data-service" usagev1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/sds/usage/v1" "github.com/graphprotocol/substreams-data-service/provider/repository" "github.com/graphprotocol/substreams-data-service/provider/usage" + "github.com/streamingfast/eth-go" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/protobuf/types/known/timestamppb" @@ -18,6 +21,11 @@ func newTestRepo() *repository.InMemoryRepository { return repository.NewInMemoryRepository() } +var testPricingConfig = repository.PricingConfig{ + PricePerBlock: sds.NewGRTFromUint64(2), + PricePerByte: sds.NewGRTFromUint64(3), +} + type capturingRepo struct { *repository.InMemoryRepository usageBySession map[string][]*repository.UsageEvent @@ -35,9 +43,14 @@ func (r *capturingRepo) UsageAdd(_ context.Context, sessionID string, usage *rep return nil } +func (r *capturingRepo) SessionApplyUsage(_ context.Context, sessionID string, usage *repository.UsageEvent, _ *big.Int) error { + r.usageBySession[sessionID] = append(r.usageBySession[sessionID], usage) + return nil +} + func TestUsageService_Report_Empty(t *testing.T) { repo := newTestRepo() - svc := usage.NewUsageService(repo) + svc := usage.NewUsageService(repo, testPricingConfig) resp, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{})) require.NoError(t, err) @@ -46,7 +59,7 @@ func TestUsageService_Report_Empty(t *testing.T) { func TestUsageService_Report_SingleEvent(t *testing.T) { repo := newTestRepo() - svc := usage.NewUsageService(repo) + svc := usage.NewUsageService(repo, testPricingConfig) ts := timestamppb.New(time.Now()) resp, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ @@ -73,7 +86,7 @@ func TestUsageService_Report_SingleEvent(t *testing.T) { func TestUsageService_Report_MultipleEvents_SamePayer(t *testing.T) { repo := newTestRepo() - svc := usage.NewUsageService(repo) + svc := usage.NewUsageService(repo, testPricingConfig) _, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ Events: []*usagev1.Event{ @@ -97,7 +110,7 @@ func TestUsageService_Report_MultipleEvents_SamePayer(t *testing.T) { func TestUsageService_Report_MultipleEvents_DifferentPayers(t *testing.T) { repo := newTestRepo() - svc := usage.NewUsageService(repo) + svc := usage.NewUsageService(repo, testPricingConfig) _, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ Events: []*usagev1.Event{ @@ -121,7 +134,7 @@ func TestUsageService_Report_MultipleEvents_DifferentPayers(t *testing.T) { func TestUsageService_Report_SessionId_FallbackToOrganizationId(t *testing.T) { repo := newTestRepo() - svc := usage.NewUsageService(repo) + svc := usage.NewUsageService(repo, testPricingConfig) _, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ Events: []*usagev1.Event{ @@ -139,7 +152,7 @@ func TestUsageService_Report_SessionId_FallbackToOrganizationId(t *testing.T) { func TestUsageService_Report_IgnoresInvalidMetrics(t *testing.T) { repo := newTestRepo() - svc := usage.NewUsageService(repo) + svc := usage.NewUsageService(repo, testPricingConfig) _, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ Events: []*usagev1.Event{ @@ -158,7 +171,7 @@ func TestUsageService_Report_IgnoresInvalidMetrics(t *testing.T) { func TestUsageService_Report_AllMetrics(t *testing.T) { repo := newTestRepo() - svc := usage.NewUsageService(repo) + svc := usage.NewUsageService(repo, testPricingConfig) _, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ Events: []*usagev1.Event{ @@ -178,7 +191,7 @@ func TestUsageService_Report_AllMetrics(t *testing.T) { func TestUsageService_Report_FirehoseCoreMetricNames(t *testing.T) { repo := newCapturingRepo() - svc := usage.NewUsageService(repo) + svc := usage.NewUsageService(repo, testPricingConfig) resp, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ Events: []*usagev1.Event{ @@ -201,3 +214,39 @@ func TestUsageService_Report_FirehoseCoreMetricNames(t *testing.T) { assert.Equal(t, int64(2048), repo.usageBySession["session-1"][0].Bytes) assert.Equal(t, int64(0), repo.usageBySession["session-1"][0].Requests) } + +func TestUsageService_Report_AppliesMeteredUsageToSession(t *testing.T) { + repo := newTestRepo() + session := repository.NewSession( + "session-1", + eth.MustNewAddress("0x1111111111111111111111111111111111111111"), + eth.MustNewAddress("0x2222222222222222222222222222222222222222"), + eth.MustNewAddress("0x3333333333333333333333333333333333333333"), + testPricingConfig, + ) + require.NoError(t, repo.SessionCreate(context.Background(), session)) + + svc := usage.NewUsageService(repo, testPricingConfig) + + resp, err := svc.Report(context.Background(), connect.NewRequest(&usagev1.ReportRequest{ + Events: []*usagev1.Event{ + { + SdsSessionId: "session-1", + Metrics: []*usagev1.Metric{ + {Name: "block_count", Value: 4}, + {Name: "egress_bytes", Value: 5}, + {Name: "requests_count", Value: 1}, + }, + }, + }, + })) + require.NoError(t, err) + assert.False(t, resp.Msg.Revoked) + + updatedSession, err := repo.SessionGet(context.Background(), "session-1") + require.NoError(t, err) + assert.Equal(t, uint64(4), updatedSession.BlocksProcessed) + assert.Equal(t, uint64(5), updatedSession.BytesTransferred) + assert.Equal(t, uint64(1), updatedSession.Requests) + assert.Equal(t, 0, updatedSession.TotalCost.Cmp(testPricingConfig.CalculateUsageCost(4, 5).BigInt())) +} diff --git a/test/integration/firecore_test.go b/test/integration/firecore_test.go index 6a701db..cfa0e8b 100644 --- a/test/integration/firecore_test.go +++ b/test/integration/firecore_test.go @@ -179,8 +179,26 @@ func TestFirecore(t *testing.T) { evidence.UsageBytes = usageEvidence.UsageBytes evidence.UsageRequests = usageEvidence.UsageRequests - return evidence.UsageBlocks+evidence.UsageBytes+evidence.UsageRequests >= 1 - }, 3*time.Second, 100*time.Millisecond, "expected metering to record non-zero usage") + statusResp, err = providerClient.GetSessionStatus(ctx, connect.NewRequest(&providerv1.GetSessionStatusRequest{ + SessionId: evidence.SessionID, + })) + if err != nil { + firecoreLog.Warn("failed to refresh gateway session status", + zap.String("session_id", evidence.SessionID), + zap.Error(err), + ) + return false + } + + paymentStatus := statusResp.Msg.GetPaymentStatus() + if paymentStatus == nil || paymentStatus.GetAccumulatedUsageValue() == nil { + return false + } + + return evidence.UsageBlocks+evidence.UsageBytes+evidence.UsageRequests >= 1 && + paymentStatus.GetAccumulatedUsageValue().ToBigInt().Sign() > 0 + }, 3*time.Second, 100*time.Millisecond, "expected metering to update the payment-state repository") + require.Positive(t, statusResp.Msg.GetPaymentStatus().GetAccumulatedUsageValue().ToBigInt().Sign(), "expected non-zero accumulated usage value from plugin metering") firecoreLog.Info("E2E Substreams request completed successfully", zap.String("session_id", evidence.SessionID), From 1171ed05339b4428d54a758a3946f9e8d3c151ab Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Sat, 28 Mar 2026 02:08:50 -0300 Subject: [PATCH 15/17] Harden firecore payment-state assertion - reuse the provider pricing config in TestFirecore so the acceptance path checks the exact gateway-visible accumulated usage value - derive the expected total from persisted plugin metering evidence instead of only asserting that payment state became non-zero - keep the local-runtime Firecore integration as the proof that provider-side metering and gateway payment state remain exactly aligned --- test/integration/firecore_test.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/test/integration/firecore_test.go b/test/integration/firecore_test.go index cfa0e8b..3f82d9a 100644 --- a/test/integration/firecore_test.go +++ b/test/integration/firecore_test.go @@ -75,6 +75,8 @@ func TestFirecore(t *testing.T) { zap.String("postgres_dsn", sanitizeDSN(PostgresTestDSN)), ) + pricingConfig := sidecarlib.DefaultPricingConfig() + gateways, err := impl.StartProviderGateway( ctx, "0.0.0.0:19001", // Payment Gateway - for consumer sidecars @@ -91,7 +93,7 @@ func TestFirecore(t *testing.T) { TLSCertFile: "", TLSKeyFile: "", }, - sidecarlib.DefaultPricingConfig(), + pricingConfig, ) require.NoError(t, err, "failed to start provider gateways") defer gateways.Shutdown(nil) @@ -195,10 +197,16 @@ func TestFirecore(t *testing.T) { return false } - return evidence.UsageBlocks+evidence.UsageBytes+evidence.UsageRequests >= 1 && - paymentStatus.GetAccumulatedUsageValue().ToBigInt().Sign() > 0 - }, 3*time.Second, 100*time.Millisecond, "expected metering to update the payment-state repository") - require.Positive(t, statusResp.Msg.GetPaymentStatus().GetAccumulatedUsageValue().ToBigInt().Sign(), "expected non-zero accumulated usage value from plugin metering") + if evidence.UsageBlocks+evidence.UsageBytes+evidence.UsageRequests < 1 { + return false + } + + expectedAccumulatedValue := pricingConfig.CalculateUsageCost(uint64(evidence.UsageBlocks), uint64(evidence.UsageBytes)).BigInt() + return paymentStatus.GetAccumulatedUsageValue().ToBigInt().Cmp(expectedAccumulatedValue) == 0 + }, 3*time.Second, 100*time.Millisecond, "expected metering to update the payment-state repository with the exact provider-priced value") + + expectedAccumulatedValue := pricingConfig.CalculateUsageCost(uint64(evidence.UsageBlocks), uint64(evidence.UsageBytes)).BigInt() + require.Equal(t, 0, statusResp.Msg.GetPaymentStatus().GetAccumulatedUsageValue().ToBigInt().Cmp(expectedAccumulatedValue), "expected payment status to match the exact provider-priced plugin metering total") firecoreLog.Info("E2E Substreams request completed successfully", zap.String("session_id", evidence.SessionID), From 6a79b07961dc75074b0909e312e2b7a65e02e087 Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Sat, 28 Mar 2026 02:25:46 -0300 Subject: [PATCH 16/17] Mark MVP-015 complete in planning docs - update the backlog to close MVP-015 with the validated local-runtime acceptance evidence and exact Firecore verification path - move MVP-014 and MVP-015 into completed foundation work in the sequencing notes and promote MVP-016 as the next runtime-control task - record that the local-first Firecore workflow now proves plugin metering and gateway-visible payment state stay exactly aligned --- docs/mvp-implementation-sequencing.md | 15 +++++++++------ plans/mvp-implementation-backlog.md | 14 +++++++++++--- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index 9082e0b..05d7b85 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -119,14 +119,14 @@ Completed foundation: - `MVP-010` Implement session-local low-funds detection and provider terminal stop behavior during streaming - `MVP-012` Add deterministic cost-based RAV issuance thresholds suitable for real runtime behavior +- `MVP-014` Integrate provider gateway validation into the real provider streaming path +- `MVP-015` Wire real byte metering from the provider/plugin path into gateway payment state Recommended next sequence: -1. `MVP-014` Integrate provider gateway validation into the real provider streaming path -2. `MVP-015` Wire real byte metering from the provider/plugin path into gateway payment state -3. `MVP-011` Propagate provider low-funds stop decisions through consumer sidecar into the real client path -4. `MVP-016` Enforce gateway Continue/Stop decisions in the live provider stream lifecycle -5. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path +1. `MVP-016` Enforce gateway Continue/Stop decisions in the live provider stream lifecycle +2. `MVP-011` Propagate provider low-funds stop decisions through consumer sidecar into the real client path +3. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path Notes: @@ -138,6 +138,10 @@ Notes: - cost-based only - compares unbaselined `delta_cost` against a provider-side `rav_request_threshold` - defaults to `10 GRT` when the provider does not configure a threshold explicitly +- `MVP-014` and `MVP-015` are now complete under the local-first runtime workflow: + - `TestFirecore` passes against `ghcr.io/streamingfast/dummy-blockchain:sds-local` + - plugin metering updates the same provider session/payment state surfaced by `GetSessionStatus` + - the Firecore acceptance path now asserts the exact gateway-visible accumulated usage value derived from persisted metering totals and provider pricing - `MVP-014` remains the main integration foundation in this lane. - Current status: repo-local gateway wiring and the real-path `TestFirecore` harness are in place, and local-first acceptance now passes when the test is pointed at a locally rebuilt `firecore`/`dummy-blockchain` image via `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE`. - The validated local runtime tuple on 2026-03-28 was: @@ -266,7 +270,6 @@ Already resolved: ### Phase 2: Integrate Runtime And Retrieval Paths -- `MVP-015` - `MVP-011` - `MVP-016` - `MVP-017` diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index 2bc9571..9498888 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -386,10 +386,16 @@ These assumptions are referenced by task ID so it is clear which scope decisions - `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE=ghcr.io/streamingfast/dummy-blockchain:sds-local go test ./test/integration -run TestFirecore -v -count=1` passes without skip against a firecore/dummy-blockchain runtime rebuilt from current SDS-compatible sources. - The backlog and runtime-compatibility docs explicitly identify the prebuilt `dummy-blockchain:v1.7.7` image as incompatible with the current SDS provider/plugin contract until `MVP-036` lands. -- [ ] MVP-015 Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway. +- [x] MVP-015 Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway. - Context: - The recent commit range fixed session ID propagation and pushed more correlation through typed plugin fields and shared repository state. - - The remaining work is to validate the billing and payment-state behavior at acceptance level. + - The repo-local acceptance path is now validated: provider-side plugin metering advances the same session aggregates and accumulated cost surfaced by `GetSessionStatus`, and the real Firecore path proves that exact pricing alignment against persisted metering evidence. + - The local-first acceptance run was validated on 2026-03-28 against: + - SDS `1171ed0bbf7a7254f6655d98c1e7947f5a3bd776` plus `ad3420a6ac9c11f48f6a9d7f478cf487233357d7` + - `firehose-core` `b574a98babcb0338198e0ff4db7ebd0e404f6529` + - `dummy-blockchain` `1cea671e78cbb069d64333fdbf4a6c9dd5502d58` + - `substreams` `8897dccff3e2f989867b7711be91d613d256a36a` + - image tags `ghcr.io/streamingfast/firehose-core:sds-local` and `ghcr.io/streamingfast/dummy-blockchain:sds-local` - Assumptions: - `A3` - Done when: @@ -397,7 +403,9 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Session correlation is stable across auth, session, usage, and gateway-side payment state. - The runtime path does not rely on consumer-reported bytes as the billing source of truth. - Verify: - - Add tests or manual instrumentation evidence showing live provider/plugin activity updates the payment-state repository consistently. + - `go test ./provider/usage ./provider/repository/psql -count=1` passes with repository/service coverage for authoritative metering application. + - `go test ./test/integration -run TestConsumerSidecar_ReportUsage_WiresPaymentSessionLoop -count=1` passes to confirm the existing wrapper-oriented payment loop still works. + - `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE=ghcr.io/streamingfast/dummy-blockchain:sds-local go test ./test/integration -run TestFirecore -count=1 -v` passes with `GetSessionStatus().payment_status.accumulated_usage_value` exactly matching the provider-priced total derived from persisted plugin metering evidence. - [ ] MVP-016 Enforce gateway Continue/Stop decisions in the live provider stream lifecycle. - Context: From 964554b728d061da5d1c861a4ec86845f478120e Mon Sep 17 00:00:00 2001 From: Juan Manuel Rodriguez Defago Date: Sat, 28 Mar 2026 03:51:08 -0300 Subject: [PATCH 17/17] Enforce low-funds stops in live firecore streams - make provider session keepalive and worker acquisition fail closed for unknown or ended sessions, with payment exhaustion surfacing as resource exhausted and other invalid states as permission denied - align the SDS session plugin error mapping and extend Firecore integration coverage with a dedicated low-funds runtime stop path alongside the existing happy path - close MVP-016 in the backlog and sequencing docs while tracking the shared-state Firecore test hardening follow-up separately under MVP-037 --- docs/mvp-implementation-sequencing.md | 13 +- plans/mvp-implementation-backlog.md | 38 +++- provider/plugin/session.go | 2 + provider/plugin/session_test.go | 113 +++++++++++ provider/repository/inmemory.go | 4 +- provider/repository/inmemory_test.go | 3 + provider/session/service.go | 76 +++++--- provider/session/service_test.go | 146 +++++++++++++- test/integration/firecore_test.go | 262 ++++++++++++++++++++++++-- 9 files changed, 602 insertions(+), 55 deletions(-) create mode 100644 provider/plugin/session_test.go diff --git a/docs/mvp-implementation-sequencing.md b/docs/mvp-implementation-sequencing.md index 05d7b85..6b2616b 100644 --- a/docs/mvp-implementation-sequencing.md +++ b/docs/mvp-implementation-sequencing.md @@ -121,12 +121,13 @@ Completed foundation: - `MVP-012` Add deterministic cost-based RAV issuance thresholds suitable for real runtime behavior - `MVP-014` Integrate provider gateway validation into the real provider streaming path - `MVP-015` Wire real byte metering from the provider/plugin path into gateway payment state +- `MVP-016` Enforce gateway Continue/Stop decisions in the live provider stream lifecycle Recommended next sequence: -1. `MVP-016` Enforce gateway Continue/Stop decisions in the live provider stream lifecycle -2. `MVP-011` Propagate provider low-funds stop decisions through consumer sidecar into the real client path -3. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path +1. `MVP-011` Propagate provider low-funds stop decisions through consumer sidecar into the real client path +2. `MVP-031` Wire the live PaymentSession and RAV-control loop into the real client/provider runtime path +3. `MVP-037` Isolate and harden the shared-state Firecore and low-funds integration tests so real-path acceptance remains deterministic across full-suite runs Notes: @@ -138,10 +139,11 @@ Notes: - cost-based only - compares unbaselined `delta_cost` against a provider-side `rav_request_threshold` - defaults to `10 GRT` when the provider does not configure a threshold explicitly -- `MVP-014` and `MVP-015` are now complete under the local-first runtime workflow: +- `MVP-014`, `MVP-015`, and `MVP-016` are now complete under the local-first runtime workflow: - `TestFirecore` passes against `ghcr.io/streamingfast/dummy-blockchain:sds-local` - plugin metering updates the same provider session/payment state surfaced by `GetSessionStatus` - the Firecore acceptance path now asserts the exact gateway-visible accumulated usage value derived from persisted metering totals and provider pricing + - the live Firecore/Substreams stream now stops on provider-enforced low-funds termination through the existing session-plugin keepalive cancellation path, with low-funds termination surfacing as runtime `ResourceExhausted` - `MVP-014` remains the main integration foundation in this lane. - Current status: repo-local gateway wiring and the real-path `TestFirecore` harness are in place, and local-first acceptance now passes when the test is pointed at a locally rebuilt `firecore`/`dummy-blockchain` image via `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE`. - The validated local runtime tuple on 2026-03-28 was: @@ -150,7 +152,8 @@ Notes: - `dummy-blockchain` `1cea671e78cbb069d64333fdbf4a6c9dd5502d58` - `substreams` `8897dccff3e2f989867b7711be91d613d256a36a` - The prebuilt published `dummy-blockchain` image remains stale and still embeds an older SDS-compatible runtime snapshot, so publishing refreshed upstream images is tracked separately under `MVP-036`, while `MVP-030` remains the compatibility/preflight hardening follow-up. -- `MVP-011` is partially advanced because the current sidecar wrapper path already stops on `NeedMoreFunds`, but the real client-facing ingress path is still unfinished. +- `MVP-011` is now the main remaining low-funds/runtime-control gap. + - Current status: the sidecar wrapper path already stops on `NeedMoreFunds`, and the provider-side live stream now stops on enforced low-funds termination, but the real client-facing ingress path is still unfinished. - `MVP-031` is effectively the capstone runtime-payment task because it depends on real provider and consumer integration plus thresholding. ### Lane C: Provider State, Settlement, And Operator Retrieval diff --git a/plans/mvp-implementation-backlog.md b/plans/mvp-implementation-backlog.md index 9498888..22b739b 100644 --- a/plans/mvp-implementation-backlog.md +++ b/plans/mvp-implementation-backlog.md @@ -102,8 +102,8 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-012 | `done` | funding-control | none | `MVP-004` | `A`, `C` | Add deterministic cost-based RAV issuance thresholds suitable for real runtime behavior | | MVP-013 | `deferred` | consumer | `A3` | none | none | Post-MVP only: implement true provider-authoritative payment-session reconnect/resume semantics | | MVP-014 | `done` | provider-integration | `A3` | `MVP-004` | `A` | Integrate the public Payment Gateway and private Plugin Gateway into the real provider streaming path | -| MVP-015 | `in_progress` | provider-integration | `A3` | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway | -| MVP-016 | `not_started` | provider-integration | `A6` | `MVP-010`, `MVP-014` | `C` | Enforce gateway Continue/Stop decisions in the live provider stream lifecycle | +| MVP-015 | `done` | provider-integration | `A3` | `MVP-004`, `MVP-014` | `A`, `C` | Wire real byte metering and session correlation from the plugin path into the payment-state repository used by the gateway | +| MVP-016 | `done` | provider-integration | `A6` | `MVP-010`, `MVP-014` | `C` | Enforce gateway Continue/Stop decisions in the live provider stream lifecycle | | MVP-017 | `not_started` | consumer-integration | `A1`, `A2`, `A3` | `MVP-007`, `MVP-011`, `MVP-033` | `A`, `C` | Implement the consumer sidecar as a Substreams-compatible endpoint/proxy rather than only a wrapper-controlled lifecycle service | | MVP-018 | `not_started` | tooling | none | `MVP-032` | `E` | Implement operator funding CLI flows for approve/deposit/top-up beyond local demo assumptions | | MVP-019 | `not_started` | tooling | `A5` | `MVP-009`, `MVP-022` | `D`, `F` | Implement provider inspection CLI flows for accepted and collectible RAV data | @@ -124,6 +124,7 @@ These assumptions are referenced by task ID so it is clear which scope decisions | MVP-034 | `done` | validation | none | none | none | Fix repository PostgreSQL tests so migrations resolve from repo-relative state rather than a machine-specific absolute path | | MVP-035 | `done` | validation | none | none | none | Make integration devenv startup resilient to local fixed-port collisions so the shared test environment is reproducible | | MVP-036 | `not_started` | operations | `A5` | `MVP-014` | `A`, `G` | Publish refreshed upstream `firehose-core` and `dummy-blockchain` images built against the current SDS plugin/runtime contract so default integration paths no longer rely on local override tags | +| MVP-037 | `not_started` | validation | none | `MVP-014`, `MVP-016` | `A`, `C` | Isolate and harden the shared-state Firecore and low-funds integration tests so real-path acceptance remains deterministic across full-suite runs | ## Protocol and Contract Tasks @@ -407,16 +408,30 @@ These assumptions are referenced by task ID so it is clear which scope decisions - `go test ./test/integration -run TestConsumerSidecar_ReportUsage_WiresPaymentSessionLoop -count=1` passes to confirm the existing wrapper-oriented payment loop still works. - `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE=ghcr.io/streamingfast/dummy-blockchain:sds-local go test ./test/integration -run TestFirecore -count=1 -v` passes with `GetSessionStatus().payment_status.accumulated_usage_value` exactly matching the provider-priced total derived from persisted plugin metering evidence. -- [ ] MVP-016 Enforce gateway Continue/Stop decisions in the live provider stream lifecycle. +- [x] MVP-016 Enforce gateway Continue/Stop decisions in the live provider stream lifecycle. - Context: - Provider-side control logic is incomplete if the live provider stream does not obey it. + - The repo-local acceptance path is now validated: plugin keepalive enforcement stops the live Firecore/Substreams stream when the provider session is no longer allowed to continue, while preserving the exact real-path `MVP-014` happy-path flow. + - The local-first acceptance run was validated on 2026-03-28 against: + - SDS `1171ed0bbf7a7254f6655d98c1e7947f5a3bd776` plus the current uncommitted `MVP-016` worktree changes + - `firehose-core` `b574a98babcb0338198e0ff4db7ebd0e404f6529` + - `dummy-blockchain` `1cea671e78cbb069d64333fdbf4a6c9dd5502d58` + - `substreams` `8897dccff3e2f989867b7711be91d613d256a36a` + - image tags `ghcr.io/streamingfast/firehose-core:sds-local` and `ghcr.io/streamingfast/dummy-blockchain:sds-local` - Assumptions: - `A6` - Done when: - The real provider path can enforce SDS control decisions during live streaming. - Gateway-driven low-funds stop behavior interrupts the live provider stream lifecycle appropriately rather than only ending the control-plane session. - Verify: - - Add manual or automated verification where the provider stops the live stream based on gateway control decisions. + - `go test ./provider/session ./provider/plugin ./provider/repository -count=1` passes with fail-closed provider session-service coverage and plugin error-mapping coverage. + - `go test ./test/integration -run TestConsumerSidecar_ReportUsage_StopsOnLowFunds -count=1 -v` passes to confirm the preexisting wrapper-oriented low-funds stop path still works. + - `SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE=ghcr.io/streamingfast/dummy-blockchain:sds-local go test ./test/integration -run 'TestFirecore|TestFirecoreStopsStreamOnLowFunds' -count=1 -v` passes with: + - the normal `TestFirecore` happy path still succeeding + - the dedicated low-funds Firecore path stopping the live stream early + - provider session state ending with `END_REASON_PAYMENT_ISSUE` + - worker cleanup eventually completing after the stop + - The prebuilt `ghcr.io/streamingfast/dummy-blockchain:v1.7.7` image remains stale and still blocks the default image path on the known header-propagation/runtime-drift issue; that remains tracked under `MVP-036`. - [ ] MVP-017 Implement the consumer sidecar as a Substreams-compatible endpoint/proxy rather than only a wrapper-controlled lifecycle service. - Context: @@ -635,6 +650,21 @@ These assumptions are referenced by task ID so it is clear which scope decisions - Verify: - Run `go test ./test/integration/...` with the default local port already occupied and confirm startup either succeeds using the supported fallback/override path or fails fast with a clear, actionable configuration message. +- [ ] MVP-037 Isolate and harden the shared-state Firecore and low-funds integration tests so real-path acceptance remains deterministic across full-suite runs. + - Context: + - `MVP-014` introduced the heavier real-path Firecore acceptance harness, and `MVP-016` extends that harness with a real low-funds stream-stop scenario. + - These tests are intentionally closer to a natural provider/runtime environment than typical unit-style integration tests: they boot the local chain/contracts, provider payment gateway, plugin gateway, consumer sidecar, Postgres, and dummy-blockchain/firecore together. + - The current integration suite still shares one devenv/chain state across multiple tests, so helpers like `SetupCustomPaymentParticipantsWithSigner` can accumulate escrow/provision state for reused payer/provider pairs and make low-funds assertions order-dependent. + - Assumptions: + - none + - Done when: + - The real-path Firecore and consumer low-funds tests no longer rely on mutable shared payer/provider state across suite runs. + - Full `go test ./test/integration/...` runs are deterministic with respect to escrow/provision setup for the low-funds scenarios used by `MVP-014` and `MVP-016`. + - The repo documents whether those tests use per-test fresh chain state, snapshot/restore isolation, or strictly unique on-chain identities per scenario. + - Verify: + - Run the affected low-funds and Firecore tests both in isolation and as part of a broader `./test/integration/...` run and confirm they produce the same result. + - Add an assertion or helper-level guard that proves the expected pre-test escrow state before the behavioral assertion is evaluated. + - [ ] MVP-026 Refresh protocol/runtime docs so they match the revised MVP architecture and remaining open questions. - Context: - [docs/mvp-scope.md](../docs/mvp-scope.md) has been updated. diff --git a/provider/plugin/session.go b/provider/plugin/session.go index 3ef90c1..50d50ee 100644 --- a/provider/plugin/session.go +++ b/provider/plugin/session.go @@ -245,6 +245,8 @@ func (p *sessionPool) GetWorker(ctx context.Context, serviceName string, session switch connect.CodeOf(err) { case connect.CodeNotFound: return "", fmt.Errorf("%w: session not found", dsession.ErrSessionNotFound) + case connect.CodePermissionDenied: + return "", fmt.Errorf("%w: %s", dsession.ErrPermissionDenied, err.Error()) case connect.CodeResourceExhausted: return "", fmt.Errorf("%w: maximum workers per session exceeded", dsession.ErrWorkersLimitExceeded) } diff --git a/provider/plugin/session_test.go b/provider/plugin/session_test.go new file mode 100644 index 0000000..1a42290 --- /dev/null +++ b/provider/plugin/session_test.go @@ -0,0 +1,113 @@ +package plugin + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "testing" + "time" + + "connectrpc.com/connect" + "github.com/alphadose/haxmap" + sds "github.com/graphprotocol/substreams-data-service" + sessionv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/sds/session/v1" + "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/sds/session/v1/sessionv1connect" + "github.com/streamingfast/dauth" + "github.com/streamingfast/dsession" + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +type stubSessionService struct { + sessionv1connect.UnimplementedSessionServiceHandler + + borrowWorker func(context.Context, *connect.Request[sessionv1.BorrowWorkerRequest]) (*connect.Response[sessionv1.BorrowWorkerResponse], error) + keepAlive func(context.Context, *connect.Request[sessionv1.KeepAliveRequest]) (*connect.Response[sessionv1.KeepAliveResponse], error) +} + +func (s stubSessionService) BorrowWorker(ctx context.Context, req *connect.Request[sessionv1.BorrowWorkerRequest]) (*connect.Response[sessionv1.BorrowWorkerResponse], error) { + if s.borrowWorker != nil { + return s.borrowWorker(ctx, req) + } + return connect.NewResponse(&sessionv1.BorrowWorkerResponse{}), nil +} + +func (s stubSessionService) KeepAlive(ctx context.Context, req *connect.Request[sessionv1.KeepAliveRequest]) (*connect.Response[sessionv1.KeepAliveResponse], error) { + if s.keepAlive != nil { + return s.keepAlive(ctx, req) + } + return connect.NewResponse(&sessionv1.KeepAliveResponse{}), nil +} + +func newTestSessionPool(t *testing.T, svc sessionv1connect.SessionServiceHandler, keepAliveDelay time.Duration) *sessionPool { + t.Helper() + + mux := http.NewServeMux() + path, handler := sessionv1connect.NewSessionServiceHandler(svc) + mux.Handle(path, handler) + + server := httptest.NewServer(mux) + t.Cleanup(server.Close) + + return &sessionPool{ + client: sessionv1connect.NewSessionServiceClient(server.Client(), server.URL), + logger: zap.NewNop(), + keepAliveDelay: keepAliveDelay, + minimalWorkerLifeDuration: 10 * time.Millisecond, + sessions: haxmap.New[string, *sessionInfo](), + } +} + +func TestSessionPoolGetWorker_MapsPermissionDenied(t *testing.T) { + pool := newTestSessionPool(t, stubSessionService{ + borrowWorker: func(context.Context, *connect.Request[sessionv1.BorrowWorkerRequest]) (*connect.Response[sessionv1.BorrowWorkerResponse], error) { + return nil, connect.NewError(connect.CodePermissionDenied, errors.New("session is not allowed")) + }, + }, 20*time.Millisecond) + + pool.sessions.Set("session-key", &sessionInfo{ + organizationID: "0x1111111111111111111111111111111111111111", + apiKeyID: "api-key", + traceID: "trace", + workers: haxmap.New[string, struct{}](), + closer: make(chan struct{}), + }) + + ctx := dauth.WithTrustedHeaders(context.Background(), dauth.TrustedHeaders{ + sds.HeaderSessionID: "sds-session-id", + }) + + _, err := pool.GetWorker(ctx, "substreams", "session-key", 1) + require.ErrorIs(t, err, dsession.ErrPermissionDenied) +} + +func TestSessionPoolKeepAlive_MapsResourceExhaustedToQuotaExceeded(t *testing.T) { + pool := newTestSessionPool(t, stubSessionService{ + keepAlive: func(context.Context, *connect.Request[sessionv1.KeepAliveRequest]) (*connect.Response[sessionv1.KeepAliveResponse], error) { + return nil, connect.NewError(connect.CodeResourceExhausted, errors.New("payment budget exhausted")) + }, + }, 10*time.Millisecond) + + done := make(chan struct{}) + pool.sessions.Set("session-key", &sessionInfo{ + apiKeyID: "api-key", + workers: haxmap.New[string, struct{}](), + closer: done, + }) + + errCh := make(chan error, 1) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + pool.startKeepAlive(ctx, done, "session-key", func(err error) { + errCh <- err + }) + + select { + case err := <-errCh: + require.ErrorIs(t, err, dsession.ErrQuotaExceeded) + case <-time.After(time.Second): + t.Fatal("expected keepalive to surface a quota exceeded error") + } +} diff --git a/provider/repository/inmemory.go b/provider/repository/inmemory.go index 11c093f..ae6f5aa 100644 --- a/provider/repository/inmemory.go +++ b/provider/repository/inmemory.go @@ -61,7 +61,7 @@ func (r *InMemoryRepository) SessionCreate(_ context.Context, session *Session) func (r *InMemoryRepository) SessionGet(_ context.Context, sessionID string) (*Session, error) { s, ok := r.sessions.Get(sessionID) if !ok { - return nil, fmt.Errorf("session %q not found", sessionID) + return nil, fmt.Errorf("session %q: %w", sessionID, ErrNotFound) } return s, nil } @@ -143,7 +143,7 @@ func (r *InMemoryRepository) WorkerCreate(_ context.Context, worker *Worker) err func (r *InMemoryRepository) WorkerGet(_ context.Context, workerKey string) (*Worker, error) { w, ok := r.workers.Get(workerKey) if !ok { - return nil, fmt.Errorf("worker %q not found", workerKey) + return nil, fmt.Errorf("worker %q: %w", workerKey, ErrNotFound) } return w, nil } diff --git a/provider/repository/inmemory_test.go b/provider/repository/inmemory_test.go index 6d7a011..6469f60 100644 --- a/provider/repository/inmemory_test.go +++ b/provider/repository/inmemory_test.go @@ -2,6 +2,7 @@ package repository_test import ( "context" + "errors" "testing" "time" @@ -80,6 +81,7 @@ func TestInMemory_SessionGet_NotFound(t *testing.T) { _, err := repo.SessionGet(ctx, "missing") require.Error(t, err) + assert.ErrorIs(t, err, repository.ErrNotFound) assert.Contains(t, err.Error(), "not found") } @@ -211,6 +213,7 @@ func TestInMemory_WorkerGet_NotFound(t *testing.T) { _, err := repo.WorkerGet(ctx, "missing") require.Error(t, err) + assert.True(t, errors.Is(err, repository.ErrNotFound)) } func TestInMemory_WorkerDelete(t *testing.T) { diff --git a/provider/session/service.go b/provider/session/service.go index 9371bb6..368ca4d 100644 --- a/provider/session/service.go +++ b/provider/session/service.go @@ -4,10 +4,12 @@ package session import ( "context" + "errors" "fmt" "time" "connectrpc.com/connect" + commonv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/common/v1" sessionv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/sds/session/v1" "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/sds/session/v1/sessionv1connect" "github.com/graphprotocol/substreams-data-service/provider/repository" @@ -38,10 +40,9 @@ func NewSessionService(repo repository.GlobalRepository, quotas *QuotaConfig) *S // BorrowWorker acquires a worker slot for a new streaming request. // -// - If the payer has reached max concurrent sessions a new session is -// created here (workers and sessions are treated as equivalent in the -// in-memory model; the dsession protocol is one worker == one connection). -// - Returns RESOURCE_EXHAUSTED if the payer's quota is exceeded. +// The request must reference a preexisting active SDS session created through +// the payment gateway flow. This keeps the live plugin path bound to +// provider-authoritative session state. func (s *SessionService) BorrowWorker( ctx context.Context, req *connect.Request[sessionv1.BorrowWorkerRequest], @@ -60,7 +61,7 @@ func (s *SessionService) BorrowWorker( // Get the SDS session ID from the request (set by session plugin from auth context). sessionID := req.Msg.SessionId if sessionID == "" { - return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("session_id not provided in request")) + return nil, connect.NewError(connect.CodePermissionDenied, fmt.Errorf("session_id not provided in request")) } zlog.Debug("BorrowWorker service called", @@ -69,6 +70,10 @@ func (s *SessionService) BorrowWorker( zap.String("service", req.Msg.Service), ) + if _, err := s.authorizeSession(ctx, sessionID, payer); err != nil { + return nil, err + } + // Check current quota usage. quota, err := s.repo.QuotaGet(ctx, payer) if err != nil { @@ -92,24 +97,6 @@ func (s *SessionService) BorrowWorker( }), nil } - // Ensure session exists. - if _, getErr := s.repo.SessionGet(ctx, sessionID); getErr != nil { - newSession := &repository.Session{ - ID: sessionID, - Payer: payer, - Status: repository.SessionStatusActive, - CreatedAt: time.Now(), - LastKeepAlive: time.Now(), - } - if createErr := s.repo.SessionCreate(ctx, newSession); createErr != nil { - // A concurrent BorrowWorker may have created the session first; that's fine. - zlog.Debug("session already exists or create failed", - zap.String("session_id", sessionID), - zap.Error(createErr), - ) - } - } - // Create the worker entry. // Worker key is unique per request, built from payer and timestamp. workerKey := buildWorkerKey(payerStr, sessionID, time.Now()) @@ -217,13 +204,15 @@ func (s *SessionService) KeepAlive( worker, err := s.repo.WorkerGet(ctx, workerKey) if err != nil { - // Worker not found is non-fatal; the session may have been cleaned up. - return connect.NewResponse(&sessionv1.KeepAliveResponse{}), nil + if errors.Is(err, repository.ErrNotFound) { + return nil, connect.NewError(connect.CodePermissionDenied, fmt.Errorf("worker not found")) + } + return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("reading worker: %w", err)) } - session, err := s.repo.SessionGet(ctx, worker.SessionID) + session, err := s.authorizeSession(ctx, worker.SessionID, worker.Payer) if err != nil { - return connect.NewResponse(&sessionv1.KeepAliveResponse{}), nil + return nil, err } session.LastKeepAlive = time.Now() @@ -237,6 +226,39 @@ func (s *SessionService) KeepAlive( return connect.NewResponse(&sessionv1.KeepAliveResponse{}), nil } +func (s *SessionService) authorizeSession(ctx context.Context, sessionID string, payer eth.Address) (*repository.Session, error) { + session, err := s.repo.SessionGet(ctx, sessionID) + if err != nil { + if errors.Is(err, repository.ErrNotFound) { + return nil, connect.NewError(connect.CodePermissionDenied, fmt.Errorf("session not found")) + } + return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("reading session: %w", err)) + } + + if session.Payer.Pretty() != payer.Pretty() { + return nil, connect.NewError(connect.CodePermissionDenied, fmt.Errorf("session payer mismatch")) + } + + if !session.IsActive() { + return nil, sessionStateError(session) + } + + return session, nil +} + +func sessionStateError(session *repository.Session) error { + if session != nil && session.EndReason == commonv1.EndReason_END_REASON_PAYMENT_ISSUE { + return connect.NewError(connect.CodeResourceExhausted, fmt.Errorf("session ended due to payment issue")) + } + + status := repository.SessionStatus("") + if session != nil { + status = session.Status + } + + return connect.NewError(connect.CodePermissionDenied, fmt.Errorf("session is not active (status: %s)", status)) +} + // buildWorkerKey constructs a unique worker key. func buildWorkerKey(payer, traceID string, createdAt time.Time) string { return fmt.Sprintf("%s|%s|%d", payer, traceID, createdAt.UnixNano()) diff --git a/provider/session/service_test.go b/provider/session/service_test.go index 4501d04..b9e9806 100644 --- a/provider/session/service_test.go +++ b/provider/session/service_test.go @@ -3,8 +3,10 @@ package session_test import ( "context" "testing" + "time" "connectrpc.com/connect" + commonv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/common/v1" sessionv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/sds/session/v1" "github.com/graphprotocol/substreams-data-service/provider/repository" "github.com/graphprotocol/substreams-data-service/provider/session" @@ -35,10 +37,26 @@ func newTestKeepAliveRequest(msg *sessionv1.KeepAliveRequest) *connect.Request[s return connect.NewRequest(msg) } +func mustCreateSession(t *testing.T, repo *repository.InMemoryRepository, sessionID, payer string) *repository.Session { + t.Helper() + + sess := &repository.Session{ + ID: sessionID, + Payer: eth.MustNewAddress(payer), + Status: repository.SessionStatusActive, + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + LastKeepAlive: time.Now(), + } + require.NoError(t, repo.SessionCreate(context.Background(), sess)) + return sess +} + // --- BorrowWorker --- func TestSessionService_BorrowWorker_Success(t *testing.T) { svc, repo := newTestService(nil) + mustCreateSession(t, repo, "test-session-001", "0x1111111111111111111111111111111111111111") req := newTestRequest(&sessionv1.BorrowWorkerRequest{ Service: "substreams", @@ -82,10 +100,62 @@ func TestSessionService_BorrowWorker_MissingSessionID(t *testing.T) { require.Error(t, err) var connectErr *connect.Error require.ErrorAs(t, err, &connectErr) - assert.Equal(t, connect.CodeInternal, connectErr.Code()) + assert.Equal(t, connect.CodePermissionDenied, connectErr.Code()) assert.Contains(t, connectErr.Message(), "session_id not provided") } +func TestSessionService_BorrowWorker_UnknownSession(t *testing.T) { + svc, _ := newTestService(nil) + + _, err := svc.BorrowWorker(context.Background(), newTestRequest(&sessionv1.BorrowWorkerRequest{ + Service: "substreams", + OrganizationId: "0x1111111111111111111111111111111111111111", + SessionId: "trace-unknown", + }, "missing-session")) + require.Error(t, err) + + var connectErr *connect.Error + require.ErrorAs(t, err, &connectErr) + assert.Equal(t, connect.CodePermissionDenied, connectErr.Code()) + assert.Contains(t, connectErr.Message(), "session not found") +} + +func TestSessionService_BorrowWorker_InactivePaymentIssueUsesResourceExhausted(t *testing.T) { + svc, repo := newTestService(nil) + sess := mustCreateSession(t, repo, "terminated-payment-session", "0x1111111111111111111111111111111111111111") + sess.End(commonv1.EndReason_END_REASON_PAYMENT_ISSUE) + require.NoError(t, repo.SessionUpdate(context.Background(), sess)) + + _, err := svc.BorrowWorker(context.Background(), newTestRequest(&sessionv1.BorrowWorkerRequest{ + Service: "substreams", + OrganizationId: "0x1111111111111111111111111111111111111111", + SessionId: "trace-payment", + }, "terminated-payment-session")) + require.Error(t, err) + + var connectErr *connect.Error + require.ErrorAs(t, err, &connectErr) + assert.Equal(t, connect.CodeResourceExhausted, connectErr.Code()) +} + +func TestSessionService_BorrowWorker_InactiveNonPaymentUsesPermissionDenied(t *testing.T) { + svc, repo := newTestService(nil) + sess := mustCreateSession(t, repo, "terminated-nonpayment-session", "0x1111111111111111111111111111111111111111") + sess.End(commonv1.EndReason_END_REASON_CLIENT_DISCONNECT) + require.NoError(t, repo.SessionUpdate(context.Background(), sess)) + + _, err := svc.BorrowWorker(context.Background(), newTestRequest(&sessionv1.BorrowWorkerRequest{ + Service: "substreams", + OrganizationId: "0x1111111111111111111111111111111111111111", + SessionId: "trace-disconnect", + }, "terminated-nonpayment-session")) + require.Error(t, err) + + var connectErr *connect.Error + require.ErrorAs(t, err, &connectErr) + assert.Equal(t, connect.CodePermissionDenied, connectErr.Code()) +} + func TestSessionService_BorrowWorker_QuotaExceeded(t *testing.T) { // Create a config with maxSessions=1, maxWorkers=1 → effective max = 1 worker. quotas := &session.QuotaConfig{ @@ -93,7 +163,9 @@ func TestSessionService_BorrowWorker_QuotaExceeded(t *testing.T) { DefaultMaxWorkersPerSession: 1, PerPayerOverrides: make(map[string]*session.PayerQuota), } - svc, _ := newTestService(quotas) + svc, repo := newTestService(quotas) + mustCreateSession(t, repo, "test-session-003", "0x1111111111111111111111111111111111111111") + mustCreateSession(t, repo, "test-session-004", "0x1111111111111111111111111111111111111111") // Borrow first worker - should succeed. req1 := newTestRequest(&sessionv1.BorrowWorkerRequest{ @@ -125,15 +197,17 @@ func TestSessionService_BorrowWorker_PerPayerOverride(t *testing.T) { "0x1111111111111111111111111111111111111111": {MaxConcurrentSessions: 5, MaxWorkersPerSession: 2}, }, } - svc, _ := newTestService(quotas) + svc, repo := newTestService(quotas) // Should be able to borrow multiple workers for payer1 (10 max). for i := range 5 { + sessionID := "test-session-" + string(rune('0'+i)) + mustCreateSession(t, repo, sessionID, "0x1111111111111111111111111111111111111111") req := newTestRequest(&sessionv1.BorrowWorkerRequest{ Service: "substreams", OrganizationId: "0x1111111111111111111111111111111111111111", SessionId: "trace-" + string(rune('0'+i)), - }, "test-session-"+string(rune('0'+i))) + }, sessionID) resp, err := svc.BorrowWorker(context.Background(), req) require.NoError(t, err) assert.Equal(t, sessionv1.BorrowStatus_BORROW_STATUS_BORROWED, resp.Msg.Status) @@ -144,6 +218,7 @@ func TestSessionService_BorrowWorker_PerPayerOverride(t *testing.T) { func TestSessionService_ReturnWorker_Success(t *testing.T) { svc, repo := newTestService(nil) + mustCreateSession(t, repo, "test-session-return-001", "0x1111111111111111111111111111111111111111") borrowReq := newTestRequest(&sessionv1.BorrowWorkerRequest{ OrganizationId: "0x1111111111111111111111111111111111111111", @@ -189,6 +264,7 @@ func TestSessionService_ReturnWorker_UnknownKey(t *testing.T) { func TestSessionService_KeepAlive_Success(t *testing.T) { svc, repo := newTestService(nil) + mustCreateSession(t, repo, "test-session-keepalive-001", "0x1111111111111111111111111111111111111111") borrowReq := newTestRequest(&sessionv1.BorrowWorkerRequest{ OrganizationId: "0x1111111111111111111111111111111111111111", @@ -223,13 +299,71 @@ func TestSessionService_KeepAlive_MissingKey(t *testing.T) { } func TestSessionService_KeepAlive_UnknownKey(t *testing.T) { - // Unknown key is non-fatal. svc, _ := newTestService(nil) _, err := svc.KeepAlive(context.Background(), connect.NewRequest(&sessionv1.KeepAliveRequest{ WorkerKey: "unknown-key", })) - require.NoError(t, err) + require.Error(t, err) + var connectErr *connect.Error + require.ErrorAs(t, err, &connectErr) + assert.Equal(t, connect.CodePermissionDenied, connectErr.Code()) +} + +func TestSessionService_KeepAlive_PaymentIssueReturnsResourceExhausted(t *testing.T) { + svc, repo := newTestService(nil) + sess := mustCreateSession(t, repo, "payment-ended-session", "0x1111111111111111111111111111111111111111") + worker := &repository.Worker{ + Key: "payment-worker", + SessionID: sess.ID, + Payer: sess.Payer, + CreatedAt: time.Now(), + } + require.NoError(t, repo.WorkerCreate(context.Background(), worker)) + + sess.End(commonv1.EndReason_END_REASON_PAYMENT_ISSUE) + lastKeepAlive := sess.LastKeepAlive + require.NoError(t, repo.SessionUpdate(context.Background(), sess)) + + _, err := svc.KeepAlive(context.Background(), connect.NewRequest(&sessionv1.KeepAliveRequest{ + WorkerKey: worker.Key, + })) + require.Error(t, err) + var connectErr *connect.Error + require.ErrorAs(t, err, &connectErr) + assert.Equal(t, connect.CodeResourceExhausted, connectErr.Code()) + + updated, getErr := repo.SessionGet(context.Background(), sess.ID) + require.NoError(t, getErr) + assert.Equal(t, lastKeepAlive, updated.LastKeepAlive) +} + +func TestSessionService_KeepAlive_NonPaymentTerminationReturnsPermissionDenied(t *testing.T) { + svc, repo := newTestService(nil) + sess := mustCreateSession(t, repo, "nonpayment-ended-session", "0x1111111111111111111111111111111111111111") + worker := &repository.Worker{ + Key: "nonpayment-worker", + SessionID: sess.ID, + Payer: sess.Payer, + CreatedAt: time.Now(), + } + require.NoError(t, repo.WorkerCreate(context.Background(), worker)) + + sess.End(commonv1.EndReason_END_REASON_CLIENT_DISCONNECT) + lastKeepAlive := sess.LastKeepAlive + require.NoError(t, repo.SessionUpdate(context.Background(), sess)) + + _, err := svc.KeepAlive(context.Background(), connect.NewRequest(&sessionv1.KeepAliveRequest{ + WorkerKey: worker.Key, + })) + require.Error(t, err) + var connectErr *connect.Error + require.ErrorAs(t, err, &connectErr) + assert.Equal(t, connect.CodePermissionDenied, connectErr.Code()) + + updated, getErr := repo.SessionGet(context.Background(), sess.ID) + require.NoError(t, getErr) + assert.Equal(t, lastKeepAlive, updated.LastKeepAlive) } // --- QuotaConfig --- diff --git a/test/integration/firecore_test.go b/test/integration/firecore_test.go index 3f82d9a..d9ad686 100644 --- a/test/integration/firecore_test.go +++ b/test/integration/firecore_test.go @@ -2,7 +2,9 @@ package integration import ( "context" + "database/sql" "fmt" + "math/big" "net/http" "os" "os/exec" @@ -15,10 +17,13 @@ import ( "github.com/graphprotocol/substreams-data-service/cmd/sds/impl" "github.com/graphprotocol/substreams-data-service/consumer/sidecar" "github.com/graphprotocol/substreams-data-service/horizon" + commonv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/common/v1" providerv1 "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1" "github.com/graphprotocol/substreams-data-service/pb/graph/substreams/data_service/provider/v1/providerv1connect" + "github.com/graphprotocol/substreams-data-service/provider/repository" psqlrepo "github.com/graphprotocol/substreams-data-service/provider/repository/psql" sidecarlib "github.com/graphprotocol/substreams-data-service/sidecar" + "github.com/streamingfast/eth-go" "github.com/streamingfast/logging" "github.com/stretchr/testify/require" "github.com/testcontainers/testcontainers-go" @@ -33,6 +38,16 @@ const ( dummyBlockchainImageEnvVar = "SDS_TEST_DUMMY_BLOCKCHAIN_IMAGE" ) +type dummyBlockchainOptions struct { + GenesisBlockBurst int + SessionPluginConfig string +} + +type sdsSinkRunOptions struct { + ReportInterval time.Duration + Timeout time.Duration +} + func TestFirecore(t *testing.T) { if testing.Short() { t.Skip("Skipping firecore integration test in short mode") @@ -215,6 +230,127 @@ func TestFirecore(t *testing.T) { ) } +func TestFirecoreStopsStreamOnLowFunds(t *testing.T) { + if testing.Short() { + t.Skip("Skipping firecore integration test in short mode") + } + + ctx := context.Background() + env := SetupEnv(t) + testStartedAt := time.Now().UTC() + dummyBlockchainImage := getDummyBlockchainImage() + + config := DefaultTestSetupConfig() + config.EscrowAmount = big.NewInt(1) + setup, err := env.SetupCustomPaymentParticipantsWithSigner(env.User2, env.User3, config) + require.NoError(t, err, "failed to prepare low-funds payment participants") + + dummyBlockchainContainer, substreamsEndpoint, err := startDummyBlockchainContainerWithOptions(ctx, dummyBlockchainImage, dummyBlockchainOptions{ + GenesisBlockBurst: 100, + SessionPluginConfig: "sds://host.docker.internal:19003?plaintext=true&keep-alive-delay=250ms&minimal-worker-life-duration=100ms", + }) + require.NoError(t, err, "failed to start dummy-blockchain container from image %q", dummyBlockchainImage) + defer dummyBlockchainContainer.Terminate(ctx) + defer func() { + if !t.Failed() { + return + } + dumpContainerLogs(t, ctx, dummyBlockchainContainer) + }() + + pricingConfig := deterministicPricingConfig() + gateways, err := impl.StartProviderGateway( + ctx, + "0.0.0.0:19001", + "0.0.0.0:19003", + env.User3.Address, + env.ChainID, + env.Collector.Address, + env.Escrow.Address, + env.RPCURL, + "http://"+substreamsEndpoint, + PostgresTestDSN, + sidecarlib.ServerTransportConfig{ + Plaintext: true, + TLSCertFile: "", + TLSKeyFile: "", + }, + pricingConfig, + ) + require.NoError(t, err, "failed to start provider gateways") + defer gateways.Shutdown(nil) + + require.NoError(t, waitForGatewayHealth(ctx, "http://localhost:19001/healthz", 30*time.Second), "payment gateway failed to become healthy") + require.NoError(t, waitForGatewayHealth(ctx, "http://localhost:19003/healthz", 30*time.Second), "plugin gateway failed to become healthy") + + consumerSidecar := sidecar.New(&sidecar.Config{ + ListenAddr: ":9002", + SignerKey: setup.SignerKey, + Domain: horizon.NewDomain(env.ChainID, env.Collector.Address), + TransportConfig: sidecarlib.ServerTransportConfig{Plaintext: true}, + }, firecoreLog) + go consumerSidecar.Run() + defer consumerSidecar.Shutdown(nil) + + require.NoError(t, waitForSidecarHealth(ctx, "http://localhost:9002/healthz", 10*time.Second), "consumer sidecar failed to become healthy") + + err = runSDSSinkWithOptions( + ctx, + "common@v0.1.0", + "map_clocks", + substreamsEndpoint, + env.User2.Address.Pretty(), + env.User3.Address.Pretty(), + env.DataService.Address.Pretty(), + 0, + 100_000, + sdsSinkRunOptions{ + ReportInterval: 100 * time.Millisecond, + Timeout: 30 * time.Second, + }, + ) + if isKnownFirecoreHeaderPropagationBlocker(err) { + dumpContainerLogs(t, ctx, dummyBlockchainContainer) + t.Skipf("MVP-016 blocked by external firecore/substreams header propagation: %v", err) + } + require.Error(t, err, "low-funds Firecore run must stop the live stream") + require.True(t, isQuotaExceededRuntimeFailure(err), "expected quota/resource exhausted runtime failure, got: %v", err) + + evidence := loadFirecoreEvidenceForParticipants(t, ctx, testStartedAt, env.User2.Address, env.User3.Address, env.DataService.Address) + require.NotEmpty(t, evidence.SessionID, "expected a provider session to be created") + require.Equal(t, 1, evidence.SessionCount, "expected exactly one low-funds provider session") + + providerClient := providerv1connect.NewPaymentGatewayServiceClient(http.DefaultClient, "http://localhost:19001") + require.Eventually(t, func() bool { + statusResp, err := providerClient.GetSessionStatus(ctx, connect.NewRequest(&providerv1.GetSessionStatusRequest{ + SessionId: evidence.SessionID, + })) + if err != nil { + firecoreLog.Warn("failed to refresh low-funds gateway session status", + zap.String("session_id", evidence.SessionID), + zap.Error(err), + ) + return false + } + if statusResp.Msg.GetActive() { + return false + } + + state, err := loadFirecoreSessionState(ctx, evidence.SessionID) + if err != nil { + firecoreLog.Warn("failed to refresh low-funds session state", + zap.String("session_id", evidence.SessionID), + zap.Error(err), + ) + return false + } + + return state.Status == repository.SessionStatusTerminated && + state.EndReason == commonv1.EndReason_END_REASON_PAYMENT_ISSUE && + state.WorkerCount == 0 + }, 10*time.Second, 100*time.Millisecond, "expected low-funds session termination and worker cleanup") +} + // waitForSidecarHealth polls the sidecar health endpoint until it returns 200 or timeout func waitForSidecarHealth(ctx context.Context, healthURL string, timeout time.Duration) error { ctx, cancel := context.WithTimeout(ctx, timeout) @@ -243,9 +379,14 @@ func waitForSidecarHealth(ctx context.Context, healthURL string, timeout time.Du // newDummyBlockchainContainer creates a dummy blockchain container for testing // It starts reader-node, merger, relayer, and substreams-tier1 with SDS plugins -func newDummyBlockchainContainer(ctx context.Context, image string, genesisBlockBurst int) (testcontainers.Container, error) { +func newDummyBlockchainContainer(ctx context.Context, image string, opts dummyBlockchainOptions) (testcontainers.Container, error) { // Build reader arguments for the dummy-blockchain binary - readerArgs := fmt.Sprintf("start --log-level=error --tracer=firehose --store-dir=/tmp/data --genesis-block-burst=%d --block-rate=120 --block-size=1500 --genesis-height=0 --server-addr=:9777", genesisBlockBurst) + readerArgs := fmt.Sprintf("start --log-level=error --tracer=firehose --store-dir=/tmp/data --genesis-block-burst=%d --block-rate=120 --block-size=1500 --genesis-height=0 --server-addr=:9777", opts.GenesisBlockBurst) + + sessionPluginConfig := opts.SessionPluginConfig + if sessionPluginConfig == "" { + sessionPluginConfig = "sds://host.docker.internal:19003?plaintext=true" + } // Build firecore start command - start required components // Configure SDS plugins to connect to the Provider Gateway running on the host @@ -260,7 +401,7 @@ func newDummyBlockchainContainer(ctx context.Context, image string, genesisBlock // SDS Plugin configuration - connect to plugin gateway on host (port 19003) // Use host.docker.internal to reach services running on the host machine "--common-auth-plugin=sds://host.docker.internal:19003?plaintext=true", - "--common-session-plugin=sds://host.docker.internal:19003?plaintext=true", + "--common-session-plugin=" + sessionPluginConfig, "--common-metering-plugin=sds://host.docker.internal:19003?plaintext=true&network=test", "--reader-node-path=/app/dummy-blockchain", "--reader-node-arguments=" + readerArgs, @@ -307,9 +448,13 @@ func newDummyBlockchainContainer(ctx context.Context, image string, genesisBlock // startDummyBlockchainContainer starts a dummy blockchain container, retrieves its endpoint, and verifies it's healthy func startDummyBlockchainContainer(ctx context.Context, image string, genesisBlockBurst int) (testcontainers.Container, string, error) { + return startDummyBlockchainContainerWithOptions(ctx, image, dummyBlockchainOptions{GenesisBlockBurst: genesisBlockBurst}) +} + +func startDummyBlockchainContainerWithOptions(ctx context.Context, image string, opts dummyBlockchainOptions) (testcontainers.Container, string, error) { firecoreLog.Info("setting up dummy-blockchain container", zap.String("image", image)) - container, err := newDummyBlockchainContainer(ctx, image, genesisBlockBurst) + container, err := newDummyBlockchainContainer(ctx, image, opts) if err != nil { return nil, "", fmt.Errorf("failed to start dummy-blockchain container: %w", err) } @@ -403,6 +548,37 @@ func runSDSSink( startBlock int64, stopBlock uint64, ) error { + return runSDSSinkWithOptions(ctx, manifest, module, endpoint, payerAddress, receiverAddress, dataServiceAddress, startBlock, stopBlock, sdsSinkRunOptions{}) +} + +func runSDSSinkWithOptions( + ctx context.Context, + manifest string, + module string, + endpoint string, + payerAddress string, + receiverAddress string, + dataServiceAddress string, + startBlock int64, + stopBlock uint64, + opts sdsSinkRunOptions, +) error { + _, err := runSDSSinkCommand(ctx, manifest, module, endpoint, payerAddress, receiverAddress, dataServiceAddress, startBlock, stopBlock, opts) + return err +} + +func runSDSSinkCommand( + ctx context.Context, + manifest string, + module string, + endpoint string, + payerAddress string, + receiverAddress string, + dataServiceAddress string, + startBlock int64, + stopBlock uint64, + opts sdsSinkRunOptions, +) (string, error) { args := []string{ "run", "./cmd/sds", @@ -419,6 +595,9 @@ func runSDSSink( fmt.Sprintf("--start-block=%d", startBlock), fmt.Sprintf("--stop-block=%d", stopBlock), } + if opts.ReportInterval > 0 { + args = append(args, "--report-interval="+opts.ReportInterval.String()) + } firecoreLog.Info("running sds sink command", zap.String("manifest", manifest), @@ -429,13 +608,17 @@ func runSDSSink( ) // Create a context with timeout for the sink execution - sinkCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) + timeout := opts.Timeout + if timeout <= 0 { + timeout = 2 * time.Minute + } + sinkCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() // Get the repository root cwd, err := os.Getwd() if err != nil { - return fmt.Errorf("failed to get working directory: %w", err) + return "", fmt.Errorf("failed to get working directory: %w", err) } repoRoot := filepath.Join(cwd, "..", "..") @@ -449,14 +632,14 @@ func runSDSSink( zap.Error(err), zap.String("output", string(output)), ) - return fmt.Errorf("sds sink failed: %w\nOutput: %s", err, string(output)) + return string(output), fmt.Errorf("sds sink failed: %w\nOutput: %s", err, string(output)) } firecoreLog.Info("sds sink command completed successfully", zap.String("output", string(output)), ) - return nil + return string(output), nil } type firecoreSessionEvidence struct { @@ -467,14 +650,23 @@ type firecoreSessionEvidence struct { UsageBlocks int64 UsageBytes int64 UsageRequests int64 + Status repository.SessionStatus + EndReason commonv1.EndReason } type firecoreSessionRow struct { - ID string `db:"id"` + ID string `db:"id"` + Status string `db:"status"` + EndReason sql.NullInt32 `db:"end_reason"` } func loadFirecoreEvidence(t *testing.T, ctx context.Context, createdAfter time.Time, env *TestEnv) firecoreSessionEvidence { t.Helper() + return loadFirecoreEvidenceForParticipants(t, ctx, createdAfter, env.Payer.Address, env.ServiceProvider.Address, env.DataService.Address) +} + +func loadFirecoreEvidenceForParticipants(t *testing.T, ctx context.Context, createdAfter time.Time, payer, receiver, dataService eth.Address) firecoreSessionEvidence { + t.Helper() dbConn, err := psqlrepo.GetConnectionFromDSN(ctx, toPostgresDriverDSN(PostgresTestDSN)) require.NoError(t, err, "connect to provider postgres repo") @@ -482,20 +674,24 @@ func loadFirecoreEvidence(t *testing.T, ctx context.Context, createdAfter time.T sessionRows := make([]firecoreSessionRow, 0, 1) err = dbConn.SelectContext(ctx, &sessionRows, ` - SELECT id + SELECT id, status, end_reason FROM sessions WHERE payer = $1 AND receiver = $2 AND data_service = $3 AND created_at >= $4 ORDER BY created_at ASC - `, env.Payer.Address.Bytes(), env.ServiceProvider.Address.Bytes(), env.DataService.Address.Bytes(), createdAfter) + `, payer.Bytes(), receiver.Bytes(), dataService.Bytes(), createdAfter) require.NoError(t, err, "query firecore-created provider sessions") require.Len(t, sessionRows, 1, "expected one provider session for the test payer/provider/data service tuple") var evidence firecoreSessionEvidence evidence.SessionID = sessionRows[0].ID evidence.SessionCount = len(sessionRows) + evidence.Status = repository.SessionStatus(sessionRows[0].Status) + if sessionRows[0].EndReason.Valid { + evidence.EndReason = commonv1.EndReason(sessionRows[0].EndReason.Int32) + } err = dbConn.GetContext(ctx, &evidence.WorkerCount, `SELECT COUNT(*) FROM workers WHERE session_id = $1`, evidence.SessionID) require.NoError(t, err, "count worker rows for firecore session") @@ -516,6 +712,39 @@ func loadFirecoreEvidence(t *testing.T, ctx context.Context, createdAfter time.T return evidence } +func loadFirecoreSessionState(ctx context.Context, sessionID string) (firecoreSessionEvidence, error) { + dbConn, err := psqlrepo.GetConnectionFromDSN(ctx, toPostgresDriverDSN(PostgresTestDSN)) + if err != nil { + return firecoreSessionEvidence{}, err + } + defer dbConn.Close() + + var row firecoreSessionRow + if err := dbConn.GetContext(ctx, &row, ` + SELECT id, status, end_reason + FROM sessions + WHERE id = $1 + `, sessionID); err != nil { + return firecoreSessionEvidence{}, err + } + + var workerCount int64 + if err := dbConn.GetContext(ctx, &workerCount, `SELECT COUNT(*) FROM workers WHERE session_id = $1`, sessionID); err != nil { + return firecoreSessionEvidence{}, err + } + + state := firecoreSessionEvidence{ + SessionID: row.ID, + Status: repository.SessionStatus(row.Status), + WorkerCount: workerCount, + } + if row.EndReason.Valid { + state.EndReason = commonv1.EndReason(row.EndReason.Int32) + } + + return state, nil +} + func loadFirecoreUsageEvidence(ctx context.Context, sessionID string) (firecoreSessionEvidence, error) { dbConn, err := psqlrepo.GetConnectionFromDSN(ctx, toPostgresDriverDSN(PostgresTestDSN)) if err != nil { @@ -542,6 +771,17 @@ func loadFirecoreUsageEvidence(ctx context.Context, sessionID string) (firecoreS return evidence, nil } +func isQuotaExceededRuntimeFailure(err error) bool { + if err == nil { + return false + } + + msg := err.Error() + return strings.Contains(msg, "Quota exceeded") || + strings.Contains(msg, "ResourceExhausted") || + strings.Contains(msg, "resource exhausted") +} + func toPostgresDriverDSN(dsn string) string { return strings.Replace(dsn, "psql://", "postgres://", 1) }