Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
c95c8dd
feat: add new flags
gacevicljubisa Nov 26, 2025
e502bc8
fix(k8s): ensure p2p-wss service port is created when configured
gacevicljubisa Nov 26, 2025
ae31531
chore: add comments to local.yaml
gacevicljubisa Nov 27, 2025
1e6340f
fix(config): update autotls-registration-endpoint
gacevicljubisa Jan 8, 2026
3def709
feat(autotls): add autotls support with pebble and p2p-forge (#559)
akrem-chabchoub Jan 16, 2026
4b52f9f
feat(autotls): implement autotls check functionality (#560)
akrem-chabchoub Feb 16, 2026
7c73865
fix(autotls): simplify config
gacevicljubisa Feb 16, 2026
91aa2da
fix: use test cert from constant
gacevicljubisa Feb 17, 2026
9634be7
fix(autotls): handle context cancellation during sleep intervals in c…
akrem-chabchoub Feb 17, 2026
027d277
feat(config): add light node configuration for autotls support in loc…
akrem-chabchoub Feb 18, 2026
c34162e
fix: git conflicts
akrem-chabchoub Mar 31, 2026
4233bf7
chore: fix lint issue
gacevicljubisa Feb 20, 2026
970ad33
feat(autotls): add support for custom DNS resolver and include Pebble…
akrem-chabchoub Mar 2, 2026
851b02d
lint forge test file
akrem-chabchoub Mar 2, 2026
d9ba4e8
feat(autotls): enhance Pebble CA certificate fetching and direct DNS …
akrem-chabchoub Mar 3, 2026
59f03f7
chore: update forge DNS address in local configuration
akrem-chabchoub Mar 4, 2026
d4a2bbe
chore: update forgeDNSAddr in check.go
akrem-chabchoub Mar 4, 2026
bb1f8db
chore: enhance DNS resolution handling with custom DNS server support
akrem-chabchoub Mar 4, 2026
47bb2e4
chore: remove unnecessary changes related to pebble
akrem-chabchoub Mar 4, 2026
070e0aa
chore: update package imports
akrem-chabchoub Mar 4, 2026
eb55129
chore: rename ForgeDNSAddr
akrem-chabchoub Mar 4, 2026
3f20440
feat(autotls): add Forge TLS host address and Pebble management URL s…
akrem-chabchoub Mar 5, 2026
ad76690
fix(autotls): increase certificate renewal wait time to accommodate e…
akrem-chabchoub Mar 5, 2026
eeb83f3
fix(autotls): extend timeout for certificate renewal process
akrem-chabchoub Mar 5, 2026
7c324ad
fix(autotls): adjust certificate renewal logic and improve logging fo…
akrem-chabchoub Mar 5, 2026
c44a78f
fix(autotls): refine certificate renewal process with enhanced retry …
akrem-chabchoub Mar 9, 2026
60731a6
refator: rm test forge
akrem-chabchoub Mar 9, 2026
ca09e75
feat(autotls): introduce support for multiple AutoTLS groups and add …
akrem-chabchoub Mar 24, 2026
e71f5d2
refactor(autotls): consolidate HTTP client configuration for Pebble m…
akrem-chabchoub Mar 24, 2026
f8b6ad1
fix: lint check.go
akrem-chabchoub Mar 25, 2026
b810492
refactor(autotls): encapsulate Pebble management API interactions in …
akrem-chabchoub Mar 25, 2026
3376f8b
fix(autotls): enhance logging for certificate snapshot dialing errors
akrem-chabchoub Mar 25, 2026
b3ef024
refactor(autotls): move certificate renewal logic to a dedicated func…
akrem-chabchoub Mar 25, 2026
4eb38b2
docs(autotls): add README for AutoTLS check detailing functionality, …
akrem-chabchoub Mar 25, 2026
84f7a7f
docs(autotls): add sequence diagram to README for better visualizatio…
akrem-chabchoub Mar 25, 2026
6820f41
chore(config): update local.yaml with new addresses for forge DNS and…
akrem-chabchoub Mar 26, 2026
c975319
chore(config): add local usage notes for forge DNS and Pebble managem…
akrem-chabchoub Mar 26, 2026
98c0ffb
fix: set timeout for insecure Pebble HTTP client
akrem-chabchoub Mar 30, 2026
869c122
chore(config): update local.yaml with valid values for CI
akrem-chabchoub Mar 31, 2026
7530865
Merge branch 'master' of github.com:ethersphere/beekeeper into dns-re…
akrem-chabchoub Mar 31, 2026
3f46e96
chore(deps): update go.mod to include new dependencies and indirect p…
akrem-chabchoub Mar 31, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions config/local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,14 @@ clusters:
count: 3
mode: node
bee-autotls:
bee-config: bee-local-autotls
config: local-autotls
count: 1
mode: node
bee-autotls-extra:
bee-config: bee-local-autotls
config: local
count: 2
count: 1
mode: node
light:
bee-config: bee-local-light-autotls
Expand Down Expand Up @@ -132,6 +137,9 @@ node-groups:
update-strategy: "RollingUpdate"
local-dns:
_inherit: "local"
local-autotls:
_inherit: "local"
p2p-wss-node-port: 31635
local-gc:
_inherit: "local"
local-light:
Expand Down Expand Up @@ -447,8 +455,14 @@ checks:
postage-label: test-label
type: feed
ci-autotls:
timeout: 15m
timeout: 25m
type: autotls
options:
ultra-light-group: ultra-light
autotls-group: bee-autotls
autotls-groups:
- bee-autotls
- bee-autotls-extra
forge-dns-address: "127.0.0.1:30053" # When running inside cluster, use p2p-forge.local.svc.cluster.local:53
forge-tls-host-address: "" # When running locally, use 127.0.0.1:31635
pebble-mgmt-url: "https://127.0.0.1:31500/roots/0" # When running inside cluster, use https://pebble.local.svc.cluster.local:15000/roots/0

2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ require (
github.com/gorilla/websocket v1.5.3
github.com/ipshipyard/p2p-forge v0.7.0
github.com/libp2p/go-libp2p v0.46.0
github.com/miekg/dns v1.1.66
github.com/multiformats/go-multiaddr v0.16.1
github.com/multiformats/go-multibase v0.2.0
github.com/opentracing/opentracing-go v1.2.0
Expand Down Expand Up @@ -107,7 +108,6 @@ require (
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mholt/acmez/v3 v3.0.0 // indirect
github.com/miekg/dns v1.1.66 // indirect
github.com/minio/sha256-simd v1.0.1 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
Expand Down
24 changes: 24 additions & 0 deletions pkg/cert/pebble.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package cert

// PebbleCertificate is the root CA certificate for Pebble (Let's Encrypt test server).
// Source: https://github.com/letsencrypt/pebble/blob/main/test/certs/pebble.minica.pem
const PebbleCertificate = `-----BEGIN CERTIFICATE-----
MIIDPzCCAiegAwIBAgIIU0Xm9UFdQxUwDQYJKoZIhvcNAQELBQAwIDEeMBwGA1UE
AxMVbWluaWNhIHJvb3QgY2EgNTM0NWU2MCAXDTI1MDkwMzIzNDAwNVoYDzIxMjUw
OTAzMjM0MDA1WjAgMR4wHAYDVQQDExVtaW5pY2Egcm9vdCBjYSA1MzQ1ZTYwggEi
MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC5WgZNoVJandj43kkLyU50vzCZ
alozvdRo3OFiKoDtmqKPNWRNO2hC9AUNxTDJco51Yc42u/WV3fPbbhSznTiOOVtn
Ajm6iq4I5nZYltGGZetGDOQWr78y2gWY+SG078MuOO2hyDIiKtVc3xiXYA+8Hluu
9F8KbqSS1h55yxZ9b87eKR+B0zu2ahzBCIHKmKWgc6N13l7aDxxY3D6uq8gtJRU0
toumyLbdzGcupVvjbjDP11nl07RESDWBLG1/g3ktJvqIa4BWgU2HMh4rND6y8OD3
Hy3H8MY6CElL+MOCbFJjWqhtOxeFyZZV9q3kYnk9CAuQJKMEGuN4GU6tzhW1AgMB
AAGjezB5MA4GA1UdDwEB/wQEAwIChDATBgNVHSUEDDAKBggrBgEFBQcDATASBgNV
HRMBAf8ECDAGAQH/AgEAMB0GA1UdDgQWBBSu8RGpErgYUoYnQuwCq+/ggTiEjDAf
BgNVHSMEGDAWgBSu8RGpErgYUoYnQuwCq+/ggTiEjDANBgkqhkiG9w0BAQsFAAOC
AQEAXDVYov1+f6EL7S41LhYQkEX/GyNNzsEvqxE9U0+3Iri5JfkcNOiA9O9L6Z+Y
bqcsXV93s3vi4r4WSWuc//wHyJYrVe5+tK4nlFpbJOvfBUtnoBDyKNxXzZCxFJVh
f9uc8UejRfQMFbDbhWY/x83y9BDufJHHq32OjCIN7gp2UR8rnfYvlz7Zg4qkJBsn
DG4dwd+pRTCFWJOVIG0JoNhK3ZmE7oJ1N4H38XkZ31NPcMksKxpsLLIS9+mosZtg
4olL7tMPJklx5ZaeMFaKRDq4Gdxkbw4+O4vRgNm3Z8AXWKknOdfgdpqLUPPhRcP4
v1lhy71EhBuXXwRQJry0lTdF+w==
-----END CERTIFICATE-----`
52 changes: 52 additions & 0 deletions pkg/check/autotls/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# autotls check

Beekeeper check that validates **p2p-forge** style AutoTLS setup: WSS underlays, forge hostnames, DNS, TLS on forge endpoints, connectivity, and (when possible) **certificate renewal** by comparing cert serials over time.

## What it runs (order)

1. Load API clients for nodes in the configured AutoTLS node groups.
2. **WSS underlays** — confirm nodes expose WebSocket secure underlays (optionally skip a group such as ultra-light).
3. **Forge domain + CA** — read `AutoTLSDomain` (and Pebble CA handling) from the first matching node config.
4. **Forge address format** — parse WSS multiaddrs and check forge hostname / peer id consistency.
5. **DNS** — resolve forge hostnames when `forge-dns-address` is set.
6. **TLS** — dial forge endpoints and verify certificates (SANs, retries for expired certs).
7. **WSS connectivity** — disconnect/reconnect between nodes over WSS underlays.
8. **Ultra-light** (optional) — same connectivity from ultra-light nodes if configured.
9. **Renewal** — snapshot leaf cert serials, wait until near expiry (or trigger dials if already expired), snapshot again, compare serials; then run WSS connectivity again.

If any step fails, the check fails.

## Config (beekeeper)

Check type: `autotls`. Options map to `Options` in `autotls.go`:

| YAML key | Purpose |
|----------|---------|
| `autotls-groups` | Node groups that run AutoTLS (default in code: `bee-autotls`). |
| `ultra-light-group` | Group name for nodes without listen addrs; excluded from WSS underlay collection, used for ultra-light connectivity test. Default: `ultra-light`. Set empty to skip ultra-light tests. |
| `forge-dns-address` | Resolver host:port used to verify DNS resolution of forge hostnames. |
| `forge-tls-host-address` | Optional `host:port` to dial the **first** sorted node’s forge TLS check from this host (e.g. in-cluster DNS). Other nodes still use IP:port from the multiaddr. |
| `pebble-mgmt-url` | Override Pebble **management** URL for fetching the live root CA PEM (see below). |

Defaults: `autotls.NewDefaultOptions()` in `autotls.go`.

## Pebble (local ACME)

When a node’s `AutoTLSCAEndpoint` contains `pebble`, the check fetches the **current** root CA from Pebble’s management API (Pebble rotates its CA on restart). The ACME directory URL in config is turned into a management URL with `pebbleMgmtURL()` (ACME port `14000` → management `15000`, path `/roots/0`). `pebble-mgmt-url` overrides that derived URL.

The HTTP client uses **TLS insecure skip verify** only for that management HTTPS call (self-signed Pebble). Implementation: `internal/service.go` (`Pebble.FetchRootCA`), client wired in `autotls.go`.

## Package layout

| File | Role |
|------|------|
| `autotls.go` | `Check`, `Run`, options, WSS underlay polling, connectivity tests, `forgeConfig`, Pebble URL helper. |
| `forge.go` | Forge multiaddr parsing, DNS/TLS verification helpers, `getCertSnapshots`, `triggerRenewalConnections`, `certSnapshot`. |
| `renewal.go` | Renewal orchestration: wait/compare/retry, `compareCertRenewals` helpers. |
| `internal/service.go` | Small HTTP client wrapper for Pebble management `GET` (root CA PEM). |

Renewal **orchestration** lives in `renewal.go`; TLS dialing and address selection for snapshots and renewal triggers stay in `forge.go` next to other forge TLS code.

## Sequence diagram

![AutoTLS check sequence diagram](autotls-sequence-diagram.png)
Binary file added pkg/check/autotls/autotls-sequence-diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
126 changes: 54 additions & 72 deletions pkg/check/autotls/autotls.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,32 @@ package autotls

import (
"context"
"crypto/tls"
"fmt"
"net/http"
"strings"
"time"

"github.com/ethersphere/beekeeper/pkg/bee"
"github.com/ethersphere/beekeeper/pkg/beekeeper"
"github.com/ethersphere/beekeeper/pkg/cert"
"github.com/ethersphere/beekeeper/pkg/check/autotls/internal"
"github.com/ethersphere/beekeeper/pkg/logging"
"github.com/ethersphere/beekeeper/pkg/orchestration"
"github.com/ethersphere/beekeeper/pkg/orchestration/k8s"
ma "github.com/multiformats/go-multiaddr"
)

type Options struct {
AutoTLSGroup string
UltraLightGroup string
AutoTLSGroups []string
UltraLightGroup string
ForgeDNSAddress string
ForgeTLSHostAddress string
PebbleMgmtURL string
}

func NewDefaultOptions() Options {
return Options{
AutoTLSGroup: "bee-autotls",
AutoTLSGroups: []string{"bee-autotls"},
UltraLightGroup: "ultra-light",
}
}
Expand All @@ -33,6 +39,13 @@ const (

var _ beekeeper.Action = (*Check)(nil)

var insecurePebbleHTTPClient = &http.Client{
Timeout: connectTimeout,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
}

type Check struct {
logger logging.Logger
}
Expand All @@ -56,20 +69,19 @@ func (c *Check) Run(ctx context.Context, cluster orchestration.Cluster, opts any
return fmt.Errorf("get node clients: %w", err)
}

autoTLSClients := orchestration.ClientMap(clients).FilterByNodeGroups([]string{o.AutoTLSGroup})
autoTLSClients := orchestration.ClientMap(clients).FilterByNodeGroups(o.AutoTLSGroups)
if len(autoTLSClients) == 0 {
return fmt.Errorf("no nodes found in AutoTLS group %q", o.AutoTLSGroup)
return fmt.Errorf("no nodes found in AutoTLS groups %v", o.AutoTLSGroups)
}

c.logger.Infof("found %d nodes in AutoTLS group %q", len(autoTLSClients), o.AutoTLSGroup)
c.logger.Infof("found %d nodes in AutoTLS groups %v", len(autoTLSClients), o.AutoTLSGroups)

wssNodes, err := c.verifyWSSUnderlays(ctx, autoTLSClients, o.UltraLightGroup)
if err != nil {
return fmt.Errorf("verify WSS underlays: %w", err)
}

// Extract forge config from the first autotls node's bee config.
forgeDomain, caCertPEM := c.forgeConfig(cluster, autoTLSClients)
forgeDomain, caCertPEM := c.forgeConfig(ctx, cluster, autoTLSClients, o.PebbleMgmtURL)
if forgeDomain == "" {
return fmt.Errorf("could not determine forge domain from node config")
}
Expand All @@ -79,11 +91,11 @@ func (c *Check) Run(ctx context.Context, cluster orchestration.Cluster, opts any
return fmt.Errorf("forge address validation: %w", err)
}

if err := c.verifyDNSResolution(ctx, forgeNodes); err != nil {
if err := c.verifyDNSResolution(ctx, forgeNodes, o.ForgeDNSAddress); err != nil {
return fmt.Errorf("DNS resolution verification: %w", err)
}

if err := c.verifyTLSCertificate(ctx, forgeNodes, caCertPEM); err != nil {
if err := c.verifyTLSCertificate(ctx, forgeNodes, caCertPEM, o.ForgeTLSHostAddress); err != nil {
return fmt.Errorf("TLS certificate verification: %w", err)
}

Expand All @@ -97,7 +109,7 @@ func (c *Check) Run(ctx context.Context, cluster orchestration.Cluster, opts any
}
}

if err := c.testCertificateRenewal(ctx, clients, wssNodes, forgeNodes, caCertPEM, connectTimeout); err != nil {
if err := c.testCertificateRenewal(ctx, clients, wssNodes, forgeNodes, caCertPEM, o.ForgeTLSHostAddress, connectTimeout); err != nil {
return fmt.Errorf("certificate renewal test: %w", err)
}

Expand Down Expand Up @@ -276,66 +288,14 @@ func (c *Check) testConnectivity(ctx context.Context, sourceClient *bee.Client,
return nil
}

func (c *Check) testCertificateRenewal(ctx context.Context, clients map[string]*bee.Client, wssNodes map[string][]string, forgeNodes map[string][]*forgeUnderlayInfo, caCertPEM string, connectTimeout time.Duration) error {
const renewalWaitTime = 350 * time.Second // This is configured in beelocal setup (we set certificate to expire in 300 seconds)

// Snapshot certificate serial numbers before waiting.
preSerials := c.getCertSerials(ctx, forgeNodes, caCertPEM)
if len(preSerials) > 0 {
c.logger.Infof("captured %d certificate serial(s) before renewal wait", len(preSerials))
} else {
c.logger.Warning("no TLS endpoints reachable, will fall back to connectivity-only renewal check")
}

c.logger.Infof("testing certificate renewal: waiting %v for certificates to expire and renew", renewalWaitTime)

select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(renewalWaitTime):
}

c.logger.Info("wait complete, verifying certificates were renewed")

// Verify serial numbers changed (proves new certs were issued).
if len(preSerials) > 0 {
postSerials := c.getCertSerials(ctx, forgeNodes, caCertPEM)
var renewed, unchanged int
for key, preSN := range preSerials {
postSN, ok := postSerials[key]
if !ok {
c.logger.Warningf("%s: endpoint became unreachable after wait", key)
continue
}
if preSN == postSN {
unchanged++
c.logger.Warningf("%s: certificate serial unchanged (%s), renewal may not have occurred", key, preSN)
} else {
renewed++
c.logger.Infof("%s: certificate renewed (serial %s -> %s)", key, preSN, postSN)
}
}
if unchanged > 0 && renewed == 0 {
return fmt.Errorf("no certificates were renewed: %d/%d serials unchanged", unchanged, len(preSerials))
}
c.logger.Infof("certificate renewal verified: %d renewed, %d unchanged", renewed, unchanged)
}

// Also verify WSS connectivity still works with the new certificates.
if err := c.testWSSConnectivity(ctx, clients, wssNodes, connectTimeout); err != nil {
return fmt.Errorf("post-renewal connectivity test failed (certificates may not have been renewed): %w", err)
}

c.logger.Info("certificate renewal test passed")
return nil
}

// forgeConfig extracts the forge domain and appropriate CA certificate from the
// first autotls node's bee configuration. If the CA endpoint indicates pebble
// (test environment), the embedded pebble CA cert is returned. Otherwise, an
// empty string is returned so the system root pool is used.
func (c *Check) forgeConfig(cluster orchestration.Cluster, autoTLSClients orchestration.ClientList) (forgeDomain, caCertPEM string) {
// forgeConfig extracts the forge domain and CA certificate from the first autotls
// node's bee configuration. When Pebble is detected, the live root CA is fetched
// from Pebble's management API (since Pebble generates a fresh CA on each start).
// Falls back to the static embedded cert if the fetch fails.
func (c *Check) forgeConfig(ctx context.Context, cluster orchestration.Cluster, autoTLSClients orchestration.ClientList, pebbleMgmtURLOverride string) (forgeDomain, caCertPEM string) {
nodes := cluster.Nodes()
pebbleSvc := internal.NewPebble(insecurePebbleHTTPClient)

for _, client := range autoTLSClients {
node, ok := nodes[client.Name()]
if !ok || node.Config() == nil {
Expand All @@ -344,9 +304,31 @@ func (c *Check) forgeConfig(cluster orchestration.Cluster, autoTLSClients orches
cfg := node.Config()
forgeDomain = cfg.AutoTLSDomain
if strings.Contains(cfg.AutoTLSCAEndpoint, "pebble") {
caCertPEM = k8s.PebbleCertificate
mgmtURL := pebbleMgmtURL(cfg.AutoTLSCAEndpoint)
if pebbleMgmtURLOverride != "" {
mgmtURL = pebbleMgmtURLOverride
}
liveCert, err := pebbleSvc.FetchRootCA(ctx, mgmtURL)
if err != nil {
c.logger.Warningf("failed to fetch live Pebble CA from %s, falling back to static cert: %v", mgmtURL, err)
caCertPEM = cert.PebbleCertificate
} else {
c.logger.Infof("fetched live Pebble CA from %s", mgmtURL)
caCertPEM = liveCert
}
}
return forgeDomain, caCertPEM
}
return "", ""
}

// pebbleMgmtURL derives the Pebble management API URL from the ACME directory endpoint.
// E.g. "https://pebble:14000/dir" -> "https://pebble:15000/roots/0"
func pebbleMgmtURL(acmeEndpoint string) string {
base := acmeEndpoint
if i := strings.LastIndex(base, "/"); i > 0 {
base = base[:i]
}
base = strings.Replace(base, ":14000", ":15000", 1)
return base + "/roots/0"
}
Loading
Loading