From 7b26e703d35de8e1e0d7cde7c5d385113fe9d3d7 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Wed, 28 Jan 2026 15:15:56 +0100 Subject: [PATCH 1/2] Change to stdlib slices.Contains --- cmd/alert.go | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/cmd/alert.go b/cmd/alert.go index 4e2bbd3..7792f62 100644 --- a/cmd/alert.go +++ b/cmd/alert.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "regexp" + "slices" "strings" "github.com/NETWAYS/check_prometheus/internal/alert" @@ -23,17 +24,6 @@ type AlertConfig struct { var cliAlertConfig AlertConfig -func contains(s string, list []string) bool { - // Tiny helper to see if a string is in a list of strings - for _, elem := range list { - if s == elem { - return true - } - } - - return false -} - var alertCmd = &cobra.Command{ Use: "alert", Short: "Checks the status of a Prometheus alert", @@ -115,7 +105,7 @@ inactive = 0`, // If it's not the Alert we're looking for, Skip! if cliAlertConfig.AlertName != nil { - if !contains(rl.AlertingRule.Name, cliAlertConfig.AlertName) { + if !slices.Contains(cliAlertConfig.AlertName, rl.AlertingRule.Name) { continue } } From 36d7490fa9a7fad26f73c8630e226e7e83fd92b3 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Wed, 28 Jan 2026 16:00:42 +0100 Subject: [PATCH 2/2] Add option to include exclude alerts via their labels --- README.md | 25 ++++++---- cmd/alert.go | 57 +++++++++++++++++++--- cmd/alert_test.go | 70 ++++++++++++++++++++++++++++ testdata/alertmanager/alert.rules | 4 +- testdata/unittest/alertDataset1.json | 9 ++-- 5 files changed, 144 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 8291695..c9a1b67 100644 --- a/README.md +++ b/README.md @@ -153,16 +153,21 @@ Examples: | total=2 firing=1 pending=0 inactive=1 Flags: - --exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex. - -h, --help help for alert - -n, --name strings The name of one or more specific alerts to check. - This parameter can be repeated e.G.: '--name alert1 --name alert2' - If no name is given, all alerts will be evaluated - -g, --group strings The name of one or more specific groups to check. - This parameter can be repeated e.G.: '--group group1 --group group2' - If no group is given, all groups will be scanned for alerts - -T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK") - -P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed + --exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex. + --exclude-label stringArray The label of one or more specific alerts to exclude. + This parameter can be repeated e.g.: '--exclude-label prio=high --exclude-label another=example' + -g, --group strings The name of one or more specific groups to check for alerts. + This parameter can be repeated e.g.: '--group group1 --group group2' + If no group is given, all groups will be scanned for alerts + -h, --help help for alert + --include-label stringArray The label of one or more specific alerts to include. + This parameter can be repeated e.g.: '--include-label prio=high --include-label another=example' + Note that repeated --include-label are combined using a union. + -n, --name strings The name of one or more specific alerts to check. + This parameter can be repeated e.g.: '--name alert1 --name alert2' + If no name is given, all alerts will be evaluated + -T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK") + -P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed ``` #### Checking all defined alerts diff --git a/cmd/alert.go b/cmd/alert.go index 7792f62..be8466a 100644 --- a/cmd/alert.go +++ b/cmd/alert.go @@ -11,6 +11,7 @@ import ( "github.com/NETWAYS/go-check" "github.com/NETWAYS/go-check/perfdata" "github.com/NETWAYS/go-check/result" + "github.com/prometheus/common/model" "github.com/spf13/cobra" ) @@ -18,6 +19,8 @@ type AlertConfig struct { AlertName []string Group []string ExcludeAlerts []string + ExcludeLabels []string + IncludeLabels []string ProblemsOnly bool NoAlertsState string } @@ -102,7 +105,6 @@ inactive = 0`, var overall result.Overall for _, rl := range rules { - // If it's not the Alert we're looking for, Skip! if cliAlertConfig.AlertName != nil { if !slices.Contains(cliAlertConfig.AlertName, rl.AlertingRule.Name) { @@ -110,22 +112,36 @@ inactive = 0`, } } + labelsMatchedInclude := matchesLabel(rl.AlertingRule.Labels, cliAlertConfig.IncludeLabels) + + if len(cliAlertConfig.IncludeLabels) > 0 && !labelsMatchedInclude { + // If the alert labels don't match here we can skip it. + continue + } + // Skip inactive alerts if flag is set if len(rl.AlertingRule.Alerts) == 0 && cliAlertConfig.ProblemsOnly { continue } - alertMatched, regexErr := matches(rl.AlertingRule.Name, cliAlertConfig.ExcludeAlerts) + alertMatchedExclude, regexErr := matches(rl.AlertingRule.Name, cliAlertConfig.ExcludeAlerts) if regexErr != nil { check.ExitRaw(check.Unknown, "Invalid regular expression provided:", regexErr.Error()) } - if alertMatched { + if alertMatchedExclude { // If the alert matches a regex from the list we can skip it. continue } + labelsMatchedExclude := matchesLabel(rl.AlertingRule.Labels, cliAlertConfig.ExcludeLabels) + + if len(cliAlertConfig.ExcludeLabels) > 0 && labelsMatchedExclude { + // If the alert labels matches here we can skip it. + continue + } + // Handle Inactive Alerts if len(rl.AlertingRule.Alerts) == 0 { // Counting states for perfdata @@ -208,18 +224,28 @@ func init() { fs.StringVarP(&cliAlertConfig.NoAlertsState, "no-alerts-state", "T", "OK", "State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK") - fs.StringArrayVar(&cliAlertConfig.ExcludeAlerts, "exclude-alert", []string{}, "Alerts to ignore. Can be used multiple times and supports regex.") + fs.StringArrayVar(&cliAlertConfig.ExcludeAlerts, "exclude-alert", []string{}, + "Alerts to ignore. Can be used multiple times and supports regex.") fs.StringSliceVarP(&cliAlertConfig.AlertName, "name", "n", nil, "The name of one or more specific alerts to check."+ - "\nThis parameter can be repeated e.G.: '--name alert1 --name alert2'"+ + "\nThis parameter can be repeated e.g.: '--name alert1 --name alert2'"+ "\nIf no name is given, all alerts will be evaluated") fs.StringSliceVarP(&cliAlertConfig.Group, "group", "g", nil, "The name of one or more specific groups to check for alerts."+ - "\nThis parameter can be repeated e.G.: '--group group1 --group group2'"+ + "\nThis parameter can be repeated e.g.: '--group group1 --group group2'"+ "\nIf no group is given, all groups will be scanned for alerts") + fs.StringArrayVar(&cliAlertConfig.IncludeLabels, "include-label", []string{}, + "The label of one or more specific alerts to include. "+ + "\nThis parameter can be repeated e.g.: '--include-label prio=high --include-label another=example'"+ + "\nNote that repeated --include-label are combined using a union.") + + fs.StringArrayVar(&cliAlertConfig.ExcludeLabels, "exclude-label", []string{}, + "The label of one or more specific alerts to exclude."+ + "\nThis parameter can be repeated e.g.: '--exclude-label prio=high --exclude-label another=example'") + fs.BoolVarP(&cliAlertConfig.ProblemsOnly, "problems", "P", false, "Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed") } @@ -257,3 +283,22 @@ func matches(input string, regexToExclude []string) (bool, error) { return false, nil } + +// Matches a list of labels against a list of labels +func matchesLabel(labels model.LabelSet, labelsToMatch []string) bool { + for _, lb := range labelsToMatch { + kv := strings.SplitN(lb, "=", 2) + + if len(kv) != 2 { + continue + } + + key, value := model.LabelName(kv[0]), model.LabelValue(kv[1]) + + if val, ok := labels[key]; ok && val == value { + return true + } + } + + return false +} diff --git a/cmd/alert_test.go b/cmd/alert_test.go index cb40d5e..2a5124c 100644 --- a/cmd/alert_test.go +++ b/cmd/alert_test.go @@ -234,6 +234,76 @@ exit status 2 args: []string{"run", "../main.go", "alert", "--name", "InactiveAlert"}, expected: "[OK] - 1 Alerts: 0 Firing - 0 Pending - 1 Inactive\n\\_ [OK] [InactiveAlert] is inactive\n|total=1 firing=0 pending=0 inactive=1\n\n", }, + { + name: "alert-include-label", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write(loadTestdata(alertTestDataSet1)) + })), + args: []string{"run", "../main.go", "alert", "--include-label", "severity=critical"}, + expected: `[CRITICAL] - 2 Alerts: 1 Firing - 0 Pending - 1 Inactive +\_ [OK] [HostOutOfMemory] is inactive +\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 - {"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"} +|total=2 firing=1 pending=0 inactive=1 + +exit status 2 +`, + }, + { + name: "alert-exclude-label", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write(loadTestdata(alertTestDataSet1)) + })), + args: []string{"run", "../main.go", "alert", "--exclude-label", "severity=critical"}, + expected: `[WARNING] - 1 Alerts: 0 Firing - 1 Pending - 0 Inactive +\_ [WARNING] [SqlAccessDeniedRate] - Job: [mysql] on Instance: [localhost] is pending - value: 0.40 - {"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"} +|total=1 firing=0 pending=1 inactive=0 + +exit status 1 +`, + }, + { + name: "alert-include-label-multiple", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write(loadTestdata(alertTestDataSet1)) + })), + args: []string{"run", "../main.go", "alert", "--include-label", "team=database", "--include-label", "severity=critical"}, + expected: `[CRITICAL] - 3 Alerts: 1 Firing - 1 Pending - 1 Inactive +\_ [OK] [HostOutOfMemory] is inactive +\_ [WARNING] [SqlAccessDeniedRate] - Job: [mysql] on Instance: [localhost] is pending - value: 0.40 - {"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"} +\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 - {"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"} +|total=3 firing=1 pending=1 inactive=1 + +exit status 2 +`, + }, + { + name: "alert-include-label-multiple-similar", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write(loadTestdata(alertTestDataSet1)) + })), + args: []string{"run", "../main.go", "alert", "--include-label", "severity=warning", "--include-label", "severity=critical"}, + expected: `[CRITICAL] - 3 Alerts: 1 Firing - 1 Pending - 1 Inactive +\_ [OK] [HostOutOfMemory] is inactive +\_ [WARNING] [SqlAccessDeniedRate] - Job: [mysql] on Instance: [localhost] is pending - value: 0.40 - {"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"} +\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 - {"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"} +|total=3 firing=1 pending=1 inactive=1 + +exit status 2 +`, + }, + { + name: "alert-exclude-label-multiple", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write(loadTestdata(alertTestDataSet1)) + })), + args: []string{"run", "../main.go", "alert", "--exclude-label", "team=database", "--exclude-label", "severity=critical"}, + expected: "[OK] - 0 Alerts: 0 Firing - 0 Pending - 0 Inactive\n\\_ [OK] No alerts retrieved\n|total=0 firing=0 pending=0 inactive=0\n\n", + }, } for _, test := range tests { diff --git a/testdata/alertmanager/alert.rules b/testdata/alertmanager/alert.rules index 847f12e..933df38 100644 --- a/testdata/alertmanager/alert.rules +++ b/testdata/alertmanager/alert.rules @@ -15,7 +15,7 @@ groups: expr: absent(up{job="alertmanager"}) for: 0m labels: - severity: warning + severity: low annotations: summary: Prometheus AlertManager job missing (instance {{ $labels.instance }}) description: "A Prometheus AlertManager job has disappeared\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" @@ -33,7 +33,7 @@ groups: expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80 for: 0m labels: - severity: warning + severity: extreme annotations: summary: Host high CPU load (instance {{ $labels.instance }}) description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/testdata/unittest/alertDataset1.json b/testdata/unittest/alertDataset1.json index 7e46bff..ef5ebc5 100644 --- a/testdata/unittest/alertDataset1.json +++ b/testdata/unittest/alertDataset1.json @@ -12,7 +12,8 @@ "query": "up", "duration": 120, "labels": { - "severity": "critical" + "severity": "critical", + "team": "network" }, "annotations": { "description": "Foo", @@ -40,7 +41,8 @@ "query": "mysql", "duration": 17280000, "labels": { - "severity": "warning" + "severity": "warning", + "team": "database" }, "annotations": { "description": "MySQL", @@ -84,7 +86,8 @@ "query": "SSL", "duration": 0, "labels": { - "severity": "critical" + "severity": "critical", + "team": "network" }, "annotations": { "description": "TLS",