Added custom messages for uptime alerts

This commit is contained in:
2026-05-14 00:55:09 +00:00
parent 6d7c0ce58b
commit d6f65c58f6
6 changed files with 286 additions and 34 deletions
+44 -2
View File
@@ -188,6 +188,48 @@ specific default by adding the alert's ID or name to its
`suppress_alert_ids` list in `cluster.yaml` (see "Edit cluster.yaml `suppress_alert_ids` list in `cluster.yaml` (see "Edit cluster.yaml
directly" below). directly" below).
## Custom alert messages
Each alert can carry its own `subject_template` and `body_template`
(Go `text/template` syntax). When set, they override the built-in
formatting for that one alert; the default renderer is used otherwise.
Discord ignores the subject template (it has no subject line).
```sh
qu alert add discord oncall --webhook https://... \
--body ':rotating_light: **{{.Check.Name}}** is now {{.Verb}}
target: `{{.Check.Target}}`
detail: {{.Snapshot.Detail}}'
# multi-line templates are easier from a file
qu alert add smtp ops --host ... --from ... --to ... \
--subject-file /etc/quptime/templates/ops.subject \
--body-file /etc/quptime/templates/ops.body
```
Available template variables:
| Variable | Meaning |
|---|---|
| `{{.Check.Name}}` | check name |
| `{{.Check.Type}}` | `http` / `tcp` / `icmp` |
| `{{.Check.Target}}` | URL or host:port being probed |
| `{{.Check.ID}}` | UUID |
| `{{.From}}` | previous state (`up` / `down` / `unknown`) |
| `{{.To}}` | new state |
| `{{.Verb}}` | `UP` / `DOWN` / `RECOVERED` |
| `{{.Snapshot.Reports}}` | total per-node reports counted |
| `{{.Snapshot.OKCount}}` | how many reported OK |
| `{{.Snapshot.NotOK}}` | how many reported failure |
| `{{.Snapshot.Detail}}` | first failure detail string |
| `{{.NodeID}}` | master that dispatched |
| `{{.When}}` | RFC3339 timestamp |
`qu alert test <name>` exercises the template against a synthetic
"homepage going DOWN" transition, so you can verify rendering before
production traffic depends on it. A template parse or execution error
falls back to the built-in format and is logged.
## Edit cluster.yaml directly ## Edit cluster.yaml directly
Anything you can do through the CLI you can also do by editing Anything you can do through the CLI you can also do by editing
@@ -258,8 +300,8 @@ qu check add tcp <name> <host:port>
qu check add icmp <name> <host> qu check add icmp <name> <host>
qu check list qu check list
qu check remove <id-or-name> qu check remove <id-or-name>
qu alert add smtp <name> --host … --port … --from … --to … [--user --password --starttls] [--default] qu alert add smtp <name> --host … --port … --from … --to … [--user --password --starttls] [--default] [--subject … --body …]
qu alert add discord <name> --webhook … [--default] qu alert add discord <name> --webhook … [--default] [--body …]
qu alert list / remove / test <id-or-name> qu alert list / remove / test <id-or-name>
qu alert default <id-or-name> on|off toggle default attachment to every check qu alert default <id-or-name> on|off toggle default attachment to every check
qu trust list / remove <node-id> qu trust list / remove <node-id>
+25 -5
View File
@@ -30,13 +30,16 @@ func (d *Dispatcher) OnTransition(check *config.Check, from, to checks.State, sn
if to == checks.StateUnknown { if to == checks.StateUnknown {
return return
} }
msg := Render(d.selfID, check, from, to, snap)
alerts := d.cluster.EffectiveAlertsFor(check) alerts := d.cluster.EffectiveAlertsFor(check)
if len(alerts) == 0 && len(check.AlertIDs) > 0 { if len(alerts) == 0 && len(check.AlertIDs) > 0 {
d.logger.Printf("alerts: check %q references alerts but none resolved", check.Name) d.logger.Printf("alerts: check %q references alerts but none resolved", check.Name)
} }
for i := range alerts { for i := range alerts {
alert := alerts[i] alert := alerts[i]
msg, err := RenderFor(&alert, d.selfID, check, from, to, snap)
if err != nil {
d.logger.Printf("alerts: %q template: %v — falling back to default", alert.Name, err)
}
if err := d.dispatchOne(&alert, msg); err != nil { if err := d.dispatchOne(&alert, msg); err != nil {
d.logger.Printf("alerts: %q via %s: %v", alert.Name, alert.Type, err) d.logger.Printf("alerts: %q via %s: %v", alert.Name, alert.Type, err)
} }
@@ -44,15 +47,32 @@ func (d *Dispatcher) OnTransition(check *config.Check, from, to checks.State, sn
} }
// Test sends a one-shot test message to the named alert. Returns an // Test sends a one-shot test message to the named alert. Returns an
// error so the CLI can surface failures interactively. // error so the CLI can surface failures interactively. If the alert
// carries custom templates they are exercised against a synthetic
// "homepage going DOWN" transition so the operator can confirm the
// template renders before a real outage.
func (d *Dispatcher) Test(alertID string) error { func (d *Dispatcher) Test(alertID string) error {
alert := d.cluster.FindAlert(alertID) alert := d.cluster.FindAlert(alertID)
if alert == nil { if alert == nil {
return fmt.Errorf("alert %q not found", alertID) return fmt.Errorf("alert %q not found", alertID)
} }
msg := Message{ if alert.SubjectTemplate == "" && alert.BodyTemplate == "" {
Subject: "[quptime] test alert", msg := Message{
Body: fmt.Sprintf("This is a test of alert %q from node %s.\nIf you see this, the alert channel is wired correctly.\n", alert.Name, d.selfID), Subject: "[quptime] test alert",
Body: fmt.Sprintf("This is a test of alert %q from node %s.\nIf you see this, the alert channel is wired correctly.\n", alert.Name, d.selfID),
}
return d.dispatchOne(alert, msg)
}
sample := &config.Check{
ID: "test-check",
Name: "test-check",
Type: config.CheckHTTP,
Target: "https://example.com",
}
snap := checks.Snapshot{Reports: 3, OKCount: 0, NotOK: 3, Detail: "synthetic test failure"}
msg, err := RenderFor(alert, d.selfID, sample, checks.StateUp, checks.StateDown, snap)
if err != nil {
return fmt.Errorf("render template: %w", err)
} }
return d.dispatchOne(alert, msg) return d.dispatchOne(alert, msg)
} }
+79 -7
View File
@@ -4,14 +4,29 @@
package alerts package alerts
import ( import (
"bytes"
"fmt" "fmt"
"strings" "strings"
"text/template"
"time" "time"
"git.cer.sh/axodouble/quptime/internal/checks" "git.cer.sh/axodouble/quptime/internal/checks"
"git.cer.sh/axodouble/quptime/internal/config" "git.cer.sh/axodouble/quptime/internal/config"
) )
// TemplateContext is what user-provided subject/body templates see. It
// is also the shape the default renderer fills in, so changing one
// place keeps the two paths consistent.
type TemplateContext struct {
Check *config.Check
From string // previous state name
To string // new state name
Verb string // "UP" | "DOWN" | "RECOVERED"
Snapshot checks.Snapshot // aggregate counts and detail
NodeID string // master that rendered the message
When string // RFC3339 timestamp
}
// Message is the rendered notification ready to ship across any // Message is the rendered notification ready to ship across any
// channel. Channels may format Subject + Body differently (SMTP uses // channel. Channels may format Subject + Body differently (SMTP uses
// both; Discord renders a single string). // both; Discord renders a single string).
@@ -20,25 +35,82 @@ type Message struct {
Body string Body string
} }
// Render produces a human-readable message from one state transition. // Render produces a human-readable message from one state transition
// using the built-in format. Used as the fallback when no custom
// template is configured (or when a custom template fails to render).
func Render(nodeID string, check *config.Check, from, to checks.State, snap checks.Snapshot) Message { func Render(nodeID string, check *config.Check, from, to checks.State, snap checks.Snapshot) Message {
now := time.Now().UTC().Format(time.RFC3339) ctx := newContext(nodeID, check, from, to, snap)
verb := transitionVerb(from, to) subject := fmt.Sprintf("[quptime] %s %s — %s", check.Name, ctx.Verb, check.Target)
subject := fmt.Sprintf("[quptime] %s %s — %s", check.Name, verb, check.Target)
var b strings.Builder var b strings.Builder
fmt.Fprintf(&b, "Check %q is now %s.\n", check.Name, strings.ToUpper(string(to))) fmt.Fprintf(&b, "Check %q is now %s.\n", check.Name, strings.ToUpper(ctx.To))
fmt.Fprintf(&b, "Previous state: %s\n", from) fmt.Fprintf(&b, "Previous state: %s\n", ctx.From)
fmt.Fprintf(&b, "Target: %s (%s)\n", check.Target, check.Type) fmt.Fprintf(&b, "Target: %s (%s)\n", check.Target, check.Type)
fmt.Fprintf(&b, "Reports: %d (ok=%d, fail=%d)\n", snap.Reports, snap.OKCount, snap.NotOK) fmt.Fprintf(&b, "Reports: %d (ok=%d, fail=%d)\n", snap.Reports, snap.OKCount, snap.NotOK)
if snap.Detail != "" { if snap.Detail != "" {
fmt.Fprintf(&b, "Detail: %s\n", snap.Detail) fmt.Fprintf(&b, "Detail: %s\n", snap.Detail)
} }
fmt.Fprintf(&b, "Master: %s\n", nodeID) fmt.Fprintf(&b, "Master: %s\n", nodeID)
fmt.Fprintf(&b, "When: %s\n", now) fmt.Fprintf(&b, "When: %s\n", ctx.When)
return Message{Subject: subject, Body: b.String()} return Message{Subject: subject, Body: b.String()}
} }
// RenderFor produces a message for one specific alert. If the alert
// defines SubjectTemplate or BodyTemplate, those override the
// corresponding field from the default render. A template error falls
// back to the default for that field and is reported via the returned
// error (the caller is expected to log but still ship the message).
func RenderFor(alert *config.Alert, nodeID string, check *config.Check, from, to checks.State, snap checks.Snapshot) (Message, error) {
def := Render(nodeID, check, from, to, snap)
if alert == nil || (alert.SubjectTemplate == "" && alert.BodyTemplate == "") {
return def, nil
}
ctx := newContext(nodeID, check, from, to, snap)
msg := def
var firstErr error
if alert.SubjectTemplate != "" {
s, err := execTemplate("subject", alert.SubjectTemplate, ctx)
if err != nil {
firstErr = err
} else {
msg.Subject = s
}
}
if alert.BodyTemplate != "" {
s, err := execTemplate("body", alert.BodyTemplate, ctx)
if err != nil && firstErr == nil {
firstErr = err
} else if err == nil {
msg.Body = s
}
}
return msg, firstErr
}
func newContext(nodeID string, check *config.Check, from, to checks.State, snap checks.Snapshot) TemplateContext {
return TemplateContext{
Check: check,
From: string(from),
To: string(to),
Verb: transitionVerb(from, to),
Snapshot: snap,
NodeID: nodeID,
When: time.Now().UTC().Format(time.RFC3339),
}
}
func execTemplate(name, src string, ctx TemplateContext) (string, error) {
tmpl, err := template.New(name).Option("missingkey=zero").Parse(src)
if err != nil {
return "", fmt.Errorf("parse %s template: %w", name, err)
}
var b bytes.Buffer
if err := tmpl.Execute(&b, ctx); err != nil {
return "", fmt.Errorf("execute %s template: %w", name, err)
}
return b.String(), nil
}
func transitionVerb(from, to checks.State) string { func transitionVerb(from, to checks.State) string {
switch to { switch to {
case checks.StateDown: case checks.StateDown:
+58
View File
@@ -50,3 +50,61 @@ func TestRenderUpInitialTransition(t *testing.T) {
t.Error("first-time UP should not be tagged RECOVERED") t.Error("first-time UP should not be tagged RECOVERED")
} }
} }
func TestRenderForUsesAlertTemplates(t *testing.T) {
check := &config.Check{Name: "homepage", Target: "https://example.com", Type: config.CheckHTTP}
snap := checks.Snapshot{Reports: 3, OKCount: 0, NotOK: 3, Detail: "connection refused"}
alert := &config.Alert{
SubjectTemplate: "{{.Check.Name}} is {{.Verb}}",
BodyTemplate: "{{.Check.Target}} :: {{.Snapshot.Detail}}",
}
msg, err := RenderFor(alert, "master", check, checks.StateUp, checks.StateDown, snap)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if msg.Subject != "homepage is DOWN" {
t.Errorf("subject = %q", msg.Subject)
}
if msg.Body != "https://example.com :: connection refused" {
t.Errorf("body = %q", msg.Body)
}
}
func TestRenderForFallsBackToDefaultPerField(t *testing.T) {
check := &config.Check{Name: "homepage", Target: "https://example.com", Type: config.CheckHTTP}
snap := checks.Snapshot{Reports: 3, OKCount: 0, NotOK: 3}
// only body overridden; subject should match default.
alert := &config.Alert{BodyTemplate: "custom body"}
msg, err := RenderFor(alert, "master", check, checks.StateUp, checks.StateDown, snap)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !strings.Contains(msg.Subject, "DOWN") {
t.Errorf("subject should be default rendering, got %q", msg.Subject)
}
if msg.Body != "custom body" {
t.Errorf("body = %q", msg.Body)
}
}
func TestRenderForReportsTemplateError(t *testing.T) {
check := &config.Check{Name: "homepage", Target: "https://example.com"}
snap := checks.Snapshot{}
alert := &config.Alert{BodyTemplate: "{{.Check.MissingField"} // unbalanced
_, err := RenderFor(alert, "master", check, checks.StateUp, checks.StateDown, snap)
if err == nil {
t.Fatal("expected parse error for malformed template")
}
}
func TestRenderForNilAlertReturnsDefault(t *testing.T) {
check := &config.Check{Name: "homepage", Target: "https://example.com"}
snap := checks.Snapshot{Reports: 1, OKCount: 1}
msg, err := RenderFor(nil, "master", check, checks.StateUp, checks.StateUp, snap)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !strings.Contains(msg.Subject, "homepage") {
t.Errorf("default subject should mention check, got %q", msg.Subject)
}
}
+70 -20
View File
@@ -4,6 +4,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"os"
"text/tabwriter" "text/tabwriter"
"time" "time"
@@ -15,6 +16,41 @@ import (
"git.cer.sh/axodouble/quptime/internal/transport" "git.cer.sh/axodouble/quptime/internal/transport"
) )
// bindTemplateFlags attaches --subject / --subject-file / --body /
// --body-file to a cobra command. resolveTemplateFlags reads the file
// variants (if non-empty) and returns the effective subject + body
// template strings. Inline flags take precedence over file flags.
func bindTemplateFlags(cmd *cobra.Command) {
cmd.Flags().String("subject", "", "subject template (text/template syntax — SMTP only)")
cmd.Flags().String("subject-file", "", "path to a file containing the subject template")
cmd.Flags().String("body", "", "body template (text/template syntax)")
cmd.Flags().String("body-file", "", "path to a file containing the body template")
}
func resolveTemplateFlags(cmd *cobra.Command) (subject, body string, err error) {
subject, _ = cmd.Flags().GetString("subject")
body, _ = cmd.Flags().GetString("body")
if subject == "" {
if p, _ := cmd.Flags().GetString("subject-file"); p != "" {
raw, e := os.ReadFile(p)
if e != nil {
return "", "", fmt.Errorf("read --subject-file %s: %w", p, e)
}
subject = string(raw)
}
}
if body == "" {
if p, _ := cmd.Flags().GetString("body-file"); p != "" {
raw, e := os.ReadFile(p)
if e != nil {
return "", "", fmt.Errorf("read --body-file %s: %w", p, e)
}
body = string(raw)
}
}
return subject, body, nil
}
func addAlertCmd(root *cobra.Command) { func addAlertCmd(root *cobra.Command) {
alert := &cobra.Command{ alert := &cobra.Command{
Use: "alert", Use: "alert",
@@ -136,22 +172,28 @@ func buildSMTPAddCmd() *cobra.Command {
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
ctx, cancel := context.WithTimeout(cmd.Context(), 10*time.Second) ctx, cancel := context.WithTimeout(cmd.Context(), 10*time.Second)
defer cancel() defer cancel()
subj, body, err := resolveTemplateFlags(cmd)
if err != nil {
return err
}
a := config.Alert{ a := config.Alert{
ID: uuid.NewString(), ID: uuid.NewString(),
Name: args[0], Name: args[0],
Type: config.AlertSMTP, Type: config.AlertSMTP,
Default: makeDefault, Default: makeDefault,
SMTPHost: host, SubjectTemplate: subj,
SMTPPort: port, BodyTemplate: body,
SMTPUser: user, SMTPHost: host,
SMTPPassword: password, SMTPPort: port,
SMTPFrom: from, SMTPUser: user,
SMTPTo: to, SMTPPassword: password,
SMTPStartTLS: startTLS, SMTPFrom: from,
SMTPTo: to,
SMTPStartTLS: startTLS,
} }
payload, _ := json.Marshal(a) payload, _ := json.Marshal(a)
body := daemon.MutateBody{Kind: transport.MutationAddAlert, Payload: payload} mb := daemon.MutateBody{Kind: transport.MutationAddAlert, Payload: payload}
raw, err := callDaemon(ctx, daemon.CtrlMutate, body) raw, err := callDaemon(ctx, daemon.CtrlMutate, mb)
if err != nil { if err != nil {
return err return err
} }
@@ -170,6 +212,7 @@ func buildSMTPAddCmd() *cobra.Command {
cmd.Flags().StringSliceVar(&to, "to", nil, "recipient address (repeat or comma-separate)") cmd.Flags().StringSliceVar(&to, "to", nil, "recipient address (repeat or comma-separate)")
cmd.Flags().BoolVar(&startTLS, "starttls", true, "negotiate STARTTLS") cmd.Flags().BoolVar(&startTLS, "starttls", true, "negotiate STARTTLS")
cmd.Flags().BoolVar(&makeDefault, "default", false, "attach this alert to every check automatically") cmd.Flags().BoolVar(&makeDefault, "default", false, "attach this alert to every check automatically")
bindTemplateFlags(cmd)
_ = cmd.MarkFlagRequired("host") _ = cmd.MarkFlagRequired("host")
_ = cmd.MarkFlagRequired("from") _ = cmd.MarkFlagRequired("from")
_ = cmd.MarkFlagRequired("to") _ = cmd.MarkFlagRequired("to")
@@ -186,16 +229,22 @@ func buildDiscordAddCmd() *cobra.Command {
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
ctx, cancel := context.WithTimeout(cmd.Context(), 10*time.Second) ctx, cancel := context.WithTimeout(cmd.Context(), 10*time.Second)
defer cancel() defer cancel()
subj, body, err := resolveTemplateFlags(cmd)
if err != nil {
return err
}
a := config.Alert{ a := config.Alert{
ID: uuid.NewString(), ID: uuid.NewString(),
Name: args[0], Name: args[0],
Type: config.AlertDiscord, Type: config.AlertDiscord,
Default: makeDefault, Default: makeDefault,
DiscordWebhook: webhook, SubjectTemplate: subj,
BodyTemplate: body,
DiscordWebhook: webhook,
} }
payload, _ := json.Marshal(a) payload, _ := json.Marshal(a)
body := daemon.MutateBody{Kind: transport.MutationAddAlert, Payload: payload} mb := daemon.MutateBody{Kind: transport.MutationAddAlert, Payload: payload}
raw, err := callDaemon(ctx, daemon.CtrlMutate, body) raw, err := callDaemon(ctx, daemon.CtrlMutate, mb)
if err != nil { if err != nil {
return err return err
} }
@@ -208,6 +257,7 @@ func buildDiscordAddCmd() *cobra.Command {
} }
cmd.Flags().StringVar(&webhook, "webhook", "", "discord webhook URL") cmd.Flags().StringVar(&webhook, "webhook", "", "discord webhook URL")
cmd.Flags().BoolVar(&makeDefault, "default", false, "attach this alert to every check automatically") cmd.Flags().BoolVar(&makeDefault, "default", false, "attach this alert to every check automatically")
bindTemplateFlags(cmd)
_ = cmd.MarkFlagRequired("webhook") _ = cmd.MarkFlagRequired("webhook")
return cmd return cmd
} }
+10
View File
@@ -85,6 +85,16 @@ type Alert struct {
// Discord options. // Discord options.
DiscordWebhook string `yaml:"discord_webhook,omitempty"` DiscordWebhook string `yaml:"discord_webhook,omitempty"`
// SubjectTemplate / BodyTemplate are optional text/template strings
// that override the default rendering. Empty means use the built-in
// format. Discord ignores SubjectTemplate (it has no subject line);
// SMTP uses both. Available variables: {{.Check.Name}},
// {{.Check.Type}}, {{.Check.Target}}, {{.Check.ID}}, {{.From}},
// {{.To}}, {{.Verb}}, {{.Snapshot.Reports}}, {{.Snapshot.OKCount}},
// {{.Snapshot.NotOK}}, {{.Snapshot.Detail}}, {{.NodeID}}, {{.When}}.
SubjectTemplate string `yaml:"subject_template,omitempty"`
BodyTemplate string `yaml:"body_template,omitempty"`
} }
// ClusterConfig is the replicated cluster state. The Version field // ClusterConfig is the replicated cluster state. The Version field