From d6f65c58f6ad3573d1bbc0e705f8f1a534247333 Mon Sep 17 00:00:00 2001 From: Axodouble Date: Thu, 14 May 2026 00:55:09 +0000 Subject: [PATCH] Added custom messages for uptime alerts --- README.md | 46 ++++++++++++++++- internal/alerts/dispatcher.go | 30 +++++++++-- internal/alerts/message.go | 86 ++++++++++++++++++++++++++++--- internal/alerts/message_test.go | 58 +++++++++++++++++++++ internal/cli/alert.go | 90 +++++++++++++++++++++++++-------- internal/config/cluster.go | 10 ++++ 6 files changed, 286 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 32eab82..e4e88a6 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,48 @@ specific default by adding the alert's ID or name to its `suppress_alert_ids` list in `cluster.yaml` (see "Edit cluster.yaml directly" below). +## Custom alert messages + +Each alert can carry its own `subject_template` and `body_template` +(Go `text/template` syntax). When set, they override the built-in +formatting for that one alert; the default renderer is used otherwise. +Discord ignores the subject template (it has no subject line). + +```sh +qu alert add discord oncall --webhook https://... \ + --body ':rotating_light: **{{.Check.Name}}** is now {{.Verb}} +target: `{{.Check.Target}}` +detail: {{.Snapshot.Detail}}' + +# multi-line templates are easier from a file +qu alert add smtp ops --host ... --from ... --to ... \ + --subject-file /etc/quptime/templates/ops.subject \ + --body-file /etc/quptime/templates/ops.body +``` + +Available template variables: + +| Variable | Meaning | +|---|---| +| `{{.Check.Name}}` | check name | +| `{{.Check.Type}}` | `http` / `tcp` / `icmp` | +| `{{.Check.Target}}` | URL or host:port being probed | +| `{{.Check.ID}}` | UUID | +| `{{.From}}` | previous state (`up` / `down` / `unknown`) | +| `{{.To}}` | new state | +| `{{.Verb}}` | `UP` / `DOWN` / `RECOVERED` | +| `{{.Snapshot.Reports}}` | total per-node reports counted | +| `{{.Snapshot.OKCount}}` | how many reported OK | +| `{{.Snapshot.NotOK}}` | how many reported failure | +| `{{.Snapshot.Detail}}` | first failure detail string | +| `{{.NodeID}}` | master that dispatched | +| `{{.When}}` | RFC3339 timestamp | + +`qu alert test ` exercises the template against a synthetic +"homepage going DOWN" transition, so you can verify rendering before +production traffic depends on it. A template parse or execution error +falls back to the built-in format and is logged. + ## Edit cluster.yaml directly Anything you can do through the CLI you can also do by editing @@ -258,8 +300,8 @@ qu check add tcp qu check add icmp qu check list qu check remove -qu alert add smtp --host … --port … --from … --to … [--user --password --starttls] [--default] -qu alert add discord --webhook … [--default] +qu alert add smtp --host … --port … --from … --to … [--user --password --starttls] [--default] [--subject … --body …] +qu alert add discord --webhook … [--default] [--body …] qu alert list / remove / test qu alert default on|off toggle default attachment to every check qu trust list / remove diff --git a/internal/alerts/dispatcher.go b/internal/alerts/dispatcher.go index 949aff7..3163fa4 100644 --- a/internal/alerts/dispatcher.go +++ b/internal/alerts/dispatcher.go @@ -30,13 +30,16 @@ func (d *Dispatcher) OnTransition(check *config.Check, from, to checks.State, sn if to == checks.StateUnknown { return } - msg := Render(d.selfID, check, from, to, snap) alerts := d.cluster.EffectiveAlertsFor(check) if len(alerts) == 0 && len(check.AlertIDs) > 0 { d.logger.Printf("alerts: check %q references alerts but none resolved", check.Name) } for i := range alerts { alert := alerts[i] + msg, err := RenderFor(&alert, d.selfID, check, from, to, snap) + if err != nil { + d.logger.Printf("alerts: %q template: %v — falling back to default", alert.Name, err) + } if err := d.dispatchOne(&alert, msg); err != nil { d.logger.Printf("alerts: %q via %s: %v", alert.Name, alert.Type, err) } @@ -44,15 +47,32 @@ func (d *Dispatcher) OnTransition(check *config.Check, from, to checks.State, sn } // Test sends a one-shot test message to the named alert. Returns an -// error so the CLI can surface failures interactively. +// error so the CLI can surface failures interactively. If the alert +// carries custom templates they are exercised against a synthetic +// "homepage going DOWN" transition so the operator can confirm the +// template renders before a real outage. func (d *Dispatcher) Test(alertID string) error { alert := d.cluster.FindAlert(alertID) if alert == nil { return fmt.Errorf("alert %q not found", alertID) } - msg := Message{ - Subject: "[quptime] test alert", - Body: fmt.Sprintf("This is a test of alert %q from node %s.\nIf you see this, the alert channel is wired correctly.\n", alert.Name, d.selfID), + if alert.SubjectTemplate == "" && alert.BodyTemplate == "" { + msg := Message{ + Subject: "[quptime] test alert", + Body: fmt.Sprintf("This is a test of alert %q from node %s.\nIf you see this, the alert channel is wired correctly.\n", alert.Name, d.selfID), + } + return d.dispatchOne(alert, msg) + } + sample := &config.Check{ + ID: "test-check", + Name: "test-check", + Type: config.CheckHTTP, + Target: "https://example.com", + } + snap := checks.Snapshot{Reports: 3, OKCount: 0, NotOK: 3, Detail: "synthetic test failure"} + msg, err := RenderFor(alert, d.selfID, sample, checks.StateUp, checks.StateDown, snap) + if err != nil { + return fmt.Errorf("render template: %w", err) } return d.dispatchOne(alert, msg) } diff --git a/internal/alerts/message.go b/internal/alerts/message.go index 9ff8614..d6fdc8d 100644 --- a/internal/alerts/message.go +++ b/internal/alerts/message.go @@ -4,14 +4,29 @@ package alerts import ( + "bytes" "fmt" "strings" + "text/template" "time" "git.cer.sh/axodouble/quptime/internal/checks" "git.cer.sh/axodouble/quptime/internal/config" ) +// TemplateContext is what user-provided subject/body templates see. It +// is also the shape the default renderer fills in, so changing one +// place keeps the two paths consistent. +type TemplateContext struct { + Check *config.Check + From string // previous state name + To string // new state name + Verb string // "UP" | "DOWN" | "RECOVERED" + Snapshot checks.Snapshot // aggregate counts and detail + NodeID string // master that rendered the message + When string // RFC3339 timestamp +} + // Message is the rendered notification ready to ship across any // channel. Channels may format Subject + Body differently (SMTP uses // both; Discord renders a single string). @@ -20,25 +35,82 @@ type Message struct { Body string } -// Render produces a human-readable message from one state transition. +// Render produces a human-readable message from one state transition +// using the built-in format. Used as the fallback when no custom +// template is configured (or when a custom template fails to render). func Render(nodeID string, check *config.Check, from, to checks.State, snap checks.Snapshot) Message { - now := time.Now().UTC().Format(time.RFC3339) - verb := transitionVerb(from, to) - subject := fmt.Sprintf("[quptime] %s %s — %s", check.Name, verb, check.Target) + ctx := newContext(nodeID, check, from, to, snap) + subject := fmt.Sprintf("[quptime] %s %s — %s", check.Name, ctx.Verb, check.Target) var b strings.Builder - fmt.Fprintf(&b, "Check %q is now %s.\n", check.Name, strings.ToUpper(string(to))) - fmt.Fprintf(&b, "Previous state: %s\n", from) + fmt.Fprintf(&b, "Check %q is now %s.\n", check.Name, strings.ToUpper(ctx.To)) + fmt.Fprintf(&b, "Previous state: %s\n", ctx.From) fmt.Fprintf(&b, "Target: %s (%s)\n", check.Target, check.Type) fmt.Fprintf(&b, "Reports: %d (ok=%d, fail=%d)\n", snap.Reports, snap.OKCount, snap.NotOK) if snap.Detail != "" { fmt.Fprintf(&b, "Detail: %s\n", snap.Detail) } fmt.Fprintf(&b, "Master: %s\n", nodeID) - fmt.Fprintf(&b, "When: %s\n", now) + fmt.Fprintf(&b, "When: %s\n", ctx.When) return Message{Subject: subject, Body: b.String()} } +// RenderFor produces a message for one specific alert. If the alert +// defines SubjectTemplate or BodyTemplate, those override the +// corresponding field from the default render. A template error falls +// back to the default for that field and is reported via the returned +// error (the caller is expected to log but still ship the message). +func RenderFor(alert *config.Alert, nodeID string, check *config.Check, from, to checks.State, snap checks.Snapshot) (Message, error) { + def := Render(nodeID, check, from, to, snap) + if alert == nil || (alert.SubjectTemplate == "" && alert.BodyTemplate == "") { + return def, nil + } + ctx := newContext(nodeID, check, from, to, snap) + msg := def + var firstErr error + if alert.SubjectTemplate != "" { + s, err := execTemplate("subject", alert.SubjectTemplate, ctx) + if err != nil { + firstErr = err + } else { + msg.Subject = s + } + } + if alert.BodyTemplate != "" { + s, err := execTemplate("body", alert.BodyTemplate, ctx) + if err != nil && firstErr == nil { + firstErr = err + } else if err == nil { + msg.Body = s + } + } + return msg, firstErr +} + +func newContext(nodeID string, check *config.Check, from, to checks.State, snap checks.Snapshot) TemplateContext { + return TemplateContext{ + Check: check, + From: string(from), + To: string(to), + Verb: transitionVerb(from, to), + Snapshot: snap, + NodeID: nodeID, + When: time.Now().UTC().Format(time.RFC3339), + } +} + +func execTemplate(name, src string, ctx TemplateContext) (string, error) { + tmpl, err := template.New(name).Option("missingkey=zero").Parse(src) + if err != nil { + return "", fmt.Errorf("parse %s template: %w", name, err) + } + var b bytes.Buffer + if err := tmpl.Execute(&b, ctx); err != nil { + return "", fmt.Errorf("execute %s template: %w", name, err) + } + return b.String(), nil +} + func transitionVerb(from, to checks.State) string { switch to { case checks.StateDown: diff --git a/internal/alerts/message_test.go b/internal/alerts/message_test.go index 27a8683..c60d4a5 100644 --- a/internal/alerts/message_test.go +++ b/internal/alerts/message_test.go @@ -50,3 +50,61 @@ func TestRenderUpInitialTransition(t *testing.T) { t.Error("first-time UP should not be tagged RECOVERED") } } + +func TestRenderForUsesAlertTemplates(t *testing.T) { + check := &config.Check{Name: "homepage", Target: "https://example.com", Type: config.CheckHTTP} + snap := checks.Snapshot{Reports: 3, OKCount: 0, NotOK: 3, Detail: "connection refused"} + alert := &config.Alert{ + SubjectTemplate: "{{.Check.Name}} is {{.Verb}}", + BodyTemplate: "{{.Check.Target}} :: {{.Snapshot.Detail}}", + } + msg, err := RenderFor(alert, "master", check, checks.StateUp, checks.StateDown, snap) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if msg.Subject != "homepage is DOWN" { + t.Errorf("subject = %q", msg.Subject) + } + if msg.Body != "https://example.com :: connection refused" { + t.Errorf("body = %q", msg.Body) + } +} + +func TestRenderForFallsBackToDefaultPerField(t *testing.T) { + check := &config.Check{Name: "homepage", Target: "https://example.com", Type: config.CheckHTTP} + snap := checks.Snapshot{Reports: 3, OKCount: 0, NotOK: 3} + // only body overridden; subject should match default. + alert := &config.Alert{BodyTemplate: "custom body"} + msg, err := RenderFor(alert, "master", check, checks.StateUp, checks.StateDown, snap) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(msg.Subject, "DOWN") { + t.Errorf("subject should be default rendering, got %q", msg.Subject) + } + if msg.Body != "custom body" { + t.Errorf("body = %q", msg.Body) + } +} + +func TestRenderForReportsTemplateError(t *testing.T) { + check := &config.Check{Name: "homepage", Target: "https://example.com"} + snap := checks.Snapshot{} + alert := &config.Alert{BodyTemplate: "{{.Check.MissingField"} // unbalanced + _, err := RenderFor(alert, "master", check, checks.StateUp, checks.StateDown, snap) + if err == nil { + t.Fatal("expected parse error for malformed template") + } +} + +func TestRenderForNilAlertReturnsDefault(t *testing.T) { + check := &config.Check{Name: "homepage", Target: "https://example.com"} + snap := checks.Snapshot{Reports: 1, OKCount: 1} + msg, err := RenderFor(nil, "master", check, checks.StateUp, checks.StateUp, snap) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(msg.Subject, "homepage") { + t.Errorf("default subject should mention check, got %q", msg.Subject) + } +} diff --git a/internal/cli/alert.go b/internal/cli/alert.go index 7376cb6..bfe92c6 100644 --- a/internal/cli/alert.go +++ b/internal/cli/alert.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "os" "text/tabwriter" "time" @@ -15,6 +16,41 @@ import ( "git.cer.sh/axodouble/quptime/internal/transport" ) +// bindTemplateFlags attaches --subject / --subject-file / --body / +// --body-file to a cobra command. resolveTemplateFlags reads the file +// variants (if non-empty) and returns the effective subject + body +// template strings. Inline flags take precedence over file flags. +func bindTemplateFlags(cmd *cobra.Command) { + cmd.Flags().String("subject", "", "subject template (text/template syntax — SMTP only)") + cmd.Flags().String("subject-file", "", "path to a file containing the subject template") + cmd.Flags().String("body", "", "body template (text/template syntax)") + cmd.Flags().String("body-file", "", "path to a file containing the body template") +} + +func resolveTemplateFlags(cmd *cobra.Command) (subject, body string, err error) { + subject, _ = cmd.Flags().GetString("subject") + body, _ = cmd.Flags().GetString("body") + if subject == "" { + if p, _ := cmd.Flags().GetString("subject-file"); p != "" { + raw, e := os.ReadFile(p) + if e != nil { + return "", "", fmt.Errorf("read --subject-file %s: %w", p, e) + } + subject = string(raw) + } + } + if body == "" { + if p, _ := cmd.Flags().GetString("body-file"); p != "" { + raw, e := os.ReadFile(p) + if e != nil { + return "", "", fmt.Errorf("read --body-file %s: %w", p, e) + } + body = string(raw) + } + } + return subject, body, nil +} + func addAlertCmd(root *cobra.Command) { alert := &cobra.Command{ Use: "alert", @@ -136,22 +172,28 @@ func buildSMTPAddCmd() *cobra.Command { RunE: func(cmd *cobra.Command, args []string) error { ctx, cancel := context.WithTimeout(cmd.Context(), 10*time.Second) defer cancel() + subj, body, err := resolveTemplateFlags(cmd) + if err != nil { + return err + } a := config.Alert{ - ID: uuid.NewString(), - Name: args[0], - Type: config.AlertSMTP, - Default: makeDefault, - SMTPHost: host, - SMTPPort: port, - SMTPUser: user, - SMTPPassword: password, - SMTPFrom: from, - SMTPTo: to, - SMTPStartTLS: startTLS, + ID: uuid.NewString(), + Name: args[0], + Type: config.AlertSMTP, + Default: makeDefault, + SubjectTemplate: subj, + BodyTemplate: body, + SMTPHost: host, + SMTPPort: port, + SMTPUser: user, + SMTPPassword: password, + SMTPFrom: from, + SMTPTo: to, + SMTPStartTLS: startTLS, } payload, _ := json.Marshal(a) - body := daemon.MutateBody{Kind: transport.MutationAddAlert, Payload: payload} - raw, err := callDaemon(ctx, daemon.CtrlMutate, body) + mb := daemon.MutateBody{Kind: transport.MutationAddAlert, Payload: payload} + raw, err := callDaemon(ctx, daemon.CtrlMutate, mb) if err != nil { return err } @@ -170,6 +212,7 @@ func buildSMTPAddCmd() *cobra.Command { cmd.Flags().StringSliceVar(&to, "to", nil, "recipient address (repeat or comma-separate)") cmd.Flags().BoolVar(&startTLS, "starttls", true, "negotiate STARTTLS") cmd.Flags().BoolVar(&makeDefault, "default", false, "attach this alert to every check automatically") + bindTemplateFlags(cmd) _ = cmd.MarkFlagRequired("host") _ = cmd.MarkFlagRequired("from") _ = cmd.MarkFlagRequired("to") @@ -186,16 +229,22 @@ func buildDiscordAddCmd() *cobra.Command { RunE: func(cmd *cobra.Command, args []string) error { ctx, cancel := context.WithTimeout(cmd.Context(), 10*time.Second) defer cancel() + subj, body, err := resolveTemplateFlags(cmd) + if err != nil { + return err + } a := config.Alert{ - ID: uuid.NewString(), - Name: args[0], - Type: config.AlertDiscord, - Default: makeDefault, - DiscordWebhook: webhook, + ID: uuid.NewString(), + Name: args[0], + Type: config.AlertDiscord, + Default: makeDefault, + SubjectTemplate: subj, + BodyTemplate: body, + DiscordWebhook: webhook, } payload, _ := json.Marshal(a) - body := daemon.MutateBody{Kind: transport.MutationAddAlert, Payload: payload} - raw, err := callDaemon(ctx, daemon.CtrlMutate, body) + mb := daemon.MutateBody{Kind: transport.MutationAddAlert, Payload: payload} + raw, err := callDaemon(ctx, daemon.CtrlMutate, mb) if err != nil { return err } @@ -208,6 +257,7 @@ func buildDiscordAddCmd() *cobra.Command { } cmd.Flags().StringVar(&webhook, "webhook", "", "discord webhook URL") cmd.Flags().BoolVar(&makeDefault, "default", false, "attach this alert to every check automatically") + bindTemplateFlags(cmd) _ = cmd.MarkFlagRequired("webhook") return cmd } diff --git a/internal/config/cluster.go b/internal/config/cluster.go index 4893113..940519e 100644 --- a/internal/config/cluster.go +++ b/internal/config/cluster.go @@ -85,6 +85,16 @@ type Alert struct { // Discord options. DiscordWebhook string `yaml:"discord_webhook,omitempty"` + + // SubjectTemplate / BodyTemplate are optional text/template strings + // that override the default rendering. Empty means use the built-in + // format. Discord ignores SubjectTemplate (it has no subject line); + // SMTP uses both. Available variables: {{.Check.Name}}, + // {{.Check.Type}}, {{.Check.Target}}, {{.Check.ID}}, {{.From}}, + // {{.To}}, {{.Verb}}, {{.Snapshot.Reports}}, {{.Snapshot.OKCount}}, + // {{.Snapshot.NotOK}}, {{.Snapshot.Detail}}, {{.NodeID}}, {{.When}}. + SubjectTemplate string `yaml:"subject_template,omitempty"` + BodyTemplate string `yaml:"body_template,omitempty"` } // ClusterConfig is the replicated cluster state. The Version field