Added default alerts, updated UI to show alerts with checks, added auto sync for manual file editing

This commit is contained in:
2026-05-14 00:25:43 +00:00
parent bd437f49e4
commit 1dc3ad1215
9 changed files with 282 additions and 21 deletions
+6
View File
@@ -195,6 +195,12 @@ func (d *Daemon) Run(ctx context.Context) error {
d.scheduler.Start(ctx)
}()
d.wg.Add(1)
go func() {
defer d.wg.Done()
d.watchManualEdits(ctx)
}()
select {
case <-ctx.Done():
case err := <-servErr:
+8
View File
@@ -169,6 +169,7 @@ func (d *Daemon) buildStatus() transport.StatusResponse {
})
}
for _, c := range snap.Checks {
check := c
cs := transport.CheckSnapshot{CheckID: c.ID, Name: c.Name, State: "unknown"}
if agg, ok := d.aggregator.SnapshotFor(c.ID); ok {
cs.State = string(agg.State)
@@ -176,6 +177,13 @@ func (d *Daemon) buildStatus() transport.StatusResponse {
cs.Total = agg.Reports
cs.Detail = agg.Detail
}
for _, a := range d.cluster.EffectiveAlertsFor(&check) {
label := a.Name
if a.Default {
label += "*"
}
cs.Alerts = append(cs.Alerts, label)
}
out.Checks = append(out.Checks, cs)
}
return out
+85
View File
@@ -0,0 +1,85 @@
package daemon
import (
"context"
"crypto/sha256"
"os"
"reflect"
"time"
"gopkg.in/yaml.v3"
"git.cer.sh/axodouble/quptime/internal/config"
"git.cer.sh/axodouble/quptime/internal/transport"
)
// manualEditPollInterval is how often the daemon checks cluster.yaml's
// hash against the last value it wrote. Short enough that an operator
// `vim`-ing the file sees their change applied within a few seconds.
const manualEditPollInterval = 2 * time.Second
// watchManualEdits polls cluster.yaml. When the on-disk content
// diverges from what the daemon last wrote, the file is parsed and
// pushed through the master as a MutationReplaceConfig — so a
// hand-edit on any node ends up replicated everywhere.
//
// The poll uses sha256 of the file contents rather than mtime so we
// don't race against `os.Rename` from our own AtomicWrite or against
// editors that touch mtime without changing content.
func (d *Daemon) watchManualEdits(ctx context.Context) {
t := time.NewTicker(manualEditPollInterval)
defer t.Stop()
for {
select {
case <-ctx.Done():
return
case <-t.C:
d.checkManualEdit(ctx)
}
}
}
func (d *Daemon) checkManualEdit(ctx context.Context) {
raw, err := os.ReadFile(config.ClusterFilePath())
if err != nil {
// A missing file during early boot or temp-file races is fine;
// the next tick will re-read it.
return
}
sum := sha256.Sum256(raw)
if sum == d.cluster.LastSavedSum() {
return
}
var edited config.ClusterConfig
if err := yaml.Unmarshal(raw, &edited); err != nil {
d.logger.Printf("manual-edit: parse cluster.yaml: %v — ignoring", err)
// Pin the hash so we don't loop on a broken file. The operator
// must save a valid YAML for the next attempt.
d.cluster.SetLastSavedSum(sum)
return
}
current := d.cluster.Snapshot()
if reflect.DeepEqual(current.Peers, edited.Peers) &&
reflect.DeepEqual(current.Checks, edited.Checks) &&
reflect.DeepEqual(current.Alerts, edited.Alerts) {
// Only cosmetic (whitespace/comments) — accept it.
d.cluster.SetLastSavedSum(sum)
return
}
d.logger.Printf("manual-edit: cluster.yaml changed externally — replicating via master")
d.cluster.SetLastSavedSum(sum)
callCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
payload := &config.ClusterConfig{
Peers: edited.Peers,
Checks: edited.Checks,
Alerts: edited.Alerts,
}
if _, err := d.replicator.LocalMutate(callCtx, transport.MutationReplaceConfig, payload); err != nil {
d.logger.Printf("manual-edit: forward to master: %v", err)
}
}