Added secrets to setting up the cluster, updated default port, and fixed some issues when joining nodes async
Release / release (push) Has been cancelled

This commit is contained in:
2026-05-12 07:51:20 +00:00
parent c90ce244b0
commit 46abc09b11
15 changed files with 308 additions and 67 deletions
+43 -5
View File
@@ -10,11 +10,18 @@ import (
)
// PeerInfo identifies a cluster member as known to all peers.
// (Trust material lives in trust.yaml; this struct stays portable.)
//
// CertPEM rides along so the daemon can populate trust.yaml when a
// new node joins: a follower receiving an updated cluster.yaml from
// the master trusts the master, and therefore trusts the peer
// certificates it forwards. Without this, mTLS between new and old
// peers would never succeed because neither would have the other in
// its trust store.
type PeerInfo struct {
NodeID string `yaml:"node_id"`
Advertise string `yaml:"advertise"`
Fingerprint string `yaml:"fingerprint"`
CertPEM string `yaml:"cert_pem,omitempty"`
}
// CheckType enumerates the supported probe kinds.
@@ -83,7 +90,27 @@ type ClusterConfig struct {
Checks []Check `yaml:"checks"`
Alerts []Alert `yaml:"alerts"`
mu sync.RWMutex `yaml:"-"`
mu sync.RWMutex `yaml:"-"`
onChange []func() // fired after any successful Mutate/Replace
}
// OnChange registers a callback fired after every successful Mutate
// or Replace. Callbacks run synchronously on the mutating goroutine
// AFTER the lock is released — they may safely call back into the
// config to read snapshots.
func (c *ClusterConfig) OnChange(fn func()) {
c.mu.Lock()
c.onChange = append(c.onChange, fn)
c.mu.Unlock()
}
func (c *ClusterConfig) fireOnChange() {
c.mu.RLock()
cbs := append([]func(){}, c.onChange...)
c.mu.RUnlock()
for _, fn := range cbs {
fn()
}
}
// LoadClusterConfig reads cluster.yaml. A missing file returns an
@@ -136,8 +163,8 @@ func (c *ClusterConfig) Snapshot() *ClusterConfig {
// success, and writes the file. Only the master should call this.
func (c *ClusterConfig) Mutate(byNode string, fn func(*ClusterConfig) error) error {
c.mu.Lock()
defer c.mu.Unlock()
if err := fn(c); err != nil {
c.mu.Unlock()
return err
}
c.Version++
@@ -145,9 +172,16 @@ func (c *ClusterConfig) Mutate(byNode string, fn func(*ClusterConfig) error) err
c.UpdatedBy = byNode
out, err := yaml.Marshal(c)
if err != nil {
c.mu.Unlock()
return err
}
return AtomicWrite(ClusterFilePath(), out, 0o600)
if err := AtomicWrite(ClusterFilePath(), out, 0o600); err != nil {
c.mu.Unlock()
return err
}
c.mu.Unlock()
c.fireOnChange()
return nil
}
// Replace overwrites the local config with an incoming snapshot if
@@ -155,8 +189,8 @@ func (c *ClusterConfig) Mutate(byNode string, fn func(*ClusterConfig) error) err
// applied.
func (c *ClusterConfig) Replace(incoming *ClusterConfig) (bool, error) {
c.mu.Lock()
defer c.mu.Unlock()
if incoming.Version <= c.Version {
c.mu.Unlock()
return false, nil
}
c.Version = incoming.Version
@@ -167,11 +201,15 @@ func (c *ClusterConfig) Replace(incoming *ClusterConfig) (bool, error) {
c.Alerts = append([]Alert(nil), incoming.Alerts...)
out, err := yaml.Marshal(c)
if err != nil {
c.mu.Unlock()
return false, err
}
if err := AtomicWrite(ClusterFilePath(), out, 0o600); err != nil {
c.mu.Unlock()
return false, err
}
c.mu.Unlock()
c.fireOnChange()
return true, nil
}
+9 -2
View File
@@ -17,12 +17,19 @@ type NodeConfig struct {
// traffic. Defaults to 0.0.0.0.
BindAddr string `yaml:"bind_addr"`
// BindPort is the port the daemon listens on. Default 9001.
// BindPort is the port the daemon listens on. Default 9901.
BindPort int `yaml:"bind_port"`
// Advertise is the address other nodes use to reach us. May differ
// from BindAddr when behind NAT. Set explicitly via `qu init --advertise`.
Advertise string `yaml:"advertise"`
// ClusterSecret is the pre-shared secret every node in the cluster
// must present during the Join RPC. Without it any operator who
// can reach :9901 could enrol themselves into the cluster, so we
// require an out-of-band copy at `qu init` time. Stored locally
// only, never replicated.
ClusterSecret string `yaml:"cluster_secret"`
}
// AdvertiseAddr returns the address peers should dial. Falls back to
@@ -49,7 +56,7 @@ func LoadNodeConfig() (*NodeConfig, error) {
return nil, fmt.Errorf("parse node.yaml: %w", err)
}
if cfg.BindPort == 0 {
cfg.BindPort = 9001
cfg.BindPort = 9901
}
if cfg.BindAddr == "" {
cfg.BindAddr = "0.0.0.0"
+8 -8
View File
@@ -8,11 +8,11 @@ func TestAdvertiseAddrFallback(t *testing.T) {
cfg NodeConfig
want string
}{
{"explicit advertise wins", NodeConfig{Advertise: "host:1234", BindAddr: "0.0.0.0", BindPort: 9001}, "host:1234"},
{"empty bind falls back to loopback", NodeConfig{BindPort: 9001}, "127.0.0.1:9001"},
{"wildcard bind falls back to loopback", NodeConfig{BindAddr: "0.0.0.0", BindPort: 9001}, "127.0.0.1:9001"},
{"ipv6 wildcard falls back to loopback", NodeConfig{BindAddr: "::", BindPort: 9001}, "127.0.0.1:9001"},
{"specific bind preserved", NodeConfig{BindAddr: "10.0.0.1", BindPort: 9001}, "10.0.0.1:9001"},
{"explicit advertise wins", NodeConfig{Advertise: "host:1234", BindAddr: "0.0.0.0", BindPort: 9901}, "host:1234"},
{"empty bind falls back to loopback", NodeConfig{BindPort: 9901}, "127.0.0.1:9901"},
{"wildcard bind falls back to loopback", NodeConfig{BindAddr: "0.0.0.0", BindPort: 9901}, "127.0.0.1:9901"},
{"ipv6 wildcard falls back to loopback", NodeConfig{BindAddr: "::", BindPort: 9901}, "127.0.0.1:9901"},
{"specific bind preserved", NodeConfig{BindAddr: "10.0.0.1", BindPort: 9901}, "10.0.0.1:9901"},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
@@ -25,7 +25,7 @@ func TestAdvertiseAddrFallback(t *testing.T) {
func TestNodeConfigRoundtrip(t *testing.T) {
t.Setenv("QUPTIME_DIR", t.TempDir())
n := &NodeConfig{NodeID: "abc", BindAddr: "127.0.0.1", BindPort: 9001, Advertise: "10.0.0.1:9001"}
n := &NodeConfig{NodeID: "abc", BindAddr: "127.0.0.1", BindPort: 9901, Advertise: "10.0.0.1:9901"}
if err := n.Save(); err != nil {
t.Fatal(err)
}
@@ -49,8 +49,8 @@ func TestLoadNodeConfigAppliesDefaults(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if loaded.BindPort != 9001 {
t.Errorf("BindPort=%d want 9001", loaded.BindPort)
if loaded.BindPort != 9901 {
t.Errorf("BindPort=%d want 9901", loaded.BindPort)
}
if loaded.BindAddr != "0.0.0.0" {
t.Errorf("BindAddr=%q want 0.0.0.0", loaded.BindAddr)