Added secrets to setting up the cluster, updated default port, and fixed some issues when joining nodes async
Release / release (push) Has been cancelled

This commit is contained in:
2026-05-12 07:51:20 +00:00
parent c90ce244b0
commit 46abc09b11
15 changed files with 308 additions and 67 deletions
+9 -5
View File
@@ -305,10 +305,11 @@ func (d *Daemon) nodeAdd(ctx context.Context, body NodeAddBody) (NodeAddResult,
return NodeAddResult{}, fmt.Errorf("own fingerprint: %w", err)
}
joinReq := transport.JoinRequest{
NodeID: d.node.NodeID,
Advertise: d.node.AdvertiseAddr(),
Fingerprint: myFP,
CertPEM: string(d.assets.Cert),
NodeID: d.node.NodeID,
Advertise: d.node.AdvertiseAddr(),
Fingerprint: myFP,
CertPEM: string(d.assets.Cert),
ClusterSecret: d.node.ClusterSecret,
}
var joinResp transport.JoinResponse
if err := d.client.Call(ctx, peerID, body.Address, transport.MethodJoin, joinReq, &joinResp); err != nil {
@@ -319,11 +320,14 @@ func (d *Daemon) nodeAdd(ctx context.Context, body NodeAddBody) (NodeAddResult,
}
// Propose the cluster-config addition. Routed to master via the
// replicator; if we are the master, applied directly.
// replicator; if we are the master, applied directly. Including
// CertPEM lets other peers auto-trust this node once the new
// cluster.yaml reaches them.
peerInfo := config.PeerInfo{
NodeID: peerID,
Advertise: body.Address,
Fingerprint: sample.Fingerprint,
CertPEM: string(sample.CertPEM),
}
ver, err := d.replicator.LocalMutate(ctx, transport.MutationAddPeer, peerInfo)
if err != nil {
+34
View File
@@ -103,6 +103,7 @@ func New(logger *log.Logger) (*Daemon, error) {
}
d.quorum = quorum.New(node.NodeID, cluster, client)
d.quorum.SetSelfAdvertise(node.AdvertiseAddr())
d.replicator = replicate.New(node.NodeID, cluster, client, d.quorum)
d.aggregator = checks.NewAggregator(cluster, nil)
d.dispatcher = alerts.New(cluster, node.NodeID, logger)
@@ -125,9 +126,42 @@ func New(logger *log.Logger) (*Daemon, error) {
d.scheduler = checks.NewScheduler(cluster, &sink{d: d})
d.control = newControlServer(d)
d.registerHandlers()
// Whenever cluster.yaml changes, mirror peer certs into the local
// trust store so this node can mTLS to every other peer — even
// peers it was never invited by directly.
cluster.OnChange(d.syncTrustFromCluster)
d.syncTrustFromCluster()
return d, nil
}
// syncTrustFromCluster makes sure every peer listed in cluster.yaml
// has a corresponding trust entry. Trust entries are only added (not
// removed) here — `qu node remove` is the explicit eviction path.
func (d *Daemon) syncTrustFromCluster() {
snap := d.cluster.Snapshot()
for _, p := range snap.Peers {
if p.NodeID == "" || p.NodeID == d.node.NodeID {
continue
}
if p.Fingerprint == "" || p.CertPEM == "" {
continue // pre-1.0 peer entry without cert material — skip
}
if existing, ok := d.trust.Get(p.NodeID); ok && existing.Fingerprint == p.Fingerprint {
continue
}
if err := d.trust.Add(trust.Entry{
NodeID: p.NodeID,
Address: p.Advertise,
Fingerprint: p.Fingerprint,
CertPEM: p.CertPEM,
}); err != nil {
d.logger.Printf("trust sync: %s: %v", p.NodeID, err)
}
}
}
// Run binds the inter-node listener and the local control socket,
// starts the quorum loop and the scheduler, and blocks until ctx is
// cancelled.
+8 -5
View File
@@ -2,6 +2,7 @@ package daemon
import (
"context"
"crypto/subtle"
"encoding/json"
"time"
@@ -38,6 +39,13 @@ func (d *Daemon) registerHandlers() {
if err := json.Unmarshal(raw, &req); err != nil {
return transport.JoinResponse{Error: err.Error()}, nil
}
// Constant-time secret check: every node in the cluster must
// present the same shared secret. This is the only barrier
// stopping a stranger who can reach :9901 from enrolling
// themselves with their own fresh key.
if subtle.ConstantTimeCompare([]byte(req.ClusterSecret), []byte(d.node.ClusterSecret)) != 1 {
return transport.JoinResponse{Error: "cluster secret mismatch"}, nil
}
fp, err := crypto.FingerprintFromCertPEM([]byte(req.CertPEM))
if err != nil {
return transport.JoinResponse{Error: "parse cert: " + err.Error()}, nil
@@ -45,11 +53,6 @@ func (d *Daemon) registerHandlers() {
if fp != req.Fingerprint {
return transport.JoinResponse{Error: "fingerprint mismatch"}, nil
}
// Outbound join (the proposing node already accepted our cert
// out of band). Symmetric trust is required for mTLS to work,
// so we accept the join automatically. Operators who need
// stricter onboarding can disable the listener and use the
// CLI flow exclusively.
if err := d.trust.Add(trust.Entry{
NodeID: req.NodeID,
Address: req.Advertise,