Auto init via environment variables support, qu init for systemd
Container image / image (push) Successful in 1m38s

This commit is contained in:
2026-05-15 04:41:45 +00:00
parent 6953709574
commit e11b3f4547
9 changed files with 475 additions and 113 deletions
+122 -61
View File
@@ -5,6 +5,7 @@ import (
"encoding/base64"
"errors"
"fmt"
"io"
"os"
"github.com/google/uuid"
@@ -30,78 +31,50 @@ Pass --secret on every subsequent node so they share the same
cluster join secret. If --secret is omitted on the very first node, a
random secret is generated and printed for the operator to copy.
Every flag may also be supplied via its QUPTIME_* environment variable
(see docs/configuration.md). Explicit flags win over env values, which
in turn win over the compiled defaults.
Idempotent in one direction only: existing key material is never
overwritten. Re-run only after wiping the data directory.`,
RunE: func(cmd *cobra.Command, args []string) error {
if err := config.EnsureDataDir(); err != nil {
return err
}
if _, err := os.Stat(config.NodeFilePath()); err == nil {
return errors.New("node.yaml already exists in data dir — refusing to overwrite")
}
secret := clusterSecret
generated := false
if secret == "" {
s, err := generateSecret()
if err != nil {
return fmt.Errorf("generate cluster secret: %w", err)
}
secret = s
generated = true
// Only let env fill fields the operator did NOT pass on the
// command line; explicit flags must win over env.
n := &config.NodeConfig{}
if cmd.Flags().Changed("bind") {
n.BindAddr = bindAddr
}
if cmd.Flags().Changed("port") {
n.BindPort = bindPort
}
if cmd.Flags().Changed("advertise") {
n.Advertise = advertise
}
if cmd.Flags().Changed("secret") {
n.ClusterSecret = clusterSecret
}
if err := n.ApplyEnvOverrides(); err != nil {
return err
}
// Cobra defaults (bind=0.0.0.0, port=9901) are still
// available as fallbacks for fields neither flag nor env
// touched.
if n.BindAddr == "" {
n.BindAddr = bindAddr
}
if n.BindPort == 0 {
n.BindPort = bindPort
}
nodeID := uuid.NewString()
n := &config.NodeConfig{
NodeID: nodeID,
BindAddr: bindAddr,
BindPort: bindPort,
Advertise: advertise,
ClusterSecret: secret,
}
if err := n.Save(); err != nil {
return fmt.Errorf("save node.yaml: %w", err)
}
if _, err := crypto.GenerateKeyPair(nodeID); err != nil {
return fmt.Errorf("generate keys: %w", err)
}
// Seed cluster.yaml with this node as its own first peer.
// Without this the math in `quorum` would treat a one-node
// cluster as "0 peers, fallback quorum=1, master=self" —
// which works in isolation but breaks the moment another
// node joins, because the replicated peers list would lack
// the inviter, leading to split-brain elections.
certPEM, err := crypto.LoadCertPEM()
_, generated, err := bootstrapNode(n)
if err != nil {
return fmt.Errorf("load cert: %w", err)
}
fp, err := crypto.FingerprintFromCertPEM(certPEM)
if err != nil {
return fmt.Errorf("fingerprint own cert: %w", err)
}
cluster := &config.ClusterConfig{}
if err := cluster.Mutate(nodeID, func(c *config.ClusterConfig) error {
c.Peers = []config.PeerInfo{{
NodeID: nodeID,
Advertise: n.AdvertiseAddr(),
Fingerprint: fp,
CertPEM: string(certPEM),
}}
return nil
}); err != nil {
return fmt.Errorf("seed cluster.yaml: %w", err)
}
out := cmd.OutOrStdout()
fmt.Fprintf(out, "initialised node %s\n", nodeID)
fmt.Fprintf(out, "data dir: %s\n", config.DataDir())
fmt.Fprintf(out, "advertise: %s\n", n.AdvertiseAddr())
if generated {
fmt.Fprintln(out)
fmt.Fprintln(out, "cluster secret (copy to every other node via --secret):")
fmt.Fprintln(out, " "+secret)
return err
}
printBootstrapResult(cmd.OutOrStdout(), n, generated)
return nil
},
}
@@ -112,6 +85,94 @@ overwritten. Re-run only after wiping the data directory.`,
root.AddCommand(cmd)
}
// bootstrapNode creates the data dir, writes node.yaml, generates the
// keypair, and seeds cluster.yaml with this node as its own first
// peer. cfg may arrive with any subset of fields populated; missing
// NodeID and ClusterSecret are auto-generated, missing BindAddr /
// BindPort get the compiled defaults.
//
// Returns the populated config (the same pointer that was passed in)
// and a flag indicating whether ClusterSecret was generated here. The
// flag exists so the caller can print the secret for the operator —
// it must be copied to every follower node out-of-band.
//
// Caller is responsible for checking that node.yaml does not yet
// exist; bootstrapNode itself will refuse to overwrite an existing
// keypair (crypto.GenerateKeyPair errors out) but does not guard
// against clobbering node.yaml.
func bootstrapNode(cfg *config.NodeConfig) (*config.NodeConfig, bool, error) {
if err := config.EnsureDataDir(); err != nil {
return nil, false, err
}
if cfg.NodeID == "" {
cfg.NodeID = uuid.NewString()
}
if cfg.BindAddr == "" {
cfg.BindAddr = "0.0.0.0"
}
if cfg.BindPort == 0 {
cfg.BindPort = 9901
}
generated := false
if cfg.ClusterSecret == "" {
s, err := generateSecret()
if err != nil {
return nil, false, fmt.Errorf("generate cluster secret: %w", err)
}
cfg.ClusterSecret = s
generated = true
}
if err := cfg.Save(); err != nil {
return nil, false, fmt.Errorf("save node.yaml: %w", err)
}
if _, err := crypto.GenerateKeyPair(cfg.NodeID); err != nil {
return nil, false, fmt.Errorf("generate keys: %w", err)
}
// Seed cluster.yaml with this node as its own first peer.
// Without this the math in `quorum` would treat a one-node
// cluster as "0 peers, fallback quorum=1, master=self" — which
// works in isolation but breaks the moment another node joins,
// because the replicated peers list would lack the inviter,
// leading to split-brain elections.
certPEM, err := crypto.LoadCertPEM()
if err != nil {
return nil, false, fmt.Errorf("load cert: %w", err)
}
fp, err := crypto.FingerprintFromCertPEM(certPEM)
if err != nil {
return nil, false, fmt.Errorf("fingerprint own cert: %w", err)
}
cluster := &config.ClusterConfig{}
if err := cluster.Mutate(cfg.NodeID, func(c *config.ClusterConfig) error {
c.Peers = []config.PeerInfo{{
NodeID: cfg.NodeID,
Advertise: cfg.AdvertiseAddr(),
Fingerprint: fp,
CertPEM: string(certPEM),
}}
return nil
}); err != nil {
return nil, false, fmt.Errorf("seed cluster.yaml: %w", err)
}
return cfg, generated, nil
}
// printBootstrapResult emits the human-readable summary both `qu init`
// and the serve auto-init path print after bootstrapping. Kept in one
// place so the secret-disclosure format stays identical across the two
// entry points.
func printBootstrapResult(out io.Writer, n *config.NodeConfig, secretGenerated bool) {
fmt.Fprintf(out, "initialised node %s\n", n.NodeID)
fmt.Fprintf(out, "data dir: %s\n", config.DataDir())
fmt.Fprintf(out, "advertise: %s\n", n.AdvertiseAddr())
if secretGenerated {
fmt.Fprintln(out)
fmt.Fprintln(out, "cluster secret (copy to every other node via --secret or QUPTIME_CLUSTER_SECRET):")
fmt.Fprintln(out, " "+n.ClusterSecret)
}
}
// generateSecret produces 32 bytes of crypto-random data and returns
// it base64-encoded. Long enough that brute force isn't a concern;
// short enough that operators can copy-paste it without pagination.
+50 -1
View File
@@ -2,6 +2,9 @@ package cli
import (
"context"
"errors"
"fmt"
"io/fs"
"log"
"os"
"os/signal"
@@ -9,6 +12,7 @@ import (
"github.com/spf13/cobra"
"git.cer.sh/axodouble/quptime/internal/config"
"git.cer.sh/axodouble/quptime/internal/daemon"
)
@@ -18,9 +22,18 @@ func addServeCmd(root *cobra.Command) {
Short: "Run the qu daemon in the foreground",
Long: `Run the qu daemon: starts the inter-node listener, the local
control socket for the CLI, the heartbeat loop and the check
scheduler. Stops cleanly on SIGINT or SIGTERM.`,
scheduler. Stops cleanly on SIGINT or SIGTERM.
If node.yaml does not exist yet, serve will bootstrap it using values
from the QUPTIME_* environment variables (see docs/configuration.md).
This makes a single ` + "`docker compose up`" + ` enough to launch a new node —
no separate ` + "`qu init`" + ` step is required when the data volume is
fresh.`,
RunE: func(cmd *cobra.Command, args []string) error {
logger := log.New(os.Stderr, "quptime: ", log.LstdFlags|log.Lmsgprefix)
if err := autoInitIfNeeded(cmd, logger); err != nil {
return err
}
d, err := daemon.New(logger)
if err != nil {
return err
@@ -32,3 +45,39 @@ scheduler. Stops cleanly on SIGINT or SIGTERM.`,
}
root.AddCommand(cmd)
}
// autoInitIfNeeded bootstraps the node on first launch.
//
// Friction this removes for container deploys: before, the operator
// had to `docker compose run --rm quptime init …` once before the
// service could come up, which makes `restart: unless-stopped`
// awkward and forces an out-of-band step into every fresh volume.
// Now serve auto-runs the same bootstrap path using QUPTIME_* env
// vars when node.yaml is absent, so the compose file can come up on
// the first try.
//
// Pre-existing node.yaml is left untouched — we only bootstrap when
// the file is genuinely missing. Any other stat error (permission
// denied, broken symlink) is surfaced so the operator sees the real
// problem instead of a confused auto-init attempt clobbering state.
func autoInitIfNeeded(cmd *cobra.Command, logger *log.Logger) error {
_, err := os.Stat(config.NodeFilePath())
if err == nil {
return nil
}
if !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("stat node.yaml: %w", err)
}
logger.Printf("node.yaml not found at %s — bootstrapping from environment", config.NodeFilePath())
n := &config.NodeConfig{}
if err := n.ApplyEnvOverrides(); err != nil {
return err
}
if _, generated, err := bootstrapNode(n); err != nil {
return fmt.Errorf("auto-init: %w", err)
} else {
printBootstrapResult(cmd.OutOrStderr(), n, generated)
}
return nil
}