Auto init via environment variables support, qu init for systemd
Container image / image (push) Successful in 1m38s
Container image / image (push) Successful in 1m38s
This commit is contained in:
+122
-61
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -30,78 +31,50 @@ Pass --secret on every subsequent node so they share the same
|
||||
cluster join secret. If --secret is omitted on the very first node, a
|
||||
random secret is generated and printed for the operator to copy.
|
||||
|
||||
Every flag may also be supplied via its QUPTIME_* environment variable
|
||||
(see docs/configuration.md). Explicit flags win over env values, which
|
||||
in turn win over the compiled defaults.
|
||||
|
||||
Idempotent in one direction only: existing key material is never
|
||||
overwritten. Re-run only after wiping the data directory.`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
if err := config.EnsureDataDir(); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := os.Stat(config.NodeFilePath()); err == nil {
|
||||
return errors.New("node.yaml already exists in data dir — refusing to overwrite")
|
||||
}
|
||||
|
||||
secret := clusterSecret
|
||||
generated := false
|
||||
if secret == "" {
|
||||
s, err := generateSecret()
|
||||
if err != nil {
|
||||
return fmt.Errorf("generate cluster secret: %w", err)
|
||||
}
|
||||
secret = s
|
||||
generated = true
|
||||
// Only let env fill fields the operator did NOT pass on the
|
||||
// command line; explicit flags must win over env.
|
||||
n := &config.NodeConfig{}
|
||||
if cmd.Flags().Changed("bind") {
|
||||
n.BindAddr = bindAddr
|
||||
}
|
||||
if cmd.Flags().Changed("port") {
|
||||
n.BindPort = bindPort
|
||||
}
|
||||
if cmd.Flags().Changed("advertise") {
|
||||
n.Advertise = advertise
|
||||
}
|
||||
if cmd.Flags().Changed("secret") {
|
||||
n.ClusterSecret = clusterSecret
|
||||
}
|
||||
if err := n.ApplyEnvOverrides(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Cobra defaults (bind=0.0.0.0, port=9901) are still
|
||||
// available as fallbacks for fields neither flag nor env
|
||||
// touched.
|
||||
if n.BindAddr == "" {
|
||||
n.BindAddr = bindAddr
|
||||
}
|
||||
if n.BindPort == 0 {
|
||||
n.BindPort = bindPort
|
||||
}
|
||||
|
||||
nodeID := uuid.NewString()
|
||||
n := &config.NodeConfig{
|
||||
NodeID: nodeID,
|
||||
BindAddr: bindAddr,
|
||||
BindPort: bindPort,
|
||||
Advertise: advertise,
|
||||
ClusterSecret: secret,
|
||||
}
|
||||
if err := n.Save(); err != nil {
|
||||
return fmt.Errorf("save node.yaml: %w", err)
|
||||
}
|
||||
if _, err := crypto.GenerateKeyPair(nodeID); err != nil {
|
||||
return fmt.Errorf("generate keys: %w", err)
|
||||
}
|
||||
|
||||
// Seed cluster.yaml with this node as its own first peer.
|
||||
// Without this the math in `quorum` would treat a one-node
|
||||
// cluster as "0 peers, fallback quorum=1, master=self" —
|
||||
// which works in isolation but breaks the moment another
|
||||
// node joins, because the replicated peers list would lack
|
||||
// the inviter, leading to split-brain elections.
|
||||
certPEM, err := crypto.LoadCertPEM()
|
||||
_, generated, err := bootstrapNode(n)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load cert: %w", err)
|
||||
}
|
||||
fp, err := crypto.FingerprintFromCertPEM(certPEM)
|
||||
if err != nil {
|
||||
return fmt.Errorf("fingerprint own cert: %w", err)
|
||||
}
|
||||
cluster := &config.ClusterConfig{}
|
||||
if err := cluster.Mutate(nodeID, func(c *config.ClusterConfig) error {
|
||||
c.Peers = []config.PeerInfo{{
|
||||
NodeID: nodeID,
|
||||
Advertise: n.AdvertiseAddr(),
|
||||
Fingerprint: fp,
|
||||
CertPEM: string(certPEM),
|
||||
}}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return fmt.Errorf("seed cluster.yaml: %w", err)
|
||||
}
|
||||
|
||||
out := cmd.OutOrStdout()
|
||||
fmt.Fprintf(out, "initialised node %s\n", nodeID)
|
||||
fmt.Fprintf(out, "data dir: %s\n", config.DataDir())
|
||||
fmt.Fprintf(out, "advertise: %s\n", n.AdvertiseAddr())
|
||||
if generated {
|
||||
fmt.Fprintln(out)
|
||||
fmt.Fprintln(out, "cluster secret (copy to every other node via --secret):")
|
||||
fmt.Fprintln(out, " "+secret)
|
||||
return err
|
||||
}
|
||||
printBootstrapResult(cmd.OutOrStdout(), n, generated)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
@@ -112,6 +85,94 @@ overwritten. Re-run only after wiping the data directory.`,
|
||||
root.AddCommand(cmd)
|
||||
}
|
||||
|
||||
// bootstrapNode creates the data dir, writes node.yaml, generates the
|
||||
// keypair, and seeds cluster.yaml with this node as its own first
|
||||
// peer. cfg may arrive with any subset of fields populated; missing
|
||||
// NodeID and ClusterSecret are auto-generated, missing BindAddr /
|
||||
// BindPort get the compiled defaults.
|
||||
//
|
||||
// Returns the populated config (the same pointer that was passed in)
|
||||
// and a flag indicating whether ClusterSecret was generated here. The
|
||||
// flag exists so the caller can print the secret for the operator —
|
||||
// it must be copied to every follower node out-of-band.
|
||||
//
|
||||
// Caller is responsible for checking that node.yaml does not yet
|
||||
// exist; bootstrapNode itself will refuse to overwrite an existing
|
||||
// keypair (crypto.GenerateKeyPair errors out) but does not guard
|
||||
// against clobbering node.yaml.
|
||||
func bootstrapNode(cfg *config.NodeConfig) (*config.NodeConfig, bool, error) {
|
||||
if err := config.EnsureDataDir(); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
if cfg.NodeID == "" {
|
||||
cfg.NodeID = uuid.NewString()
|
||||
}
|
||||
if cfg.BindAddr == "" {
|
||||
cfg.BindAddr = "0.0.0.0"
|
||||
}
|
||||
if cfg.BindPort == 0 {
|
||||
cfg.BindPort = 9901
|
||||
}
|
||||
generated := false
|
||||
if cfg.ClusterSecret == "" {
|
||||
s, err := generateSecret()
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("generate cluster secret: %w", err)
|
||||
}
|
||||
cfg.ClusterSecret = s
|
||||
generated = true
|
||||
}
|
||||
if err := cfg.Save(); err != nil {
|
||||
return nil, false, fmt.Errorf("save node.yaml: %w", err)
|
||||
}
|
||||
if _, err := crypto.GenerateKeyPair(cfg.NodeID); err != nil {
|
||||
return nil, false, fmt.Errorf("generate keys: %w", err)
|
||||
}
|
||||
|
||||
// Seed cluster.yaml with this node as its own first peer.
|
||||
// Without this the math in `quorum` would treat a one-node
|
||||
// cluster as "0 peers, fallback quorum=1, master=self" — which
|
||||
// works in isolation but breaks the moment another node joins,
|
||||
// because the replicated peers list would lack the inviter,
|
||||
// leading to split-brain elections.
|
||||
certPEM, err := crypto.LoadCertPEM()
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("load cert: %w", err)
|
||||
}
|
||||
fp, err := crypto.FingerprintFromCertPEM(certPEM)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("fingerprint own cert: %w", err)
|
||||
}
|
||||
cluster := &config.ClusterConfig{}
|
||||
if err := cluster.Mutate(cfg.NodeID, func(c *config.ClusterConfig) error {
|
||||
c.Peers = []config.PeerInfo{{
|
||||
NodeID: cfg.NodeID,
|
||||
Advertise: cfg.AdvertiseAddr(),
|
||||
Fingerprint: fp,
|
||||
CertPEM: string(certPEM),
|
||||
}}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, false, fmt.Errorf("seed cluster.yaml: %w", err)
|
||||
}
|
||||
return cfg, generated, nil
|
||||
}
|
||||
|
||||
// printBootstrapResult emits the human-readable summary both `qu init`
|
||||
// and the serve auto-init path print after bootstrapping. Kept in one
|
||||
// place so the secret-disclosure format stays identical across the two
|
||||
// entry points.
|
||||
func printBootstrapResult(out io.Writer, n *config.NodeConfig, secretGenerated bool) {
|
||||
fmt.Fprintf(out, "initialised node %s\n", n.NodeID)
|
||||
fmt.Fprintf(out, "data dir: %s\n", config.DataDir())
|
||||
fmt.Fprintf(out, "advertise: %s\n", n.AdvertiseAddr())
|
||||
if secretGenerated {
|
||||
fmt.Fprintln(out)
|
||||
fmt.Fprintln(out, "cluster secret (copy to every other node via --secret or QUPTIME_CLUSTER_SECRET):")
|
||||
fmt.Fprintln(out, " "+n.ClusterSecret)
|
||||
}
|
||||
}
|
||||
|
||||
// generateSecret produces 32 bytes of crypto-random data and returns
|
||||
// it base64-encoded. Long enough that brute force isn't a concern;
|
||||
// short enough that operators can copy-paste it without pagination.
|
||||
|
||||
+50
-1
@@ -2,6 +2,9 @@ package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
@@ -9,6 +12,7 @@ import (
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"git.cer.sh/axodouble/quptime/internal/config"
|
||||
"git.cer.sh/axodouble/quptime/internal/daemon"
|
||||
)
|
||||
|
||||
@@ -18,9 +22,18 @@ func addServeCmd(root *cobra.Command) {
|
||||
Short: "Run the qu daemon in the foreground",
|
||||
Long: `Run the qu daemon: starts the inter-node listener, the local
|
||||
control socket for the CLI, the heartbeat loop and the check
|
||||
scheduler. Stops cleanly on SIGINT or SIGTERM.`,
|
||||
scheduler. Stops cleanly on SIGINT or SIGTERM.
|
||||
|
||||
If node.yaml does not exist yet, serve will bootstrap it using values
|
||||
from the QUPTIME_* environment variables (see docs/configuration.md).
|
||||
This makes a single ` + "`docker compose up`" + ` enough to launch a new node —
|
||||
no separate ` + "`qu init`" + ` step is required when the data volume is
|
||||
fresh.`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
logger := log.New(os.Stderr, "quptime: ", log.LstdFlags|log.Lmsgprefix)
|
||||
if err := autoInitIfNeeded(cmd, logger); err != nil {
|
||||
return err
|
||||
}
|
||||
d, err := daemon.New(logger)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -32,3 +45,39 @@ scheduler. Stops cleanly on SIGINT or SIGTERM.`,
|
||||
}
|
||||
root.AddCommand(cmd)
|
||||
}
|
||||
|
||||
// autoInitIfNeeded bootstraps the node on first launch.
|
||||
//
|
||||
// Friction this removes for container deploys: before, the operator
|
||||
// had to `docker compose run --rm quptime init …` once before the
|
||||
// service could come up, which makes `restart: unless-stopped`
|
||||
// awkward and forces an out-of-band step into every fresh volume.
|
||||
// Now serve auto-runs the same bootstrap path using QUPTIME_* env
|
||||
// vars when node.yaml is absent, so the compose file can come up on
|
||||
// the first try.
|
||||
//
|
||||
// Pre-existing node.yaml is left untouched — we only bootstrap when
|
||||
// the file is genuinely missing. Any other stat error (permission
|
||||
// denied, broken symlink) is surfaced so the operator sees the real
|
||||
// problem instead of a confused auto-init attempt clobbering state.
|
||||
func autoInitIfNeeded(cmd *cobra.Command, logger *log.Logger) error {
|
||||
_, err := os.Stat(config.NodeFilePath())
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if !errors.Is(err, fs.ErrNotExist) {
|
||||
return fmt.Errorf("stat node.yaml: %w", err)
|
||||
}
|
||||
|
||||
logger.Printf("node.yaml not found at %s — bootstrapping from environment", config.NodeFilePath())
|
||||
n := &config.NodeConfig{}
|
||||
if err := n.ApplyEnvOverrides(); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, generated, err := bootstrapNode(n); err != nil {
|
||||
return fmt.Errorf("auto-init: %w", err)
|
||||
} else {
|
||||
printBootstrapResult(cmd.OutOrStderr(), n, generated)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user