Auto init via environment variables support, qu init for systemd
Container image / image (push) Successful in 1m38s

This commit is contained in:
2026-05-15 04:41:45 +00:00
parent 6953709574
commit e11b3f4547
9 changed files with 475 additions and 113 deletions
+25 -8
View File
@@ -1,5 +1,14 @@
# An example of a docker compose with Tailscale & QUptime.
# This setup is specifically intended for hosts that may not be able to reach each other directly or have a public IP address.
# This setup is specifically intended for hosts that may not be able to
# reach each other directly or have a public IP address.
#
# Bring it up with `docker compose -f docker-compose-tailscale.yml up -d`.
# QUptime auto-initialises on first start using the QUPTIME_* env vars
# below — no separate `qu init` step is required.
#
# On the first node, omit QUPTIME_CLUSTER_SECRET to have one generated
# for you. Read it out of the logs (`docker logs quptime`) and copy it
# into the .env of every other node before bringing them up.
services:
tailscale:
@@ -18,20 +27,28 @@ services:
quptime:
image: git.cer.sh/axodouble/quptime:master
container_name: quptime
environment:
# host:port other QUptime nodes use to reach this one. Use the
# Tailscale IP / MagicDNS name of this host. Required behind NAT.
- QUPTIME_ADVERTISE=${QUPTIME_ADVERTISE}
# Shared cluster join secret. Set on every node. Leave unset on
# the very first node — one will be generated and logged for you
# to copy to the others. Followers MUST set this before starting.
- QUPTIME_CLUSTER_SECRET=${QUPTIME_CLUSTER_SECRET:-}
# Optional: pin a port other than the default 9901.
# - QUPTIME_BIND_PORT=9901
volumes:
- quptime:/etc/quptime
ports:
- "9901:9901"
depends_on:
- tailscale
# No restart directive, user needs to init quptime first
# Run `docker compose -f docker-compose-tailscale.yml run --rm quptime init` to initialize
# the data volume before starting the service
# If this is not the master node, use
# `docker compose -f docker-compose-tailscale.yml run --rm quptime --advertise <TAILSCALE_IP>:9901 --secret <SECRET>`
# And add the individual nodes to the cluster with `docker compose -f docker-compose-tailscale.yml run --rm quptime node add <OTHER_NODE_IP>:9901`
network_mode: "service:tailscale" # Use the Tailscale network stack
restart: unless-stopped
# After this node is up, add peers from the master with:
# docker compose -f docker-compose-tailscale.yml exec quptime \
# qu node add <OTHER_NODE_TAILSCALE_IP>:9901
volumes:
tailscale:
quptime:
quptime:
+45
View File
@@ -35,6 +35,8 @@ Override the socket path with `QUPTIME_SOCKET=/run/foo.sock`.
## Environment variables
### Paths
| Variable | Purpose |
| ----------------- | ------------------------------------------------------------------------------------------------------------------------- |
| `QUPTIME_DIR` | Data directory. Defaults to `/etc/quptime` (root) or `$XDG_CONFIG_HOME/quptime`. |
@@ -42,9 +44,52 @@ Override the socket path with `QUPTIME_SOCKET=/run/foo.sock`.
| `XDG_CONFIG_HOME` | Honored when running as non-root and `QUPTIME_DIR` is unset. |
| `XDG_RUNTIME_DIR` | Honored when running as non-root and `QUPTIME_SOCKET` is unset. |
### `node.yaml` field overrides
Every field in `node.yaml` can also be supplied via an environment
variable. This is the recommended way to drive Docker / Compose
deployments: drop the env vars into the compose file and the daemon
will bootstrap on first start without a separate `qu init` step.
| Variable | `node.yaml` field | Notes |
| ------------------------ | ----------------- | -------------------------------------------------------------------------------------------------------------- |
| `QUPTIME_NODE_ID` | `node_id` | Pin a specific UUID. Leave unset to let `qu init` / auto-init generate one. |
| `QUPTIME_BIND_ADDR` | `bind_addr` | Defaults to `0.0.0.0`. |
| `QUPTIME_BIND_PORT` | `bind_port` | Integer. Defaults to `9901`. |
| `QUPTIME_ADVERTISE` | `advertise` | `host:port` other peers use to reach this node. Required when bound to a wildcard or behind NAT. |
| `QUPTIME_CLUSTER_SECRET` | `cluster_secret` | Pre-shared join secret. Set the same value on every node. If unset on the very first node, one is generated. |
Precedence is **env > file > compiled default**. Non-empty env values
win over whatever is stored in `node.yaml` at load time, so changing a
variable in `docker-compose.yml` and restarting the container is
enough to roll out new bind/advertise values — no on-disk edit
required. Empty env values are ignored (they will not clear a
previously persisted field).
For `qu init` specifically, explicit command-line flags take
precedence over env values; env values fill in only the fields the
operator did not pass on the command line.
The daemon does not read any other environment variables. SMTP, Discord,
and HTTP probe targets are configured exclusively in `cluster.yaml`.
## Auto-init on `qu serve`
If `node.yaml` does not exist when `qu serve` starts, the daemon
bootstraps it in-place using the `QUPTIME_*` env vars above: a fresh
UUID is generated (or `QUPTIME_NODE_ID` is honored if set), an RSA
keypair and self-signed cert are written under `keys/`, and
`cluster.yaml` is seeded with this node as its sole peer. If no
`QUPTIME_CLUSTER_SECRET` was provided, a random one is generated and
printed to stderr — copy it to every follower node's
`QUPTIME_CLUSTER_SECRET` (or `--secret` flag) before they start.
This is what makes the docker-compose flow `docker compose up`-only
on a fresh volume. To opt out (e.g. so a misconfigured deployment
crashes loudly instead of silently generating a new identity), run
`qu init` against the volume yourself before letting `qu serve` ever
see it.
## `node.yaml` — local identity
Never replicated. One file per host. Generated by `qu init`.
+43 -14
View File
@@ -27,6 +27,14 @@ services:
image: git.cer.sh/axodouble/quptime:v0.1.0
container_name: quptime
restart: unless-stopped
environment:
# host:port other nodes use to reach this one. Must be reachable
# from every peer — the loopback inside the container is useless.
- QUPTIME_ADVERTISE=<host-ip>:9901
# Pre-shared join secret. Omit on the very first node and read
# the generated value out of `docker logs quptime`, then set
# this env var on every follower before bringing them up.
- QUPTIME_CLUSTER_SECRET=${QUPTIME_CLUSTER_SECRET:-}
ports:
- "9901:9901"
volumes:
@@ -41,17 +49,25 @@ volumes:
quptime-data:
```
You must **`qu init` before the daemon will start**. With this compose
file:
`qu serve` auto-initialises the data volume on first start using the
`QUPTIME_*` env vars (see [configuration.md](../configuration.md) for
the full list). One command brings everything up:
```sh
docker compose run --rm quptime init --advertise <host-ip>:9901
docker compose up -d
docker compose exec quptime qu status
```
`<host-ip>` must be reachable from every other node — the loopback
address inside the container is useless to peers.
On the very first node, capture the auto-generated cluster secret:
```sh
docker compose logs quptime | grep -A1 'cluster secret'
```
Copy that value into the `QUPTIME_CLUSTER_SECRET` env var of every
follower before starting them, otherwise their join RPCs will be
rejected. The full list of accepted env vars lives in
[configuration.md](../configuration.md#nodeyaml-field-overrides).
## Three-node compose on a single host
@@ -69,18 +85,27 @@ services:
alpha:
<<: *quptime
container_name: alpha
environment:
- QUPTIME_ADVERTISE=alpha:9901
# First node: leave secret unset and read it from `docker logs`.
ports: ["9901:9901"]
volumes: ["alpha-data:/etc/quptime"]
bravo:
<<: *quptime
container_name: bravo
environment:
- QUPTIME_ADVERTISE=bravo:9901
- QUPTIME_CLUSTER_SECRET=${SECRET}
ports: ["9902:9901"]
volumes: ["bravo-data:/etc/quptime"]
charlie:
<<: *quptime
container_name: charlie
environment:
- QUPTIME_ADVERTISE=charlie:9901
- QUPTIME_CLUSTER_SECRET=${SECRET}
ports: ["9903:9901"]
volumes: ["charlie-data:/etc/quptime"]
@@ -93,15 +118,12 @@ volumes:
Bootstrap:
```sh
# First node: prints the secret to stdout.
docker compose run --rm alpha init --advertise alpha:9901
# Capture the secret (or read it back from alpha-data).
SECRET=$(docker compose exec alpha cat /etc/quptime/node.yaml | grep cluster_secret | awk '{print $2}')
docker compose run --rm bravo init --advertise bravo:9901 --secret "$SECRET"
docker compose run --rm charlie init --advertise charlie:9901 --secret "$SECRET"
docker compose up -d
# 1. Start alpha first to mint the cluster secret.
docker compose up -d alpha
# 2. Read the secret off alpha's stdout.
export SECRET=$(docker compose logs alpha | awk '/cluster secret/{getline; print $1}')
# 3. Bring up the followers — they pick up the secret from $SECRET.
docker compose up -d bravo charlie
# Invite from alpha. The hostnames resolve over the compose network.
docker compose exec alpha qu node add bravo:9901
@@ -127,6 +149,9 @@ services:
image: git.cer.sh/axodouble/quptime:v0.1.0
container_name: quptime
restart: unless-stopped
environment:
- QUPTIME_ADVERTISE=${QUPTIME_ADVERTISE} # host:9901 reachable from peers
- QUPTIME_CLUSTER_SECRET=${QUPTIME_CLUSTER_SECRET}
ports:
- "9901:9901"
volumes:
@@ -135,6 +160,10 @@ services:
- NET_RAW
```
Put the per-host values (`QUPTIME_ADVERTISE`, `QUPTIME_CLUSTER_SECRET`)
in a sibling `.env` file or a config-management secret so the compose
file itself is identical across hosts.
Persistence is a bind-mount under `/srv/quptime/data` so backups and
upgrades hit a known path. See [operations.md](../operations.md) for
the backup recipe.
+29 -22
View File
@@ -53,12 +53,21 @@ services:
quptime:
image: git.cer.sh/axodouble/quptime:v0.1.0
container_name: quptime
environment:
# host:port other QUptime nodes use to reach this one. Should be
# this node's tailnet IP / MagicDNS name. Auto-init reads this on
# first start.
- QUPTIME_ADVERTISE=${QUPTIME_ADVERTISE}
# Shared cluster join secret. Omit on the very first node to have
# it generated and logged for you, then copy it into every
# follower's .env.
- QUPTIME_CLUSTER_SECRET=${QUPTIME_CLUSTER_SECRET:-}
volumes:
- quptime:/etc/quptime
network_mode: "service:tailscale"
depends_on: [tailscale]
cap_add: [NET_RAW]
# No restart directive yet — needs `qu init` first.
restart: unless-stopped
volumes:
tailscale:
@@ -67,43 +76,41 @@ volumes:
### One-time bootstrap
Each host runs the same script with different `HOST` and `TAILSCALE_AUTHKEY`:
Each host runs the same compose file with a per-host `.env`:
```sh
# .env
# .env (alpha — the first node)
HOST=alpha
TAILSCALE_AUTHKEY=tskey-auth-xxxxxxxx
QUPTIME_ADVERTISE=100.64.1.1:9901 # this node's tailnet IP
# QUPTIME_CLUSTER_SECRET left unset — will be generated on first boot.
```
Start Tailscale alone first so it gets an IP:
Start the stack on the first host. `qu serve` auto-initialises the
volume using the env vars above, so a single `docker compose up`
brings everything up:
```sh
docker compose up -d tailscale
sleep 5
TSIP=$(docker compose exec tailscale tailscale ip --4)
echo "this node's tailnet IP: $TSIP"
docker compose up -d
docker compose logs quptime | grep -A1 'cluster secret'
# Pipe the secret through your password manager.
```
On the **first** host, init without `--secret`:
On every **other** host, write the same `.env` plus the captured
secret:
```sh
docker compose run --rm quptime init --advertise "$TSIP:9901"
# Grab the printed secret; pipe through your password manager.
# .env (bravo, charlie, …)
HOST=bravo
TAILSCALE_AUTHKEY=tskey-auth-xxxxxxxx
QUPTIME_ADVERTISE=100.64.1.2:9901
QUPTIME_CLUSTER_SECRET=<paste from alpha>
```
On every **other** host, paste the secret:
Bring them up and invite them from the first node:
```sh
docker compose run --rm quptime init \
--advertise "$TSIP:9901" \
--secret "$CLUSTER_SECRET"
```
Then bring up `qu` on every node and invite from the first:
```sh
# Each host
docker compose up -d quptime
docker compose up -d
# From alpha
docker compose exec quptime qu node add 100.64.1.2:9901
+19 -4
View File
@@ -146,15 +146,26 @@ both call this out.
load node.yaml: open ...: no such file or directory
```
Run `qu init` before `qu serve`. The daemon does not auto-init —
silently generating identities and secrets would be a worse failure
mode than crashing.
`qu serve` normally auto-bootstraps a missing `node.yaml` using the
`QUPTIME_*` env vars (see
[configuration.md](configuration.md#auto-init-on-qu-serve)). If you
still see this error, the most likely causes are:
- The data directory is read-only or owned by a different user — the
bootstrap can't write `node.yaml`. Fix permissions on
`$QUPTIME_DIR`.
- Something else removed `node.yaml` mid-run (a config-management
tool, a misconfigured volume). Re-run `qu serve` and it will
rebuild from env, or run `qu init` manually with the flags you
want.
```
node.yaml has empty node_id — run `qu init` first
```
Same fix.
`node.yaml` exists but lacks a `node_id`. Either delete the file and
let auto-init regenerate it, or run `qu init` against a wiped data
dir.
```
listen tcp :9901: bind: address already in use
@@ -197,3 +208,7 @@ sudo systemctl start quptime
The data directory is the only state. Wipe it and you're back to a
fresh node.
Under Docker (or any env-driven deploy), the explicit `qu init` step
isn't needed — wiping the data volume and restarting the container is
enough; `qu serve` will re-bootstrap from the `QUPTIME_*` env vars.
+122 -61
View File
@@ -5,6 +5,7 @@ import (
"encoding/base64"
"errors"
"fmt"
"io"
"os"
"github.com/google/uuid"
@@ -30,78 +31,50 @@ Pass --secret on every subsequent node so they share the same
cluster join secret. If --secret is omitted on the very first node, a
random secret is generated and printed for the operator to copy.
Every flag may also be supplied via its QUPTIME_* environment variable
(see docs/configuration.md). Explicit flags win over env values, which
in turn win over the compiled defaults.
Idempotent in one direction only: existing key material is never
overwritten. Re-run only after wiping the data directory.`,
RunE: func(cmd *cobra.Command, args []string) error {
if err := config.EnsureDataDir(); err != nil {
return err
}
if _, err := os.Stat(config.NodeFilePath()); err == nil {
return errors.New("node.yaml already exists in data dir — refusing to overwrite")
}
secret := clusterSecret
generated := false
if secret == "" {
s, err := generateSecret()
if err != nil {
return fmt.Errorf("generate cluster secret: %w", err)
}
secret = s
generated = true
// Only let env fill fields the operator did NOT pass on the
// command line; explicit flags must win over env.
n := &config.NodeConfig{}
if cmd.Flags().Changed("bind") {
n.BindAddr = bindAddr
}
if cmd.Flags().Changed("port") {
n.BindPort = bindPort
}
if cmd.Flags().Changed("advertise") {
n.Advertise = advertise
}
if cmd.Flags().Changed("secret") {
n.ClusterSecret = clusterSecret
}
if err := n.ApplyEnvOverrides(); err != nil {
return err
}
// Cobra defaults (bind=0.0.0.0, port=9901) are still
// available as fallbacks for fields neither flag nor env
// touched.
if n.BindAddr == "" {
n.BindAddr = bindAddr
}
if n.BindPort == 0 {
n.BindPort = bindPort
}
nodeID := uuid.NewString()
n := &config.NodeConfig{
NodeID: nodeID,
BindAddr: bindAddr,
BindPort: bindPort,
Advertise: advertise,
ClusterSecret: secret,
}
if err := n.Save(); err != nil {
return fmt.Errorf("save node.yaml: %w", err)
}
if _, err := crypto.GenerateKeyPair(nodeID); err != nil {
return fmt.Errorf("generate keys: %w", err)
}
// Seed cluster.yaml with this node as its own first peer.
// Without this the math in `quorum` would treat a one-node
// cluster as "0 peers, fallback quorum=1, master=self" —
// which works in isolation but breaks the moment another
// node joins, because the replicated peers list would lack
// the inviter, leading to split-brain elections.
certPEM, err := crypto.LoadCertPEM()
_, generated, err := bootstrapNode(n)
if err != nil {
return fmt.Errorf("load cert: %w", err)
}
fp, err := crypto.FingerprintFromCertPEM(certPEM)
if err != nil {
return fmt.Errorf("fingerprint own cert: %w", err)
}
cluster := &config.ClusterConfig{}
if err := cluster.Mutate(nodeID, func(c *config.ClusterConfig) error {
c.Peers = []config.PeerInfo{{
NodeID: nodeID,
Advertise: n.AdvertiseAddr(),
Fingerprint: fp,
CertPEM: string(certPEM),
}}
return nil
}); err != nil {
return fmt.Errorf("seed cluster.yaml: %w", err)
}
out := cmd.OutOrStdout()
fmt.Fprintf(out, "initialised node %s\n", nodeID)
fmt.Fprintf(out, "data dir: %s\n", config.DataDir())
fmt.Fprintf(out, "advertise: %s\n", n.AdvertiseAddr())
if generated {
fmt.Fprintln(out)
fmt.Fprintln(out, "cluster secret (copy to every other node via --secret):")
fmt.Fprintln(out, " "+secret)
return err
}
printBootstrapResult(cmd.OutOrStdout(), n, generated)
return nil
},
}
@@ -112,6 +85,94 @@ overwritten. Re-run only after wiping the data directory.`,
root.AddCommand(cmd)
}
// bootstrapNode creates the data dir, writes node.yaml, generates the
// keypair, and seeds cluster.yaml with this node as its own first
// peer. cfg may arrive with any subset of fields populated; missing
// NodeID and ClusterSecret are auto-generated, missing BindAddr /
// BindPort get the compiled defaults.
//
// Returns the populated config (the same pointer that was passed in)
// and a flag indicating whether ClusterSecret was generated here. The
// flag exists so the caller can print the secret for the operator —
// it must be copied to every follower node out-of-band.
//
// Caller is responsible for checking that node.yaml does not yet
// exist; bootstrapNode itself will refuse to overwrite an existing
// keypair (crypto.GenerateKeyPair errors out) but does not guard
// against clobbering node.yaml.
func bootstrapNode(cfg *config.NodeConfig) (*config.NodeConfig, bool, error) {
if err := config.EnsureDataDir(); err != nil {
return nil, false, err
}
if cfg.NodeID == "" {
cfg.NodeID = uuid.NewString()
}
if cfg.BindAddr == "" {
cfg.BindAddr = "0.0.0.0"
}
if cfg.BindPort == 0 {
cfg.BindPort = 9901
}
generated := false
if cfg.ClusterSecret == "" {
s, err := generateSecret()
if err != nil {
return nil, false, fmt.Errorf("generate cluster secret: %w", err)
}
cfg.ClusterSecret = s
generated = true
}
if err := cfg.Save(); err != nil {
return nil, false, fmt.Errorf("save node.yaml: %w", err)
}
if _, err := crypto.GenerateKeyPair(cfg.NodeID); err != nil {
return nil, false, fmt.Errorf("generate keys: %w", err)
}
// Seed cluster.yaml with this node as its own first peer.
// Without this the math in `quorum` would treat a one-node
// cluster as "0 peers, fallback quorum=1, master=self" — which
// works in isolation but breaks the moment another node joins,
// because the replicated peers list would lack the inviter,
// leading to split-brain elections.
certPEM, err := crypto.LoadCertPEM()
if err != nil {
return nil, false, fmt.Errorf("load cert: %w", err)
}
fp, err := crypto.FingerprintFromCertPEM(certPEM)
if err != nil {
return nil, false, fmt.Errorf("fingerprint own cert: %w", err)
}
cluster := &config.ClusterConfig{}
if err := cluster.Mutate(cfg.NodeID, func(c *config.ClusterConfig) error {
c.Peers = []config.PeerInfo{{
NodeID: cfg.NodeID,
Advertise: cfg.AdvertiseAddr(),
Fingerprint: fp,
CertPEM: string(certPEM),
}}
return nil
}); err != nil {
return nil, false, fmt.Errorf("seed cluster.yaml: %w", err)
}
return cfg, generated, nil
}
// printBootstrapResult emits the human-readable summary both `qu init`
// and the serve auto-init path print after bootstrapping. Kept in one
// place so the secret-disclosure format stays identical across the two
// entry points.
func printBootstrapResult(out io.Writer, n *config.NodeConfig, secretGenerated bool) {
fmt.Fprintf(out, "initialised node %s\n", n.NodeID)
fmt.Fprintf(out, "data dir: %s\n", config.DataDir())
fmt.Fprintf(out, "advertise: %s\n", n.AdvertiseAddr())
if secretGenerated {
fmt.Fprintln(out)
fmt.Fprintln(out, "cluster secret (copy to every other node via --secret or QUPTIME_CLUSTER_SECRET):")
fmt.Fprintln(out, " "+n.ClusterSecret)
}
}
// generateSecret produces 32 bytes of crypto-random data and returns
// it base64-encoded. Long enough that brute force isn't a concern;
// short enough that operators can copy-paste it without pagination.
+50 -1
View File
@@ -2,6 +2,9 @@ package cli
import (
"context"
"errors"
"fmt"
"io/fs"
"log"
"os"
"os/signal"
@@ -9,6 +12,7 @@ import (
"github.com/spf13/cobra"
"git.cer.sh/axodouble/quptime/internal/config"
"git.cer.sh/axodouble/quptime/internal/daemon"
)
@@ -18,9 +22,18 @@ func addServeCmd(root *cobra.Command) {
Short: "Run the qu daemon in the foreground",
Long: `Run the qu daemon: starts the inter-node listener, the local
control socket for the CLI, the heartbeat loop and the check
scheduler. Stops cleanly on SIGINT or SIGTERM.`,
scheduler. Stops cleanly on SIGINT or SIGTERM.
If node.yaml does not exist yet, serve will bootstrap it using values
from the QUPTIME_* environment variables (see docs/configuration.md).
This makes a single ` + "`docker compose up`" + ` enough to launch a new node —
no separate ` + "`qu init`" + ` step is required when the data volume is
fresh.`,
RunE: func(cmd *cobra.Command, args []string) error {
logger := log.New(os.Stderr, "quptime: ", log.LstdFlags|log.Lmsgprefix)
if err := autoInitIfNeeded(cmd, logger); err != nil {
return err
}
d, err := daemon.New(logger)
if err != nil {
return err
@@ -32,3 +45,39 @@ scheduler. Stops cleanly on SIGINT or SIGTERM.`,
}
root.AddCommand(cmd)
}
// autoInitIfNeeded bootstraps the node on first launch.
//
// Friction this removes for container deploys: before, the operator
// had to `docker compose run --rm quptime init …` once before the
// service could come up, which makes `restart: unless-stopped`
// awkward and forces an out-of-band step into every fresh volume.
// Now serve auto-runs the same bootstrap path using QUPTIME_* env
// vars when node.yaml is absent, so the compose file can come up on
// the first try.
//
// Pre-existing node.yaml is left untouched — we only bootstrap when
// the file is genuinely missing. Any other stat error (permission
// denied, broken symlink) is surfaced so the operator sees the real
// problem instead of a confused auto-init attempt clobbering state.
func autoInitIfNeeded(cmd *cobra.Command, logger *log.Logger) error {
_, err := os.Stat(config.NodeFilePath())
if err == nil {
return nil
}
if !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("stat node.yaml: %w", err)
}
logger.Printf("node.yaml not found at %s — bootstrapping from environment", config.NodeFilePath())
n := &config.NodeConfig{}
if err := n.ApplyEnvOverrides(); err != nil {
return err
}
if _, generated, err := bootstrapNode(n); err != nil {
return fmt.Errorf("auto-init: %w", err)
} else {
printBootstrapResult(cmd.OutOrStderr(), n, generated)
}
return nil
}
+47
View File
@@ -3,10 +3,26 @@ package config
import (
"fmt"
"os"
"strconv"
"gopkg.in/yaml.v3"
)
// Environment variable names that override fields on NodeConfig at
// load time. Intended to let `docker compose` setups drive a node's
// identity and listener configuration without having to bake a
// node.yaml into the image or run `qu init` manually first.
//
// Empty values are ignored — they do not clear a field. The override
// order is therefore: env (non-empty) > file > compiled default.
const (
EnvNodeID = "QUPTIME_NODE_ID"
EnvBindAddr = "QUPTIME_BIND_ADDR"
EnvBindPort = "QUPTIME_BIND_PORT"
EnvAdvertise = "QUPTIME_ADVERTISE"
EnvClusterSecret = "QUPTIME_CLUSTER_SECRET"
)
// NodeConfig is the per-node, never-replicated identity file.
type NodeConfig struct {
// NodeID is a stable UUID generated at `qu init`. Used by all peers
@@ -45,6 +61,34 @@ func (n *NodeConfig) AdvertiseAddr() string {
return fmt.Sprintf("%s:%d", bind, n.BindPort)
}
// ApplyEnvOverrides folds QUPTIME_* environment variables onto n.
// Non-empty env values win over the existing field value. Called both
// by LoadNodeConfig and by the `qu init` / serve auto-init paths so
// the same precedence rules apply whether the daemon is reading a
// persisted node.yaml or constructing one from scratch.
func (n *NodeConfig) ApplyEnvOverrides() error {
if v := os.Getenv(EnvNodeID); v != "" {
n.NodeID = v
}
if v := os.Getenv(EnvBindAddr); v != "" {
n.BindAddr = v
}
if v := os.Getenv(EnvBindPort); v != "" {
p, err := strconv.Atoi(v)
if err != nil {
return fmt.Errorf("%s=%q: not an integer: %w", EnvBindPort, v, err)
}
n.BindPort = p
}
if v := os.Getenv(EnvAdvertise); v != "" {
n.Advertise = v
}
if v := os.Getenv(EnvClusterSecret); v != "" {
n.ClusterSecret = v
}
return nil
}
// LoadNodeConfig reads node.yaml from the data dir.
func LoadNodeConfig() (*NodeConfig, error) {
raw, err := os.ReadFile(NodeFilePath())
@@ -55,6 +99,9 @@ func LoadNodeConfig() (*NodeConfig, error) {
if err := yaml.Unmarshal(raw, cfg); err != nil {
return nil, fmt.Errorf("parse node.yaml: %w", err)
}
if err := cfg.ApplyEnvOverrides(); err != nil {
return nil, err
}
if cfg.BindPort == 0 {
cfg.BindPort = 9901
}
+95 -3
View File
@@ -4,9 +4,9 @@ import "testing"
func TestAdvertiseAddrFallback(t *testing.T) {
cases := []struct {
name string
cfg NodeConfig
want string
name string
cfg NodeConfig
want string
}{
{"explicit advertise wins", NodeConfig{Advertise: "host:1234", BindAddr: "0.0.0.0", BindPort: 9901}, "host:1234"},
{"empty bind falls back to loopback", NodeConfig{BindPort: 9901}, "127.0.0.1:9901"},
@@ -56,3 +56,95 @@ func TestLoadNodeConfigAppliesDefaults(t *testing.T) {
t.Errorf("BindAddr=%q want 0.0.0.0", loaded.BindAddr)
}
}
func TestApplyEnvOverrides(t *testing.T) {
t.Setenv(EnvNodeID, "node-from-env")
t.Setenv(EnvBindAddr, "1.2.3.4")
t.Setenv(EnvBindPort, "9999")
t.Setenv(EnvAdvertise, "public.example.com:9999")
t.Setenv(EnvClusterSecret, "shh-secret")
n := &NodeConfig{
NodeID: "original-id",
BindAddr: "0.0.0.0",
BindPort: 9901,
Advertise: "old.example.com:9901",
ClusterSecret: "old-secret",
}
if err := n.ApplyEnvOverrides(); err != nil {
t.Fatal(err)
}
want := NodeConfig{
NodeID: "node-from-env",
BindAddr: "1.2.3.4",
BindPort: 9999,
Advertise: "public.example.com:9999",
ClusterSecret: "shh-secret",
}
if *n != want {
t.Errorf("got %+v want %+v", *n, want)
}
}
func TestApplyEnvOverridesEmptyValuesIgnored(t *testing.T) {
// Explicitly empty env vars must NOT clobber existing fields —
// otherwise `docker run -e QUPTIME_ADVERTISE=` would silently
// erase a previously-persisted advertise address.
t.Setenv(EnvNodeID, "")
t.Setenv(EnvBindAddr, "")
t.Setenv(EnvBindPort, "")
t.Setenv(EnvAdvertise, "")
t.Setenv(EnvClusterSecret, "")
orig := NodeConfig{
NodeID: "keep-me",
BindAddr: "10.0.0.1",
BindPort: 9901,
Advertise: "keep.example.com:9901",
ClusterSecret: "keep-secret",
}
n := orig
if err := n.ApplyEnvOverrides(); err != nil {
t.Fatal(err)
}
if n != orig {
t.Errorf("empty env vars mutated config: got %+v want %+v", n, orig)
}
}
func TestApplyEnvOverridesBadPort(t *testing.T) {
t.Setenv(EnvBindPort, "not-an-int")
n := &NodeConfig{}
if err := n.ApplyEnvOverrides(); err == nil {
t.Fatal("expected error for non-integer port")
}
}
func TestLoadNodeConfigEnvOverridesFile(t *testing.T) {
t.Setenv("QUPTIME_DIR", t.TempDir())
// Persist a file with one bind addr; env should win on load.
n := &NodeConfig{NodeID: "abc", BindAddr: "127.0.0.1", BindPort: 9901, Advertise: "file.example.com:9901"}
if err := n.Save(); err != nil {
t.Fatal(err)
}
t.Setenv(EnvBindAddr, "0.0.0.0")
t.Setenv(EnvAdvertise, "env.example.com:9001")
t.Setenv(EnvBindPort, "9001")
loaded, err := LoadNodeConfig()
if err != nil {
t.Fatal(err)
}
if loaded.BindAddr != "0.0.0.0" {
t.Errorf("BindAddr=%q want 0.0.0.0 (env override)", loaded.BindAddr)
}
if loaded.BindPort != 9001 {
t.Errorf("BindPort=%d want 9001 (env override)", loaded.BindPort)
}
if loaded.Advertise != "env.example.com:9001" {
t.Errorf("Advertise=%q want env.example.com:9001 (env override)", loaded.Advertise)
}
if loaded.NodeID != "abc" {
t.Errorf("NodeID=%q want abc (unchanged)", loaded.NodeID)
}
}