Auto init via environment variables support, qu init for systemd
Container image / image (push) Successful in 1m38s

This commit is contained in:
2026-05-15 04:41:45 +00:00
parent 6953709574
commit e11b3f4547
9 changed files with 475 additions and 113 deletions
+24 -7
View File
@@ -1,5 +1,14 @@
# An example of a docker compose with Tailscale & QUptime. # An example of a docker compose with Tailscale & QUptime.
# This setup is specifically intended for hosts that may not be able to reach each other directly or have a public IP address. # This setup is specifically intended for hosts that may not be able to
# reach each other directly or have a public IP address.
#
# Bring it up with `docker compose -f docker-compose-tailscale.yml up -d`.
# QUptime auto-initialises on first start using the QUPTIME_* env vars
# below — no separate `qu init` step is required.
#
# On the first node, omit QUPTIME_CLUSTER_SECRET to have one generated
# for you. Read it out of the logs (`docker logs quptime`) and copy it
# into the .env of every other node before bringing them up.
services: services:
tailscale: tailscale:
@@ -18,19 +27,27 @@ services:
quptime: quptime:
image: git.cer.sh/axodouble/quptime:master image: git.cer.sh/axodouble/quptime:master
container_name: quptime container_name: quptime
environment:
# host:port other QUptime nodes use to reach this one. Use the
# Tailscale IP / MagicDNS name of this host. Required behind NAT.
- QUPTIME_ADVERTISE=${QUPTIME_ADVERTISE}
# Shared cluster join secret. Set on every node. Leave unset on
# the very first node — one will be generated and logged for you
# to copy to the others. Followers MUST set this before starting.
- QUPTIME_CLUSTER_SECRET=${QUPTIME_CLUSTER_SECRET:-}
# Optional: pin a port other than the default 9901.
# - QUPTIME_BIND_PORT=9901
volumes: volumes:
- quptime:/etc/quptime - quptime:/etc/quptime
ports: ports:
- "9901:9901" - "9901:9901"
depends_on: depends_on:
- tailscale - tailscale
# No restart directive, user needs to init quptime first
# Run `docker compose -f docker-compose-tailscale.yml run --rm quptime init` to initialize
# the data volume before starting the service
# If this is not the master node, use
# `docker compose -f docker-compose-tailscale.yml run --rm quptime --advertise <TAILSCALE_IP>:9901 --secret <SECRET>`
# And add the individual nodes to the cluster with `docker compose -f docker-compose-tailscale.yml run --rm quptime node add <OTHER_NODE_IP>:9901`
network_mode: "service:tailscale" # Use the Tailscale network stack network_mode: "service:tailscale" # Use the Tailscale network stack
restart: unless-stopped
# After this node is up, add peers from the master with:
# docker compose -f docker-compose-tailscale.yml exec quptime \
# qu node add <OTHER_NODE_TAILSCALE_IP>:9901
volumes: volumes:
tailscale: tailscale:
+45
View File
@@ -35,6 +35,8 @@ Override the socket path with `QUPTIME_SOCKET=/run/foo.sock`.
## Environment variables ## Environment variables
### Paths
| Variable | Purpose | | Variable | Purpose |
| ----------------- | ------------------------------------------------------------------------------------------------------------------------- | | ----------------- | ------------------------------------------------------------------------------------------------------------------------- |
| `QUPTIME_DIR` | Data directory. Defaults to `/etc/quptime` (root) or `$XDG_CONFIG_HOME/quptime`. | | `QUPTIME_DIR` | Data directory. Defaults to `/etc/quptime` (root) or `$XDG_CONFIG_HOME/quptime`. |
@@ -42,9 +44,52 @@ Override the socket path with `QUPTIME_SOCKET=/run/foo.sock`.
| `XDG_CONFIG_HOME` | Honored when running as non-root and `QUPTIME_DIR` is unset. | | `XDG_CONFIG_HOME` | Honored when running as non-root and `QUPTIME_DIR` is unset. |
| `XDG_RUNTIME_DIR` | Honored when running as non-root and `QUPTIME_SOCKET` is unset. | | `XDG_RUNTIME_DIR` | Honored when running as non-root and `QUPTIME_SOCKET` is unset. |
### `node.yaml` field overrides
Every field in `node.yaml` can also be supplied via an environment
variable. This is the recommended way to drive Docker / Compose
deployments: drop the env vars into the compose file and the daemon
will bootstrap on first start without a separate `qu init` step.
| Variable | `node.yaml` field | Notes |
| ------------------------ | ----------------- | -------------------------------------------------------------------------------------------------------------- |
| `QUPTIME_NODE_ID` | `node_id` | Pin a specific UUID. Leave unset to let `qu init` / auto-init generate one. |
| `QUPTIME_BIND_ADDR` | `bind_addr` | Defaults to `0.0.0.0`. |
| `QUPTIME_BIND_PORT` | `bind_port` | Integer. Defaults to `9901`. |
| `QUPTIME_ADVERTISE` | `advertise` | `host:port` other peers use to reach this node. Required when bound to a wildcard or behind NAT. |
| `QUPTIME_CLUSTER_SECRET` | `cluster_secret` | Pre-shared join secret. Set the same value on every node. If unset on the very first node, one is generated. |
Precedence is **env > file > compiled default**. Non-empty env values
win over whatever is stored in `node.yaml` at load time, so changing a
variable in `docker-compose.yml` and restarting the container is
enough to roll out new bind/advertise values — no on-disk edit
required. Empty env values are ignored (they will not clear a
previously persisted field).
For `qu init` specifically, explicit command-line flags take
precedence over env values; env values fill in only the fields the
operator did not pass on the command line.
The daemon does not read any other environment variables. SMTP, Discord, The daemon does not read any other environment variables. SMTP, Discord,
and HTTP probe targets are configured exclusively in `cluster.yaml`. and HTTP probe targets are configured exclusively in `cluster.yaml`.
## Auto-init on `qu serve`
If `node.yaml` does not exist when `qu serve` starts, the daemon
bootstraps it in-place using the `QUPTIME_*` env vars above: a fresh
UUID is generated (or `QUPTIME_NODE_ID` is honored if set), an RSA
keypair and self-signed cert are written under `keys/`, and
`cluster.yaml` is seeded with this node as its sole peer. If no
`QUPTIME_CLUSTER_SECRET` was provided, a random one is generated and
printed to stderr — copy it to every follower node's
`QUPTIME_CLUSTER_SECRET` (or `--secret` flag) before they start.
This is what makes the docker-compose flow `docker compose up`-only
on a fresh volume. To opt out (e.g. so a misconfigured deployment
crashes loudly instead of silently generating a new identity), run
`qu init` against the volume yourself before letting `qu serve` ever
see it.
## `node.yaml` — local identity ## `node.yaml` — local identity
Never replicated. One file per host. Generated by `qu init`. Never replicated. One file per host. Generated by `qu init`.
+43 -14
View File
@@ -27,6 +27,14 @@ services:
image: git.cer.sh/axodouble/quptime:v0.1.0 image: git.cer.sh/axodouble/quptime:v0.1.0
container_name: quptime container_name: quptime
restart: unless-stopped restart: unless-stopped
environment:
# host:port other nodes use to reach this one. Must be reachable
# from every peer — the loopback inside the container is useless.
- QUPTIME_ADVERTISE=<host-ip>:9901
# Pre-shared join secret. Omit on the very first node and read
# the generated value out of `docker logs quptime`, then set
# this env var on every follower before bringing them up.
- QUPTIME_CLUSTER_SECRET=${QUPTIME_CLUSTER_SECRET:-}
ports: ports:
- "9901:9901" - "9901:9901"
volumes: volumes:
@@ -41,17 +49,25 @@ volumes:
quptime-data: quptime-data:
``` ```
You must **`qu init` before the daemon will start**. With this compose `qu serve` auto-initialises the data volume on first start using the
file: `QUPTIME_*` env vars (see [configuration.md](../configuration.md) for
the full list). One command brings everything up:
```sh ```sh
docker compose run --rm quptime init --advertise <host-ip>:9901
docker compose up -d docker compose up -d
docker compose exec quptime qu status docker compose exec quptime qu status
``` ```
`<host-ip>` must be reachable from every other node — the loopback On the very first node, capture the auto-generated cluster secret:
address inside the container is useless to peers.
```sh
docker compose logs quptime | grep -A1 'cluster secret'
```
Copy that value into the `QUPTIME_CLUSTER_SECRET` env var of every
follower before starting them, otherwise their join RPCs will be
rejected. The full list of accepted env vars lives in
[configuration.md](../configuration.md#nodeyaml-field-overrides).
## Three-node compose on a single host ## Three-node compose on a single host
@@ -69,18 +85,27 @@ services:
alpha: alpha:
<<: *quptime <<: *quptime
container_name: alpha container_name: alpha
environment:
- QUPTIME_ADVERTISE=alpha:9901
# First node: leave secret unset and read it from `docker logs`.
ports: ["9901:9901"] ports: ["9901:9901"]
volumes: ["alpha-data:/etc/quptime"] volumes: ["alpha-data:/etc/quptime"]
bravo: bravo:
<<: *quptime <<: *quptime
container_name: bravo container_name: bravo
environment:
- QUPTIME_ADVERTISE=bravo:9901
- QUPTIME_CLUSTER_SECRET=${SECRET}
ports: ["9902:9901"] ports: ["9902:9901"]
volumes: ["bravo-data:/etc/quptime"] volumes: ["bravo-data:/etc/quptime"]
charlie: charlie:
<<: *quptime <<: *quptime
container_name: charlie container_name: charlie
environment:
- QUPTIME_ADVERTISE=charlie:9901
- QUPTIME_CLUSTER_SECRET=${SECRET}
ports: ["9903:9901"] ports: ["9903:9901"]
volumes: ["charlie-data:/etc/quptime"] volumes: ["charlie-data:/etc/quptime"]
@@ -93,15 +118,12 @@ volumes:
Bootstrap: Bootstrap:
```sh ```sh
# First node: prints the secret to stdout. # 1. Start alpha first to mint the cluster secret.
docker compose run --rm alpha init --advertise alpha:9901 docker compose up -d alpha
# Capture the secret (or read it back from alpha-data). # 2. Read the secret off alpha's stdout.
SECRET=$(docker compose exec alpha cat /etc/quptime/node.yaml | grep cluster_secret | awk '{print $2}') export SECRET=$(docker compose logs alpha | awk '/cluster secret/{getline; print $1}')
# 3. Bring up the followers — they pick up the secret from $SECRET.
docker compose run --rm bravo init --advertise bravo:9901 --secret "$SECRET" docker compose up -d bravo charlie
docker compose run --rm charlie init --advertise charlie:9901 --secret "$SECRET"
docker compose up -d
# Invite from alpha. The hostnames resolve over the compose network. # Invite from alpha. The hostnames resolve over the compose network.
docker compose exec alpha qu node add bravo:9901 docker compose exec alpha qu node add bravo:9901
@@ -127,6 +149,9 @@ services:
image: git.cer.sh/axodouble/quptime:v0.1.0 image: git.cer.sh/axodouble/quptime:v0.1.0
container_name: quptime container_name: quptime
restart: unless-stopped restart: unless-stopped
environment:
- QUPTIME_ADVERTISE=${QUPTIME_ADVERTISE} # host:9901 reachable from peers
- QUPTIME_CLUSTER_SECRET=${QUPTIME_CLUSTER_SECRET}
ports: ports:
- "9901:9901" - "9901:9901"
volumes: volumes:
@@ -135,6 +160,10 @@ services:
- NET_RAW - NET_RAW
``` ```
Put the per-host values (`QUPTIME_ADVERTISE`, `QUPTIME_CLUSTER_SECRET`)
in a sibling `.env` file or a config-management secret so the compose
file itself is identical across hosts.
Persistence is a bind-mount under `/srv/quptime/data` so backups and Persistence is a bind-mount under `/srv/quptime/data` so backups and
upgrades hit a known path. See [operations.md](../operations.md) for upgrades hit a known path. See [operations.md](../operations.md) for
the backup recipe. the backup recipe.
+29 -22
View File
@@ -53,12 +53,21 @@ services:
quptime: quptime:
image: git.cer.sh/axodouble/quptime:v0.1.0 image: git.cer.sh/axodouble/quptime:v0.1.0
container_name: quptime container_name: quptime
environment:
# host:port other QUptime nodes use to reach this one. Should be
# this node's tailnet IP / MagicDNS name. Auto-init reads this on
# first start.
- QUPTIME_ADVERTISE=${QUPTIME_ADVERTISE}
# Shared cluster join secret. Omit on the very first node to have
# it generated and logged for you, then copy it into every
# follower's .env.
- QUPTIME_CLUSTER_SECRET=${QUPTIME_CLUSTER_SECRET:-}
volumes: volumes:
- quptime:/etc/quptime - quptime:/etc/quptime
network_mode: "service:tailscale" network_mode: "service:tailscale"
depends_on: [tailscale] depends_on: [tailscale]
cap_add: [NET_RAW] cap_add: [NET_RAW]
# No restart directive yet — needs `qu init` first. restart: unless-stopped
volumes: volumes:
tailscale: tailscale:
@@ -67,43 +76,41 @@ volumes:
### One-time bootstrap ### One-time bootstrap
Each host runs the same script with different `HOST` and `TAILSCALE_AUTHKEY`: Each host runs the same compose file with a per-host `.env`:
```sh ```sh
# .env # .env (alpha — the first node)
HOST=alpha HOST=alpha
TAILSCALE_AUTHKEY=tskey-auth-xxxxxxxx TAILSCALE_AUTHKEY=tskey-auth-xxxxxxxx
QUPTIME_ADVERTISE=100.64.1.1:9901 # this node's tailnet IP
# QUPTIME_CLUSTER_SECRET left unset — will be generated on first boot.
``` ```
Start Tailscale alone first so it gets an IP: Start the stack on the first host. `qu serve` auto-initialises the
volume using the env vars above, so a single `docker compose up`
brings everything up:
```sh ```sh
docker compose up -d tailscale docker compose up -d
sleep 5 docker compose logs quptime | grep -A1 'cluster secret'
TSIP=$(docker compose exec tailscale tailscale ip --4) # Pipe the secret through your password manager.
echo "this node's tailnet IP: $TSIP"
``` ```
On the **first** host, init without `--secret`: On every **other** host, write the same `.env` plus the captured
secret:
```sh ```sh
docker compose run --rm quptime init --advertise "$TSIP:9901" # .env (bravo, charlie, …)
# Grab the printed secret; pipe through your password manager. HOST=bravo
TAILSCALE_AUTHKEY=tskey-auth-xxxxxxxx
QUPTIME_ADVERTISE=100.64.1.2:9901
QUPTIME_CLUSTER_SECRET=<paste from alpha>
``` ```
On every **other** host, paste the secret: Bring them up and invite them from the first node:
```sh ```sh
docker compose run --rm quptime init \ docker compose up -d
--advertise "$TSIP:9901" \
--secret "$CLUSTER_SECRET"
```
Then bring up `qu` on every node and invite from the first:
```sh
# Each host
docker compose up -d quptime
# From alpha # From alpha
docker compose exec quptime qu node add 100.64.1.2:9901 docker compose exec quptime qu node add 100.64.1.2:9901
+19 -4
View File
@@ -146,15 +146,26 @@ both call this out.
load node.yaml: open ...: no such file or directory load node.yaml: open ...: no such file or directory
``` ```
Run `qu init` before `qu serve`. The daemon does not auto-init — `qu serve` normally auto-bootstraps a missing `node.yaml` using the
silently generating identities and secrets would be a worse failure `QUPTIME_*` env vars (see
mode than crashing. [configuration.md](configuration.md#auto-init-on-qu-serve)). If you
still see this error, the most likely causes are:
- The data directory is read-only or owned by a different user — the
bootstrap can't write `node.yaml`. Fix permissions on
`$QUPTIME_DIR`.
- Something else removed `node.yaml` mid-run (a config-management
tool, a misconfigured volume). Re-run `qu serve` and it will
rebuild from env, or run `qu init` manually with the flags you
want.
``` ```
node.yaml has empty node_id — run `qu init` first node.yaml has empty node_id — run `qu init` first
``` ```
Same fix. `node.yaml` exists but lacks a `node_id`. Either delete the file and
let auto-init regenerate it, or run `qu init` against a wiped data
dir.
``` ```
listen tcp :9901: bind: address already in use listen tcp :9901: bind: address already in use
@@ -197,3 +208,7 @@ sudo systemctl start quptime
The data directory is the only state. Wipe it and you're back to a The data directory is the only state. Wipe it and you're back to a
fresh node. fresh node.
Under Docker (or any env-driven deploy), the explicit `qu init` step
isn't needed — wiping the data volume and restarting the container is
enough; `qu serve` will re-bootstrap from the `QUPTIME_*` env vars.
+122 -61
View File
@@ -5,6 +5,7 @@ import (
"encoding/base64" "encoding/base64"
"errors" "errors"
"fmt" "fmt"
"io"
"os" "os"
"github.com/google/uuid" "github.com/google/uuid"
@@ -30,78 +31,50 @@ Pass --secret on every subsequent node so they share the same
cluster join secret. If --secret is omitted on the very first node, a cluster join secret. If --secret is omitted on the very first node, a
random secret is generated and printed for the operator to copy. random secret is generated and printed for the operator to copy.
Every flag may also be supplied via its QUPTIME_* environment variable
(see docs/configuration.md). Explicit flags win over env values, which
in turn win over the compiled defaults.
Idempotent in one direction only: existing key material is never Idempotent in one direction only: existing key material is never
overwritten. Re-run only after wiping the data directory.`, overwritten. Re-run only after wiping the data directory.`,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
if err := config.EnsureDataDir(); err != nil {
return err
}
if _, err := os.Stat(config.NodeFilePath()); err == nil { if _, err := os.Stat(config.NodeFilePath()); err == nil {
return errors.New("node.yaml already exists in data dir — refusing to overwrite") return errors.New("node.yaml already exists in data dir — refusing to overwrite")
} }
secret := clusterSecret // Only let env fill fields the operator did NOT pass on the
generated := false // command line; explicit flags must win over env.
if secret == "" { n := &config.NodeConfig{}
s, err := generateSecret() if cmd.Flags().Changed("bind") {
if err != nil { n.BindAddr = bindAddr
return fmt.Errorf("generate cluster secret: %w", err) }
} if cmd.Flags().Changed("port") {
secret = s n.BindPort = bindPort
generated = true }
if cmd.Flags().Changed("advertise") {
n.Advertise = advertise
}
if cmd.Flags().Changed("secret") {
n.ClusterSecret = clusterSecret
}
if err := n.ApplyEnvOverrides(); err != nil {
return err
}
// Cobra defaults (bind=0.0.0.0, port=9901) are still
// available as fallbacks for fields neither flag nor env
// touched.
if n.BindAddr == "" {
n.BindAddr = bindAddr
}
if n.BindPort == 0 {
n.BindPort = bindPort
} }
nodeID := uuid.NewString() _, generated, err := bootstrapNode(n)
n := &config.NodeConfig{
NodeID: nodeID,
BindAddr: bindAddr,
BindPort: bindPort,
Advertise: advertise,
ClusterSecret: secret,
}
if err := n.Save(); err != nil {
return fmt.Errorf("save node.yaml: %w", err)
}
if _, err := crypto.GenerateKeyPair(nodeID); err != nil {
return fmt.Errorf("generate keys: %w", err)
}
// Seed cluster.yaml with this node as its own first peer.
// Without this the math in `quorum` would treat a one-node
// cluster as "0 peers, fallback quorum=1, master=self" —
// which works in isolation but breaks the moment another
// node joins, because the replicated peers list would lack
// the inviter, leading to split-brain elections.
certPEM, err := crypto.LoadCertPEM()
if err != nil { if err != nil {
return fmt.Errorf("load cert: %w", err) return err
}
fp, err := crypto.FingerprintFromCertPEM(certPEM)
if err != nil {
return fmt.Errorf("fingerprint own cert: %w", err)
}
cluster := &config.ClusterConfig{}
if err := cluster.Mutate(nodeID, func(c *config.ClusterConfig) error {
c.Peers = []config.PeerInfo{{
NodeID: nodeID,
Advertise: n.AdvertiseAddr(),
Fingerprint: fp,
CertPEM: string(certPEM),
}}
return nil
}); err != nil {
return fmt.Errorf("seed cluster.yaml: %w", err)
}
out := cmd.OutOrStdout()
fmt.Fprintf(out, "initialised node %s\n", nodeID)
fmt.Fprintf(out, "data dir: %s\n", config.DataDir())
fmt.Fprintf(out, "advertise: %s\n", n.AdvertiseAddr())
if generated {
fmt.Fprintln(out)
fmt.Fprintln(out, "cluster secret (copy to every other node via --secret):")
fmt.Fprintln(out, " "+secret)
} }
printBootstrapResult(cmd.OutOrStdout(), n, generated)
return nil return nil
}, },
} }
@@ -112,6 +85,94 @@ overwritten. Re-run only after wiping the data directory.`,
root.AddCommand(cmd) root.AddCommand(cmd)
} }
// bootstrapNode creates the data dir, writes node.yaml, generates the
// keypair, and seeds cluster.yaml with this node as its own first
// peer. cfg may arrive with any subset of fields populated; missing
// NodeID and ClusterSecret are auto-generated, missing BindAddr /
// BindPort get the compiled defaults.
//
// Returns the populated config (the same pointer that was passed in)
// and a flag indicating whether ClusterSecret was generated here. The
// flag exists so the caller can print the secret for the operator —
// it must be copied to every follower node out-of-band.
//
// Caller is responsible for checking that node.yaml does not yet
// exist; bootstrapNode itself will refuse to overwrite an existing
// keypair (crypto.GenerateKeyPair errors out) but does not guard
// against clobbering node.yaml.
func bootstrapNode(cfg *config.NodeConfig) (*config.NodeConfig, bool, error) {
if err := config.EnsureDataDir(); err != nil {
return nil, false, err
}
if cfg.NodeID == "" {
cfg.NodeID = uuid.NewString()
}
if cfg.BindAddr == "" {
cfg.BindAddr = "0.0.0.0"
}
if cfg.BindPort == 0 {
cfg.BindPort = 9901
}
generated := false
if cfg.ClusterSecret == "" {
s, err := generateSecret()
if err != nil {
return nil, false, fmt.Errorf("generate cluster secret: %w", err)
}
cfg.ClusterSecret = s
generated = true
}
if err := cfg.Save(); err != nil {
return nil, false, fmt.Errorf("save node.yaml: %w", err)
}
if _, err := crypto.GenerateKeyPair(cfg.NodeID); err != nil {
return nil, false, fmt.Errorf("generate keys: %w", err)
}
// Seed cluster.yaml with this node as its own first peer.
// Without this the math in `quorum` would treat a one-node
// cluster as "0 peers, fallback quorum=1, master=self" — which
// works in isolation but breaks the moment another node joins,
// because the replicated peers list would lack the inviter,
// leading to split-brain elections.
certPEM, err := crypto.LoadCertPEM()
if err != nil {
return nil, false, fmt.Errorf("load cert: %w", err)
}
fp, err := crypto.FingerprintFromCertPEM(certPEM)
if err != nil {
return nil, false, fmt.Errorf("fingerprint own cert: %w", err)
}
cluster := &config.ClusterConfig{}
if err := cluster.Mutate(cfg.NodeID, func(c *config.ClusterConfig) error {
c.Peers = []config.PeerInfo{{
NodeID: cfg.NodeID,
Advertise: cfg.AdvertiseAddr(),
Fingerprint: fp,
CertPEM: string(certPEM),
}}
return nil
}); err != nil {
return nil, false, fmt.Errorf("seed cluster.yaml: %w", err)
}
return cfg, generated, nil
}
// printBootstrapResult emits the human-readable summary both `qu init`
// and the serve auto-init path print after bootstrapping. Kept in one
// place so the secret-disclosure format stays identical across the two
// entry points.
func printBootstrapResult(out io.Writer, n *config.NodeConfig, secretGenerated bool) {
fmt.Fprintf(out, "initialised node %s\n", n.NodeID)
fmt.Fprintf(out, "data dir: %s\n", config.DataDir())
fmt.Fprintf(out, "advertise: %s\n", n.AdvertiseAddr())
if secretGenerated {
fmt.Fprintln(out)
fmt.Fprintln(out, "cluster secret (copy to every other node via --secret or QUPTIME_CLUSTER_SECRET):")
fmt.Fprintln(out, " "+n.ClusterSecret)
}
}
// generateSecret produces 32 bytes of crypto-random data and returns // generateSecret produces 32 bytes of crypto-random data and returns
// it base64-encoded. Long enough that brute force isn't a concern; // it base64-encoded. Long enough that brute force isn't a concern;
// short enough that operators can copy-paste it without pagination. // short enough that operators can copy-paste it without pagination.
+50 -1
View File
@@ -2,6 +2,9 @@ package cli
import ( import (
"context" "context"
"errors"
"fmt"
"io/fs"
"log" "log"
"os" "os"
"os/signal" "os/signal"
@@ -9,6 +12,7 @@ import (
"github.com/spf13/cobra" "github.com/spf13/cobra"
"git.cer.sh/axodouble/quptime/internal/config"
"git.cer.sh/axodouble/quptime/internal/daemon" "git.cer.sh/axodouble/quptime/internal/daemon"
) )
@@ -18,9 +22,18 @@ func addServeCmd(root *cobra.Command) {
Short: "Run the qu daemon in the foreground", Short: "Run the qu daemon in the foreground",
Long: `Run the qu daemon: starts the inter-node listener, the local Long: `Run the qu daemon: starts the inter-node listener, the local
control socket for the CLI, the heartbeat loop and the check control socket for the CLI, the heartbeat loop and the check
scheduler. Stops cleanly on SIGINT or SIGTERM.`, scheduler. Stops cleanly on SIGINT or SIGTERM.
If node.yaml does not exist yet, serve will bootstrap it using values
from the QUPTIME_* environment variables (see docs/configuration.md).
This makes a single ` + "`docker compose up`" + ` enough to launch a new node —
no separate ` + "`qu init`" + ` step is required when the data volume is
fresh.`,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
logger := log.New(os.Stderr, "quptime: ", log.LstdFlags|log.Lmsgprefix) logger := log.New(os.Stderr, "quptime: ", log.LstdFlags|log.Lmsgprefix)
if err := autoInitIfNeeded(cmd, logger); err != nil {
return err
}
d, err := daemon.New(logger) d, err := daemon.New(logger)
if err != nil { if err != nil {
return err return err
@@ -32,3 +45,39 @@ scheduler. Stops cleanly on SIGINT or SIGTERM.`,
} }
root.AddCommand(cmd) root.AddCommand(cmd)
} }
// autoInitIfNeeded bootstraps the node on first launch.
//
// Friction this removes for container deploys: before, the operator
// had to `docker compose run --rm quptime init …` once before the
// service could come up, which makes `restart: unless-stopped`
// awkward and forces an out-of-band step into every fresh volume.
// Now serve auto-runs the same bootstrap path using QUPTIME_* env
// vars when node.yaml is absent, so the compose file can come up on
// the first try.
//
// Pre-existing node.yaml is left untouched — we only bootstrap when
// the file is genuinely missing. Any other stat error (permission
// denied, broken symlink) is surfaced so the operator sees the real
// problem instead of a confused auto-init attempt clobbering state.
func autoInitIfNeeded(cmd *cobra.Command, logger *log.Logger) error {
_, err := os.Stat(config.NodeFilePath())
if err == nil {
return nil
}
if !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("stat node.yaml: %w", err)
}
logger.Printf("node.yaml not found at %s — bootstrapping from environment", config.NodeFilePath())
n := &config.NodeConfig{}
if err := n.ApplyEnvOverrides(); err != nil {
return err
}
if _, generated, err := bootstrapNode(n); err != nil {
return fmt.Errorf("auto-init: %w", err)
} else {
printBootstrapResult(cmd.OutOrStderr(), n, generated)
}
return nil
}
+47
View File
@@ -3,10 +3,26 @@ package config
import ( import (
"fmt" "fmt"
"os" "os"
"strconv"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
// Environment variable names that override fields on NodeConfig at
// load time. Intended to let `docker compose` setups drive a node's
// identity and listener configuration without having to bake a
// node.yaml into the image or run `qu init` manually first.
//
// Empty values are ignored — they do not clear a field. The override
// order is therefore: env (non-empty) > file > compiled default.
const (
EnvNodeID = "QUPTIME_NODE_ID"
EnvBindAddr = "QUPTIME_BIND_ADDR"
EnvBindPort = "QUPTIME_BIND_PORT"
EnvAdvertise = "QUPTIME_ADVERTISE"
EnvClusterSecret = "QUPTIME_CLUSTER_SECRET"
)
// NodeConfig is the per-node, never-replicated identity file. // NodeConfig is the per-node, never-replicated identity file.
type NodeConfig struct { type NodeConfig struct {
// NodeID is a stable UUID generated at `qu init`. Used by all peers // NodeID is a stable UUID generated at `qu init`. Used by all peers
@@ -45,6 +61,34 @@ func (n *NodeConfig) AdvertiseAddr() string {
return fmt.Sprintf("%s:%d", bind, n.BindPort) return fmt.Sprintf("%s:%d", bind, n.BindPort)
} }
// ApplyEnvOverrides folds QUPTIME_* environment variables onto n.
// Non-empty env values win over the existing field value. Called both
// by LoadNodeConfig and by the `qu init` / serve auto-init paths so
// the same precedence rules apply whether the daemon is reading a
// persisted node.yaml or constructing one from scratch.
func (n *NodeConfig) ApplyEnvOverrides() error {
if v := os.Getenv(EnvNodeID); v != "" {
n.NodeID = v
}
if v := os.Getenv(EnvBindAddr); v != "" {
n.BindAddr = v
}
if v := os.Getenv(EnvBindPort); v != "" {
p, err := strconv.Atoi(v)
if err != nil {
return fmt.Errorf("%s=%q: not an integer: %w", EnvBindPort, v, err)
}
n.BindPort = p
}
if v := os.Getenv(EnvAdvertise); v != "" {
n.Advertise = v
}
if v := os.Getenv(EnvClusterSecret); v != "" {
n.ClusterSecret = v
}
return nil
}
// LoadNodeConfig reads node.yaml from the data dir. // LoadNodeConfig reads node.yaml from the data dir.
func LoadNodeConfig() (*NodeConfig, error) { func LoadNodeConfig() (*NodeConfig, error) {
raw, err := os.ReadFile(NodeFilePath()) raw, err := os.ReadFile(NodeFilePath())
@@ -55,6 +99,9 @@ func LoadNodeConfig() (*NodeConfig, error) {
if err := yaml.Unmarshal(raw, cfg); err != nil { if err := yaml.Unmarshal(raw, cfg); err != nil {
return nil, fmt.Errorf("parse node.yaml: %w", err) return nil, fmt.Errorf("parse node.yaml: %w", err)
} }
if err := cfg.ApplyEnvOverrides(); err != nil {
return nil, err
}
if cfg.BindPort == 0 { if cfg.BindPort == 0 {
cfg.BindPort = 9901 cfg.BindPort = 9901
} }
+95 -3
View File
@@ -4,9 +4,9 @@ import "testing"
func TestAdvertiseAddrFallback(t *testing.T) { func TestAdvertiseAddrFallback(t *testing.T) {
cases := []struct { cases := []struct {
name string name string
cfg NodeConfig cfg NodeConfig
want string want string
}{ }{
{"explicit advertise wins", NodeConfig{Advertise: "host:1234", BindAddr: "0.0.0.0", BindPort: 9901}, "host:1234"}, {"explicit advertise wins", NodeConfig{Advertise: "host:1234", BindAddr: "0.0.0.0", BindPort: 9901}, "host:1234"},
{"empty bind falls back to loopback", NodeConfig{BindPort: 9901}, "127.0.0.1:9901"}, {"empty bind falls back to loopback", NodeConfig{BindPort: 9901}, "127.0.0.1:9901"},
@@ -56,3 +56,95 @@ func TestLoadNodeConfigAppliesDefaults(t *testing.T) {
t.Errorf("BindAddr=%q want 0.0.0.0", loaded.BindAddr) t.Errorf("BindAddr=%q want 0.0.0.0", loaded.BindAddr)
} }
} }
func TestApplyEnvOverrides(t *testing.T) {
t.Setenv(EnvNodeID, "node-from-env")
t.Setenv(EnvBindAddr, "1.2.3.4")
t.Setenv(EnvBindPort, "9999")
t.Setenv(EnvAdvertise, "public.example.com:9999")
t.Setenv(EnvClusterSecret, "shh-secret")
n := &NodeConfig{
NodeID: "original-id",
BindAddr: "0.0.0.0",
BindPort: 9901,
Advertise: "old.example.com:9901",
ClusterSecret: "old-secret",
}
if err := n.ApplyEnvOverrides(); err != nil {
t.Fatal(err)
}
want := NodeConfig{
NodeID: "node-from-env",
BindAddr: "1.2.3.4",
BindPort: 9999,
Advertise: "public.example.com:9999",
ClusterSecret: "shh-secret",
}
if *n != want {
t.Errorf("got %+v want %+v", *n, want)
}
}
func TestApplyEnvOverridesEmptyValuesIgnored(t *testing.T) {
// Explicitly empty env vars must NOT clobber existing fields —
// otherwise `docker run -e QUPTIME_ADVERTISE=` would silently
// erase a previously-persisted advertise address.
t.Setenv(EnvNodeID, "")
t.Setenv(EnvBindAddr, "")
t.Setenv(EnvBindPort, "")
t.Setenv(EnvAdvertise, "")
t.Setenv(EnvClusterSecret, "")
orig := NodeConfig{
NodeID: "keep-me",
BindAddr: "10.0.0.1",
BindPort: 9901,
Advertise: "keep.example.com:9901",
ClusterSecret: "keep-secret",
}
n := orig
if err := n.ApplyEnvOverrides(); err != nil {
t.Fatal(err)
}
if n != orig {
t.Errorf("empty env vars mutated config: got %+v want %+v", n, orig)
}
}
func TestApplyEnvOverridesBadPort(t *testing.T) {
t.Setenv(EnvBindPort, "not-an-int")
n := &NodeConfig{}
if err := n.ApplyEnvOverrides(); err == nil {
t.Fatal("expected error for non-integer port")
}
}
func TestLoadNodeConfigEnvOverridesFile(t *testing.T) {
t.Setenv("QUPTIME_DIR", t.TempDir())
// Persist a file with one bind addr; env should win on load.
n := &NodeConfig{NodeID: "abc", BindAddr: "127.0.0.1", BindPort: 9901, Advertise: "file.example.com:9901"}
if err := n.Save(); err != nil {
t.Fatal(err)
}
t.Setenv(EnvBindAddr, "0.0.0.0")
t.Setenv(EnvAdvertise, "env.example.com:9001")
t.Setenv(EnvBindPort, "9001")
loaded, err := LoadNodeConfig()
if err != nil {
t.Fatal(err)
}
if loaded.BindAddr != "0.0.0.0" {
t.Errorf("BindAddr=%q want 0.0.0.0 (env override)", loaded.BindAddr)
}
if loaded.BindPort != 9001 {
t.Errorf("BindPort=%d want 9001 (env override)", loaded.BindPort)
}
if loaded.Advertise != "env.example.com:9001" {
t.Errorf("Advertise=%q want env.example.com:9001 (env override)", loaded.Advertise)
}
if loaded.NodeID != "abc" {
t.Errorf("NodeID=%q want abc (unchanged)", loaded.NodeID)
}
}