Initial structure
This commit is contained in:
@@ -0,0 +1,227 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// PeerInfo identifies a cluster member as known to all peers.
|
||||
// (Trust material lives in trust.yaml; this struct stays portable.)
|
||||
type PeerInfo struct {
|
||||
NodeID string `yaml:"node_id"`
|
||||
Advertise string `yaml:"advertise"`
|
||||
Fingerprint string `yaml:"fingerprint"`
|
||||
}
|
||||
|
||||
// CheckType enumerates the supported probe kinds.
|
||||
type CheckType string
|
||||
|
||||
const (
|
||||
CheckHTTP CheckType = "http"
|
||||
CheckTCP CheckType = "tcp"
|
||||
CheckICMP CheckType = "icmp"
|
||||
)
|
||||
|
||||
// Check describes a single monitored target.
|
||||
type Check struct {
|
||||
ID string `yaml:"id"`
|
||||
Name string `yaml:"name"`
|
||||
Type CheckType `yaml:"type"`
|
||||
Target string `yaml:"target"` // URL, host:port, or host
|
||||
Interval time.Duration `yaml:"interval"` // default 30s
|
||||
Timeout time.Duration `yaml:"timeout"` // default 10s
|
||||
|
||||
// HTTP-only options.
|
||||
ExpectStatus int `yaml:"expect_status,omitempty"`
|
||||
BodyMatch string `yaml:"body_match,omitempty"`
|
||||
|
||||
// AlertIDs lists which configured alerts fire when this check
|
||||
// transitions state.
|
||||
AlertIDs []string `yaml:"alert_ids,omitempty"`
|
||||
}
|
||||
|
||||
// AlertType enumerates supported notifier kinds.
|
||||
type AlertType string
|
||||
|
||||
const (
|
||||
AlertSMTP AlertType = "smtp"
|
||||
AlertDiscord AlertType = "discord"
|
||||
)
|
||||
|
||||
// Alert describes a single notifier destination.
|
||||
type Alert struct {
|
||||
ID string `yaml:"id"`
|
||||
Name string `yaml:"name"`
|
||||
Type AlertType `yaml:"type"`
|
||||
|
||||
// SMTP options.
|
||||
SMTPHost string `yaml:"smtp_host,omitempty"`
|
||||
SMTPPort int `yaml:"smtp_port,omitempty"`
|
||||
SMTPUser string `yaml:"smtp_user,omitempty"`
|
||||
SMTPPassword string `yaml:"smtp_password,omitempty"`
|
||||
SMTPFrom string `yaml:"smtp_from,omitempty"`
|
||||
SMTPTo []string `yaml:"smtp_to,omitempty"`
|
||||
SMTPStartTLS bool `yaml:"smtp_starttls,omitempty"`
|
||||
|
||||
// Discord options.
|
||||
DiscordWebhook string `yaml:"discord_webhook,omitempty"`
|
||||
}
|
||||
|
||||
// ClusterConfig is the replicated cluster state. The Version field
|
||||
// strictly increases on every mutation; the master is the only node
|
||||
// that bumps it.
|
||||
type ClusterConfig struct {
|
||||
Version uint64 `yaml:"version"`
|
||||
UpdatedAt time.Time `yaml:"updated_at"`
|
||||
UpdatedBy string `yaml:"updated_by"`
|
||||
|
||||
Peers []PeerInfo `yaml:"peers"`
|
||||
Checks []Check `yaml:"checks"`
|
||||
Alerts []Alert `yaml:"alerts"`
|
||||
|
||||
mu sync.RWMutex `yaml:"-"`
|
||||
}
|
||||
|
||||
// LoadClusterConfig reads cluster.yaml. A missing file returns an
|
||||
// empty (version 0) config — callers should treat that as the
|
||||
// pre-bootstrap state.
|
||||
func LoadClusterConfig() (*ClusterConfig, error) {
|
||||
raw, err := os.ReadFile(ClusterFilePath())
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return &ClusterConfig{}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
cfg := &ClusterConfig{}
|
||||
if err := yaml.Unmarshal(raw, cfg); err != nil {
|
||||
return nil, fmt.Errorf("parse cluster.yaml: %w", err)
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// Save writes cluster.yaml atomically. Caller is responsible for
|
||||
// having already taken any external locks.
|
||||
func (c *ClusterConfig) Save() error {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
out, err := yaml.Marshal(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return AtomicWrite(ClusterFilePath(), out, 0o600)
|
||||
}
|
||||
|
||||
// Snapshot returns a deep-enough copy of the config that can be
|
||||
// safely serialized while the original continues to mutate.
|
||||
func (c *ClusterConfig) Snapshot() *ClusterConfig {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
cp := &ClusterConfig{
|
||||
Version: c.Version,
|
||||
UpdatedAt: c.UpdatedAt,
|
||||
UpdatedBy: c.UpdatedBy,
|
||||
Peers: append([]PeerInfo(nil), c.Peers...),
|
||||
Checks: append([]Check(nil), c.Checks...),
|
||||
Alerts: append([]Alert(nil), c.Alerts...),
|
||||
}
|
||||
return cp
|
||||
}
|
||||
|
||||
// Mutate runs fn under the config write lock, bumps Version on
|
||||
// success, and writes the file. Only the master should call this.
|
||||
func (c *ClusterConfig) Mutate(byNode string, fn func(*ClusterConfig) error) error {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if err := fn(c); err != nil {
|
||||
return err
|
||||
}
|
||||
c.Version++
|
||||
c.UpdatedAt = time.Now().UTC()
|
||||
c.UpdatedBy = byNode
|
||||
out, err := yaml.Marshal(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return AtomicWrite(ClusterFilePath(), out, 0o600)
|
||||
}
|
||||
|
||||
// Replace overwrites the local config with an incoming snapshot if
|
||||
// that snapshot has a strictly greater version. Returns true if
|
||||
// applied.
|
||||
func (c *ClusterConfig) Replace(incoming *ClusterConfig) (bool, error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if incoming.Version <= c.Version {
|
||||
return false, nil
|
||||
}
|
||||
c.Version = incoming.Version
|
||||
c.UpdatedAt = incoming.UpdatedAt
|
||||
c.UpdatedBy = incoming.UpdatedBy
|
||||
c.Peers = append([]PeerInfo(nil), incoming.Peers...)
|
||||
c.Checks = append([]Check(nil), incoming.Checks...)
|
||||
c.Alerts = append([]Alert(nil), incoming.Alerts...)
|
||||
out, err := yaml.Marshal(c)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if err := AtomicWrite(ClusterFilePath(), out, 0o600); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// FindCheck returns the check with the given ID or name.
|
||||
func (c *ClusterConfig) FindCheck(idOrName string) (*Check, int) {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
for i := range c.Checks {
|
||||
if c.Checks[i].ID == idOrName || c.Checks[i].Name == idOrName {
|
||||
cp := c.Checks[i]
|
||||
return &cp, i
|
||||
}
|
||||
}
|
||||
return nil, -1
|
||||
}
|
||||
|
||||
// FindAlert returns the alert with the given ID or name.
|
||||
func (c *ClusterConfig) FindAlert(idOrName string) (*Alert, int) {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
for i := range c.Alerts {
|
||||
if c.Alerts[i].ID == idOrName || c.Alerts[i].Name == idOrName {
|
||||
cp := c.Alerts[i]
|
||||
return &cp, i
|
||||
}
|
||||
}
|
||||
return nil, -1
|
||||
}
|
||||
|
||||
// FindPeer returns the peer with the given node ID.
|
||||
func (c *ClusterConfig) FindPeer(nodeID string) (*PeerInfo, int) {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
for i := range c.Peers {
|
||||
if c.Peers[i].NodeID == nodeID {
|
||||
cp := c.Peers[i]
|
||||
return &cp, i
|
||||
}
|
||||
}
|
||||
return nil, -1
|
||||
}
|
||||
|
||||
// QuorumSize returns the minimum number of live nodes required for
|
||||
// the cluster to make progress: floor(N/2) + 1.
|
||||
func (c *ClusterConfig) QuorumSize() int {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
n := len(c.Peers)
|
||||
if n == 0 {
|
||||
return 1
|
||||
}
|
||||
return n/2 + 1
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// NodeConfig is the per-node, never-replicated identity file.
|
||||
type NodeConfig struct {
|
||||
// NodeID is a stable UUID generated at `qu init`. Used by all peers
|
||||
// to refer to this node across restarts and IP changes.
|
||||
NodeID string `yaml:"node_id"`
|
||||
|
||||
// BindAddr is the address the daemon listens on for inter-node
|
||||
// traffic. Defaults to 0.0.0.0.
|
||||
BindAddr string `yaml:"bind_addr"`
|
||||
|
||||
// BindPort is the port the daemon listens on. Default 9001.
|
||||
BindPort int `yaml:"bind_port"`
|
||||
|
||||
// Advertise is the address other nodes use to reach us. May differ
|
||||
// from BindAddr when behind NAT. Set explicitly via `qu init --advertise`.
|
||||
Advertise string `yaml:"advertise"`
|
||||
}
|
||||
|
||||
// AdvertiseAddr returns the address peers should dial. Falls back to
|
||||
// BindAddr:BindPort if Advertise is empty.
|
||||
func (n *NodeConfig) AdvertiseAddr() string {
|
||||
if n.Advertise != "" {
|
||||
return n.Advertise
|
||||
}
|
||||
bind := n.BindAddr
|
||||
if bind == "" || bind == "0.0.0.0" || bind == "::" {
|
||||
bind = "127.0.0.1"
|
||||
}
|
||||
return fmt.Sprintf("%s:%d", bind, n.BindPort)
|
||||
}
|
||||
|
||||
// LoadNodeConfig reads node.yaml from the data dir.
|
||||
func LoadNodeConfig() (*NodeConfig, error) {
|
||||
raw, err := os.ReadFile(NodeFilePath())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cfg := &NodeConfig{}
|
||||
if err := yaml.Unmarshal(raw, cfg); err != nil {
|
||||
return nil, fmt.Errorf("parse node.yaml: %w", err)
|
||||
}
|
||||
if cfg.BindPort == 0 {
|
||||
cfg.BindPort = 9001
|
||||
}
|
||||
if cfg.BindAddr == "" {
|
||||
cfg.BindAddr = "0.0.0.0"
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// Save writes node.yaml atomically.
|
||||
func (n *NodeConfig) Save() error {
|
||||
out, err := yaml.Marshal(n)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return AtomicWrite(NodeFilePath(), out, 0o600)
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
// Package config owns the on-disk layout of a node's state.
|
||||
//
|
||||
// Two YAML files live under the data directory:
|
||||
//
|
||||
// node.yaml — local identity, never replicated (id, addresses, key paths)
|
||||
// cluster.yaml — replicated state (peers, checks, alerts, version)
|
||||
// trust.yaml — local fingerprint trust store
|
||||
// keys/ — RSA private + public keys + self-signed cert
|
||||
// state.json — runtime cache (last check results, current master)
|
||||
//
|
||||
// A unix socket for the local CLI lives alongside (defaults to
|
||||
// /var/run/quptime/quptime.sock when running as root, otherwise
|
||||
// $XDG_RUNTIME_DIR/quptime/quptime.sock).
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// Default file names. Callers should always go through DataDir() so an
|
||||
// override via QUPTIME_DIR is respected.
|
||||
const (
|
||||
NodeFile = "node.yaml"
|
||||
ClusterFile = "cluster.yaml"
|
||||
TrustFile = "trust.yaml"
|
||||
StateFile = "state.json"
|
||||
KeysDir = "keys"
|
||||
PrivateKey = "private.pem"
|
||||
PublicKey = "public.pem"
|
||||
CertFile = "cert.pem"
|
||||
SocketName = "quptime.sock"
|
||||
|
||||
envDataDir = "QUPTIME_DIR"
|
||||
)
|
||||
|
||||
// DataDir returns the configured data directory. Order of resolution:
|
||||
// 1. $QUPTIME_DIR if set
|
||||
// 2. /etc/quptime when running as root
|
||||
// 3. $XDG_CONFIG_HOME/quptime (or ~/.config/quptime) otherwise
|
||||
func DataDir() string {
|
||||
if v := os.Getenv(envDataDir); v != "" {
|
||||
return v
|
||||
}
|
||||
if os.Geteuid() == 0 {
|
||||
return "/etc/quptime"
|
||||
}
|
||||
if v := os.Getenv("XDG_CONFIG_HOME"); v != "" {
|
||||
return filepath.Join(v, "quptime")
|
||||
}
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil || home == "" {
|
||||
return "./quptime"
|
||||
}
|
||||
return filepath.Join(home, ".config", "quptime")
|
||||
}
|
||||
|
||||
// SocketPath returns the unix socket used for local CLI ↔ daemon control.
|
||||
func SocketPath() string {
|
||||
if v := os.Getenv("QUPTIME_SOCKET"); v != "" {
|
||||
return v
|
||||
}
|
||||
if os.Geteuid() == 0 {
|
||||
return "/var/run/quptime/" + SocketName
|
||||
}
|
||||
if v := os.Getenv("XDG_RUNTIME_DIR"); v != "" {
|
||||
return filepath.Join(v, "quptime", SocketName)
|
||||
}
|
||||
return filepath.Join(os.TempDir(), "quptime-"+envUserSuffix(), SocketName)
|
||||
}
|
||||
|
||||
func envUserSuffix() string {
|
||||
if u := os.Getenv("USER"); u != "" {
|
||||
return u
|
||||
}
|
||||
return "default"
|
||||
}
|
||||
|
||||
// NodeFilePath returns the absolute path to node.yaml.
|
||||
func NodeFilePath() string { return filepath.Join(DataDir(), NodeFile) }
|
||||
|
||||
// ClusterFilePath returns the absolute path to cluster.yaml.
|
||||
func ClusterFilePath() string { return filepath.Join(DataDir(), ClusterFile) }
|
||||
|
||||
// TrustFilePath returns the absolute path to trust.yaml.
|
||||
func TrustFilePath() string { return filepath.Join(DataDir(), TrustFile) }
|
||||
|
||||
// StateFilePath returns the absolute path to state.json.
|
||||
func StateFilePath() string { return filepath.Join(DataDir(), StateFile) }
|
||||
|
||||
// PrivateKeyPath returns the absolute path to the RSA private key.
|
||||
func PrivateKeyPath() string { return filepath.Join(DataDir(), KeysDir, PrivateKey) }
|
||||
|
||||
// PublicKeyPath returns the absolute path to the RSA public key.
|
||||
func PublicKeyPath() string { return filepath.Join(DataDir(), KeysDir, PublicKey) }
|
||||
|
||||
// CertFilePath returns the absolute path to the self-signed cert (PEM).
|
||||
func CertFilePath() string { return filepath.Join(DataDir(), KeysDir, CertFile) }
|
||||
|
||||
// EnsureDataDir creates the data directory tree if absent.
|
||||
func EnsureDataDir() error {
|
||||
dir := DataDir()
|
||||
if err := os.MkdirAll(filepath.Join(dir, KeysDir), 0o700); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.MkdirAll(filepath.Dir(SocketPath()), 0o700)
|
||||
}
|
||||
|
||||
// AtomicWrite writes data to path through a temp file + rename. The temp
|
||||
// file is created in the same directory so the rename is atomic on POSIX.
|
||||
func AtomicWrite(path string, data []byte, perm os.FileMode) error {
|
||||
if path == "" {
|
||||
return errors.New("empty path")
|
||||
}
|
||||
dir := filepath.Dir(path)
|
||||
if err := os.MkdirAll(dir, 0o700); err != nil {
|
||||
return err
|
||||
}
|
||||
tmp, err := os.CreateTemp(dir, filepath.Base(path)+".tmp-*")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tmpName := tmp.Name()
|
||||
if _, err := tmp.Write(data); err != nil {
|
||||
tmp.Close()
|
||||
os.Remove(tmpName)
|
||||
return err
|
||||
}
|
||||
if err := tmp.Chmod(perm); err != nil {
|
||||
tmp.Close()
|
||||
os.Remove(tmpName)
|
||||
return err
|
||||
}
|
||||
if err := tmp.Close(); err != nil {
|
||||
os.Remove(tmpName)
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmpName, path)
|
||||
}
|
||||
Reference in New Issue
Block a user