Initial structure
This commit is contained in:
@@ -0,0 +1,147 @@
|
||||
package checks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/jasper/quptime/internal/config"
|
||||
)
|
||||
|
||||
// ReconcileInterval is how often the scheduler reconciles its set of
|
||||
// running probes against cluster.yaml.
|
||||
const ReconcileInterval = 5 * time.Second
|
||||
|
||||
// Sink is the abstraction the scheduler uses to report results.
|
||||
// Implemented by the daemon: results go straight to the local
|
||||
// aggregator when self is the master, otherwise they ship to the
|
||||
// master over the RPC channel.
|
||||
type Sink interface {
|
||||
Submit(Result)
|
||||
}
|
||||
|
||||
// Scheduler keeps a goroutine alive per configured check. On each
|
||||
// reconcile pass it starts probes for new checks, stops probes for
|
||||
// removed checks, and restarts probes whose interval or type changed.
|
||||
type Scheduler struct {
|
||||
cluster *config.ClusterConfig
|
||||
sink Sink
|
||||
|
||||
mu sync.Mutex
|
||||
running map[string]*probeWorker
|
||||
}
|
||||
|
||||
type probeWorker struct {
|
||||
check config.Check
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// NewScheduler creates a scheduler bound to the given cluster config.
|
||||
func NewScheduler(cluster *config.ClusterConfig, sink Sink) *Scheduler {
|
||||
return &Scheduler{
|
||||
cluster: cluster,
|
||||
sink: sink,
|
||||
running: map[string]*probeWorker{},
|
||||
}
|
||||
}
|
||||
|
||||
// Start runs the reconcile loop until ctx is cancelled. Reconcile is
|
||||
// also called immediately on entry so checks start without waiting
|
||||
// for the first tick.
|
||||
func (s *Scheduler) Start(ctx context.Context) {
|
||||
s.reconcile(ctx)
|
||||
t := time.NewTicker(ReconcileInterval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
s.stopAll()
|
||||
return
|
||||
case <-t.C:
|
||||
s.reconcile(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) reconcile(ctx context.Context) {
|
||||
snap := s.cluster.Snapshot()
|
||||
want := map[string]config.Check{}
|
||||
for _, c := range snap.Checks {
|
||||
if c.ID == "" {
|
||||
continue
|
||||
}
|
||||
want[c.ID] = c
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
for id, w := range s.running {
|
||||
desired, stillThere := want[id]
|
||||
if !stillThere || !sameCheck(desired, w.check) {
|
||||
w.cancel()
|
||||
delete(s.running, id)
|
||||
}
|
||||
}
|
||||
for id, c := range want {
|
||||
if _, exists := s.running[id]; exists {
|
||||
continue
|
||||
}
|
||||
wctx, cancel := context.WithCancel(ctx)
|
||||
s.running[id] = &probeWorker{check: c, cancel: cancel}
|
||||
go s.run(wctx, c)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) run(ctx context.Context, c config.Check) {
|
||||
interval := c.Interval
|
||||
if interval <= 0 {
|
||||
interval = 30 * time.Second
|
||||
}
|
||||
// stagger startup so a freshly-loaded scheduler doesn't burst
|
||||
// hundreds of probes simultaneously.
|
||||
jitter := time.Duration(int64(interval) / 10)
|
||||
if jitter > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(time.Duration(time.Now().UnixNano() % int64(jitter))):
|
||||
}
|
||||
}
|
||||
t := time.NewTicker(interval)
|
||||
defer t.Stop()
|
||||
|
||||
// fire one immediate probe so state populates without delay.
|
||||
s.sink.Submit(Run(ctx, &c))
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-t.C:
|
||||
s.sink.Submit(Run(ctx, &c))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) stopAll() {
|
||||
s.mu.Lock()
|
||||
for id, w := range s.running {
|
||||
w.cancel()
|
||||
delete(s.running, id)
|
||||
}
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
// sameCheck returns true when two Check structs would produce
|
||||
// identical probing behaviour, so the scheduler can leave the worker
|
||||
// running across a no-op config push.
|
||||
func sameCheck(a, b config.Check) bool {
|
||||
return a.ID == b.ID &&
|
||||
a.Type == b.Type &&
|
||||
a.Target == b.Target &&
|
||||
a.Interval == b.Interval &&
|
||||
a.Timeout == b.Timeout &&
|
||||
a.ExpectStatus == b.ExpectStatus &&
|
||||
a.BodyMatch == b.BodyMatch
|
||||
}
|
||||
Reference in New Issue
Block a user