Fixed logic error; multi host chicken and egg finger printing causes hosts to never join
This commit is contained in:
@@ -107,7 +107,10 @@ func (s *Server) handleConn(ctx context.Context, raw net.Conn) {
|
||||
if err := tlsConn.HandshakeContext(ctx); err != nil {
|
||||
return
|
||||
}
|
||||
peerID := peerNodeIDFromConnState(tlsConn.ConnectionState())
|
||||
state := tlsConn.ConnectionState()
|
||||
peerID := peerNodeIDFromConnState(state)
|
||||
peerFP := peerFingerprintFromConnState(state)
|
||||
trusted := s.peerTrusted(peerFP)
|
||||
|
||||
for {
|
||||
body, err := readFrame(tlsConn)
|
||||
@@ -120,6 +123,14 @@ func (s *Server) handleConn(ctx context.Context, raw net.Conn) {
|
||||
return
|
||||
}
|
||||
|
||||
// Until the peer is trusted, only the bootstrap call (Join) is
|
||||
// allowed through. Everything else gets a clear error so the
|
||||
// caller knows to re-run `qu node add`.
|
||||
if !trusted && req.Method != MethodJoin {
|
||||
_ = writeError(tlsConn, req.ID, "peer not trusted; run `qu node add` first")
|
||||
continue
|
||||
}
|
||||
|
||||
fn, exists := s.handlers[req.Method]
|
||||
if !exists {
|
||||
_ = writeError(tlsConn, req.ID, "unknown method: "+req.Method)
|
||||
@@ -134,9 +145,26 @@ func (s *Server) handleConn(ctx context.Context, raw net.Conn) {
|
||||
if err := writeResult(tlsConn, req.ID, result); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// A successful Join writes the caller into our trust store;
|
||||
// re-check so subsequent calls on this same connection (or
|
||||
// after reconnect) flow through normally.
|
||||
if req.Method == MethodJoin && !trusted {
|
||||
trusted = s.peerTrusted(peerFP)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// peerTrusted reports whether peerFP is in our trust store. Returns
|
||||
// false on empty input so a missing/parse-failed cert is never trusted.
|
||||
func (s *Server) peerTrusted(peerFP string) bool {
|
||||
if peerFP == "" {
|
||||
return false
|
||||
}
|
||||
_, ok := s.assets.Trust.LookupByFingerprint(peerFP)
|
||||
return ok
|
||||
}
|
||||
|
||||
// Client opens and pools one mTLS connection per peer node ID. Each
|
||||
// connection serialises outstanding calls under a mutex; concurrent
|
||||
// calls to different peers proceed in parallel.
|
||||
@@ -321,3 +349,13 @@ func peerNodeIDFromConnState(cs tls.ConnectionState) string {
|
||||
}
|
||||
return cs.PeerCertificates[0].Subject.CommonName
|
||||
}
|
||||
|
||||
// peerFingerprintFromConnState computes the SPKI fingerprint of the
|
||||
// peer's leaf cert, matching the format the trust store stores. An
|
||||
// empty result means the peer presented no cert.
|
||||
func peerFingerprintFromConnState(cs tls.ConnectionState) string {
|
||||
if len(cs.PeerCertificates) == 0 {
|
||||
return ""
|
||||
}
|
||||
return fingerprintOf(cs.PeerCertificates[0])
|
||||
}
|
||||
|
||||
@@ -170,6 +170,73 @@ func TestRPCRejectsUntrustedPeer(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestJoinAllowedFromUntrustedPeer(t *testing.T) {
|
||||
// Verifies the bootstrap path: B has not yet been added to A's
|
||||
// trust store. A must still accept the Join RPC; subsequent
|
||||
// non-Join calls on the same connection should succeed once Join
|
||||
// has populated trust.
|
||||
a := makeNode(t, t.TempDir(), "node-a")
|
||||
b := makeNode(t, t.TempDir(), "node-b")
|
||||
|
||||
tmpLn, _ := net.Listen("tcp", "127.0.0.1:0")
|
||||
addr := tmpLn.Addr().String()
|
||||
tmpLn.Close()
|
||||
|
||||
// B must trust A so B's client-side handshake passes.
|
||||
t.Setenv("QUPTIME_DIR", b.dir)
|
||||
_ = b.assets.Trust.Add(trust.Entry{NodeID: a.id, Address: addr, Fingerprint: a.fp})
|
||||
|
||||
srv := NewServer(a.assets)
|
||||
srv.Handle(MethodJoin, func(_ context.Context, _ string, raw json.RawMessage) (any, error) {
|
||||
var req JoinRequest
|
||||
if err := json.Unmarshal(raw, &req); err != nil {
|
||||
return JoinResponse{Error: err.Error()}, nil
|
||||
}
|
||||
// A pretends to accept and records B in its trust store.
|
||||
t.Setenv("QUPTIME_DIR", a.dir)
|
||||
if err := a.assets.Trust.Add(trust.Entry{
|
||||
NodeID: req.NodeID, Address: req.Advertise, Fingerprint: req.Fingerprint,
|
||||
}); err != nil {
|
||||
return JoinResponse{Error: err.Error()}, nil
|
||||
}
|
||||
return JoinResponse{Accepted: true}, nil
|
||||
})
|
||||
srv.Handle(MethodPing, func(_ context.Context, _ string, _ json.RawMessage) (any, error) {
|
||||
return PingResponse{NodeID: a.id, Now: time.Now()}, nil
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
go srv.Serve(ctx, addr)
|
||||
defer srv.Stop()
|
||||
if !waitForDial(addr, 2*time.Second) {
|
||||
t.Fatal("server not up")
|
||||
}
|
||||
|
||||
cli := NewClient(b.assets)
|
||||
defer cli.Close()
|
||||
|
||||
// Pre-Join: Ping must be rejected with the "not trusted" error.
|
||||
if err := cli.Call(ctx, a.id, addr, MethodPing, nil, nil); err == nil {
|
||||
t.Error("Ping was allowed without prior trust")
|
||||
}
|
||||
|
||||
// Join must succeed even though B is untrusted.
|
||||
joinReq := JoinRequest{NodeID: b.id, Advertise: addr, Fingerprint: b.fp, CertPEM: string(b.assets.Cert)}
|
||||
var joinResp JoinResponse
|
||||
if err := cli.Call(ctx, a.id, addr, MethodJoin, joinReq, &joinResp); err != nil {
|
||||
t.Fatalf("Join: %v", err)
|
||||
}
|
||||
if !joinResp.Accepted {
|
||||
t.Fatalf("Join not accepted: %s", joinResp.Error)
|
||||
}
|
||||
|
||||
// Post-Join on the SAME pooled connection: Ping should now flow.
|
||||
if err := cli.Call(ctx, a.id, addr, MethodPing, nil, nil); err != nil {
|
||||
t.Errorf("post-Join Ping failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// waitForDial polls a TCP listener until it accepts a plain TCP
|
||||
// connection, signalling that Serve has begun listening.
|
||||
func waitForDial(addr string, max time.Duration) bool {
|
||||
|
||||
@@ -41,19 +41,24 @@ func (a *TLSAssets) tlsCert() (tls.Certificate, error) {
|
||||
}
|
||||
|
||||
// ServerConfig produces a tls.Config suitable for an inter-node
|
||||
// listener. Peers must present a certificate, and that certificate's
|
||||
// fingerprint must already be present in the trust store.
|
||||
// listener.
|
||||
//
|
||||
// We accept any client certificate at the TLS layer (no CA verification
|
||||
// and no fingerprint pinning here). Trust is enforced one layer up by
|
||||
// the RPC dispatcher: untrusted peers may only invoke MethodJoin, which
|
||||
// is the protocol's bootstrap step. This avoids the chicken-and-egg
|
||||
// where Join itself would need pre-existing symmetric trust to complete
|
||||
// the handshake.
|
||||
func (a *TLSAssets) ServerConfig() (*tls.Config, error) {
|
||||
cert, err := a.tlsCert()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
MinVersion: MinTLS,
|
||||
ClientAuth: tls.RequireAnyClientCert,
|
||||
InsecureSkipVerify: true, // we do our own pinning via VerifyPeerCertificate
|
||||
VerifyPeerCertificate: a.Trust.VerifyPeerCert,
|
||||
Certificates: []tls.Certificate{cert},
|
||||
MinVersion: MinTLS,
|
||||
ClientAuth: tls.RequireAnyClientCert,
|
||||
InsecureSkipVerify: true, // trust is gated per-method by the RPC dispatcher
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user