Extend netbird status command to include health information (#1471)
* Adds management, signal, and relay (STUN/TURN) health probes to the status command. * Adds a reason when the management or signal connections are disconnected. * Adds last wireguard handshake and received/sent bytes per peer
This commit is contained in:
@@ -35,6 +35,7 @@ type Client interface {
|
||||
GetStatus() Status
|
||||
Receive(msgHandler func(msg *proto.Message) error) error
|
||||
Ready() bool
|
||||
IsHealthy() bool
|
||||
WaitStreamConnected()
|
||||
SendToStream(msg *proto.EncryptedMessage) error
|
||||
Send(msg *proto.Message) error
|
||||
|
||||
@@ -28,7 +28,7 @@ const defaultSendTimeout = 5 * time.Second
|
||||
|
||||
// ConnStateNotifier is a wrapper interface of the status recorder
|
||||
type ConnStateNotifier interface {
|
||||
MarkSignalDisconnected()
|
||||
MarkSignalDisconnected(error)
|
||||
MarkSignalConnected()
|
||||
}
|
||||
|
||||
@@ -166,7 +166,7 @@ func (c *GrpcClient) Receive(msgHandler func(msg *proto.Message) error) error {
|
||||
// we need this reset because after a successful connection and a consequent error, backoff lib doesn't
|
||||
// reset times and next try will start with a long delay
|
||||
backOff.Reset()
|
||||
c.notifyDisconnected()
|
||||
c.notifyDisconnected(err)
|
||||
log.Warnf("disconnected from the Signal service but will retry silently. Reason: %v", err)
|
||||
return err
|
||||
}
|
||||
@@ -238,6 +238,35 @@ func (c *GrpcClient) Ready() bool {
|
||||
return c.signalConn.GetState() == connectivity.Ready || c.signalConn.GetState() == connectivity.Idle
|
||||
}
|
||||
|
||||
// IsHealthy probes the gRPC connection and returns false on errors
|
||||
func (c *GrpcClient) IsHealthy() bool {
|
||||
switch c.signalConn.GetState() {
|
||||
case connectivity.TransientFailure:
|
||||
return false
|
||||
case connectivity.Connecting:
|
||||
return true
|
||||
case connectivity.Shutdown:
|
||||
return true
|
||||
case connectivity.Idle:
|
||||
case connectivity.Ready:
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(c.ctx, 1*time.Second)
|
||||
defer cancel()
|
||||
_, err := c.realClient.Send(ctx, &proto.EncryptedMessage{
|
||||
Key: c.key.PublicKey().String(),
|
||||
RemoteKey: "dummy",
|
||||
Body: nil,
|
||||
})
|
||||
if err != nil {
|
||||
c.notifyDisconnected(err)
|
||||
log.Warnf("health check returned: %s", err)
|
||||
return false
|
||||
}
|
||||
c.notifyConnected()
|
||||
return true
|
||||
}
|
||||
|
||||
// WaitStreamConnected waits until the client is connected to the Signal stream
|
||||
func (c *GrpcClient) WaitStreamConnected() {
|
||||
|
||||
@@ -383,14 +412,14 @@ func (c *GrpcClient) receive(stream proto.SignalExchange_ConnectStreamClient,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *GrpcClient) notifyDisconnected() {
|
||||
func (c *GrpcClient) notifyDisconnected(err error) {
|
||||
c.connStateCallbackLock.RLock()
|
||||
defer c.connStateCallbackLock.RUnlock()
|
||||
|
||||
if c.connStateCallback == nil {
|
||||
return
|
||||
}
|
||||
c.connStateCallback.MarkSignalDisconnected()
|
||||
c.connStateCallback.MarkSignalDisconnected(err)
|
||||
}
|
||||
|
||||
func (c *GrpcClient) notifyConnected() {
|
||||
|
||||
@@ -15,6 +15,10 @@ type MockClient struct {
|
||||
SendFunc func(msg *proto.Message) error
|
||||
}
|
||||
|
||||
func (sm *MockClient) IsHealthy() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (sm *MockClient) Close() error {
|
||||
if sm.CloseFunc == nil {
|
||||
return nil
|
||||
|
||||
Reference in New Issue
Block a user