[client] Persist sync response via pluggable store (disk on iOS) (#6331)
* Persist sync response via pluggable store (disk on iOS)
The latest Management sync response (which carries the network map) was
kept in memory for debug bundle generation. On memory-constrained
platforms like iOS the network map can be large enough to matter.
Introduce a syncstore package with a Store interface and two backends:
a memory backend (the previous behavior) and a disk backend that
serializes the response to a file in the state directory. The backend
is selected per-platform at build time: disk on iOS, memory elsewhere.
The disk store clears any leftover file on construction so a fresh
store never reads stale data from an earlier run (e.g. another
profile's network map).
In the engine, drop the separate persistSyncResponse bool: the store is
only instantiated while persistence is enabled, and its presence is
what marks persistence as active. The store is also cleared on engine
close so the file does not linger on disk.
* syncstore: silence nilnil linter on "nothing stored" returns
Get returns (nil, nil) to signal that nothing is stored, which is part
of the Store contract and preserves the original behaviour. Annotate
both backends with //nolint:nilnil so golangci-lint does not flag it.
* syncstore: hold syncRespMux for the whole store Set/Get
Both handleSync and GetLatestSyncResponse snapshotted e.syncStore under
the read lock and then released it before calling Set/Get. That allowed
SetSyncResponsePersistence(false) or engine close to clear the store
mid-call. In particular a concurrent Clear()+nil followed by a late
Set could re-create the file that was just removed, defeating the
leak/lingering protection.
Hold syncRespMux for the duration of the store operation in both spots
so the store cannot be cleared while a Set/Get is in flight.
* syncstore: avoid StateDir "." when state path is empty
On mobile the state path may be empty (the engine tolerates a missing
state file). filepath.Dir("") returns ".", which would make a
disk-backed syncstore write into the working directory instead of
letting NewDiskStore fall back to os.TempDir().
Only set engineConfig.StateDir when path is non-empty.
This commit is contained in:
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -346,6 +347,11 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
|
|||||||
return wrapErr(err)
|
return wrapErr(err)
|
||||||
}
|
}
|
||||||
engineConfig.TempDir = mobileDependency.TempDir
|
engineConfig.TempDir = mobileDependency.TempDir
|
||||||
|
// Leave StateDir empty when there is no state path so a disk-backed
|
||||||
|
// syncstore falls back to os.TempDir() instead of filepath.Dir("") == ".".
|
||||||
|
if path != "" {
|
||||||
|
engineConfig.StateDir = filepath.Dir(path)
|
||||||
|
}
|
||||||
|
|
||||||
relayManager := relayClient.NewManager(engineCtx, relayURLs, myPrivateKey.PublicKey().String(), engineConfig.MTU)
|
relayManager := relayClient.NewManager(engineCtx, relayURLs, myPrivateKey.PublicKey().String(), engineConfig.MTU)
|
||||||
c.statusRecorder.SetRelayMgr(relayManager)
|
c.statusRecorder.SetRelayMgr(relayManager)
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"golang.zx2c4.com/wireguard/tun/netstack"
|
"golang.zx2c4.com/wireguard/tun/netstack"
|
||||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||||
"google.golang.org/protobuf/proto"
|
|
||||||
|
|
||||||
nberrors "github.com/netbirdio/netbird/client/errors"
|
nberrors "github.com/netbirdio/netbird/client/errors"
|
||||||
"github.com/netbirdio/netbird/client/firewall"
|
"github.com/netbirdio/netbird/client/firewall"
|
||||||
@@ -56,6 +55,7 @@ import (
|
|||||||
"github.com/netbirdio/netbird/client/internal/routemanager"
|
"github.com/netbirdio/netbird/client/internal/routemanager"
|
||||||
"github.com/netbirdio/netbird/client/internal/routemanager/systemops"
|
"github.com/netbirdio/netbird/client/internal/routemanager/systemops"
|
||||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||||
|
"github.com/netbirdio/netbird/client/internal/syncstore"
|
||||||
"github.com/netbirdio/netbird/client/internal/updater"
|
"github.com/netbirdio/netbird/client/internal/updater"
|
||||||
"github.com/netbirdio/netbird/client/jobexec"
|
"github.com/netbirdio/netbird/client/jobexec"
|
||||||
cProto "github.com/netbirdio/netbird/client/proto"
|
cProto "github.com/netbirdio/netbird/client/proto"
|
||||||
@@ -148,6 +148,10 @@ type EngineConfig struct {
|
|||||||
|
|
||||||
LogPath string
|
LogPath string
|
||||||
TempDir string
|
TempDir string
|
||||||
|
|
||||||
|
// StateDir is the directory holding the state file. The sync response
|
||||||
|
// (network map) is serialized here on platforms that persist it to disk.
|
||||||
|
StateDir string
|
||||||
}
|
}
|
||||||
|
|
||||||
// EngineServices holds the external service dependencies required by the Engine.
|
// EngineServices holds the external service dependencies required by the Engine.
|
||||||
@@ -226,10 +230,15 @@ type Engine struct {
|
|||||||
|
|
||||||
afpacketCapture *capture.AFPacketCapture
|
afpacketCapture *capture.AFPacketCapture
|
||||||
|
|
||||||
// Sync response persistence (protected by syncRespMux)
|
// Sync response persistence (protected by syncRespMux).
|
||||||
syncRespMux sync.RWMutex
|
// syncStore is nil unless persistence has been enabled; its presence is
|
||||||
persistSyncResponse bool
|
// what marks persistence as active. The backend (disk or memory) is
|
||||||
latestSyncResponse *mgmProto.SyncResponse
|
// selected per-platform; see the syncstore package. syncStoreDir is where
|
||||||
|
// a disk-backed store serializes to.
|
||||||
|
syncRespMux sync.RWMutex
|
||||||
|
syncStore syncstore.Store
|
||||||
|
syncStoreDir string
|
||||||
|
|
||||||
flowManager nftypes.FlowManager
|
flowManager nftypes.FlowManager
|
||||||
|
|
||||||
// auto-update
|
// auto-update
|
||||||
@@ -292,6 +301,7 @@ func NewEngine(
|
|||||||
jobExecutor: jobexec.NewExecutor(),
|
jobExecutor: jobexec.NewExecutor(),
|
||||||
clientMetrics: services.ClientMetrics,
|
clientMetrics: services.ClientMetrics,
|
||||||
updateManager: services.UpdateManager,
|
updateManager: services.UpdateManager,
|
||||||
|
syncStoreDir: config.StateDir,
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Infof("I am: %s", config.WgPrivateKey.PublicKey().String())
|
log.Infof("I am: %s", config.WgPrivateKey.PublicKey().String())
|
||||||
@@ -913,19 +923,18 @@ func (e *Engine) handleSync(update *mgmProto.SyncResponse) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Persist sync response under the dedicated lock (syncRespMux), not under syncMsgMux.
|
// Persist sync response under the dedicated lock (syncRespMux), not under syncMsgMux.
|
||||||
// Read the storage-enabled flag under the syncRespMux too.
|
// A non-nil syncStore is what marks persistence as enabled. Hold the lock for
|
||||||
|
// the whole Set so the store cannot be cleared (disabled / engine close)
|
||||||
|
// mid-call and have this write resurrect a file that was just removed.
|
||||||
e.syncRespMux.RLock()
|
e.syncRespMux.RLock()
|
||||||
enabled := e.persistSyncResponse
|
if e.syncStore != nil {
|
||||||
e.syncRespMux.RUnlock()
|
if err := e.syncStore.Set(update); err != nil {
|
||||||
|
log.Errorf("failed to persist sync response: %v", err)
|
||||||
// Store sync response if persistence is enabled
|
} else {
|
||||||
if enabled {
|
log.Debugf("sync response persisted with serial %d", nm.GetSerial())
|
||||||
e.syncRespMux.Lock()
|
}
|
||||||
e.latestSyncResponse = update
|
|
||||||
e.syncRespMux.Unlock()
|
|
||||||
|
|
||||||
log.Debugf("sync response persisted with serial %d", nm.GetSerial())
|
|
||||||
}
|
}
|
||||||
|
e.syncRespMux.RUnlock()
|
||||||
|
|
||||||
// only apply new changes and ignore old ones
|
// only apply new changes and ignore old ones
|
||||||
if err := e.updateNetworkMap(nm); err != nil {
|
if err := e.updateNetworkMap(nm); err != nil {
|
||||||
@@ -1813,6 +1822,18 @@ func (e *Engine) close() {
|
|||||||
if err := e.portForwardManager.GracefullyStop(ctx); err != nil {
|
if err := e.portForwardManager.GracefullyStop(ctx); err != nil {
|
||||||
log.Warnf("failed to gracefully stop port forwarding manager: %s", err)
|
log.Warnf("failed to gracefully stop port forwarding manager: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Drop any persisted sync response so its network map does not linger on
|
||||||
|
// disk after the engine stops (and cannot leak into a later run).
|
||||||
|
e.syncRespMux.Lock()
|
||||||
|
store := e.syncStore
|
||||||
|
e.syncStore = nil
|
||||||
|
e.syncRespMux.Unlock()
|
||||||
|
if store != nil {
|
||||||
|
if err := store.Clear(); err != nil {
|
||||||
|
log.Warnf("failed to clear persisted sync response on close: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) readInitialSettings() ([]*route.Route, *nbdns.Config, bool, error) {
|
func (e *Engine) readInitialSettings() ([]*route.Route, *nbdns.Config, bool, error) {
|
||||||
@@ -2142,45 +2163,42 @@ func (e *Engine) stopDNSServer() {
|
|||||||
e.statusRecorder.UpdateDNSStates(nsGroupStates)
|
e.statusRecorder.UpdateDNSStates(nsGroupStates)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetSyncResponsePersistence enables or disables sync response persistence
|
// SetSyncResponsePersistence enables or disables sync response persistence.
|
||||||
|
// The store is only instantiated while persistence is enabled; construction
|
||||||
|
// itself drops any stale data left over from an earlier run (see syncstore).
|
||||||
func (e *Engine) SetSyncResponsePersistence(enabled bool) {
|
func (e *Engine) SetSyncResponsePersistence(enabled bool) {
|
||||||
e.syncRespMux.Lock()
|
e.syncRespMux.Lock()
|
||||||
defer e.syncRespMux.Unlock()
|
defer e.syncRespMux.Unlock()
|
||||||
|
|
||||||
if enabled == e.persistSyncResponse {
|
if enabled == (e.syncStore != nil) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
e.persistSyncResponse = enabled
|
|
||||||
log.Debugf("Sync response persistence is set to %t", enabled)
|
log.Debugf("Sync response persistence is set to %t", enabled)
|
||||||
|
|
||||||
if !enabled {
|
if !enabled {
|
||||||
e.latestSyncResponse = nil
|
if err := e.syncStore.Clear(); err != nil {
|
||||||
|
log.Warnf("failed to clear persisted sync response: %v", err)
|
||||||
|
}
|
||||||
|
e.syncStore = nil
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
e.syncStore = syncstore.New(e.syncStoreDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetLatestSyncResponse returns the stored sync response if persistence is enabled
|
// GetLatestSyncResponse returns the stored sync response if persistence is enabled
|
||||||
func (e *Engine) GetLatestSyncResponse() (*mgmProto.SyncResponse, error) {
|
func (e *Engine) GetLatestSyncResponse() (*mgmProto.SyncResponse, error) {
|
||||||
|
// Hold the lock for the whole Get so the store cannot be cleared
|
||||||
|
// (disabled / engine close) mid-call.
|
||||||
e.syncRespMux.RLock()
|
e.syncRespMux.RLock()
|
||||||
enabled := e.persistSyncResponse
|
defer e.syncRespMux.RUnlock()
|
||||||
latest := e.latestSyncResponse
|
|
||||||
e.syncRespMux.RUnlock()
|
|
||||||
|
|
||||||
if !enabled {
|
if e.syncStore == nil {
|
||||||
return nil, errors.New("sync response persistence is disabled")
|
return nil, errors.New("sync response persistence is disabled")
|
||||||
}
|
}
|
||||||
|
|
||||||
if latest == nil {
|
//nolint:nilnil
|
||||||
//nolint:nilnil
|
return e.syncStore.Get()
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("Retrieving latest sync response with size %d bytes", proto.Size(latest))
|
|
||||||
sr, ok := proto.Clone(latest).(*mgmProto.SyncResponse)
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("failed to clone sync response")
|
|
||||||
}
|
|
||||||
|
|
||||||
return sr, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetWgAddr returns the wireguard address
|
// GetWgAddr returns the wireguard address
|
||||||
|
|||||||
99
client/internal/syncstore/disk.go
Normal file
99
client/internal/syncstore/disk.go
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
package syncstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
|
mgmProto "github.com/netbirdio/netbird/shared/management/proto"
|
||||||
|
"github.com/netbirdio/netbird/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
// syncResponseFileName is the name of the file the sync response is serialized
|
||||||
|
// to, placed inside the configured directory (the state directory).
|
||||||
|
const syncResponseFileName = "networkmap.pb"
|
||||||
|
|
||||||
|
// diskStore serializes the latest sync response to a file on disk instead of
|
||||||
|
// keeping it in memory. This trades disk I/O for a much smaller memory
|
||||||
|
// footprint, which matters on memory-constrained platforms (iOS).
|
||||||
|
type diskStore struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
path string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDiskStore returns a Store that serializes the sync response to a file in
|
||||||
|
// the given directory. If dir is empty it falls back to the OS temp directory.
|
||||||
|
//
|
||||||
|
// Any file left over from a previous run is removed on construction so a fresh
|
||||||
|
// store never reads stale data (e.g. another profile's network map).
|
||||||
|
func NewDiskStore(dir string) Store {
|
||||||
|
if dir == "" {
|
||||||
|
dir = os.TempDir()
|
||||||
|
}
|
||||||
|
s := &diskStore{
|
||||||
|
path: filepath.Join(dir, syncResponseFileName),
|
||||||
|
}
|
||||||
|
if err := s.Clear(); err != nil {
|
||||||
|
log.Warnf("failed to clear stale sync response file: %v", err)
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *diskStore) Set(resp *mgmProto.SyncResponse) error {
|
||||||
|
if resp == nil {
|
||||||
|
return s.Clear()
|
||||||
|
}
|
||||||
|
|
||||||
|
bs, err := proto.Marshal(resp)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal sync response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
if err := util.WriteBytesWithRestrictedPermission(context.Background(), s.path, bs); err != nil {
|
||||||
|
return fmt.Errorf("write sync response to %s: %w", s.path, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("sync response persisted to %s (%d bytes)", s.path, len(bs))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *diskStore) Get() (*mgmProto.SyncResponse, error) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
bs, err := os.ReadFile(s.path)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
|
//nolint:nilnil // nil,nil means "nothing stored", per the Store contract; preserve the original behaviour
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("read sync response from %s: %w", s.path, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := &mgmProto.SyncResponse{}
|
||||||
|
if err := proto.Unmarshal(bs, resp); err != nil {
|
||||||
|
return nil, fmt.Errorf("unmarshal sync response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("retrieving latest sync response from %s (%d bytes)", s.path, len(bs))
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *diskStore) Clear() error {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
if err := os.Remove(s.path); err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return fmt.Errorf("remove sync response file %s: %w", s.path, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
9
client/internal/syncstore/factory_ios.go
Normal file
9
client/internal/syncstore/factory_ios.go
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
//go:build ios
|
||||||
|
|
||||||
|
package syncstore
|
||||||
|
|
||||||
|
// New returns the platform default store. On iOS the sync response is
|
||||||
|
// serialized to disk (in dir) to keep it out of the constrained process memory.
|
||||||
|
func New(dir string) Store {
|
||||||
|
return NewDiskStore(dir)
|
||||||
|
}
|
||||||
9
client/internal/syncstore/factory_other.go
Normal file
9
client/internal/syncstore/factory_other.go
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
//go:build !ios
|
||||||
|
|
||||||
|
package syncstore
|
||||||
|
|
||||||
|
// New returns the platform default store. On all non-iOS platforms the sync
|
||||||
|
// response is kept in memory; dir is unused.
|
||||||
|
func New(_ string) Store {
|
||||||
|
return NewMemoryStore()
|
||||||
|
}
|
||||||
56
client/internal/syncstore/memory.go
Normal file
56
client/internal/syncstore/memory.go
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
package syncstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
"google.golang.org/protobuf/proto"
|
||||||
|
|
||||||
|
mgmProto "github.com/netbirdio/netbird/shared/management/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
// memoryStore keeps the latest sync response in memory.
|
||||||
|
type memoryStore struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
latest *mgmProto.SyncResponse
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewMemoryStore returns a Store that keeps the sync response in memory.
|
||||||
|
func NewMemoryStore() Store {
|
||||||
|
return &memoryStore{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *memoryStore) Set(resp *mgmProto.SyncResponse) error {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
s.latest = resp
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *memoryStore) Get() (*mgmProto.SyncResponse, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
latest := s.latest
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if latest == nil {
|
||||||
|
//nolint:nilnil // nil,nil means "nothing stored", per the Store contract; preserve the original behaviour
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("retrieving latest sync response with size %d bytes", proto.Size(latest))
|
||||||
|
sr, ok := proto.Clone(latest).(*mgmProto.SyncResponse)
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("clone sync response")
|
||||||
|
}
|
||||||
|
return sr, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *memoryStore) Clear() error {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
s.latest = nil
|
||||||
|
return nil
|
||||||
|
}
|
||||||
29
client/internal/syncstore/syncstore.go
Normal file
29
client/internal/syncstore/syncstore.go
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
// Package syncstore stores the latest Management sync response (which carries
|
||||||
|
// the network map) for debug bundle generation.
|
||||||
|
//
|
||||||
|
// The storage backend is selected at build time per operating system: on iOS
|
||||||
|
// the response is serialized to disk to keep it out of the (tightly
|
||||||
|
// constrained) process memory, while on all other platforms it is kept in
|
||||||
|
// memory. The backend is chosen by the New constructor; see factory_ios.go and
|
||||||
|
// factory_other.go.
|
||||||
|
package syncstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
mgmProto "github.com/netbirdio/netbird/shared/management/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Store persists the latest sync response and returns it on demand.
|
||||||
|
//
|
||||||
|
// Implementations must be safe for concurrent use.
|
||||||
|
type Store interface {
|
||||||
|
// Set stores the given sync response, replacing any previously stored one.
|
||||||
|
Set(resp *mgmProto.SyncResponse) error
|
||||||
|
|
||||||
|
// Get returns the stored sync response, or nil if none is stored.
|
||||||
|
// The returned value is an independent copy that the caller may retain.
|
||||||
|
Get() (*mgmProto.SyncResponse, error)
|
||||||
|
|
||||||
|
// Clear removes any stored sync response. It is safe to call when nothing
|
||||||
|
// is stored.
|
||||||
|
Clear() error
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user