|
|
@@ -17,7 +17,6 @@ import (
|
|
|
"sync"
|
|
|
"time"
|
|
|
|
|
|
- "golang.org/x/sync/errgroup"
|
|
|
"tailscale.com/logpolicy"
|
|
|
"tailscale.com/logtail"
|
|
|
"tailscale.com/net/connstats"
|
|
|
@@ -25,6 +24,7 @@ import (
|
|
|
"tailscale.com/smallzstd"
|
|
|
"tailscale.com/tailcfg"
|
|
|
"tailscale.com/types/netlogtype"
|
|
|
+ "tailscale.com/util/multierr"
|
|
|
"tailscale.com/wgengine/router"
|
|
|
)
|
|
|
|
|
|
@@ -32,8 +32,7 @@ import (
|
|
|
const pollPeriod = 5 * time.Second
|
|
|
|
|
|
// Device is an abstraction over a tunnel device or a magic socket.
|
|
|
-// *tstun.Wrapper implements this interface.
|
|
|
-// *magicsock.Conn implements this interface.
|
|
|
+// Both *tstun.Wrapper and *magicsock.Conn implement this interface.
|
|
|
type Device interface {
|
|
|
SetStatistics(*connstats.Statistics)
|
|
|
}
|
|
|
@@ -47,15 +46,15 @@ func (noopDevice) SetStatistics(*connstats.Statistics) {}
|
|
|
// Exit node traffic is not logged for privacy reasons.
|
|
|
// The zero value is ready for use.
|
|
|
type Logger struct {
|
|
|
- mu sync.Mutex
|
|
|
+ mu sync.Mutex // protects all fields below
|
|
|
|
|
|
logger *logtail.Logger
|
|
|
+ stats *connstats.Statistics
|
|
|
+ tun Device
|
|
|
+ sock Device
|
|
|
|
|
|
addrs map[netip.Addr]bool
|
|
|
prefixes map[netip.Prefix]bool
|
|
|
-
|
|
|
- group errgroup.Group
|
|
|
- cancel context.CancelFunc
|
|
|
}
|
|
|
|
|
|
// Running reports whether the logger is running.
|
|
|
@@ -97,18 +96,13 @@ func (nl *Logger) Startup(nodeID tailcfg.StableNodeID, nodeLogID, domainLogID lo
|
|
|
if nl.logger != nil {
|
|
|
return fmt.Errorf("network logger already running for %v", nl.logger.PrivateID().Public())
|
|
|
}
|
|
|
- if tun == nil {
|
|
|
- tun = noopDevice{}
|
|
|
- }
|
|
|
- if sock == nil {
|
|
|
- sock = noopDevice{}
|
|
|
- }
|
|
|
|
|
|
+ // Startup a log stream to Tailscale's logging service.
|
|
|
httpc := &http.Client{Transport: logpolicy.NewLogtailTransport(logtail.DefaultHost)}
|
|
|
if testClient != nil {
|
|
|
httpc = testClient
|
|
|
}
|
|
|
- logger := logtail.NewLogger(logtail.Config{
|
|
|
+ nl.logger = logtail.NewLogger(logtail.Config{
|
|
|
Collection: "tailtraffic.log.tailscale.io",
|
|
|
PrivateID: nodeLogID,
|
|
|
CopyPrivateID: domainLogID,
|
|
|
@@ -127,47 +121,34 @@ func (nl *Logger) Startup(nodeID tailcfg.StableNodeID, nodeLogID, domainLogID lo
|
|
|
IncludeProcID: true,
|
|
|
IncludeProcSequence: true,
|
|
|
}, log.Printf)
|
|
|
- nl.logger = logger
|
|
|
-
|
|
|
- stats := new(connstats.Statistics)
|
|
|
- ctx, cancel := context.WithCancel(context.Background())
|
|
|
- nl.cancel = cancel
|
|
|
- nl.group.Go(func() error {
|
|
|
- tun.SetStatistics(stats)
|
|
|
- defer tun.SetStatistics(nil)
|
|
|
|
|
|
- sock.SetStatistics(stats)
|
|
|
- defer sock.SetStatistics(nil)
|
|
|
+ // Startup a data structure to track per-connection statistics.
|
|
|
+ // There is a maximum size for individual log messages that logtail
|
|
|
+ // can upload to the Tailscale log service, so stay below this limit.
|
|
|
+ const maxLogSize = 256 << 10
|
|
|
+ const maxConns = (maxLogSize - netlogtype.MaxMessageJSONSize) / netlogtype.MaxConnectionCountsJSONSize
|
|
|
+ nl.stats = connstats.NewStatistics(pollPeriod, maxConns, func(start, end time.Time, virtual, physical map[netlogtype.Connection]netlogtype.Counts) {
|
|
|
+ nl.mu.Lock()
|
|
|
+ addrs := nl.addrs
|
|
|
+ prefixes := nl.prefixes
|
|
|
+ nl.mu.Unlock()
|
|
|
+ recordStatistics(nl.logger, nodeID, start, end, virtual, physical, addrs, prefixes)
|
|
|
+ })
|
|
|
|
|
|
- start := time.Now()
|
|
|
- ticker := time.NewTicker(pollPeriod)
|
|
|
- for {
|
|
|
- var end time.Time
|
|
|
- select {
|
|
|
- case <-ctx.Done():
|
|
|
- end = time.Now()
|
|
|
- case end = <-ticker.C:
|
|
|
- }
|
|
|
+ // Register the connection tracker into the TUN device.
|
|
|
+ if tun == nil {
|
|
|
+ tun = noopDevice{}
|
|
|
+ }
|
|
|
+ nl.tun = tun
|
|
|
+ nl.tun.SetStatistics(nl.stats)
|
|
|
|
|
|
- // NOTE: connstats and sockStats will always be slightly out-of-sync.
|
|
|
- // It is impossible to have an atomic snapshot of statistics
|
|
|
- // at both layers without a global mutex that spans all layers.
|
|
|
- connstats, sockStats := stats.Extract()
|
|
|
- if len(connstats)+len(sockStats) > 0 {
|
|
|
- nl.mu.Lock()
|
|
|
- addrs := nl.addrs
|
|
|
- prefixes := nl.prefixes
|
|
|
- nl.mu.Unlock()
|
|
|
- recordStatistics(logger, nodeID, start, end, connstats, sockStats, addrs, prefixes)
|
|
|
- }
|
|
|
+ // Register the connection tracker into magicsock.
|
|
|
+ if sock == nil {
|
|
|
+ sock = noopDevice{}
|
|
|
+ }
|
|
|
+ nl.sock = sock
|
|
|
+ nl.sock.SetStatistics(nl.stats)
|
|
|
|
|
|
- if ctx.Err() != nil {
|
|
|
- break
|
|
|
- }
|
|
|
- start = end.Add(time.Nanosecond)
|
|
|
- }
|
|
|
- return nil
|
|
|
- })
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
@@ -222,21 +203,8 @@ func recordStatistics(logger *logtail.Logger, nodeID tailcfg.StableNodeID, start
|
|
|
}
|
|
|
|
|
|
if len(m.VirtualTraffic)+len(m.SubnetTraffic)+len(m.ExitTraffic)+len(m.PhysicalTraffic) > 0 {
|
|
|
- // TODO(joetsai): Place a hard limit on the size of a network log message.
|
|
|
- // The log server rejects any payloads above a certain size, so logging
|
|
|
- // a message that large would cause logtail to be stuck forever trying
|
|
|
- // and failing to upload the same excessively large payload.
|
|
|
- //
|
|
|
- // We should figure out the behavior for handling this. We could split
|
|
|
- // the message apart so that there are multiple chunks with the same window,
|
|
|
- // We could also consider reducing the granularity of the data
|
|
|
- // by dropping port numbers.
|
|
|
- const maxSize = 256 << 10
|
|
|
if b, err := json.Marshal(m); err != nil {
|
|
|
logger.Logf("json.Marshal error: %v", err)
|
|
|
- } else if len(b) > maxSize {
|
|
|
- logger.Logf("JSON body too large: %dB (virtual:%d subnet:%d exit:%d physical:%d)",
|
|
|
- len(b), len(m.VirtualTraffic), len(m.SubnetTraffic), len(m.ExitTraffic), len(m.PhysicalTraffic))
|
|
|
} else {
|
|
|
logger.Logf("%s", b)
|
|
|
}
|
|
|
@@ -285,15 +253,23 @@ func (nl *Logger) Shutdown(ctx context.Context) error {
|
|
|
if nl.logger == nil {
|
|
|
return nil
|
|
|
}
|
|
|
- nl.cancel()
|
|
|
+
|
|
|
+ // Shutdown in reverse order of Startup.
|
|
|
+ // Do not hold lock while shutting down since this may flush one last time.
|
|
|
nl.mu.Unlock()
|
|
|
- nl.group.Wait() // do not hold lock while waiting
|
|
|
+ nl.sock.SetStatistics(nil)
|
|
|
+ nl.tun.SetStatistics(nil)
|
|
|
+ err1 := nl.stats.Shutdown(ctx)
|
|
|
+ err2 := nl.logger.Shutdown(ctx)
|
|
|
nl.mu.Lock()
|
|
|
- err := nl.logger.Shutdown(ctx)
|
|
|
|
|
|
+ // Purge state.
|
|
|
nl.logger = nil
|
|
|
+ nl.stats = nil
|
|
|
+ nl.tun = nil
|
|
|
+ nl.sock = nil
|
|
|
nl.addrs = nil
|
|
|
nl.prefixes = nil
|
|
|
- nl.cancel = nil
|
|
|
- return err
|
|
|
+
|
|
|
+ return multierr.New(err1, err2)
|
|
|
}
|