Browse Source

Move Linux client & common packages into a public repo.

Earl Lee 6 years ago
parent
commit
a8d8b8719a
100 changed files with 10562 additions and 0 deletions
  1. 17 0
      AUTHORS
  2. 27 0
      LICENSE
  3. 24 0
      PATENTS
  4. 28 0
      atomicfile/atomicfile.go
  5. 14 0
      cmd/relaynode/.gitignore
  6. 63 0
      cmd/relaynode/acl.json
  7. 1 0
      cmd/relaynode/clean.do
  8. 13 0
      cmd/relaynode/clean.od
  9. 10 0
      cmd/relaynode/deb.od
  10. 1 0
      cmd/relaynode/debian/README.Debian
  11. 5 0
      cmd/relaynode/debian/changelog.do
  12. 0 0
      cmd/relaynode/debian/clean
  13. 1 0
      cmd/relaynode/debian/compat
  14. 14 0
      cmd/relaynode/debian/control
  15. 11 0
      cmd/relaynode/debian/copyright
  16. 25 0
      cmd/relaynode/debian/gen-changelog
  17. 4 0
      cmd/relaynode/debian/install
  18. 8 0
      cmd/relaynode/debian/postinst
  19. 10 0
      cmd/relaynode/debian/rules
  20. 12 0
      cmd/relaynode/debian/tailscale-relay.service
  21. 20 0
      cmd/relaynode/default.deb.od
  22. 21 0
      cmd/relaynode/default.dir.od
  23. 14 0
      cmd/relaynode/default.rpm.od
  24. 7 0
      cmd/relaynode/default.spec.od
  25. 8 0
      cmd/relaynode/default.tar.gz.od
  26. 15 0
      cmd/relaynode/dist.od
  27. 1 0
      cmd/relaynode/docker/.gitignore
  28. 17 0
      cmd/relaynode/docker/Dockerfile
  29. 1 0
      cmd/relaynode/docker/all.do
  30. 3 0
      cmd/relaynode/docker/build.do
  31. 2 0
      cmd/relaynode/docker/relaynode.do
  32. 10 0
      cmd/relaynode/docker/run.sh
  33. 1 0
      cmd/relaynode/package
  34. 300 0
      cmd/relaynode/relaynode.go
  35. 9 0
      cmd/relaynode/rpm.od
  36. 4 0
      cmd/relaynode/tailscale-login
  37. 14 0
      cmd/relaynode/tailscale-relay.defaults
  38. 42 0
      cmd/relaynode/tailscale-relay.spec.in
  39. 7 0
      cmd/relaynode/tarball.od
  40. 96 0
      cmd/taillogin/taillogin.go
  41. 149 0
      cmd/tailscale/ipn.go
  42. 88 0
      cmd/tailscaled/ipnd.go
  43. 594 0
      control/controlclient/auto.go
  44. 1107 0
      control/controlclient/auto_test.go
  45. 68 0
      control/controlclient/controlclient_test.go
  46. 656 0
      control/controlclient/direct.go
  47. 305 0
      control/controlclient/direct_test.go
  48. 294 0
      control/controlclient/netmap.go
  49. 227 0
      control/policy/policy.go
  50. 156 0
      control/policy/policy_test.go
  51. 182 0
      derp/derp_client.go
  52. 380 0
      derp/derp_server.go
  53. 125 0
      derp/derp_test.go
  54. 203 0
      derp/derphttp/derphttp_client.go
  55. 35 0
      derp/derphttp/derphttp_server.go
  56. 142 0
      derp/derphttp/derphttp_test.go
  57. 13 0
      derp/doc.go
  58. 19 0
      go.mod
  59. 76 0
      go.sum
  60. 79 0
      ipn/backend.go
  61. 11 0
      ipn/doc.go
  62. 207 0
      ipn/e2e_test.go
  63. 72 0
      ipn/fake.go
  64. 166 0
      ipn/handle.go
  65. 253 0
      ipn/ipnserver/server.go
  66. 635 0
      ipn/local.go
  67. 249 0
      ipn/message.go
  68. 171 0
      ipn/message_test.go
  69. 149 0
      ipn/prefs.go
  70. 68 0
      ipn/prefs_test.go
  71. 10 0
      logger/logger.go
  72. 171 0
      logpolicy/logpolicy.go
  73. 6 0
      logtail/.gitignore
  74. 10 0
      logtail/README.md
  75. 195 0
      logtail/api.md
  76. 49 0
      logtail/backoff/backoff.go
  77. 82 0
      logtail/buffer.go
  78. 51 0
      logtail/example/logadopt/logadopt.go
  79. 87 0
      logtail/example/logreprocess/demo.sh
  80. 116 0
      logtail/example/logreprocess/logreprocess.go
  81. 46 0
      logtail/example/logtail/logtail.go
  82. 238 0
      logtail/filch/filch.go
  83. 178 0
      logtail/filch/filch_test.go
  84. 30 0
      logtail/filch/filch_unix.go
  85. 44 0
      logtail/filch/filch_windows.go
  86. 103 0
      logtail/id.go
  87. 54 0
      logtail/id_test.go
  88. 464 0
      logtail/logtail.go
  89. 20 0
      logtail/logtail_test.go
  90. 155 0
      portlist/netstat.go
  91. 89 0
      portlist/netstat_test.go
  92. 59 0
      portlist/poller.go
  93. 87 0
      portlist/portlist.go
  94. 99 0
      portlist/portlist_darwin.go
  95. 155 0
      portlist/portlist_linux.go
  96. 20 0
      portlist/portlist_other.go
  97. 16 0
      portlist/portlist_windows.go
  98. 78 0
      ratelimit/ratelimit.go
  99. 28 0
      ratelimit/ratelimit_test.go
  100. 63 0
      safesocket/basic_test.go

+ 17 - 0
AUTHORS

@@ -0,0 +1,17 @@
+# This is the official list of Tailscale
+# authors for copyright purposes.
+#
+# Names should be added to this file as one of
+#     Organization's name
+#     Individual's name <submission email address>
+#     Individual's name <submission email address> <email2> <emailN>
+#
+# Please keep the list sorted.
+#
+# You do not need to add entries to this list, and we don't actively
+# populate this list. If you do want to be acknowledged explicitly as
+# a copyright holder, though, then please send a PR referencing your
+# earlier contributions and clarifying whether it's you or your
+# company that owns the rights to your contribution.
+
+Tailscale Inc.

+ 27 - 0
LICENSE

@@ -0,0 +1,27 @@
+Copyright (c) 2020 Tailscale & AUTHORS. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Tailscale Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 24 - 0
PATENTS

@@ -0,0 +1,24 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Tailscale Inc. as part of the Tailscale project.
+
+Tailscale Inc. hereby grants to You a perpetual, worldwide,
+non-exclusive, no-charge, royalty-free, irrevocable (except as stated
+in this section) patent license to make, have made, use, offer to
+sell, sell, import, transfer and otherwise run, modify and propagate
+the contents of this implementation of Tailscale, where such license
+applies only to those patent claims, both currently owned or
+controlled by Tailscale Inc. and acquired in the future, licensable
+by Tailscale Inc. that are necessarily infringed by this
+implementation of Tailscale.  This grant does not include claims that
+would be infringed only as a consequence of further modification of
+this implementation.  If you or your agent or exclusive licensee
+institute or order or agree to the institution of patent litigation
+against any entity (including a cross-claim or counterclaim in a
+lawsuit) alleging that this implementation of Tailscale or any code
+incorporated within this implementation of Tailscale constitutes
+direct or contributory patent infringement, or inducement of patent
+infringement, then any patent rights granted to you under this License
+for this implementation of Tailscale shall terminate as of the date
+such litigation is filed.

+ 28 - 0
atomicfile/atomicfile.go

@@ -0,0 +1,28 @@
+// Copyright 2019 Tailscale & AUTHORS. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package atomicfile contains code related to writing to filesystems
+// atomically.
+//
+// This package should be considered internal; its API is not stable.
+package atomicfile // import "tailscale.com/atomicfile"
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+)
+
+// WriteFile writes data to filename+some suffix, then renames it
+// into filename.
+func WriteFile(filename string, data []byte, perm os.FileMode) error {
+	tmpname := filename + ".new.tmp"
+	if err := ioutil.WriteFile(tmpname, data, perm); err != nil {
+		return fmt.Errorf("%#v: %v", tmpname, err)
+	}
+	if err := os.Rename(tmpname, filename); err != nil {
+		return fmt.Errorf("%#v->%#v: %v", tmpname, filename, err)
+	}
+	return nil
+}

+ 14 - 0
cmd/relaynode/.gitignore

@@ -0,0 +1,14 @@
+/*.tar.gz
+/*.deb
+/*.rpm
+/*.spec
+/pkgver
+debian/changelog
+debian/debhelper-build-stamp
+debian/files
+debian/*.log
+debian/*.substvars
+debian/*.debhelper
+debian/tailscale-relay
+/tailscale-relay/
+/tailscale-relay-*

+ 63 - 0
cmd/relaynode/acl.json

@@ -0,0 +1,63 @@
+{
+  // Declare static groups of users beyond those in the identity service
+  "Groups": {
+    "group:eng": ["[email protected]", "[email protected]"]
+  },
+
+  // Declare convenient hostname aliases to use in place of IP addresses
+  "Hosts": {
+    "h222": "100.2.2.2"
+  },
+
+  // Access control list
+  "ACLs": [
+    {
+        "Action": "accept",
+        // Match any of several users
+        "Users": ["[email protected]", "[email protected]"],
+        // Match any port on h222, and port 22 of 10.1.2.3
+        "Ports": ["h222:*", "10.1.2.3:22"]
+    },
+    {
+        "Action": "accept",
+        // Match any user at all
+        "Users": ["*"],
+        // Match port 80 on one machine, ports 53 and 5353 on a second one,
+        // and ports 8000 through 8080 (a port range) on a third one.
+        "Ports": ["h222:80", "10.8.8.8:53,5353", "10.2.3.4:8000-8080"]
+    },
+    {
+        "Action": "accept",
+        // Match all users in the "Admin" role (network administrators)
+        "Users": ["role:Admin", "group:eng"],
+        // Allow access to port 22 on all servers
+        "Ports": ["*:22"]
+    },
+    {
+        "Action": "accept",
+        "Users": ["role:User"],
+        // Match only windows and linux workstations (not implemented yet)
+        "OS": ["windows", "linux"],
+        // Only desktop machines are allowed to access this server
+        "Ports": ["10.1.1.1:443"]
+    },
+    {
+        "Action": "accept",
+        "Users": ["*"],
+        // Match machines which have never been authorized, or which expired.
+        // (not implemented yet)
+        "MachineAuth": ["unauthorized", "expired"],
+        // Logged-in users on unauthorized machines can access the email server.
+        // Open the TLS ports for SMTP, IMAP, and HTTP.
+        "Ports": ["10.1.2.3:465", "10.1.2.3:993", "10.1.2.3:443"]
+    },
+
+    // Match absolutely everything. Comment out this section if you want
+    // the above ACLs to apply.
+    { "Action": "accept", "Users": ["*"], "Ports": ["*:*"] },
+
+    // Leave this line here so that every rule can end in a comma.
+    // It has no effect since it has no matching rules.
+    {"Action": "accept"}
+  ]
+}

+ 1 - 0
cmd/relaynode/clean.do

@@ -0,0 +1 @@
+rm -f debian/changelog *~ debian/*~

+ 13 - 0
cmd/relaynode/clean.od

@@ -0,0 +1,13 @@
+exec >&2
+read -r package <package
+rm -f *~ .*~ \
+	debian/*~ debian/changelog debian/debhelper-build-stamp \
+	debian/*.log debian/files debian/*.substvars debian/*.debhelper \
+	*.tar.gz *.deb *.rpm *.spec pkgver relaynode *.exe
+[ -n "$package" ] && rm -rf "debian/$package"
+for d in */.stamp; do
+	if [ -e "$d" ]; then
+		dir=$(dirname "$d")
+		rm -rf "$dir"
+	fi
+done

+ 10 - 0
cmd/relaynode/deb.od

@@ -0,0 +1,10 @@
+exec >&2
+dir=${1%/*}
+redo-ifchange "$S/$dir/package" "$S/oss/version/short.txt"
+read -r package <"$S/$dir/package"
+read -r version <"$S/oss/version/short.txt"
+arch=$(dpkg --print-architecture)
+
+redo-ifchange "$dir/${package}_$arch.deb"
+rm -f "$dir/${package}"_*_"$arch.deb"
+ln -sf "${package}_$arch.deb" "$dir/${package}_${version}_$arch.deb"

+ 1 - 0
cmd/relaynode/debian/README.Debian

@@ -0,0 +1 @@
+Tailscale IPN relay daemon.

+ 5 - 0
cmd/relaynode/debian/changelog.do

@@ -0,0 +1,5 @@
+redo-ifchange ../../../version/short.txt gen-changelog
+(
+	cd ..
+	debian/gen-changelog
+) >$3

+ 0 - 0
cmd/relaynode/debian/clean


+ 1 - 0
cmd/relaynode/debian/compat

@@ -0,0 +1 @@
+9

+ 14 - 0
cmd/relaynode/debian/control

@@ -0,0 +1,14 @@
+Source: tailscale-relay
+Section: net
+Priority: extra
+Maintainer: Avery Pennarun <[email protected]>
+Build-Depends: debhelper (>= 10.2.5), dh-systemd (>= 1.5)
+Standards-Version: 3.9.2
+Homepage: https://tailscale.com/
+Vcs-Git: https://github.com/tailscale/tailscale
+Vcs-Browser: https://github.com/tailscale/tailscale
+
+Package: tailscale-relay
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: Traffic relay node for Tailscale IPN

+ 11 - 0
cmd/relaynode/debian/copyright

@@ -0,0 +1,11 @@
+Format: http://svn.debian.org/wsvn/dep/web/deps/dep5.mdwn?op=file&rev=173
+Upstream-Name: tailscale-relay
+Upstream-Contact: Avery Pennarun <[email protected]>
+Source: https://github.com/tailscale/tailscale/
+
+Files: *
+Copyright: © 2019 Tailscale Inc. <[email protected]>
+License: Proprietary
+ *
+ * Copyright 2019 Tailscale Inc. All rights reserved.
+ *

+ 25 - 0
cmd/relaynode/debian/gen-changelog

@@ -0,0 +1,25 @@
+#!/bin/sh
+read junk pkgname <debian/control
+read shortver <../../version/short.txt
+git log --pretty='format:'"$pkgname"' (SHA:%H) unstable; urgency=low
+
+  * %s
+  
+ -- %aN <%aE>  %aD
+' . |
+python -Sc '
+import os, re, subprocess, sys
+
+first = True
+def Describe(g):
+  global first
+  if first:
+    s = sys.argv[1]
+    first = False
+  else:
+    sha = g.group(1)
+    s = subprocess.check_output(["git", "describe", "--", sha]).strip().decode("utf-8")
+  return re.sub(r"^\D*", "", s)
+
+print(re.sub(r"SHA:([0-9a-f]+)", Describe, sys.stdin.read()))
+' "$shortver"

+ 4 - 0
cmd/relaynode/debian/install

@@ -0,0 +1,4 @@
+relaynode			/usr/sbin
+tailscale-login			/usr/sbin
+taillogin			/usr/sbin
+acl.json			/etc/tailscale

+ 8 - 0
cmd/relaynode/debian/postinst

@@ -0,0 +1,8 @@
+#DEBHELPER#
+
+f=/var/lib/tailscale/relay.conf
+if ! [ -e "$f" ]; then
+	echo
+	echo "Note: Run tailscale-login to configure $f." >&2
+	echo
+fi

+ 10 - 0
cmd/relaynode/debian/rules

@@ -0,0 +1,10 @@
+#!/usr/bin/make -f
+DESTDIR=debian/tailscale-relay
+
+override_dh_auto_test:
+override_dh_auto_install:
+	mkdir -p "${DESTDIR}/etc/default"
+	cp tailscale-relay.defaults "${DESTDIR}/etc/default/tailscale-relay"
+
+%:
+	dh $@ --with=systemd

+ 12 - 0
cmd/relaynode/debian/tailscale-relay.service

@@ -0,0 +1,12 @@
+[Unit]
+Description=Traffic relay node for Tailscale IPN
+After=network.target
+ConditionPathExists=/var/lib/tailscale/relay.conf
+
+[Service]
+EnvironmentFile=/etc/default/tailscale-relay
+ExecStart=/usr/sbin/relaynode --config=/var/lib/tailscale/relay.conf --tun=wg0 $PORT $ACL_FILE $FLAGS
+Restart=on-failure
+
+[Install]
+WantedBy=multi-user.target

+ 20 - 0
cmd/relaynode/default.deb.od

@@ -0,0 +1,20 @@
+exec >&2
+dir=${1%/*}
+redo-ifchange "$S/oss/version/short.txt" "$S/$dir/package" "$dir/debtmp.dir"
+read -r package <"$S/$dir/package"
+read -r version <"$S/oss/version/short.txt"
+arch=$(dpkg --print-architecture)
+
+(
+	cd "$S/$dir"
+	git ls-files debian | xargs redo-ifchange debian/changelog
+)
+cp -a "$S/$dir/debian" "$dir/debtmp/"
+rm -f "$dir/debtmp/debian/$package.debhelper.log"
+(
+	cd "$dir/debtmp" &&
+	debian/rules build &&
+	fakeroot debian/rules binary
+)
+
+mv "$dir/${package}_${version}_${arch}.deb" "$3"

+ 21 - 0
cmd/relaynode/default.dir.od

@@ -0,0 +1,21 @@
+# Generate a directory tree suitable for forming a tarball of
+# this package.
+exec >&2
+dir=${1%/*}
+outdir=$PWD/${1%.dir}
+rm -rf "$outdir"
+mkdir "$outdir"
+touch $outdir/.stamp
+sfiles="
+	tailscale-login
+	acl.json
+	debian/*.service
+	*.defaults
+"
+ofiles="
+	relaynode
+	../taillogin/taillogin
+"
+redo-ifchange "$outdir/.stamp"
+(cd "$S/$dir" && redo-ifchange $sfiles && cp $sfiles "$outdir/")
+(cd "$dir" && redo-ifchange $ofiles && cp $ofiles "$outdir/")

+ 14 - 0
cmd/relaynode/default.rpm.od

@@ -0,0 +1,14 @@
+exec >&2
+dir=${1%/*}
+pkg=${1##*/}
+pkg=${pkg%.rpm}
+redo-ifchange "$S/oss/version/short.txt" "$dir/$pkg.tar.gz" "$dir/$pkg.spec"
+read -r pkgver junk <"$S/oss/version/short.txt"
+
+machine=$(uname -m)
+rpmbase=$HOME/rpmbuild
+
+mkdir -p "$rpmbase/SOURCES/"
+cp "$dir/$pkg.tar.gz" "$rpmbase/SOURCES/"
+rpmbuild -bb "$dir/$pkg.spec"
+mv "$rpmbase/RPMS/$machine/$pkg-$pkgver.$machine.rpm" $3

+ 7 - 0
cmd/relaynode/default.spec.od

@@ -0,0 +1,7 @@
+redo-ifchange "$S/$1.in" "$S/oss/version/short.txt"
+read -r pkgver junk <"$S/oss/version/short.txt"
+basever=${pkgver%-*}
+subver=${pkgver#*-}
+sed -e "s/Version: 0.00$/Version: $basever/" \
+    -e "s/Release: 0$/Release: $subver/" \
+	<"$S/$1.in" >"$3"

+ 8 - 0
cmd/relaynode/default.tar.gz.od

@@ -0,0 +1,8 @@
+exec >&2
+xdir=${1%.tar.gz}
+base=${xdir##*/}
+updir=${xdir%/*}
+redo-ifchange "$xdir.dir"
+OUT="$PWD/$3"
+
+cd "$updir" && tar -czvf "$OUT" --exclude "$base/.stamp" "$base"

+ 15 - 0
cmd/relaynode/dist.od

@@ -0,0 +1,15 @@
+# Build packages for customer distribution.
+dir=${1%/*}
+cd "$dir"
+targets="tarball"
+if which dh_clean fakeroot dpkg >/dev/null; then
+	targets="$targets deb"
+else
+	echo "Skipping debian packages: debhelper and/or dpkg build tools missing." >&2
+fi
+if which rpm >/dev/null; then
+	targets="$targets rpm"
+else
+	echo "Skipping rpm packages: rpm build tools missing." >&2
+fi
+redo-ifchange $targets

+ 1 - 0
cmd/relaynode/docker/.gitignore

@@ -0,0 +1 @@
+/relaynode

+ 17 - 0
cmd/relaynode/docker/Dockerfile

@@ -0,0 +1,17 @@
+# Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Build with: docker build -t tailcontrol-alpine .
+# Run with: docker run --cap-add=NET_ADMIN --device=/dev/net/tun:/dev/net/tun -it tailcontrol-alpine
+
+FROM debian:stretch-slim
+
+RUN apt-get update && apt-get -y install iproute2 iptables
+RUN apt-get -y install ca-certificates
+RUN apt-get -y install nginx-light
+
+COPY relaynode /
+
+# tailcontrol -tun=wg0 -dbdir=$HOME/taildb >> tailcontrol.log 2>&1 &
+CMD ["/relaynode", "-R", "--config", "relay.conf"]

+ 1 - 0
cmd/relaynode/docker/all.do

@@ -0,0 +1 @@
+redo-ifchange build

+ 3 - 0
cmd/relaynode/docker/build.do

@@ -0,0 +1,3 @@
+exec >&2
+redo-ifchange Dockerfile relaynode
+docker build -t tailscale .

+ 2 - 0
cmd/relaynode/docker/relaynode.do

@@ -0,0 +1,2 @@
+redo-ifchange ../relaynode
+cp ../relaynode $3

+ 10 - 0
cmd/relaynode/docker/run.sh

@@ -0,0 +1,10 @@
+#!/bin/sh
+# Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -e
+redo-ifchange build
+docker run --cap-add=NET_ADMIN \
+	--device=/dev/net/tun:/dev/net/tun \
+	-it tailscale

+ 1 - 0
cmd/relaynode/package

@@ -0,0 +1 @@
+tailscale-relay

+ 300 - 0
cmd/relaynode/relaynode.go

@@ -0,0 +1,300 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Relaynode is the old Linux Tailscale daemon.
+//
+// Deprecated: this program will be soon deleted. The replacement is
+// cmd/tailscaled.
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"net/http/pprof"
+	"os"
+	"os/signal"
+	"strings"
+	"syscall"
+	"time"
+
+	"github.com/apenwarr/fixconsole"
+	"github.com/google/go-cmp/cmp"
+	"github.com/klauspost/compress/zstd"
+	"github.com/pborman/getopt/v2"
+	"github.com/tailscale/wireguard-go/wgcfg"
+	"tailscale.com/atomicfile"
+	"tailscale.com/control/controlclient"
+	"tailscale.com/control/policy"
+	"tailscale.com/logpolicy"
+	"tailscale.com/version"
+	"tailscale.com/wgengine"
+	"tailscale.com/wgengine/filter"
+	"tailscale.com/wgengine/magicsock"
+)
+
+func main() {
+	err := fixconsole.FixConsoleIfNeeded()
+	if err != nil {
+		log.Printf("fixConsoleOutput: %v\n", err)
+	}
+	config := getopt.StringLong("config", 'f', "", "path to config file")
+	server := getopt.StringLong("server", 's', "https://login.tailscale.com", "URL to tailcontrol server")
+	listenport := getopt.Uint16Long("port", 'p', magicsock.DefaultPort, "WireGuard port (0=autoselect)")
+	tunname := getopt.StringLong("tun", 0, "wg0", "tunnel interface name")
+	alwaysrefresh := getopt.BoolLong("always-refresh", 0, "force key refresh at startup")
+	fake := getopt.BoolLong("fake", 0, "fake tunnel+routing instead of tuntap")
+	nuroutes := getopt.BoolLong("no-single-routes", 'N', "disallow (non-subnet) routes to single nodes")
+	rroutes := getopt.BoolLong("remote-routes", 'R', "allow routing subnets to remote nodes")
+	droutes := getopt.BoolLong("default-routes", 'D', "allow default route on remote node")
+	routes := getopt.StringLong("routes", 0, "", "list of IP ranges this node can relay")
+	aclfile := getopt.StringLong("acl-file", 0, "", "restrict traffic relaying according to json ACL file")
+	derp := getopt.BoolLong("derp", 0, "enable bypass via Detour Encrypted Routing Protocol (DERP)", "false")
+	debug := getopt.StringLong("debug", 0, "", "Address of debug server")
+	getopt.Parse()
+	if len(getopt.Args()) > 0 {
+		log.Fatalf("too many non-flag arguments: %#v", getopt.Args()[0])
+	}
+	uflags := controlclient.UFlagsHelper(!*nuroutes, *rroutes, *droutes)
+	if *config == "" {
+		log.Fatal("no --config file specified")
+	}
+	if *tunname == "" {
+		log.Printf("Warning: no --tun device specified; routing disabled.\n")
+	}
+
+	pol := logpolicy.New("tailnode.log.tailscale.io", *config)
+
+	logf := wgengine.RusagePrefixLog(log.Printf)
+
+	// The wgengine takes a wireguard configuration produced by the
+	// controlclient, and runs the actual tunnels and packets.
+	var e wgengine.Engine
+	if *fake {
+		e, err = wgengine.NewFakeUserspaceEngine(logf, *listenport, *derp)
+	} else {
+		e, err = wgengine.NewUserspaceEngine(logf, *tunname, *listenport, *derp)
+	}
+	if err != nil {
+		log.Fatalf("Error starting wireguard engine: %v\n", err)
+	}
+
+	e = wgengine.NewWatchdog(e)
+	var lastacljson string
+	var p *policy.Policy
+
+	if *aclfile == "" {
+		e.SetFilter(nil)
+	} else {
+		lastacljson = readOrFatal(*aclfile)
+		p = installFilterOrFatal(e, *aclfile, lastacljson, nil)
+	}
+
+	var lastNetMap *controlclient.NetworkMap
+	var lastUserMap map[string][]filter.IP
+	statusFunc := func(new controlclient.Status) {
+		if new.URL != "" {
+			fmt.Fprintf(os.Stderr, "To authenticate, visit:\n\n\t%s\n\n", new.URL)
+			return
+		}
+		if new.Err != "" {
+			log.Print(new.Err)
+			return
+		}
+		if new.Persist != nil {
+			if err := saveConfig(*config, *new.Persist); err != nil {
+				log.Println(err)
+			}
+		}
+
+		if m := new.NetMap; m != nil {
+			if lastNetMap != nil {
+				s1 := strings.Split(lastNetMap.Concise(), "\n")
+				s2 := strings.Split(new.NetMap.Concise(), "\n")
+				logf("netmap diff:\n%v\n", cmp.Diff(s1, s2))
+			}
+			lastNetMap = m
+
+			if m.Equal(&controlclient.NetworkMap{}) {
+				return
+			}
+
+			wgcfg, err := m.WGCfg(uflags, m.DNS)
+			if err != nil {
+				log.Fatalf("Error getting wg config: %v\n", err)
+			}
+			err = e.Reconfig(wgcfg, m.DNSDomains)
+			if err != nil {
+				log.Fatalf("Error reconfiguring engine: %v\n", err)
+			}
+			lastUserMap = m.UserMap()
+			if p != nil {
+				matches, err := p.Expand(lastUserMap)
+				if err != nil {
+					log.Fatalf("Error expanding ACLs: %v\n", err)
+				}
+				e.SetFilter(filter.New(matches))
+			}
+		}
+	}
+
+	cfg, err := loadConfig(*config)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	hi := controlclient.NewHostinfo()
+	hi.FrontendLogID = pol.PublicID.String()
+	hi.BackendLogID = pol.PublicID.String()
+	if *routes != "" {
+		for _, routeStr := range strings.Split(*routes, ",") {
+			cidr, err := wgcfg.ParseCIDR(routeStr)
+			if err != nil {
+				log.Fatalf("--routes: not an IP range: %s", routeStr)
+			}
+			hi.RoutableIPs = append(hi.RoutableIPs, *cidr)
+		}
+	}
+
+	c, err := controlclient.New(controlclient.Options{
+		Persist:   cfg,
+		ServerURL: *server,
+		Hostinfo:  &hi,
+		NewDecompressor: func() (controlclient.Decompressor, error) {
+			return zstd.NewReader(nil)
+		},
+		KeepAlive: true,
+	})
+	c.SetStatusFunc(statusFunc)
+	if err != nil {
+		log.Fatal(err)
+	}
+	lf := controlclient.LoginDefault
+	if *alwaysrefresh {
+		lf |= controlclient.LoginInteractive
+	}
+	c.Login(nil, lf)
+
+	// Print the wireguard status when we get an update.
+	e.SetStatusCallback(func(s *wgengine.Status, err error) {
+		if err != nil {
+			log.Fatalf("Wireguard engine status error: %v\n", err)
+		}
+		var ss []string
+		for _, p := range s.Peers {
+			if p.LastHandshake.IsZero() {
+				ss = append(ss, "x")
+			} else {
+				ss = append(ss, fmt.Sprintf("%d/%d", p.RxBytes, p.TxBytes))
+			}
+		}
+		logf("v%v peers: %v\n", version.LONG, strings.Join(ss, " "))
+		c.UpdateEndpoints(0, s.LocalAddrs)
+	})
+
+	if *debug != "" {
+		go runDebugServer(*debug)
+	}
+
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, os.Interrupt)
+	signal.Notify(sigCh, syscall.SIGTERM)
+
+	t := time.NewTicker(5 * time.Second)
+loop:
+	for {
+		select {
+		case <-t.C:
+			// For the sake of curiosity, request a status
+			// update periodically.
+			e.RequestStatus()
+
+			// check if aclfile has changed.
+			// TODO(apenwarr): use fsnotify instead of polling?
+			if *aclfile != "" {
+				json := readOrFatal(*aclfile)
+				if json != lastacljson {
+					logf("ACL file (%v) changed. Reloading filter.\n", *aclfile)
+					lastacljson = json
+					p = installFilterOrFatal(e, *aclfile, json, lastUserMap)
+				}
+			}
+		case <-sigCh:
+			logf("signal received, exiting")
+			t.Stop()
+			break loop
+		}
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
+	defer cancel()
+
+	e.Close()
+	pol.Shutdown(ctx)
+}
+
+func loadConfig(path string) (cfg controlclient.Persist, err error) {
+	b, err := ioutil.ReadFile(path)
+	if os.IsNotExist(err) {
+		log.Printf("config %s does not exist", path)
+		return controlclient.Persist{}, nil
+	}
+	if err := json.Unmarshal(b, &cfg); err != nil {
+		return controlclient.Persist{}, fmt.Errorf("load config: %v", err)
+	}
+	return cfg, nil
+}
+
+func saveConfig(path string, cfg controlclient.Persist) error {
+	b, err := json.MarshalIndent(cfg, "", "\t")
+	if err != nil {
+		return fmt.Errorf("save config: %v", err)
+	}
+	if err := atomicfile.WriteFile(path, b, 0666); err != nil {
+		return fmt.Errorf("save config: %v", err)
+	}
+	return nil
+}
+
+func readOrFatal(filename string) string {
+	b, err := ioutil.ReadFile(filename)
+	if err != nil {
+		log.Fatalf("%v: ReadFile: %v\n", filename, err)
+	}
+	return string(b)
+}
+
+func installFilterOrFatal(e wgengine.Engine, filename, acljson string, usermap map[string][]filter.IP) *policy.Policy {
+	p, err := policy.Parse(acljson)
+	if err != nil {
+		log.Fatalf("%v: json filter: %v\n", filename, err)
+	}
+
+	matches, err := p.Expand(usermap)
+	if err != nil {
+		log.Fatalf("%v: json filter: %v\n", filename, err)
+	}
+
+	e.SetFilter(filter.New(matches))
+	return p
+}
+
+func runDebugServer(addr string) {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/debug/pprof/", pprof.Index)
+	mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
+	mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
+	mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
+	mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
+	srv := http.Server{
+		Addr:    addr,
+		Handler: mux,
+	}
+	if err := srv.ListenAndServe(); err != nil {
+		log.Fatal(err)
+	}
+}

+ 9 - 0
cmd/relaynode/rpm.od

@@ -0,0 +1,9 @@
+exec >&2
+dir=${2%/*}
+redo-ifchange "$S/$dir/package" "$S/oss/version/short.txt"
+read -r package <"$S/$dir/package"
+read -r pkgver <"$S/oss/version/short.txt"
+machine=$(uname -m)
+redo-ifchange "$dir/$package.rpm"
+rm -f "$dir/${package}"-*."$machine.rpm"
+ln -sf "$package.rpm" "$dir/$package-$pkgver.$machine.rpm"

+ 4 - 0
cmd/relaynode/tailscale-login

@@ -0,0 +1,4 @@
+#!/bin/sh
+cfg=/var/lib/tailscale/relay.conf
+dir=$(dirname "$0")
+"$dir/taillogin" --config="$cfg"

+ 14 - 0
cmd/relaynode/tailscale-relay.defaults

@@ -0,0 +1,14 @@
+# Set the port to listen on for incoming VPN packets.
+# Remote nodes will automatically be informed about the new port number,
+# but you might want to configure this in order to set external firewall
+# settings.
+PORT="--port=41641"
+
+# Comment out this line to allow all traffic to be relayed.
+# Or edit the given file to allow specific traffic.
+# The example file is unlikely to match any users on your network, so it
+# will block all incoming traffic by default.
+ACL_FILE="--acl-file=/etc/tailscale/acl.json"
+
+# Extra flags you might want to pass to relaynode.
+FLAGS=""

+ 42 - 0
cmd/relaynode/tailscale-relay.spec.in

@@ -0,0 +1,42 @@
+Name: tailscale-relay
+Version: 0.00
+Release: 0
+Summary: Traffic relay node for Tailscale
+Group: Network
+License: Proprietary
+URL: https://tailscale.com/
+Vendor: Tailscale Inc.
+#Source: https://github.com/tailscale/tailscale
+Source0: tailscale-relay.tar.gz
+#Prefix: %{_prefix}
+Packager: Avery Pennarun <[email protected]>
+BuildRoot: %{_tmppath}/%{name}-root
+
+%description
+Traffic relay node for Tailscale.
+
+%prep
+%setup -n tailscale-relay
+
+%build
+
+%install
+D=$RPM_BUILD_ROOT
+[ "$D" = "/" -o -z "$D" ] && exit 99
+rm -rf "$D"
+mkdir -p $D/usr/sbin $D/lib/systemd/system $D/etc/default $D/etc/tailscale
+cp taillogin tailscale-login relaynode $D/usr/sbin
+cp tailscale-relay.service $D/lib/systemd/system/
+cp tailscale-relay.defaults $D/etc/default/tailscale-relay
+cp acl.json $D/etc/tailscale/acl.json
+
+%clean
+
+%files
+%defattr(-,root,root)
+%config(noreplace) /etc/default/tailscale-relay
+%config(noreplace) /etc/tailscale/acl.json
+/lib/systemd/system/tailscale-relay.service
+/usr/sbin/taillogin
+/usr/sbin/tailscale-login
+/usr/sbin/relaynode

+ 7 - 0
cmd/relaynode/tarball.od

@@ -0,0 +1,7 @@
+dir=${1%/*}
+redo-ifchange "$S/$dir/package" "$S/oss/version/short.txt"
+read -r package <"$S/$dir/package"
+read -r version <"$S/oss/version/short.txt"
+redo-ifchange "$dir/$package.tar.gz"
+rm -f "$dir/$package"-*.tar.gz
+ln -sf "$package.tar.gz" "$dir/$package-$version.tar.gz"

+ 96 - 0
cmd/taillogin/taillogin.go

@@ -0,0 +1,96 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The taillogin command, invoked via the tailscale-login shell script, is shipped
+// with the current (old) Linux client, to log in to Tailscale on a Linux box.
+//
+// Deprecated: this will be deleted, to be replaced by cmd/tailscale.
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+
+	"github.com/pborman/getopt/v2"
+	"tailscale.com/atomicfile"
+	"tailscale.com/control/controlclient"
+	"tailscale.com/logpolicy"
+)
+
+func main() {
+	config := getopt.StringLong("config", 'f', "", "path to config file")
+	server := getopt.StringLong("server", 's', "https://login.tailscale.com", "URL to tailgate server")
+	getopt.Parse()
+	if len(getopt.Args()) > 0 {
+		log.Fatal("too many non-flag arguments")
+	}
+	if *config == "" {
+		log.Fatal("no --config file specified")
+	}
+	pol := logpolicy.New("tailnode.log.tailscale.io", *config)
+	defer pol.Close()
+
+	cfg, err := loadConfig(*config)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	hi := controlclient.NewHostinfo()
+	hi.FrontendLogID = pol.PublicID.String()
+	hi.BackendLogID = pol.PublicID.String()
+
+	done := make(chan struct{}, 1)
+	c, err := controlclient.New(controlclient.Options{
+		Persist:   cfg,
+		ServerURL: *server,
+		Hostinfo:  &hi,
+	})
+	c.SetStatusFunc(func(new controlclient.Status) {
+		if new.URL != "" {
+			fmt.Fprintf(os.Stderr, "To authenticate, visit:\n\n\t%s\n\n", new.URL)
+			return
+		}
+		if new.Err != "" {
+			log.Print(new.Err)
+			return
+		}
+		if new.Persist != nil {
+			if err := saveConfig(*config, *new.Persist); err != nil {
+				log.Println(err)
+			}
+		}
+		if new.NetMap != nil {
+			done <- struct{}{}
+		}
+	})
+	c.Login(nil, 0)
+	<-done
+	log.Printf("Success.\n")
+}
+
+func loadConfig(path string) (cfg controlclient.Persist, err error) {
+	b, err := ioutil.ReadFile(path)
+	if os.IsNotExist(err) {
+		log.Printf("config %s does not exist", path)
+		return controlclient.Persist{}, nil
+	}
+	if err := json.Unmarshal(b, &cfg); err != nil {
+		return controlclient.Persist{}, fmt.Errorf("load config: %v", err)
+	}
+	return cfg, nil
+}
+
+func saveConfig(path string, cfg controlclient.Persist) error {
+	b, err := json.MarshalIndent(cfg, "", "\t")
+	if err != nil {
+		return fmt.Errorf("save config: %v", err)
+	}
+	if err := atomicfile.WriteFile(path, b, 0666); err != nil {
+		return fmt.Errorf("save config: %v", err)
+	}
+	return nil
+}

+ 149 - 0
cmd/tailscale/ipn.go

@@ -0,0 +1,149 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The tailscale command is the Tailscale command-line client. It interacts
+// with the tailscaled client daemon.
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"github.com/apenwarr/fixconsole"
+	"github.com/pborman/getopt/v2"
+	"tailscale.com/atomicfile"
+	"tailscale.com/control/controlclient"
+	"tailscale.com/ipn"
+	"tailscale.com/logpolicy"
+	"tailscale.com/safesocket"
+)
+
+func pump(ctx context.Context, bc *ipn.BackendClient, c net.Conn) {
+	defer log.Printf("Control connection done.\n")
+	defer c.Close()
+	for ctx.Err() == nil {
+		msg, err := ipn.ReadMsg(c)
+		if err != nil {
+			log.Printf("ReadMsg: %v\n", err)
+			break
+		}
+		bc.GotNotifyMsg(msg)
+	}
+}
+
+func main() {
+	err := fixconsole.FixConsoleIfNeeded()
+	if err != nil {
+		log.Printf("fixConsoleOutput: %v\n", err)
+	}
+	config := getopt.StringLong("config", 'f', "", "path to config file")
+	server := getopt.StringLong("server", 's', "https://login.tailscale.com", "URL to tailcontrol server")
+	alwaysrefresh := getopt.BoolLong("always-refresh", 0, "force key refresh at startup")
+	nuroutes := getopt.BoolLong("no-single-routes", 'N', "disallow (non-subnet) routes to single nodes")
+	rroutes := getopt.BoolLong("remote-routes", 'R', "allow routing subnets to remote nodes")
+	droutes := getopt.BoolLong("default-routes", 'D', "allow default route on remote node")
+	getopt.Parse()
+	if *config == "" {
+		logpolicy.New("tailnode.log.tailscale.io", "tailscale")
+		log.Fatal("no --config file specified")
+	}
+	if len(getopt.Args()) > 0 {
+		log.Fatalf("too many non-flag arguments: %#v", getopt.Args()[0])
+	}
+
+	pol := logpolicy.New("tailnode.log.tailscale.io", *config)
+	defer pol.Close()
+
+	prefs, err := loadConfig(*config)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	// TODO(apenwarr): fix different semantics between prefs and uflags
+	// TODO(apenwarr): allow setting/using CorpDNS
+	prefs.WantRunning = true
+	prefs.RouteAll = *rroutes || *droutes
+	prefs.AllowSingleHosts = !*nuroutes
+
+	c, err := safesocket.Connect("", "Tailscale", "tailscaled", 41112)
+	if err != nil {
+		log.Fatalf("safesocket.Connect: %v\n", err)
+	}
+	clientToServer := func(b []byte) {
+		ipn.WriteMsg(c, b)
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	lf := controlclient.LoginDefault
+	if *alwaysrefresh {
+		lf |= controlclient.LoginInteractive
+	}
+
+	go func() {
+		interrupt := make(chan os.Signal, 1)
+		signal.Notify(interrupt, syscall.SIGINT, syscall.SIGTERM)
+		<-interrupt
+		c.Close()
+	}()
+
+	bc := ipn.NewBackendClient(log.Printf, clientToServer)
+	opts := ipn.Options{
+		Prefs:      prefs,
+		ServerURL:  *server,
+		LoginFlags: lf,
+		Notify: func(n ipn.Notify) {
+			log.Printf("Notify: %v\n", n)
+			if n.ErrMessage != nil {
+				log.Fatalf("backend error: %v\n", *n.ErrMessage)
+			}
+			if s := n.State; s != nil {
+				switch *s {
+				case ipn.NeedsLogin:
+					bc.StartLoginInteractive()
+				case ipn.NeedsMachineAuth:
+					fmt.Fprintf(os.Stderr, "\nTo authorize your machine, visit (as admin):\n\n\t%s/admin/machines\n\n", *server)
+				case ipn.Starting, ipn.Running:
+					// Done full authentication process
+					cancel()
+				}
+			}
+			if url := n.BrowseToURL; url != nil {
+				fmt.Fprintf(os.Stderr, "\nTo authenticate, visit:\n\n\t%s\n\n", *url)
+			}
+			if p := n.Prefs; p != nil {
+				prefs = *p
+				saveConfig(*config, *p)
+			}
+		},
+	}
+	bc.Start(opts)
+	pump(ctx, bc, c)
+}
+
+func loadConfig(path string) (ipn.Prefs, error) {
+	b, err := ioutil.ReadFile(path)
+	if os.IsNotExist(err) {
+		log.Printf("config %s does not exist", path)
+		return ipn.NewPrefs(), nil
+	}
+	return ipn.PrefsFromBytes(b, false)
+}
+
+func saveConfig(path string, prefs ipn.Prefs) error {
+	b, err := json.MarshalIndent(prefs, "", "\t")
+	if err != nil {
+		return fmt.Errorf("save config: %v", err)
+	}
+	if err := atomicfile.WriteFile(path, b, 0666); err != nil {
+		return fmt.Errorf("save config: %v", err)
+	}
+	return nil
+}

+ 88 - 0
cmd/tailscaled/ipnd.go

@@ -0,0 +1,88 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The tailscaled program is the Tailscale client daemon. It's configured
+// and controlled via the tailscale CLI program.
+//
+// It primarily supports Linux, though other systems will likely be
+// supported in the future.
+package main
+
+import (
+	"context"
+	"log"
+	"net/http"
+	"net/http/pprof"
+
+	"github.com/apenwarr/fixconsole"
+	"github.com/pborman/getopt/v2"
+	"tailscale.com/ipn/ipnserver"
+	"tailscale.com/logpolicy"
+	"tailscale.com/wgengine"
+)
+
+func main() {
+	fake := getopt.BoolLong("fake", 0, "fake tunnel+routing instead of tuntap")
+	debug := getopt.StringLong("debug", 0, "", "Address of debug server")
+
+	logf := wgengine.RusagePrefixLog(log.Printf)
+
+	err := fixconsole.FixConsoleIfNeeded()
+	if err != nil {
+		logf("fixConsoleOutput: %v\n", err)
+	}
+	pol := logpolicy.New("tailnode.log.tailscale.io", "tailscaled")
+
+	getopt.Parse()
+	if len(getopt.Args()) > 0 {
+		log.Fatalf("too many non-flag arguments: %#v", getopt.Args()[0])
+	}
+
+	if *debug != "" {
+		go runDebugServer(*debug)
+	}
+
+	var e wgengine.Engine
+	if *fake {
+		e, err = wgengine.NewFakeUserspaceEngine(logf, 0, false)
+	} else {
+		e, err = wgengine.NewUserspaceEngine(logf, "ts0", 0, false)
+	}
+	if err != nil {
+		log.Fatalf("wgengine.New: %v\n", err)
+	}
+	e = wgengine.NewWatchdog(e)
+
+	opts := ipnserver.Options{
+		SurviveDisconnects: true,
+		AllowQuit:          false,
+	}
+	err = ipnserver.Run(context.Background(), logf, pol.PublicID.String(), opts, e)
+	if err != nil {
+		log.Fatalf("tailscaled: %v\n", err)
+	}
+
+	// TODO(crawshaw): It would be nice to start a timeout context the moment a signal
+	// is received and use that timeout to give us a moment to finish uploading logs
+	// here. But the signal is handled inside ipnserver.Run, so some plumbing is needed.
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	pol.Shutdown(ctx)
+}
+
+func runDebugServer(addr string) {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/debug/pprof/", pprof.Index)
+	mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
+	mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
+	mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
+	mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
+	srv := http.Server{
+		Addr:    addr,
+		Handler: mux,
+	}
+	if err := srv.ListenAndServe(); err != nil {
+		log.Fatal(err)
+	}
+}

+ 594 - 0
control/controlclient/auto.go

@@ -0,0 +1,594 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package controlclient implements the client for the IPN control plane.
+//
+// It handles authentication, port picking, and collects the local
+// network configuration.
+package controlclient
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"reflect"
+	"sync"
+	"time"
+
+	"golang.org/x/oauth2"
+	"tailscale.com/logger"
+	"tailscale.com/logtail/backoff"
+	"tailscale.com/tailcfg"
+)
+
+// TODO(apenwarr): eliminate the 'state' variable, as it's now obsolete.
+//  It's used only by the unit tests.
+type state int
+
+const (
+	stateNew = state(iota)
+	stateNotAuthenticated
+	stateAuthenticating
+	stateURLVisitRequired
+	stateAuthenticated
+	stateSynchronized // connected and received map update
+)
+
+func (s state) MarshalText() ([]byte, error) {
+	return []byte(s.String()), nil
+}
+
+func (s state) String() string {
+	switch s {
+	case stateNew:
+		return "state:new"
+	case stateNotAuthenticated:
+		return "state:not-authenticated"
+	case stateAuthenticating:
+		return "state:authenticating"
+	case stateURLVisitRequired:
+		return "state:url-visit-required"
+	case stateAuthenticated:
+		return "state:authenticated"
+	case stateSynchronized:
+		return "state:synchronized"
+	default:
+		return fmt.Sprintf("state:unknown:%d", int(s))
+	}
+}
+
+type Status struct {
+	LoginFinished *struct{}
+	Err           string
+	URL           string
+	Persist       *Persist         // locally persisted configuration
+	NetMap        *NetworkMap      // server-pushed configuration
+	Hostinfo      tailcfg.Hostinfo // current Hostinfo data
+	state         state
+}
+
+// Equal reports whether s and s2 are equal.
+func (s *Status) Equal(s2 *Status) bool {
+	if s == nil && s2 == nil {
+		return true
+	}
+	return s != nil && s2 != nil &&
+		(s.LoginFinished == nil) == (s2.LoginFinished == nil) &&
+		s.Err == s2.Err &&
+		s.URL == s2.URL &&
+		reflect.DeepEqual(s.Persist, s2.Persist) &&
+		reflect.DeepEqual(s.NetMap, s2.NetMap) &&
+		reflect.DeepEqual(s.Hostinfo, s2.Hostinfo) &&
+		s.state == s2.state
+}
+
+func (s Status) String() string {
+	b, err := json.MarshalIndent(s, "", "\t")
+	if err != nil {
+		panic(err)
+	}
+	return s.state.String() + " " + string(b)
+}
+
+type LoginGoal struct {
+	wantLoggedIn bool          // true if we *want* to be logged in
+	token        *oauth2.Token // oauth token to use when logging in
+	flags        LoginFlags    // flags to use when logging in
+	url          string        // auth url that needs to be visited
+}
+
+// Client connects to a tailcontrol server for a node.
+type Client struct {
+	direct   *Direct // our interface to the server APIs
+	timeNow  func() time.Time
+	logf     logger.Logf
+	expiry   *time.Time
+	closed   bool
+	newMapCh chan struct{} // readable when we must restart a map request
+
+	mu         sync.Mutex   // mutex guards the following fields
+	statusFunc func(Status) // called to update Client status
+
+	loggedIn     bool       // true if currently logged in
+	loginGoal    *LoginGoal // non-nil if some login activity is desired
+	synced       bool       // true if our netmap is up-to-date
+	hostinfo     tailcfg.Hostinfo
+	inPollNetMap bool // true if currently running a PollNetMap
+	inSendStatus int  // number of sendStatus calls currently in progress
+	state        state
+
+	authCtx    context.Context // context used for auth requests
+	mapCtx     context.Context // context used for netmap requests
+	authCancel func()          // cancel the auth context
+	mapCancel  func()          // cancel the netmap context
+	quit       chan struct{}   // when closed, goroutines should all exit
+	authDone   chan struct{}   // when closed, auth goroutine is done
+	mapDone    chan struct{}   // when closed, map goroutine is done
+}
+
+// New creates and starts a new Client.
+func New(opts Options) (*Client, error) {
+	c, err := NewNoStart(opts)
+	if c != nil {
+		c.Start()
+	}
+	return c, err
+}
+
+// NewNoStart creates a new Client, but without calling Start on it.
+func NewNoStart(opts Options) (*Client, error) {
+	direct, err := NewDirect(opts)
+	if err != nil {
+		return nil, err
+	}
+	c := &Client{
+		direct:   direct,
+		timeNow:  opts.TimeNow,
+		logf:     opts.Logf,
+		newMapCh: make(chan struct{}, 1),
+		quit:     make(chan struct{}),
+		authDone: make(chan struct{}),
+		mapDone:  make(chan struct{}),
+	}
+	c.authCtx, c.authCancel = context.WithCancel(context.Background())
+	c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
+	return c, nil
+}
+
+// Start starts the client's goroutines.
+//
+// It should only be called for clients created by NewNoStart.
+func (c *Client) Start() {
+	go c.authRoutine()
+	go c.mapRoutine()
+}
+
+func (c *Client) cancelAuth() {
+	c.mu.Lock()
+	if c.authCancel != nil {
+		c.authCancel()
+	}
+	if !c.closed {
+		c.authCtx, c.authCancel = context.WithCancel(context.Background())
+	}
+	c.mu.Unlock()
+}
+
+func (c *Client) cancelMapLocked() {
+	if c.mapCancel != nil {
+		c.mapCancel()
+	}
+	if !c.closed {
+		c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
+	}
+}
+
+func (c *Client) cancelMapUnsafely() {
+	c.mu.Lock()
+	c.cancelMapLocked()
+	c.mu.Unlock()
+}
+
+func (c *Client) cancelMapSafely() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.logf("cancelMapSafely: synced=%v\n", c.synced)
+
+	if c.inPollNetMap == true {
+		// received at least one netmap since the last
+		// interruption. That means the server has already
+		// fully processed our last request, which might
+		// include UpdateEndpoints(). Interrupt it and try
+		// again.
+		c.cancelMapLocked()
+	} else {
+		// !synced means we either haven't done a netmap
+		// request yet, or it hasn't answered yet. So the
+		// server is in an undefined state. If we send
+		// another netmap request too soon, it might race
+		// with the last one, and if we're very unlucky,
+		// the new request will be applied before the old one,
+		// and the wrong endpoints will get registered. We
+		// have to tell the client to abort politely, only
+		// after it receives a response to its existing netmap
+		// request.
+		select {
+		case c.newMapCh <- struct{}{}:
+			c.logf("cancelMapSafely: wrote to channel\n")
+		default:
+			// if channel write failed, then there was already
+			// an outstanding newMapCh request. One is enough,
+			// since it'll always use the latest endpoints.
+			c.logf("cancelMapSafely: channel was full\n")
+		}
+	}
+}
+
+func (c *Client) authRoutine() {
+	defer close(c.authDone)
+	bo := backoff.Backoff{Name: "authRoutine"}
+
+	for {
+		c.mu.Lock()
+		c.logf("authRoutine: %s\n", c.state)
+		expiry := c.expiry
+		goal := c.loginGoal
+		ctx := c.authCtx
+		synced := c.synced
+		c.mu.Unlock()
+
+		select {
+		case <-c.quit:
+			c.logf("authRoutine: quit\n")
+			return
+		default:
+		}
+
+		report := func(err error, msg string) {
+			c.logf("%s: %v\n", msg, err)
+			err = fmt.Errorf("%s: %v", msg, err)
+			// don't send status updates for context errors,
+			// since context cancelation is always on purpose.
+			if ctx.Err() == nil {
+				c.sendStatus("authRoutine1", err, "", nil)
+			}
+		}
+
+		if goal == nil {
+			// Wait for something interesting to happen
+			var exp <-chan time.Time
+			if expiry != nil && !expiry.IsZero() {
+				// if expiry is in the future, don't delay
+				// past that time.
+				// If it's in the past, then it's already
+				// being handled by someone, so no need to
+				// wake ourselves up again.
+				now := c.timeNow()
+				if expiry.Before(now) {
+					delay := expiry.Sub(now)
+					if delay > 5*time.Second {
+						delay = time.Second
+					}
+					exp = time.After(delay)
+				}
+			}
+			select {
+			case <-ctx.Done():
+				c.logf("authRoutine: context done.\n")
+			case <-exp:
+				// Unfortunately the key expiry isn't provided
+				// by the control server until mapRequest.
+				// So we have to do some hackery with c.expiry
+				// in here.
+				// TODO(apenwarr): add a key expiry field in RegisterResponse.
+				c.logf("authRoutine: key expiration check.\n")
+				if synced && expiry != nil && !expiry.IsZero() && expiry.Before(c.timeNow()) {
+					c.logf("Key expired; setting loggedIn=false.")
+
+					c.mu.Lock()
+					c.loginGoal = &LoginGoal{
+						wantLoggedIn: c.loggedIn,
+					}
+					c.loggedIn = false
+					c.expiry = nil
+					c.mu.Unlock()
+				}
+			}
+		} else if !goal.wantLoggedIn {
+			err := c.direct.TryLogout(c.authCtx)
+			if err != nil {
+				report(err, "TryLogout")
+				bo.BackOff(ctx, err)
+				continue
+			}
+
+			// success
+			c.mu.Lock()
+			c.loggedIn = false
+			c.loginGoal = nil
+			c.state = stateNotAuthenticated
+			c.synced = false
+			c.mu.Unlock()
+
+			c.sendStatus("authRoutine2", nil, "", nil)
+			bo.BackOff(ctx, nil)
+		} else { // ie. goal.wantLoggedIn
+			c.mu.Lock()
+			if goal.url != "" {
+				c.state = stateURLVisitRequired
+			} else {
+				c.state = stateAuthenticating
+			}
+			c.mu.Unlock()
+
+			var url string
+			var err error
+			var f string
+			if goal.url != "" {
+				url, err = c.direct.WaitLoginURL(ctx, goal.url)
+				f = "WaitLoginURL"
+			} else {
+				url, err = c.direct.TryLogin(ctx, goal.token, goal.flags)
+				f = "TryLogin"
+			}
+			if err != nil {
+				report(err, f)
+				bo.BackOff(ctx, err)
+				continue
+			} else if url != "" {
+				if goal.url != "" {
+					err = fmt.Errorf("weird: server required a new url?")
+					report(err, "WaitLoginURL")
+				}
+				goal.url = url
+				goal.token = nil
+				goal.flags = LoginDefault
+
+				c.mu.Lock()
+				c.loginGoal = goal
+				c.state = stateURLVisitRequired
+				c.synced = false
+				c.mu.Unlock()
+
+				c.sendStatus("authRoutine3", err, url, nil)
+				bo.BackOff(ctx, err)
+				continue
+			}
+
+			// success
+			c.mu.Lock()
+			c.loggedIn = true
+			c.loginGoal = nil
+			c.state = stateAuthenticated
+			c.mu.Unlock()
+
+			c.sendStatus("authRoutine4", nil, "", nil)
+			c.cancelMapSafely()
+			bo.BackOff(ctx, nil)
+		}
+	}
+}
+
+func (c *Client) mapRoutine() {
+	defer close(c.mapDone)
+	bo := backoff.Backoff{Name: "mapRoutine"}
+
+	for {
+		c.mu.Lock()
+		c.logf("mapRoutine: %s\n", c.state)
+		loggedIn := c.loggedIn
+		ctx := c.mapCtx
+		c.mu.Unlock()
+
+		select {
+		case <-c.quit:
+			c.logf("mapRoutine: quit\n")
+			return
+		default:
+		}
+
+		report := func(err error, msg string) {
+			c.logf("%s: %v\n", msg, err)
+			err = fmt.Errorf("%s: %v", msg, err)
+			// don't send status updates for context errors,
+			// since context cancelation is always on purpose.
+			if ctx.Err() == nil {
+				c.sendStatus("mapRoutine1", err, "", nil)
+			}
+		}
+
+		if !loggedIn {
+			// Wait for something interesting to happen
+			c.mu.Lock()
+			c.synced = false
+			// c.state is set by authRoutine()
+			c.mu.Unlock()
+
+			select {
+			case <-ctx.Done():
+				c.logf("mapRoutine: context done.\n")
+			case <-c.newMapCh:
+				c.logf("mapRoutine: new map needed while idle.\n")
+			}
+		} else {
+			// Be sure this is false when we're not inside
+			// PollNetMap, so that cancelMapSafely() can notify
+			// us correctly.
+			c.mu.Lock()
+			c.inPollNetMap = false
+			c.mu.Unlock()
+
+			err := c.direct.PollNetMap(ctx, -1, func(nm *NetworkMap) {
+				c.mu.Lock()
+
+				select {
+				case <-c.newMapCh:
+					c.logf("mapRoutine: new map request during PollNetMap. canceling.\n")
+					c.cancelMapLocked()
+
+					// Don't emit this netmap; we're
+					// about to request a fresh one.
+					c.mu.Unlock()
+					return
+				default:
+				}
+
+				c.synced = true
+				c.inPollNetMap = true
+				if c.loggedIn {
+					c.state = stateSynchronized
+				}
+				exp := nm.Expiry
+				c.expiry = &exp
+				stillAuthed := c.loggedIn
+				state := c.state
+
+				c.mu.Unlock()
+
+				c.logf("mapRoutine: netmap received: %s\n", state)
+				if stillAuthed {
+					c.sendStatus("mapRoutine2", nil, "", nm)
+				}
+			})
+
+			c.mu.Lock()
+			c.synced = false
+			c.inPollNetMap = false
+			if c.state == stateSynchronized {
+				c.state = stateAuthenticated
+			}
+			c.mu.Unlock()
+
+			if err != nil {
+				report(err, "PollNetMap")
+				bo.BackOff(ctx, err)
+				continue
+			}
+			bo.BackOff(ctx, nil)
+		}
+	}
+}
+
+func (c *Client) AuthCantContinue() bool {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	return !c.loggedIn && (c.loginGoal == nil || c.loginGoal.url != "")
+}
+
+func (c *Client) SetStatusFunc(fn func(Status)) {
+	c.mu.Lock()
+	c.statusFunc = fn
+	c.mu.Unlock()
+}
+
+func (c *Client) SetHostinfo(hi tailcfg.Hostinfo) {
+	c.direct.SetHostinfo(hi)
+	// Send new Hostinfo to server
+	c.cancelMapSafely()
+}
+
+func (c *Client) sendStatus(who string, err error, url string, nm *NetworkMap) {
+	c.mu.Lock()
+	state := c.state
+	loggedIn := c.loggedIn
+	synced := c.synced
+	statusFunc := c.statusFunc
+	hi := c.hostinfo
+	c.inSendStatus++
+	c.mu.Unlock()
+
+	c.logf("sendStatus: %s: %v\n", who, state)
+
+	var p *Persist
+	var fin *struct{}
+	if state == stateAuthenticated {
+		fin = &struct{}{}
+	}
+	if nm != nil && loggedIn && synced {
+		pp := c.direct.GetPersist()
+		p = &pp
+	} else {
+		// don't send netmap status, as it's misleading when we're
+		// not logged in.
+		nm = nil
+	}
+	new := Status{
+		LoginFinished: fin,
+		URL:           url,
+		Persist:       p,
+		NetMap:        nm,
+		Hostinfo:      hi,
+		state:         state,
+	}
+	if err != nil {
+		new.Err = err.Error()
+	}
+	if statusFunc != nil {
+		statusFunc(new)
+	}
+
+	c.mu.Lock()
+	c.inSendStatus--
+	c.mu.Unlock()
+}
+
+func (c *Client) Login(t *oauth2.Token, flags LoginFlags) {
+	c.logf("client.Login(%v, %v)\n", t != nil, flags)
+
+	c.mu.Lock()
+	c.loginGoal = &LoginGoal{
+		wantLoggedIn: true,
+		token:        t,
+		flags:        flags,
+	}
+	c.mu.Unlock()
+
+	c.cancelAuth()
+}
+
+func (c *Client) Logout() {
+	c.logf("client.Logout()\n")
+
+	c.mu.Lock()
+	c.loginGoal = &LoginGoal{
+		wantLoggedIn: false,
+	}
+	c.mu.Unlock()
+
+	c.cancelAuth()
+}
+
+func (c *Client) UpdateEndpoints(localPort uint16, endpoints []string) {
+	changed, err := c.direct.SetEndpoints(localPort, endpoints)
+	if err != nil {
+		c.sendStatus("updateEndpoints", err, "", nil)
+	} else if changed {
+		c.cancelMapSafely()
+	}
+}
+
+func (c *Client) Shutdown() {
+	c.logf("client.Shutdown()\n")
+
+	c.mu.Lock()
+	inSendStatus := c.inSendStatus
+	closed := c.closed
+	if !closed {
+		c.closed = true
+		c.statusFunc = nil
+	}
+	c.mu.Unlock()
+
+	c.logf("client.Shutdown: inSendStatus=%v\n", inSendStatus)
+	if !closed {
+		close(c.quit)
+		c.cancelAuth()
+		<-c.authDone
+		c.cancelMapUnsafely()
+		<-c.mapDone
+		c.logf("Client.Shutdown done.\n")
+	}
+}

+ 1107 - 0
control/controlclient/auto_test.go

@@ -0,0 +1,1107 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build depends_on_currently_unreleased
+
+package controlclient
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"os"
+	"reflect"
+	"runtime/pprof"
+	"strconv"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/klauspost/compress/zstd"
+	"github.com/tailscale/wireguard-go/wgcfg"
+	"tailscale.com/tailcfg"
+	"tailscale.com/testy"
+	"tailscale.io/control" // not yet released
+)
+
+func TestTest(t *testing.T) {
+	check := testy.NewResourceCheck()
+	defer check.Assert(t)
+}
+
+func TestServerStartStop(t *testing.T) {
+	s := newServer(t)
+	defer s.close()
+}
+
+func TestControlBasics(t *testing.T) {
+	s := newServer(t)
+	defer s.close()
+
+	c := s.newClient(t, "c")
+	c.Login(nil, 0)
+	status := c.waitStatus(t, stateURLVisitRequired)
+	c.postAuthURL(t, "[email protected]", status.New)
+}
+
+func TestControl(t *testing.T) {
+	log.SetFlags(log.Ltime | log.Lshortfile)
+	s := newServer(t)
+	defer s.close()
+
+	c1 := s.newClient(t, "c1")
+
+	t.Run("authorize first tailscale.com client", func(t *testing.T) {
+		const loginName = "[email protected]"
+		c1.checkNoStatus(t)
+		c1.loginAs(t, loginName)
+		c1.waitStatus(t, stateAuthenticated)
+		status := c1.waitStatus(t, stateSynchronized)
+		if got, want := status.New.NetMap.MachineStatus, tailcfg.MachineUnauthorized; got != want {
+			t.Errorf("MachineStatus=%v, want %v", got, want)
+		}
+		c1.checkNoStatus(t)
+		affectedPeers, err := s.control.AuthorizeMachine(c1.mkey, c1.nkey)
+		if err != nil {
+			t.Fatal(err)
+		}
+		status = c1.status(t)
+		if got := status.New.Persist.LoginName; got != loginName {
+			t.Errorf("LoginName=%q, want %q", got, loginName)
+		}
+		if got := status.New.Persist.Provider; got != "google" {
+			t.Errorf("Provider=%q, want google", got)
+		}
+		if len(affectedPeers) != 1 || affectedPeers[0] != c1.id {
+			t.Errorf("authorization should notify the node being authorized (%v), got: %v", c1.id, affectedPeers)
+		}
+		if peers := status.New.NetMap.Peers; len(peers) != 0 {
+			t.Errorf("peers=%v, want none", peers)
+		}
+		if userID := status.New.NetMap.User; userID == 0 {
+			t.Errorf("NetMap.User is missing")
+		} else {
+			profile := status.New.NetMap.UserProfiles[userID]
+			if profile.LoginName != loginName {
+				t.Errorf("NetMap user LoginName=%q, want %q", profile.LoginName, loginName)
+			}
+		}
+		c1.checkNoStatus(t)
+	})
+
+	c2 := s.newClient(t, "c2")
+
+	t.Run("authorize second tailscale.io client", func(t *testing.T) {
+		c2.loginAs(t, "[email protected]")
+		c2.waitStatus(t, stateAuthenticated)
+		c2.waitStatus(t, stateSynchronized)
+		c2.checkNoStatus(t)
+
+		// Make sure not to call operations like this on a client in a
+		// test until the initial map read is done. Otherwise the
+		// initial map read will trigger a map update to peers, and
+		// there will sometimes be a spurious map update.
+		affectedPeers, err := s.control.AuthorizeMachine(c2.mkey, c2.nkey)
+		if err != nil {
+			t.Fatal(err)
+		}
+		status := c2.waitStatus(t, stateSynchronized)
+		c1Status := c1.waitStatus(t, stateSynchronized)
+
+		if len(affectedPeers) != 2 {
+			t.Errorf("affectedPeers=%v, want two entries", affectedPeers)
+		}
+		if want := []tailcfg.NodeID{c1.id, c2.id}; !nodeIDsEqual(affectedPeers, want) {
+			t.Errorf("affectedPeers=%v, want %v", affectedPeers, want)
+		}
+
+		c1NetMap := c1Status.New.NetMap
+		c2NetMap := status.New.NetMap
+		if len(c1NetMap.Peers) != 1 || len(c2NetMap.Peers) != 1 {
+			t.Error("wrong number of peers")
+		} else {
+			if c2NetMap.Peers[0].Key != c1.nkey {
+				t.Errorf("c2 has wrong peer key %v, want %v", c2NetMap.Peers[0].Key, c1.nkey)
+			}
+			if c1NetMap.Peers[0].Key != c2.nkey {
+				t.Errorf("c1 has wrong peer key %v, want %v", c1NetMap.Peers[0].Key, c2.nkey)
+			}
+		}
+		if t.Failed() {
+			t.Errorf("client1 network map:\n%s", c1Status.New.NetMap)
+			t.Errorf("client2 network map:\n%s", status.New.NetMap)
+		}
+
+		c1.checkNoStatus(t)
+		c2.checkNoStatus(t)
+	})
+
+	// c3/c4 are on a different domain to c1/c2.
+	// The two domains should never affect one another.
+	c3 := s.newClient(t, "c3")
+
+	t.Run("authorize first onmicrosoft client", func(t *testing.T) {
+		c3.loginAs(t, "[email protected]")
+		c3.waitStatus(t, stateAuthenticated)
+		c3Status := c3.waitStatus(t, stateSynchronized)
+		// no machine authorization for tailscale.onmicrosoft.com
+		c1.checkNoStatus(t)
+		c2.checkNoStatus(t)
+
+		netMap := c3Status.New.NetMap
+		if netMap.NodeKey != c3.nkey {
+			t.Errorf("netMap.NodeKey=%v, want %v", netMap.NodeKey, c3.nkey)
+		}
+		if len(netMap.Peers) != 0 {
+			t.Errorf("netMap.Peers=%v, want none", netMap.Peers)
+		}
+
+		c1.checkNoStatus(t)
+		c2.checkNoStatus(t)
+		c3.checkNoStatus(t)
+	})
+
+	c4 := s.newClient(t, "c4")
+
+	t.Run("authorize second onmicrosoft client", func(t *testing.T) {
+		c4.loginAs(t, "[email protected]")
+		c4.waitStatus(t, stateAuthenticated)
+		c3Status := c3.waitStatus(t, stateSynchronized)
+		c4Status := c4.waitStatus(t, stateSynchronized)
+		c3NetMap := c3Status.New.NetMap
+		c4NetMap := c4Status.New.NetMap
+
+		c1.checkNoStatus(t)
+		c2.checkNoStatus(t)
+
+		if len(c3NetMap.Peers) != 1 {
+			t.Errorf("wrong number of c3 peers: %d", len(c3NetMap.Peers))
+		} else if len(c4NetMap.Peers) != 1 {
+			t.Errorf("wrong number of c4 peers: %d", len(c4NetMap.Peers))
+		} else {
+			if c3NetMap.Peers[0].Key != c4.nkey || c4NetMap.Peers[0].Key != c3.nkey {
+				t.Error("wrong peer key")
+			}
+		}
+		if t.Failed() {
+			t.Errorf("client3 network map:\n%s", c3NetMap)
+			t.Errorf("client4 network map:\n%s", c4NetMap)
+		}
+	})
+
+	var c1NetMap *NetworkMap
+	t.Run("update c1 and c2 endpoints", func(t *testing.T) {
+		c1Endpoints := []string{"172.16.1.5:12345", "4.4.4.4:4444"}
+		c1.checkNoStatus(t)
+		c1.UpdateEndpoints(1234, c1Endpoints)
+		c1NetMap = c1.status(t).New.NetMap
+		c2NetMap := c2.status(t).New.NetMap
+		c1.checkNoStatus(t)
+		c2.checkNoStatus(t)
+
+		if c1NetMap.LocalPort != 1234 {
+			t.Errorf("c1 netmap localport=%d, want 1234", c1NetMap.LocalPort)
+		}
+		if len(c2NetMap.Peers) != 1 {
+			t.Fatalf("wrong peer count: %d", len(c2NetMap.Peers))
+		}
+		if got := c2NetMap.Peers[0].Endpoints; !reflect.DeepEqual(c1Endpoints, got) {
+			t.Errorf("c2 peer endpoints=%v, want %v", got, c1Endpoints)
+		}
+		c3.checkNoStatus(t)
+		c4.checkNoStatus(t)
+
+		c2Endpoints := []string{"172.16.1.7:6543", "5.5.5.5.3333"}
+		c2.UpdateEndpoints(9876, c2Endpoints)
+		c1NetMap = c1.status(t).New.NetMap
+		c2NetMap = c2.status(t).New.NetMap
+
+		if c1NetMap.LocalPort != 1234 {
+			t.Errorf("c1 netmap localport=%d, want 1234", c1NetMap.LocalPort)
+		}
+		if c2NetMap.LocalPort != 9876 {
+			t.Errorf("c2 netmap localport=%d, want 9876", c2NetMap.LocalPort)
+		}
+		if got := c2NetMap.Peers[0].Endpoints; !reflect.DeepEqual(c1Endpoints, got) {
+			t.Errorf("c2 peer endpoints=%v, want %v", got, c1Endpoints)
+		}
+		if got := c1NetMap.Peers[0].Endpoints; !reflect.DeepEqual(c2Endpoints, got) {
+			t.Errorf("c1 peer endpoints=%v, want %v", got, c2Endpoints)
+		}
+
+		c1.checkNoStatus(t)
+		c2.checkNoStatus(t)
+		c3.checkNoStatus(t)
+		c4.checkNoStatus(t)
+	})
+
+	allZeros, err := wgcfg.ParseCIDR("0.0.0.0/0")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	t.Run("route all traffic via client 1", func(t *testing.T) {
+		aips := []wgcfg.CIDR{}
+		aips = append(aips, c1NetMap.Addresses...)
+		aips = append(aips, *allZeros)
+
+		affectedPeers, err := s.control.SetAllowedIPs(c1.nkey, aips)
+		if err != nil {
+			t.Fatal(err)
+		}
+		c2Status := c2.status(t)
+		c2NetMap := c2Status.New.NetMap
+
+		if want := []tailcfg.NodeID{c2.id}; !nodeIDsEqual(affectedPeers, want) {
+			t.Errorf("affectedPeers=%v, want %v", affectedPeers, want)
+		}
+
+		_ = c2NetMap
+		foundAllZeros := false
+		for _, cidr := range c2NetMap.Peers[0].AllowedIPs {
+			if cidr == *allZeros {
+				foundAllZeros = true
+			}
+		}
+		if !foundAllZeros {
+			t.Errorf("client2 peer does not contain %s: %v", allZeros, c2NetMap.Peers[0].AllowedIPs)
+		}
+
+		c1.checkNoStatus(t)
+		c3.checkNoStatus(t)
+		c4.checkNoStatus(t)
+	})
+
+	t.Run("remove route all traffic", func(t *testing.T) {
+		affectedPeers, err := s.control.SetAllowedIPs(c1.nkey, c1NetMap.Addresses)
+		if err != nil {
+			t.Fatal(err)
+		}
+		c2NetMap := c2.status(t).New.NetMap
+
+		if want := []tailcfg.NodeID{c2.id}; !nodeIDsEqual(affectedPeers, want) {
+			t.Errorf("affectedPeers=%v, want %v", affectedPeers, want)
+		}
+
+		foundAllZeros := false
+		for _, cidr := range c2NetMap.Peers[0].AllowedIPs {
+			if cidr == *allZeros {
+				foundAllZeros = true
+			}
+		}
+		if foundAllZeros {
+			t.Errorf("client2 peer still contains %s: %v", allZeros, c2NetMap.Peers[0].AllowedIPs)
+		}
+
+		c1.checkNoStatus(t)
+		c3.checkNoStatus(t)
+		c4.checkNoStatus(t)
+	})
+
+	t.Run("refresh client key", func(t *testing.T) {
+		oldKey := c1.nkey
+
+		c1.Login(nil, LoginInteractive)
+		status := c1.waitStatus(t, stateURLVisitRequired)
+		authURL := status.New.URL
+
+		resp, err := c1.httpc.Get(authURL)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if resp.StatusCode != 200 {
+			t.Errorf("GET %s failed: %q", authURL, resp.Status)
+		}
+		body, err := ioutil.ReadAll(resp.Body)
+		resp.Body.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+		cookies := resp.Cookies()
+		if len(cookies) == 0 || cookies[0].Name != "tailcontrol" {
+			t.Logf("GET %s: %s", authURL, string(body))
+			t.Fatalf("GET %s: bad cookie: %v", authURL, cookies)
+		}
+		c1.waitStatus(t, stateAuthenticated)
+		status = c1.waitStatus(t, stateSynchronized)
+		if status.New.Err != "" {
+			t.Fatal(status.New.Err)
+		}
+
+		c1NetMap := status.New.NetMap
+		c1.nkey = c1NetMap.NodeKey
+		if c1.nkey == oldKey {
+			t.Errorf("new key is the same as the old key: %s", oldKey)
+		}
+		c2NetMap := c2.status(t).New.NetMap
+		if len(c2NetMap.Peers) != 1 || c2NetMap.Peers[0].Key != c1.nkey {
+			t.Errorf("c2 peer: %v, want new node key %v", c1.nkey, c2NetMap.Peers[0].Key)
+		}
+
+		c3.checkNoStatus(t)
+		c4.checkNoStatus(t)
+	})
+}
+
+func TestLoginInterrupt(t *testing.T) {
+	s := newServer(t)
+	defer s.close()
+
+	c := s.newClient(t, "c")
+
+	const loginName = "[email protected]"
+	c.checkNoStatus(t)
+	c.loginAs(t, loginName)
+	c.waitStatus(t, stateAuthenticated)
+	c.waitStatus(t, stateSynchronized)
+	t.Logf("authorizing: %v %v %v %v\n", s, s.control, c.mkey, c.nkey)
+	if _, err := s.control.AuthorizeMachine(c.mkey, c.nkey); err != nil {
+		t.Fatal(err)
+	}
+	status := c.waitStatus(t, stateSynchronized)
+	if got, want := status.New.NetMap.MachineStatus, tailcfg.MachineAuthorized; got != want {
+		t.Errorf("MachineStatus=%v, want %v", got, want)
+	}
+	origAddrs := status.New.NetMap.Addresses
+	if len(origAddrs) == 0 {
+		t.Errorf("Addresses empty, want something")
+	}
+
+	c.Logout()
+	c.waitStatus(t, stateNotAuthenticated)
+	c.Login(nil, 0)
+	status = c.waitStatus(t, stateURLVisitRequired)
+	authURL := status.New.URL
+
+	// Interrupt, and do login again.
+	c.Login(nil, 0)
+	status = c.waitStatus(t, stateURLVisitRequired)
+	authURL2 := status.New.URL
+
+	if authURL == authURL2 {
+		t.Errorf("auth URLs match for subsequent logins: %s", authURL)
+	}
+
+	form := url.Values{"user": []string{loginName}}
+	req, err := http.NewRequest("POST", authURL2, strings.NewReader(form.Encode()))
+	if err != nil {
+		t.Fatal(err)
+	}
+	req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+	resp, err := c.httpc.Do(req.WithContext(c.ctx))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if resp.StatusCode != 200 {
+		t.Fatalf("POST %s failed: %q", authURL2, resp.Status)
+	}
+	cookies := resp.Cookies()
+	if len(cookies) == 0 || cookies[0].Name != "tailcontrol" {
+		t.Fatalf("POST %s: bad cookie: %v", authURL2, cookies)
+	}
+
+	c.waitStatus(t, stateAuthenticated)
+	status = c.status(t)
+	if got := status.New.NetMap.NodeKey; got != c.nkey {
+		t.Errorf("netmap has wrong node key: %v, want %v", got, c.nkey)
+	}
+	if got := status.New.NetMap.Addresses; len(got) == 0 {
+		t.Errorf("Addresses empty after re-login, want something")
+	} else if len(origAddrs) > 0 && origAddrs[0] != got[0] {
+		t.Errorf("Addresses=%v after re-login, originally was %v, want IP to be unchanged", got, origAddrs)
+	}
+}
+
+func TestSpinUpdateEndpoints(t *testing.T) {
+	s := newServer(t)
+	defer s.close()
+
+	c1 := s.newClient(t, "c1")
+	c2 := s.newClient(t, "c2")
+
+	const loginName = "[email protected]"
+	c1.loginAs(t, loginName)
+	c1.waitStatus(t, stateAuthenticated)
+	c1.waitStatus(t, stateSynchronized)
+	if _, err := s.control.AuthorizeMachine(c1.mkey, c1.nkey); err != nil {
+		t.Fatal(err)
+	}
+	c1.waitStatus(t, stateSynchronized)
+
+	c2.loginAs(t, loginName)
+	c2.waitStatus(t, stateAuthenticated)
+	c2.waitStatus(t, stateSynchronized)
+	if _, err := s.control.AuthorizeMachine(c2.mkey, c2.nkey); err != nil {
+		t.Fatal(err)
+	}
+	c2.waitStatus(t, stateSynchronized)
+	c1.waitStatus(t, stateSynchronized)
+
+	const portBase = 1200
+	const portCount = 50
+	const portLast = portBase + portCount - 1
+
+	errCh := make(chan error, 1)
+	collectPorts := func() error {
+		t := time.After(10 * time.Second)
+		var port int
+		for i := 0; i < portCount; i++ {
+			var status statusChange
+			select {
+			case status = <-c2.statusCh:
+			case <-t:
+				return fmt.Errorf("c2 status timeout (i=%d)", i)
+			}
+			peers := status.New.NetMap.Peers
+			if len(peers) != 1 {
+				return fmt.Errorf("c2 len(peers)=%d, want 1", len(peers))
+			}
+			eps := peers[0].Endpoints
+			if len(eps) != 2 {
+				return fmt.Errorf("c2 peer len(eps)=%d, want 2", len(eps))
+			}
+			ep := eps[1]
+			const prefix = "192.168.1.45:"
+			if !strings.HasPrefix(ep, prefix) {
+				return fmt.Errorf("c2 peer endpoint=%s, want prefix %s", ep, prefix)
+			}
+			var err error
+			port, err = strconv.Atoi(strings.TrimPrefix(ep, prefix))
+			if err != nil {
+				return fmt.Errorf("c2 peer endpoint port: %v", err)
+			}
+			if port == portLast {
+				return nil // got it
+			}
+		}
+		return fmt.Errorf("c2 peer endpoint did not see portLast (saw %d)", port)
+	}
+	go func() {
+		errCh <- collectPorts()
+	}()
+
+	// Very quickly call UpdateEndpoints several times.
+	// Some (most) of these calls will never make it to the server, they
+	// will be canceled by subsequent calls.
+	// The last call goes through, so we can see portLast.
+	eps := []string{"127.0.0.1:1234", ""}
+	for i := 0; i < portCount; i++ {
+		eps[1] = fmt.Sprintf("192.168.1.45:%d", portBase+i)
+		c1.UpdateEndpoints(1234, eps)
+	}
+
+	if err := <-errCh; err != nil {
+		t.Fatalf("collect ports: %v", err)
+	}
+}
+
+func TestLogout(t *testing.T) {
+	s := newServer(t)
+	defer s.close()
+
+	c1 := s.newClient(t, "c1")
+
+	const loginName = "[email protected]"
+	c1.loginAs(t, loginName)
+
+	c1.waitStatus(t, stateAuthenticated)
+	c1.waitStatus(t, stateSynchronized)
+	if _, err := s.control.AuthorizeMachine(c1.mkey, c1.nkey); err != nil {
+		t.Fatal(err)
+	}
+	nkey1 := c1.status(t).New.NetMap.NodeKey
+
+	c1.Logout()
+	c1.waitStatus(t, stateNotAuthenticated)
+
+	c1.loginAs(t, loginName)
+	c1.waitStatus(t, stateAuthenticated)
+	status := c1.waitStatus(t, stateSynchronized)
+	if got, want := status.New.NetMap.MachineStatus, tailcfg.MachineAuthorized; got != want {
+		t.Errorf("re-login MachineStatus=%v, want %v", got, want)
+	}
+	nkey2 := status.New.NetMap.NodeKey
+	if nkey1 == nkey2 {
+		t.Errorf("key not changed after re-login: %v", nkey1)
+	}
+
+	c1.checkNoStatus(t)
+}
+
+func TestExpiry(t *testing.T) {
+	var nowMu sync.Mutex
+	now := time.Now() // Server and Client use this variable as the current time
+	timeNow := func() time.Time {
+		nowMu.Lock()
+		defer nowMu.Unlock()
+		return now
+	}
+	timeInc := func(d time.Duration) {
+		nowMu.Lock()
+		defer nowMu.Unlock()
+		now = now.Add(d)
+	}
+
+	s := newServer(t)
+	s.control.TimeNow = timeNow
+	defer s.close()
+
+	c1 := s.newClient(t, "c1")
+
+	const loginName = "[email protected]"
+	c1.loginAs(t, loginName)
+
+	c1.waitStatus(t, stateAuthenticated)
+	c1.waitStatus(t, stateSynchronized)
+	if _, err := s.control.AuthorizeMachine(c1.mkey, c1.nkey); err != nil {
+		t.Fatal(err)
+	}
+	status := c1.waitStatus(t, stateSynchronized).New
+	nkey1 := c1.direct.persist.PrivateNodeKey
+	nkey1Expiry := status.NetMap.Expiry
+	if wantExpiry := timeNow().Add(180 * 24 * time.Hour); !nkey1Expiry.Equal(wantExpiry) {
+		t.Errorf("node key expiry = %v, want %v", nkey1Expiry, wantExpiry)
+	}
+
+	timeInc(1 * time.Hour)          // move the clock forward
+	c1.Login(nil, LoginInteractive) // refresh the key
+	status = c1.waitStatus(t, stateURLVisitRequired).New
+	c1.postAuthURL(t, loginName, status)
+	c1.waitStatus(t, stateAuthenticated)
+	status = c1.waitStatus(t, stateSynchronized).New
+	if newKey := c1.direct.persist.PrivateNodeKey; newKey == nkey1 {
+		t.Errorf("node key unchanged after LoginInteractive: %v", nkey1)
+	}
+	if want, got := timeNow().Add(180*24*time.Hour), status.NetMap.Expiry; !got.Equal(want) {
+		t.Errorf("node key expiry = %v, want %v", got, want)
+	}
+
+	timeInc(2 * time.Hour) // move the clock forward
+	c1.Login(nil, 0)
+	c1.waitStatus(t, stateAuthenticated)
+	c1.waitStatus(t, stateSynchronized)
+	c1.checkNoStatus(t) // nothing happens, network map stays the same
+
+	timeInc(180 * 24 * time.Hour) // move the clock past expiry
+	c1.loginAs(t, loginName)
+	c1.waitStatus(t, stateAuthenticated)
+	status = c1.waitStatus(t, stateSynchronized).New
+	if got, want := c1.expiry, timeNow(); got.Equal(want) {
+		t.Errorf("node key expiry = %v, want %v", got, want)
+	}
+	if c1.direct.persist.PrivateNodeKey == nkey1 {
+		t.Errorf("node key after 37 hours is still %v", status.NetMap.NodeKey)
+	}
+}
+
+func TestRefresh(t *testing.T) {
+	var nowMu sync.Mutex
+	now := time.Now() // Server and Client use this variable as the current time
+	timeNow := func() time.Time {
+		nowMu.Lock()
+		defer nowMu.Unlock()
+		return now
+	}
+
+	s := newServer(t)
+	s.control.TimeNow = timeNow
+	defer s.close()
+
+	c1 := s.newClient(t, "c1")
+
+	const loginName = "[email protected]" // versabank cfgdb has 72 hour key expiry configured
+	c1.loginAs(t, loginName)
+
+	c1.waitStatus(t, stateAuthenticated)
+	c1.waitStatus(t, stateSynchronized)
+	if _, err := s.control.AuthorizeMachine(c1.mkey, c1.nkey); err != nil {
+		t.Fatal(err)
+	}
+	status := c1.status(t).New
+	nkey1 := status.NetMap.NodeKey
+	nkey1Expiry := status.NetMap.Expiry
+	if wantExpiry := timeNow().Add(72 * time.Hour); !nkey1Expiry.Equal(wantExpiry) {
+		t.Errorf("node key expiry = %v, want %v", nkey1Expiry, wantExpiry)
+	}
+
+	c1.Login(nil, LoginInteractive)
+	c1.waitStatus(t, stateURLVisitRequired)
+	// Until authorization happens, old netmap is still valid.
+	exp := c1.expiry
+	if exp == nil {
+		t.Errorf("expiry==nil during refresh\n")
+	}
+	if got := *exp; !nkey1Expiry.Equal(got) {
+		t.Errorf("node key expiry = %v, want %v", got, nkey1Expiry)
+	}
+	k := tailcfg.NodeKey(*c1.direct.persist.PrivateNodeKey.Public())
+	if k != nkey1 {
+		t.Errorf("node key after 2 hours is %v, want %v", k, nkey1)
+	}
+	c1.Shutdown()
+}
+
+func TestExpectedProvider(t *testing.T) {
+	s := newServer(t)
+	defer s.close()
+
+	c := s.newClient(t, "c1")
+
+	c.direct.persist.LoginName = "[email protected]"
+	c.direct.persist.Provider = "microsoft"
+	c.Login(nil, 0)
+	status := c.readStatus(t)
+	if e, substr := status.New.Err, `provider "microsoft" is not supported`; !strings.Contains(e, substr) {
+		t.Errorf("Err=%q, expect substring %q", e, substr)
+	}
+}
+
+func TestNewUserWebFlow(t *testing.T) {
+	s := newServer(t)
+	defer s.close()
+	s.control.DB().SetSegmentAPIKey(segmentKey)
+
+	c := s.newClient(t, "c1")
+	c.Login(nil, 0)
+	status := c.waitStatus(t, stateURLVisitRequired)
+	authURL := status.New.URL
+	resp, err := c.httpc.Get(authURL)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if resp.StatusCode != 200 {
+		t.Errorf("statuscode=%d, want 200", resp.StatusCode)
+	}
+	b, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got := string(b)
+	if !strings.Contains(got, `<input type="email"`) {
+		t.Fatalf("page does not mention email field:\n\n%s", got)
+	}
+
+	loginWith := "[email protected]"
+	resp, err = c.httpc.PostForm(authURL, url.Values{"user": []string{loginWith}})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if resp.StatusCode != 200 {
+		t.Errorf("statuscode=%d, want 200", resp.StatusCode)
+	}
+	b, err = ioutil.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got = string(b)
+	if !strings.Contains(got, "This is a new machine") {
+		t.Fatalf("no machine authorization message:\n\n%s", got)
+	}
+
+	c.waitStatus(t, stateAuthenticated)
+	c.waitStatus(t, stateSynchronized)
+	if _, err := s.control.AuthorizeMachine(c.mkey, c.nkey); err != nil {
+		t.Fatal(err)
+	}
+	netmap := c.status(t).New.NetMap
+	loginname := netmap.UserProfiles[netmap.User].LoginName
+	if loginname != loginWith {
+		t.Errorf("loginame=%s want %s", loginname, loginWith)
+	}
+
+	t.Run("segment POST", func(t *testing.T) {
+		select {
+		case msg := <-s.segmentMsg:
+			if got, want := msg["userId"], control.UserIDHash(netmap.User); got != want {
+				t.Errorf("segment hashed user ID = %q, want %q", got, want)
+			}
+			if got, want := msg["event"], "new node activated"; got != want {
+				t.Errorf("event=%q, want %q", got, want)
+			}
+			if t.Failed() {
+				t.Log(msg)
+			}
+		case <-time.After(3 * time.Second):
+			t.Errorf("timeout waiting for segment identify req")
+		}
+	})
+
+	t.Run("user expiry", func(t *testing.T) {
+		peers, err := s.control.ExpireUserNodes(netmap.User)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if len(peers) != 1 {
+			t.Errorf("len(peers)=%d, want 1", len(peers))
+		}
+		if nodes, err := s.control.DB().AllNodes(netmap.User); err != nil {
+			t.Fatal(err)
+		} else if len(nodes) != 1 {
+			t.Errorf("len(nodes)=%d, want 1", len(nodes))
+		} else if exp := nodes[0].KeyExpiry; exp.After(c.timeNow()) {
+			t.Errorf("node[0] expiry=%v, want it to have passed", exp)
+		}
+		netmap := c.status(t).New.NetMap
+		if exp := netmap.Expiry; exp.After(c.timeNow()) {
+			t.Errorf("expiry=%v, want it to have passed", exp)
+		}
+	})
+}
+
+func TestGoogleSigninButton(t *testing.T) {
+	s := newServer(t)
+	defer s.close()
+
+	c := s.newClient(t, "c1")
+	c.Login(nil, 0)
+	status := c.waitStatus(t, stateURLVisitRequired)
+	authURL := status.New.URL
+	resp, err := c.httpc.Get(authURL)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if resp.StatusCode != 200 {
+		t.Errorf("statuscode=%d, want 200", resp.StatusCode)
+	}
+	b, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got := string(b)
+	if !strings.Contains(got, `Sign in with Google`) {
+		t.Fatalf("page does not mention google signin button:\n\n%s", got)
+	}
+
+	resp, err = c.httpc.PostForm(authURL, url.Values{"provider": []string{"google"}})
+	if err != nil {
+		t.Fatal(err)
+	}
+	if resp.StatusCode != 200 {
+		t.Errorf("statuscode=%d, want 200", resp.StatusCode)
+	}
+	b, err = ioutil.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got = string(b)
+	if !strings.Contains(got, "Authorization successful") {
+		t.Fatalf("no machine authorization message:\n\n%s", got)
+	}
+
+	c.waitStatus(t, stateAuthenticated)
+	netmap := c.status(t).New.NetMap
+	loginname := netmap.UserProfiles[netmap.User].LoginName
+	if want := "[email protected]"; loginname != want {
+		t.Errorf("loginame=%s want %s", loginname, want)
+	}
+}
+
+func nodeIDsEqual(n1, n2 []tailcfg.NodeID) bool {
+	if len(n1) != len(n2) {
+		return false
+	}
+	n1s := make(map[tailcfg.NodeID]bool)
+	for _, id := range n1 {
+		n1s[id] = true
+	}
+	for _, id := range n2 {
+		if !n1s[id] {
+			return false
+		}
+	}
+	return true
+}
+
+type server struct {
+	t          *testing.T
+	tmpdir     string
+	control    *control.Server
+	http       *httptest.Server
+	clients    []*client
+	check      *testy.ResourceCheck
+	segmentMsg chan map[string]interface{}
+}
+
+const segmentKey = "segkey"
+
+func newServer(t *testing.T) *server {
+	t.Helper()
+	testy.FixLogs(t)
+	s := &server{
+		t:          t,
+		check:      testy.NewResourceCheck(),
+		segmentMsg: make(chan map[string]interface{}, 8),
+	}
+
+	tmpdir, err := ioutil.TempDir("", "control-test-")
+	if err != nil {
+		t.Fatal(err)
+	}
+	s.tmpdir = tmpdir
+
+	serveSegment := func(w http.ResponseWriter, r *http.Request) {
+		errorf := func(format string, args ...interface{}) {
+			msg := fmt.Sprintf(format, args...)
+			s.segmentMsg <- map[string]interface{}{
+				"error": msg,
+			}
+			http.Error(w, "segment error: "+msg, 400)
+		}
+
+		user, pass, ok := r.BasicAuth()
+		if pass != "" {
+			errorf("unexpected auth passworkd : %s", user)
+			return
+		}
+		if user != segmentKey {
+			errorf("got basic auth user %q, want %q", user, segmentKey)
+			return
+		}
+		if !ok {
+			errorf("no basic auth")
+		}
+		b, err := ioutil.ReadAll(r.Body)
+		if err != nil {
+			errorf("readall: %v", err)
+			return
+		}
+
+		m := make(map[string]interface{})
+		if err := json.Unmarshal(b, &m); err != nil {
+			errorf("unmarshal failed: %v, text:\n%s", err, string(b))
+			return
+		}
+		s.segmentMsg <- m
+	}
+
+	s.http = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case "/v1/identify", "/v1/track":
+			serveSegment(w, r)
+		default:
+			s.control.ServeHTTP(w, r)
+		}
+	}))
+	s.control, err = control.New(tmpdir, s.http.URL, true)
+	if err != nil {
+		t.Fatal(err)
+	}
+	s.control.QuietLogging = true
+	control.SegmentServer = s.http.URL
+
+	return s
+}
+
+func (s *server) close() {
+	t := s.t
+	t.Helper()
+	t.Logf("server.close: shutting down %d clients...\n", len(s.clients))
+	for i, c := range s.clients {
+		t.Logf("   %d\n", i)
+		c.Shutdown()
+		t.Logf("   %d CloseIdle\n", i)
+		c.cancel()
+	}
+	// TODO: remove CloseClientConnections when we have a real client shutdown mechanism.
+	// The client shutdown should clean up all HTTP connections and calling this will
+	// hide any cleanup failures.
+	t.Logf("server.close: CloseClientConnections...\n")
+	s.http.CloseClientConnections()
+	t.Logf("server.close: http.Close...\n")
+	s.http.Close()
+	s.control.Shutdown()
+	// TODO: s.control.Shutdown
+	t.Logf("server.close: RemoveAll...\n")
+	os.RemoveAll(s.tmpdir)
+	t.Logf("server.close: done.\n")
+	s.check.Assert(s.t)
+	log.SetOutput(os.Stderr)
+}
+
+type statusChange struct {
+	New Status
+}
+
+func (s *server) newClient(t *testing.T, name string) *client {
+	t.Helper()
+
+	ch := make(chan statusChange, 1024)
+	httpc := s.http.Client()
+	hi := NewHostinfo()
+	hi.FrontendLogID = "go-test-only"
+	hi.BackendLogID = "go-test-only"
+	ctlc, err := NewNoStart(Options{
+		ServerURL: s.http.URL,
+		HTTPC:     httpc,
+		TimeNow:   s.control.TimeNow,
+		Logf: func(fmt string, args ...interface{}) {
+			t.Helper()
+			t.Logf(name+": "+fmt, args...)
+		},
+		Hostinfo: &hi,
+		NewDecompressor: func() (Decompressor, error) {
+			return zstd.NewReader(nil)
+		},
+		KeepAlive: true,
+	})
+	ctlc.SetStatusFunc(func(new Status) {
+		select {
+		case ch <- statusChange{New: new}:
+		case <-time.After(5 * time.Second):
+			log.Fatalf("newClient.statusFunc: stuck.\n")
+		}
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	c := &client{
+		Client:   ctlc,
+		s:        s,
+		name:     name,
+		httpc:    httpc,
+		statusCh: ch,
+	}
+	c.ctx, c.cancel = context.WithCancel(context.Background())
+	s.clients = append(s.clients, c)
+	ctlc.Start()
+
+	return c
+}
+
+type client struct {
+	*Client
+	s        *server
+	name     string
+	ctx      context.Context
+	cancel   func()
+	httpc    *http.Client
+	mkey     tailcfg.MachineKey
+	nkey     tailcfg.NodeKey
+	id       tailcfg.NodeID
+	statusCh <-chan statusChange
+}
+
+func (c *client) loginAs(t *testing.T, user string) *http.Cookie {
+	t.Helper()
+
+	c.Login(nil, 0)
+	status := c.waitStatus(t, stateURLVisitRequired)
+
+	return c.postAuthURL(t, user, status.New)
+}
+
+func (c *client) postAuthURL(t *testing.T, user string, status Status) *http.Cookie {
+	t.Helper()
+	authURL := status.URL
+	if authURL == "" {
+		t.Fatalf("expecting auth URL, got: %v", status)
+	}
+	return postAuthURL(t, c.ctx, c.httpc, user, authURL)
+}
+
+func postAuthURL(t *testing.T, ctx context.Context, httpc *http.Client, user string, authURL string) *http.Cookie {
+	t.Helper()
+
+	form := url.Values{"user": []string{user}}
+	req, err := http.NewRequest("POST", authURL, strings.NewReader(form.Encode()))
+	if err != nil {
+		t.Fatal(err)
+	}
+	req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+	resp, err := httpc.Do(req.WithContext(ctx))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if resp.StatusCode != 200 {
+		t.Fatalf("POST %s failed: %q", authURL, resp.Status)
+	}
+	cookies := resp.Cookies()
+	if len(cookies) == 0 || cookies[0].Name != "tailcontrol" {
+		t.Fatalf("POST %s: bad cookie: %v", authURL, cookies)
+	}
+	return cookies[0]
+}
+
+func (c *client) checkNoStatus(t *testing.T) {
+	t.Helper()
+	select {
+	case status := <-c.statusCh:
+		t.Fatalf("%s: unexpected status change: %v", c.name, status)
+	default:
+	}
+}
+
+func (c *client) readStatus(t *testing.T) (status statusChange) {
+	t.Helper()
+	select {
+	case status = <-c.statusCh:
+	case <-time.After(3 * time.Second):
+		// TODO(crawshaw): every ~1000 test runs on macOS sees a login get
+		// suck in the httpc.Do GET request of loadServerKey.
+		// Why? Is this a timing problem, with something causing a pause
+		// long enough that the timeout expires? Or is something more
+		// sinister going on in the server (or even the HTTP stack)?
+		//
+		// Extending the timeout to 6 seconds does not solve the problem
+		// but does seem to reduce the frequency of flakes.
+		//
+		// (I have added a runtime.ReadMemStats call here, and have not
+		// observed any global pauses greater than 50 microseconds.)
+		//
+		// NOTE(apenwarr): I can reproduce this more quickly by
+		//  running multiple copies of 'go test -count 100' in
+		//  parallel, but only on macOS. Increasing the timeout to
+		//  10 seconds doesn't seem to help in that case. The
+		//  timeout is often, but not always, in fetching the
+		//  control key, but I think that's not the essential element.
+		pprof.Lookup("goroutine").WriteTo(os.Stdout, 1)
+		t.Logf("%s: timeout: no status received\n", c.name)
+		t.Fatalf("%s: timeout: no status received", c.name)
+	}
+	return status
+}
+
+func (c *client) status(t *testing.T) (status statusChange) {
+	t.Helper()
+	status = c.readStatus(t)
+	if status.New.Err != "" {
+		t.Errorf("%s state %s: status error: %s", c.name, status.New.state, status.New.Err)
+	} else {
+		t.Logf("%s state: %s", c.name, status.New.state)
+		if status.New.NetMap != nil {
+			c.mkey = tailcfg.MachineKey(*status.New.Persist.PrivateMachineKey.Public())
+			if nkey := status.New.NetMap.NodeKey; nkey != (tailcfg.NodeKey{}) && nkey != c.nkey {
+				c.nkey = nkey
+				c.id = c.s.control.DB().Node(c.nkey).ID
+			}
+		}
+	}
+	return status
+}
+
+func (c *client) waitStatus(t *testing.T, want state) statusChange {
+	t.Helper()
+	status := c.status(t)
+	if status.New.state != want {
+		t.Fatalf("%s bad state=%s, want %s (%v)", c.name, status.New.state, want, status.New)
+	}
+	return status
+}
+
+// TODO: test client shutdown + recreate
+// TODO: test server disconnect/reconnect during followup
+// TODO: test network outage downgrade from stateSynchronized -> stateAuthenticated
+// TODO: test os/hostname gets sent to server
+// TODO: test vpn IP not assigned until machine is authorized
+// TODO: test overlapping calls to RefreshLogin
+// TODO: test registering a new node for a user+machine key replaces the old
+//       node even if the OldNodeKey is not specified by the client.
+// TODO: test "does not expire" on server extends expiry in sent network map

+ 68 - 0
control/controlclient/controlclient_test.go

@@ -0,0 +1,68 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package controlclient
+
+import (
+	"reflect"
+	"testing"
+)
+
+func fieldsOf(t reflect.Type) (fields []string) {
+	for i := 0; i < t.NumField(); i++ {
+		fields = append(fields, t.Field(i).Name)
+	}
+	return
+}
+
+func TestStatusEqual(t *testing.T) {
+	// Verify that the Equal method stays in sync with reality
+	equalHandles := []string{"LoginFinished", "Err", "URL", "Persist", "NetMap", "Hostinfo", "state"}
+	if have := fieldsOf(reflect.TypeOf(Status{})); !reflect.DeepEqual(have, equalHandles) {
+		t.Errorf("Status.Equal check might be out of sync\nfields: %q\nhandled: %q\n",
+			have, equalHandles)
+	}
+
+	tests := []struct {
+		a, b *Status
+		want bool
+	}{
+		{
+			&Status{},
+			nil,
+			false,
+		},
+		{
+			nil,
+			&Status{},
+			false,
+		},
+		{
+			&Status{},
+			&Status{},
+			true,
+		},
+		{
+			&Status{state: stateNew},
+			&Status{state: stateNew},
+			true,
+		},
+		{
+			&Status{state: stateNew},
+			&Status{state: stateAuthenticated},
+			false,
+		},
+		{
+			&Status{LoginFinished: nil},
+			&Status{LoginFinished: new(struct{})},
+			false,
+		},
+	}
+	for i, tt := range tests {
+		got := tt.a.Equal(tt.b)
+		if got != tt.want {
+			t.Errorf("%d. Equal = %v; want %v", i, got, tt.want)
+		}
+	}
+}

+ 656 - 0
control/controlclient/direct.go

@@ -0,0 +1,656 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package controlclient
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"encoding/binary"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"os"
+	"runtime"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/tailscale/wireguard-go/wgcfg"
+	"golang.org/x/crypto/nacl/box"
+	"golang.org/x/oauth2"
+	"tailscale.com/logger"
+	"tailscale.com/tailcfg"
+	"tailscale.com/version"
+	"tailscale.com/wgengine/filter"
+)
+
+type Persist struct {
+	PrivateMachineKey wgcfg.PrivateKey
+	PrivateNodeKey    wgcfg.PrivateKey
+	OldPrivateNodeKey wgcfg.PrivateKey // needed to request key rotation
+	Provider          string
+	LoginName         string
+}
+
+func (p *Persist) Pretty() string {
+	var mk, ok, nk wgcfg.Key
+	if !p.PrivateMachineKey.IsZero() {
+		mk = *p.PrivateMachineKey.Public()
+	}
+	if !p.OldPrivateNodeKey.IsZero() {
+		ok = *p.OldPrivateNodeKey.Public()
+	}
+	if !p.PrivateNodeKey.IsZero() {
+		nk = *p.PrivateNodeKey.Public()
+	}
+	return fmt.Sprintf("Persist{m=%v, o=%v, n=%v u=%#v}",
+		mk.ShortString(), ok.ShortString(), nk.ShortString(),
+		p.LoginName)
+}
+
+// Direct is the client that connects to a tailcontrol server for a node.
+type Direct struct {
+	httpc           *http.Client // HTTP client used to talk to tailcontrol
+	serverURL       string       // URL of the tailcontrol server
+	timeNow         func() time.Time
+	newDecompressor func() (Decompressor, error)
+	keepAlive       bool
+	logf            logger.Logf
+
+	mu           sync.Mutex // mutex guards the following fields
+	serverKey    wgcfg.Key
+	persist      Persist
+	tryingNewKey wgcfg.PrivateKey
+	expiry       *time.Time
+	hostinfo     tailcfg.Hostinfo
+	endpoints    []string
+	localPort    uint16
+	cmdCh        chan interface{}
+	doneCh       chan struct{}
+}
+
+type Options struct {
+	Persist         Persist          // initial persistent data
+	HTTPC           *http.Client     // HTTP client used to talk to tailcontrol
+	ServerURL       string           // URL of the tailcontrol server
+	TimeNow         func() time.Time // time.Now implementation used by Client
+	Hostinfo        *tailcfg.Hostinfo
+	NewDecompressor func() (Decompressor, error)
+	KeepAlive       bool
+	Logf            logger.Logf
+}
+
+type Decompressor interface {
+	DecodeAll(input, dst []byte) ([]byte, error)
+	Close()
+}
+
+// NewDirect returns a new Direct client.
+func NewDirect(opts Options) (*Direct, error) {
+	if opts.ServerURL == "" {
+		return nil, errors.New("controlclient.New: no server URL specified")
+	}
+	opts.ServerURL = strings.TrimRight(opts.ServerURL, "/")
+	if opts.HTTPC == nil {
+		opts.HTTPC = http.DefaultClient
+	}
+	if opts.TimeNow == nil {
+		opts.TimeNow = time.Now
+	}
+	if opts.Logf == nil {
+		// TODO(apenwarr): remove this default and fail instead.
+		opts.Logf = log.Printf
+	}
+
+	c := &Direct{
+		httpc:           opts.HTTPC,
+		serverURL:       opts.ServerURL,
+		timeNow:         opts.TimeNow,
+		logf:            opts.Logf,
+		newDecompressor: opts.NewDecompressor,
+		keepAlive:       opts.KeepAlive,
+		persist:         opts.Persist,
+	}
+	if opts.Hostinfo == nil {
+		c.SetHostinfo(NewHostinfo())
+	} else {
+		c.SetHostinfo(*opts.Hostinfo)
+	}
+
+	return c, nil
+}
+
+func NewHostinfo() tailcfg.Hostinfo {
+	hostname, _ := os.Hostname()
+	os := runtime.GOOS
+	switch os {
+	case "darwin":
+		switch runtime.GOARCH {
+		case "arm", "arm64":
+			os = "iOS"
+		default:
+			os = "macOS"
+		}
+	}
+
+	return tailcfg.Hostinfo{
+		IPNVersion: version.LONG,
+		Hostname:   hostname,
+		OS:         os,
+	}
+}
+
+func (c *Direct) SetHostinfo(hi tailcfg.Hostinfo) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.logf("Hostinfo: %v\n", hi)
+	c.hostinfo = hi
+}
+
+func (c *Direct) GetPersist() Persist {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.persist
+}
+
+type LoginFlags int
+
+const (
+	LoginDefault     = LoginFlags(0)
+	LoginInteractive = LoginFlags(1 << iota) // force user login and key refresh
+)
+
+func (c *Direct) TryLogout(ctx context.Context) error {
+	c.logf("direct.TryLogout()\n")
+
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if c.persist.PrivateNodeKey != (wgcfg.PrivateKey{}) {
+		// TODO(crawshaw): Tell the server. This node key should be immediately invalidated.
+	}
+	c.persist = Persist{
+		PrivateMachineKey: c.persist.PrivateMachineKey,
+	}
+	return nil
+}
+
+func (c *Direct) TryLogin(ctx context.Context, t *oauth2.Token, flags LoginFlags) (url string, err error) {
+	c.logf("direct.TryLogin(%v, %v)\n", t != nil, flags)
+	return c.doLoginOrRegen(ctx, t, flags, false, "")
+}
+
+func (c *Direct) WaitLoginURL(ctx context.Context, url string) (newUrl string, err error) {
+	c.logf("direct.WaitLoginURL\n")
+	return c.doLoginOrRegen(ctx, nil, LoginDefault, false, url)
+}
+
+func (c *Direct) doLoginOrRegen(ctx context.Context, t *oauth2.Token, flags LoginFlags, regen bool, url string) (newUrl string, err error) {
+	mustregen, url, err := c.doLogin(ctx, t, flags, regen, url)
+	if err != nil {
+		return url, err
+	}
+	if mustregen {
+		_, url, err = c.doLogin(ctx, t, flags, true, url)
+	}
+	return url, err
+}
+
+func (c *Direct) doLogin(ctx context.Context, t *oauth2.Token, flags LoginFlags, regen bool, url string) (mustregen bool, newurl string, err error) {
+	c.mu.Lock()
+	persist := c.persist
+	tryingNewKey := c.tryingNewKey
+	serverKey := c.serverKey
+	expired := c.expiry != nil && !c.expiry.IsZero() && c.expiry.Before(c.timeNow())
+	c.mu.Unlock()
+
+	if persist.PrivateMachineKey == (wgcfg.PrivateKey{}) {
+		c.logf("Generating a new machinekey.\n")
+		mkey, err := wgcfg.NewPrivateKey()
+		if err != nil {
+			log.Fatal(err)
+		}
+		persist.PrivateMachineKey = *mkey
+	}
+
+	if expired {
+		c.logf("Old key expired -> regen=true\n")
+		regen = true
+	}
+	if (flags & LoginInteractive) != 0 {
+		c.logf("LoginInteractive -> regen=true\n")
+		regen = true
+	}
+
+	c.logf("doLogin(regen=%v, hasUrl=%v)\n", regen, url != "")
+	if serverKey == (wgcfg.Key{}) {
+		var err error
+		serverKey, err = loadServerKey(ctx, c.httpc, c.serverURL)
+		if err != nil {
+			return regen, url, err
+		}
+
+		c.mu.Lock()
+		c.serverKey = serverKey
+		c.mu.Unlock()
+	}
+
+	var oldNodeKey wgcfg.Key
+	if url != "" {
+	} else if regen || persist.PrivateNodeKey == (wgcfg.PrivateKey{}) {
+		c.logf("Generating a new nodekey.\n")
+		persist.OldPrivateNodeKey = persist.PrivateNodeKey
+		key, err := wgcfg.NewPrivateKey()
+		if err != nil {
+			c.logf("login keygen: %v", err)
+			return regen, url, err
+		}
+		tryingNewKey = *key
+	} else {
+		// Try refreshing the current key first
+		tryingNewKey = persist.PrivateNodeKey
+	}
+	if persist.OldPrivateNodeKey != (wgcfg.PrivateKey{}) {
+		oldNodeKey = *persist.OldPrivateNodeKey.Public()
+	}
+
+	if tryingNewKey == (wgcfg.PrivateKey{}) {
+		log.Fatalf("tryingNewKey is empty, give up\n")
+	}
+	if c.hostinfo.BackendLogID == "" {
+		err = errors.New("hostinfo: BackendLogID missing")
+		return regen, url, err
+	}
+	request := tailcfg.RegisterRequest{
+		Version:    1,
+		OldNodeKey: tailcfg.NodeKey(oldNodeKey),
+		NodeKey:    tailcfg.NodeKey(*tryingNewKey.Public()),
+		Hostinfo:   c.hostinfo,
+		Followup:   url,
+	}
+	c.logf("RegisterReq: onode=%v node=%v fup=%v\n",
+		request.OldNodeKey.AbbrevString(),
+		request.NodeKey.AbbrevString(), url != "")
+	request.Auth.Oauth2Token = t
+	request.Auth.Provider = persist.Provider
+	request.Auth.LoginName = persist.LoginName
+	bodyData, err := encode(request, &serverKey, &persist.PrivateMachineKey)
+	if err != nil {
+		return regen, url, err
+	}
+	body := bytes.NewReader(bodyData)
+
+	u := fmt.Sprintf("%s/machine/%s", c.serverURL, persist.PrivateMachineKey.Public().HexString())
+	req, err := http.NewRequest("POST", u, body)
+	if err != nil {
+		return regen, url, err
+	}
+	req = req.WithContext(ctx)
+
+	res, err := c.httpc.Do(req)
+	if err != nil {
+		return regen, url, fmt.Errorf("register request: %v", err)
+	}
+	c.logf("RegisterReq: returned.\n")
+	resp := tailcfg.RegisterResponse{}
+	if err := decode(res, &resp, &serverKey, &persist.PrivateMachineKey); err != nil {
+		return regen, url, fmt.Errorf("register request: %v", err)
+	}
+
+	if resp.NodeKeyExpired {
+		if regen {
+			return true, "", fmt.Errorf("weird: regen=true but server says NodeKeyExpired: %v", request.NodeKey)
+		}
+		c.logf("server reports new node key %v has expired",
+			request.NodeKey.AbbrevString())
+		return true, "", nil
+	}
+	if persist.Provider == "" {
+		persist.Provider = resp.Login.Provider
+	}
+	if persist.LoginName == "" {
+		persist.LoginName = resp.Login.LoginName
+	}
+
+	// TODO(crawshaw): RegisterResponse should be able to mechanically
+	// communicate some extra instructions from the server:
+	//	- new node key required
+	//	- machine key no longer supported
+	//	- user is disabled
+
+	if resp.AuthURL != "" {
+		c.logf("AuthURL is %.20v...\n", resp.AuthURL)
+	} else {
+		c.logf("No AuthURL\n")
+	}
+
+	c.mu.Lock()
+	if resp.AuthURL == "" {
+		// key rotation is complete
+		persist.PrivateNodeKey = tryingNewKey
+	} else {
+		// save it for the retry-with-URL
+		c.tryingNewKey = tryingNewKey
+	}
+	c.persist = persist
+	c.mu.Unlock()
+
+	if err != nil {
+		return regen, "", err
+	}
+	if ctx.Err() != nil {
+		return regen, "", ctx.Err()
+	}
+	return false, resp.AuthURL, nil
+}
+
+func sameStrings(a, b []string) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func (c *Direct) newEndpoints(localPort uint16, endpoints []string) bool {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	// Nothing new?
+	if c.localPort == localPort && sameStrings(c.endpoints, endpoints) {
+		return false // unchanged
+	}
+	c.logf("client.newEndpoints(%v, %v)\n", localPort, endpoints)
+	if len(c.endpoints) > 0 {
+		// empty the old list without deallocating it
+		c.endpoints = c.endpoints[:0]
+	}
+	c.localPort = localPort
+	c.endpoints = append(c.endpoints, endpoints...)
+	return true // changed
+}
+
+// SetEndpoints updates the list of locally advertised endpoints.
+// It won't be replicated to the server until a *fresh* call to PollNetMap().
+// You don't need to restart PollNetMap if we return changed==false.
+func (c *Direct) SetEndpoints(localPort uint16, endpoints []string) (changed bool, err error) {
+	// (no log message on function entry, because it clutters the logs
+	//  if endpoints haven't changed. newEndpoints() will log it.)
+	changed = c.newEndpoints(localPort, endpoints)
+	return changed, nil
+}
+
+func (c *Direct) PollNetMap(ctx context.Context, maxPolls int, cb func(*NetworkMap)) error {
+	c.mu.Lock()
+	persist := c.persist
+	serverURL := c.serverURL
+	serverKey := c.serverKey
+	hostinfo := c.hostinfo
+	localPort := c.localPort
+	ep := append([]string(nil), c.endpoints...)
+	c.mu.Unlock()
+
+	if hostinfo.BackendLogID == "" {
+		return errors.New("hostinfo: BackendLogID missing")
+	}
+
+	allowStream := maxPolls != 1
+	c.logf("PollNetMap: stream=%v :%v %v\n", maxPolls, localPort, ep)
+
+	request := tailcfg.MapRequest{
+		Version:   4,
+		KeepAlive: c.keepAlive,
+		NodeKey:   tailcfg.NodeKey(*persist.PrivateNodeKey.Public()),
+		Endpoints: ep,
+		Stream:    allowStream,
+		Hostinfo:  hostinfo,
+	}
+	if c.newDecompressor != nil {
+		request.Compress = "zstd"
+	}
+
+	bodyData, err := encode(request, &serverKey, &persist.PrivateMachineKey)
+	if err != nil {
+		return err
+	}
+
+	u := fmt.Sprintf("%s/machine/%s/map", serverURL, persist.PrivateMachineKey.Public().HexString())
+	req, err := http.NewRequest("POST", u, bytes.NewReader(bodyData))
+	if err != nil {
+		return err
+	}
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+	req = req.WithContext(ctx)
+
+	res, err := c.httpc.Do(req)
+	if err != nil {
+		return err
+	}
+	if res.StatusCode != 200 {
+		msg, _ := ioutil.ReadAll(res.Body)
+		res.Body.Close()
+		return fmt.Errorf("initial fetch failed %d: %s",
+			res.StatusCode, strings.TrimSpace(string(msg)))
+	}
+	defer res.Body.Close()
+
+	// If we go more than pollTimeout without hearing from the server,
+	// end the long poll. We should be receiving a keep alive ping
+	// every minute.
+	const pollTimeout = 120 * time.Second
+	timeout := time.NewTimer(pollTimeout)
+	timeoutReset := make(chan struct{})
+	defer close(timeoutReset)
+	go func() {
+		for {
+			select {
+			case <-timeout.C:
+				c.logf("map response long-poll timed out!")
+				cancel()
+				return
+			case _, ok := <-timeoutReset:
+				if !ok {
+					return // channel closed, shut down goroutine
+				}
+				if !timeout.Stop() {
+					<-timeout.C
+				}
+				timeout.Reset(pollTimeout)
+			}
+		}
+	}()
+
+	// If allowStream, then the server will use an HTTP long poll to
+	// return incremental results. There is always one response right
+	// away, followed by a delay, and eventually others.
+	// If !allowStream, it'll still send the first result in exactly
+	// the same format before just closing the connection.
+	// We can use this same read loop either way.
+	var msg []byte
+	for i := 0; i < maxPolls || maxPolls < 0; i++ {
+		var siz [4]byte
+		if _, err := io.ReadFull(res.Body, siz[:]); err != nil {
+			return err
+		}
+		size := binary.LittleEndian.Uint32(siz[:])
+		msg = append(msg[:0], make([]byte, size)...)
+		if _, err := io.ReadFull(res.Body, msg); err != nil {
+			return err
+		}
+
+		var resp tailcfg.MapResponse
+
+		// Default filter if the key is missing from the incoming
+		// json (ie. old tailcontrol server without PacketFilter
+		// support). If even an empty PacketFilter is provided, this
+		// will be overwritten.
+		// TODO(apenwarr 2020-02-01): remove after tailcontrol is fully deployed.
+		resp.PacketFilter = filter.MatchAllowAll
+
+		if err := c.decodeMsg(msg, &resp); err != nil {
+			return err
+		}
+		if resp.KeepAlive {
+			c.logf("map response keep alive received")
+			timeoutReset <- struct{}{}
+			continue
+		}
+
+		nm := &NetworkMap{
+			NodeKey:      tailcfg.NodeKey(*persist.PrivateNodeKey.Public()),
+			PrivateKey:   persist.PrivateNodeKey,
+			Expiry:       resp.Node.KeyExpiry,
+			Addresses:    resp.Node.Addresses,
+			Peers:        resp.Peers,
+			LocalPort:    localPort,
+			User:         resp.Node.User,
+			UserProfiles: make(map[tailcfg.UserID]tailcfg.UserProfile),
+			Domain:       resp.Domain,
+			Roles:        resp.Roles,
+			DNS:          resp.DNS,
+			DNSDomains:   resp.SearchPaths,
+			Hostinfo:     resp.Node.Hostinfo,
+			PacketFilter: resp.PacketFilter,
+		}
+		for _, profile := range resp.UserProfiles {
+			nm.UserProfiles[profile.ID] = profile
+		}
+		if resp.Node.MachineAuthorized {
+			nm.MachineStatus = tailcfg.MachineAuthorized
+		} else {
+			nm.MachineStatus = tailcfg.MachineUnauthorized
+		}
+		//c.logf("new network map[%d]:\n%s", i, nm.Concise())
+
+		c.mu.Lock()
+		c.expiry = &nm.Expiry
+		c.mu.Unlock()
+
+		cb(nm)
+	}
+	if ctx.Err() != nil {
+		return ctx.Err()
+	}
+	return nil
+}
+
+func decode(res *http.Response, v interface{}, serverKey *wgcfg.Key, mkey *wgcfg.PrivateKey) error {
+	defer res.Body.Close()
+	msg, err := ioutil.ReadAll(io.LimitReader(res.Body, 1<<20))
+	if err != nil {
+		return err
+	}
+	if res.StatusCode != 200 {
+		return fmt.Errorf("%d: %v", res.StatusCode, string(msg))
+	}
+	return decodeMsg(msg, v, serverKey, mkey)
+}
+
+func (c *Direct) decodeMsg(msg []byte, v interface{}) error {
+	mkey := c.persist.PrivateMachineKey
+	serverKey := c.serverKey
+
+	decrypted, err := decryptMsg(msg, &serverKey, &mkey)
+	if err != nil {
+		return err
+	}
+	var b []byte
+	if c.newDecompressor == nil {
+		b = decrypted
+	} else {
+		//decoder, err := zstd.NewReader(nil)
+		decoder, err := c.newDecompressor()
+		if err != nil {
+			return err
+		}
+		defer decoder.Close()
+		b, err = decoder.DecodeAll(decrypted, nil)
+		if err != nil {
+			return err
+		}
+	}
+	if err := json.Unmarshal(b, v); err != nil {
+		return fmt.Errorf("response: %v", err)
+	}
+	return nil
+
+}
+
+func decodeMsg(msg []byte, v interface{}, serverKey *wgcfg.Key, mkey *wgcfg.PrivateKey) error {
+	decrypted, err := decryptMsg(msg, serverKey, mkey)
+	if err != nil {
+		return err
+	}
+	if err := json.Unmarshal(decrypted, v); err != nil {
+		return fmt.Errorf("response: %v", err)
+	}
+	return nil
+}
+
+func decryptMsg(msg []byte, serverKey *wgcfg.Key, mkey *wgcfg.PrivateKey) ([]byte, error) {
+	var nonce [24]byte
+	if len(msg) < len(nonce)+1 {
+		return nil, fmt.Errorf("response missing nonce, len=%d", len(msg))
+	}
+	copy(nonce[:], msg)
+	msg = msg[len(nonce):]
+
+	pub, pri := (*[32]byte)(serverKey), (*[32]byte)(mkey)
+	decrypted, ok := box.Open(nil, msg, &nonce, pub, pri)
+	if !ok {
+		return nil, fmt.Errorf("cannot decrypt response")
+	}
+	return decrypted, nil
+}
+
+func encode(v interface{}, serverKey *wgcfg.Key, mkey *wgcfg.PrivateKey) ([]byte, error) {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return nil, err
+	}
+	var nonce [24]byte
+	if _, err := io.ReadFull(rand.Reader, nonce[:]); err != nil {
+		panic(err)
+	}
+	pub, pri := (*[32]byte)(serverKey), (*[32]byte)(mkey)
+	msg := box.Seal(nonce[:], b, &nonce, pub, pri)
+	return msg, nil
+}
+
+func loadServerKey(ctx context.Context, httpc *http.Client, serverURL string) (wgcfg.Key, error) {
+	req, err := http.NewRequest("GET", serverURL+"/key", nil)
+	if err != nil {
+		return wgcfg.Key{}, fmt.Errorf("create control key request: %v", err)
+	}
+	req = req.WithContext(ctx)
+	res, err := httpc.Do(req)
+	if err != nil {
+		return wgcfg.Key{}, fmt.Errorf("fetch control key: %v", err)
+	}
+	defer res.Body.Close()
+	b, err := ioutil.ReadAll(io.LimitReader(res.Body, 1<<16))
+	if err != nil {
+		return wgcfg.Key{}, fmt.Errorf("fetch control key response: %v", err)
+	}
+	if res.StatusCode != 200 {
+		return wgcfg.Key{}, fmt.Errorf("fetch control key: %d: %s", res.StatusCode, string(b))
+	}
+	key, err := wgcfg.ParseHexKey(string(b))
+	if err != nil {
+		return wgcfg.Key{}, fmt.Errorf("fetch control key: %v", err)
+	}
+	return *key, nil
+}

+ 305 - 0
control/controlclient/direct_test.go

@@ -0,0 +1,305 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build depends_on_currently_unreleased
+
+package controlclient
+
+import (
+	"context"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/klauspost/compress/zstd"
+	"github.com/tailscale/wireguard-go/wgcfg"
+	"tailscale.com/tailcfg"
+	"tailscale.io/control" // not yet released
+)
+
+func TestClientsReusingKeys(t *testing.T) {
+	tmpdir, err := ioutil.TempDir("", "control-test-")
+	if err != nil {
+		t.Fatal(err)
+	}
+	var server *control.Server
+	httpsrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		server.ServeHTTP(w, r)
+	}))
+	server, err = control.New(tmpdir, httpsrv.URL, true)
+	if err != nil {
+		t.Fatal(err)
+	}
+	server.QuietLogging = true
+	defer func() {
+		httpsrv.CloseClientConnections()
+		httpsrv.Close()
+		os.RemoveAll(tmpdir)
+	}()
+
+	hi := NewHostinfo()
+	hi.FrontendLogID = "go-test-only"
+	hi.BackendLogID = "go-test-only"
+	c1, err := NewDirect(Options{
+		ServerURL: httpsrv.URL,
+		HTTPC:     httpsrv.Client(),
+		//TimeNow:   s.control.TimeNow,
+		Logf: func(fmt string, args ...interface{}) {
+			t.Helper()
+			t.Logf("c1: "+fmt, args...)
+		},
+		Hostinfo: &hi,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	authURL, err := c1.TryLogin(ctx, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	const user = "[email protected]"
+	postAuthURL(t, ctx, httpsrv.Client(), user, authURL)
+	newURL, err := c1.WaitLoginURL(ctx, authURL)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if newURL != "" {
+		t.Fatalf("unexpected newURL: %s", newURL)
+	}
+
+	pollErrCh := make(chan error)
+	go func() {
+		err := c1.PollNetMap(ctx, -1, func(netMap *NetworkMap) {})
+		pollErrCh <- err
+	}()
+
+	select {
+	case err := <-pollErrCh:
+		t.Fatal(err)
+	default:
+	}
+
+	c2, err := NewDirect(Options{
+		ServerURL: httpsrv.URL,
+		HTTPC:     httpsrv.Client(),
+		Logf: func(fmt string, args ...interface{}) {
+			t.Helper()
+			t.Logf("c2: "+fmt, args...)
+		},
+		Persist:  c1.GetPersist(),
+		Hostinfo: &hi,
+		NewDecompressor: func() (Decompressor, error) {
+			return zstd.NewReader(nil)
+		},
+		KeepAlive: true,
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	authURL, err = c2.TryLogin(ctx, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if authURL != "" {
+		t.Errorf("unexpected authURL %s", authURL)
+	}
+
+	err = c2.PollNetMap(ctx, 1, func(netMap *NetworkMap) {})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	select {
+	case err := <-pollErrCh:
+		t.Logf("expected poll error: %v", err)
+	case <-time.After(5 * time.Second):
+		t.Fatal("first client poll failed to close")
+	}
+}
+
+func TestClientsReusingOldKey(t *testing.T) {
+	tmpdir, err := ioutil.TempDir("", "control-test-")
+	if err != nil {
+		t.Fatal(err)
+	}
+	var server *control.Server
+	httpsrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		server.ServeHTTP(w, r)
+	}))
+	server, err = control.New(tmpdir, httpsrv.URL, true)
+	if err != nil {
+		t.Fatal(err)
+	}
+	server.QuietLogging = true
+	defer func() {
+		httpsrv.CloseClientConnections()
+		httpsrv.Close()
+		os.RemoveAll(tmpdir)
+	}()
+
+	hi := NewHostinfo()
+	hi.FrontendLogID = "go-test-only"
+	hi.BackendLogID = "go-test-only"
+	genOpts := func() Options {
+		return Options{
+			ServerURL: httpsrv.URL,
+			HTTPC:     httpsrv.Client(),
+			//TimeNow:   s.control.TimeNow,
+			Logf: func(fmt string, args ...interface{}) {
+				t.Helper()
+				t.Logf("c1: "+fmt, args...)
+			},
+			Hostinfo: &hi,
+		}
+	}
+
+	// Login with a new node key. This requires authorization.
+	c1, err := NewDirect(genOpts())
+	if err != nil {
+		t.Fatal(err)
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	authURL, err := c1.TryLogin(ctx, nil, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	const user = "[email protected]"
+	postAuthURL(t, ctx, httpsrv.Client(), user, authURL)
+	newURL, err := c1.WaitLoginURL(ctx, authURL)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if newURL != "" {
+		t.Fatalf("unexpected newURL: %s", newURL)
+	}
+
+	if err := c1.PollNetMap(ctx, 1, func(netMap *NetworkMap) {}); err != nil {
+		t.Fatal(err)
+	}
+
+	newPrivKey := func(t *testing.T) wgcfg.PrivateKey {
+		t.Helper()
+		k, err := wgcfg.NewPrivateKey()
+		if err != nil {
+			t.Fatal(err)
+		}
+		return *k
+	}
+
+	// Replace the previous key with a new key.
+	persist1 := c1.GetPersist()
+	persist2 := Persist{
+		PrivateMachineKey: persist1.PrivateMachineKey,
+		OldPrivateNodeKey: persist1.PrivateNodeKey,
+		PrivateNodeKey:    newPrivKey(t),
+	}
+	opts := genOpts()
+	opts.Persist = persist2
+
+	c1, err = NewDirect(opts)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if authURL, err := c1.TryLogin(ctx, nil, 0); err != nil {
+		t.Fatal(err)
+	} else if authURL == "" {
+		t.Fatal("expected authURL for reused oldNodeKey, got none")
+	} else {
+		postAuthURL(t, ctx, httpsrv.Client(), user, authURL)
+		if newURL, err := c1.WaitLoginURL(ctx, authURL); err != nil {
+			t.Fatal(err)
+		} else if newURL != "" {
+			t.Fatalf("unexpected newURL: %s", newURL)
+		}
+	}
+	if p := c1.GetPersist(); p.PrivateNodeKey != opts.Persist.PrivateNodeKey {
+		t.Error("unexpected node key change")
+	} else {
+		persist2 = p
+	}
+
+	// Here we simulate a client using using old persistant data.
+	// We use the key we have already replaced as the old node key.
+	// This requires the user to authenticate.
+	persist3 := Persist{
+		PrivateMachineKey: persist1.PrivateMachineKey,
+		OldPrivateNodeKey: persist1.PrivateNodeKey,
+		PrivateNodeKey:    newPrivKey(t),
+	}
+	opts = genOpts()
+	opts.Persist = persist3
+
+	c1, err = NewDirect(opts)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if authURL, err := c1.TryLogin(ctx, nil, 0); err != nil {
+		t.Fatal(err)
+	} else if authURL == "" {
+		t.Fatal("expected authURL for reused oldNodeKey, got none")
+	} else {
+		postAuthURL(t, ctx, httpsrv.Client(), user, authURL)
+		if newURL, err := c1.WaitLoginURL(ctx, authURL); err != nil {
+			t.Fatal(err)
+		} else if newURL != "" {
+			t.Fatalf("unexpected newURL: %s", newURL)
+		}
+	}
+	if err := c1.PollNetMap(ctx, 1, func(netMap *NetworkMap) {}); err != nil {
+		t.Fatal(err)
+	}
+
+	// At this point, there should only be one node for the machine key
+	// registered as active in the server.
+	mkey := tailcfg.MachineKey(*persist1.PrivateMachineKey.Public())
+	nodeIDs, err := server.DB().MachineNodes(mkey)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(nodeIDs) != 1 {
+		t.Logf("active nodes for machine key %v:", mkey)
+		for i, nodeID := range nodeIDs {
+			nodeKey := server.DB().NodeKey(nodeID)
+			t.Logf("\tnode %d: id=%v, key=%v", i, nodeID, nodeKey)
+		}
+		t.Fatalf("want 1 active node for the client machine, got %d", len(nodeIDs))
+	}
+
+	// Now try the previous node key. It should fail.
+	opts = genOpts()
+	opts.Persist = persist2
+	c1, err = NewDirect(opts)
+	if err != nil {
+		t.Fatal(err)
+	}
+	// TODO(crawshaw): make this return an actual error.
+	// Have cfgdb track expired keys, and when an expired key is reused
+	// produce an error.
+	if authURL, err := c1.TryLogin(ctx, nil, 0); err != nil {
+		t.Fatal(err)
+	} else if authURL == "" {
+		t.Fatal("expected authURL for reused nodeKey, got none")
+	} else {
+		postAuthURL(t, ctx, httpsrv.Client(), user, authURL)
+		if newURL, err := c1.WaitLoginURL(ctx, authURL); err != nil {
+			t.Fatal(err)
+		} else if newURL != "" {
+			t.Fatalf("unexpected newURL: %s", newURL)
+		}
+	}
+	if err := c1.PollNetMap(ctx, 1, func(netMap *NetworkMap) {}); err != nil {
+		t.Fatal(err)
+	}
+	if nodeIDs, err := server.DB().MachineNodes(mkey); err != nil {
+		t.Fatal(err)
+	} else if len(nodeIDs) != 1 {
+		t.Fatalf("want 1 active node for the client machine, got %d", len(nodeIDs))
+	}
+}

+ 294 - 0
control/controlclient/netmap.go

@@ -0,0 +1,294 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package controlclient
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"log"
+	"net"
+	"runtime"
+	"strings"
+	"time"
+
+	"github.com/tailscale/wireguard-go/wgcfg"
+	"tailscale.com/tailcfg"
+	"tailscale.com/wgengine/filter"
+)
+
+type NetworkMap struct {
+	// Core networking
+
+	NodeKey       tailcfg.NodeKey
+	PrivateKey    wgcfg.PrivateKey
+	Expiry        time.Time
+	Addresses     []wgcfg.CIDR
+	LocalPort     uint16 // used for debugging
+	MachineStatus tailcfg.MachineStatus
+	Peers         []tailcfg.Node
+	DNS           []wgcfg.IP
+	DNSDomains    []string
+	Hostinfo      tailcfg.Hostinfo
+	PacketFilter  filter.Matches
+
+	// ACLs
+
+	User   tailcfg.UserID
+	Domain string
+	// TODO(crawshaw): reduce UserProfiles to []tailcfg.UserProfile?
+	// There are lots of ways to slice this data, leave it up to users.
+	UserProfiles map[tailcfg.UserID]tailcfg.UserProfile
+	Roles        []tailcfg.Role
+	// TODO(crawshaw): Groups       []tailcfg.Group
+	// TODO(crawshaw): Capabilities []tailcfg.Capability
+}
+
+func (n *NetworkMap) Equal(n2 *NetworkMap) bool {
+	// TODO(crawshaw): this is crude, but is an easy way to avoid bugs.
+	b, err := json.Marshal(n)
+	if err != nil {
+		panic(err)
+	}
+	b2, err := json.Marshal(n2)
+	if err != nil {
+		panic(err)
+	}
+	return bytes.Equal(b, b2)
+}
+
+func (n *NetworkMap) isEmpty() bool {
+	if n == nil {
+		return true
+	}
+	return n.Equal(&NetworkMap{})
+}
+
+func (nm NetworkMap) String() string {
+	return nm.Concise()
+}
+
+func keyString(key [32]byte) string {
+	b64 := base64.StdEncoding.EncodeToString(key[:])
+	abbrev := "invalid"
+	if len(b64) == 44 {
+		abbrev = b64[0:4] + "…" + b64[39:43]
+	}
+	return fmt.Sprintf("[%s]", abbrev)
+}
+
+func (nm *NetworkMap) Concise() string {
+	buf := new(strings.Builder)
+	fmt.Fprintf(buf, "NetworkMap: self: %v auth=%v :%v %v\n",
+		keyString(nm.NodeKey), nm.MachineStatus,
+		nm.LocalPort, nm.Addresses)
+	for _, p := range nm.Peers {
+		aip := make([]string, len(p.AllowedIPs))
+		for i, a := range p.AllowedIPs {
+			aip[i] = fmt.Sprint(a)
+		}
+		u := fmt.Sprint(p.User)
+		if strings.HasPrefix(u, "userid:") {
+			u = "u:" + u[7:]
+		}
+		f1 := fmt.Sprintf(" %v %-6v %v",
+			keyString(p.Key), u, p.Endpoints)
+		f2 := fmt.Sprintf(" %*v\n", 70-len(f1),
+			strings.Join(aip, " "))
+		fmt.Fprintf(buf, "%s%s", f1, f2)
+	}
+	return buf.String()
+}
+
+func (nm *NetworkMap) JSON() string {
+	b, err := json.MarshalIndent(*nm, "", "  ")
+	if err != nil {
+		return fmt.Sprintf("[json error: %v]", err)
+	}
+	return string(b)
+}
+
+// TODO(apenwarr): delete me once relaynode doesn't need this anymore.
+// control.go:userMap() supercedes it. This does not belong in the client.
+func (nm *NetworkMap) UserMap() map[string][]filter.IP {
+	// Make a lookup table of roles
+	log.Printf("roles list is: %v\n", nm.Roles)
+	roles := make(map[tailcfg.RoleID]tailcfg.Role)
+	for _, role := range nm.Roles {
+		roles[role.ID] = role
+	}
+
+	// First, go through each node's addresses and make a lookup table
+	// of IP->User.
+	fwd := make(map[wgcfg.IP]string)
+	for _, node := range nm.Peers {
+		for _, addr := range node.Addresses {
+			if addr.Mask == 32 && addr.IP.Is4() {
+				user, ok := nm.UserProfiles[node.User]
+				if ok {
+					fwd[addr.IP] = user.LoginName
+				}
+			}
+		}
+	}
+
+	// Next, reverse the mapping into User->IP.
+	rev := make(map[string][]filter.IP)
+	for ip, username := range fwd {
+		ip4 := ip.To4()
+		if ip4 != nil {
+			fip := filter.NewIP(net.IP(ip4))
+			rev[username] = append(rev[username], fip)
+		}
+	}
+
+	// Now add roles, which are lists of users, and therefore lists
+	// of those users' IP addresses.
+	for _, user := range nm.UserProfiles {
+		for _, roleid := range user.Roles {
+			role, ok := roles[roleid]
+			if ok {
+				rolename := "role:" + role.Name
+				rev[rolename] = append(rev[rolename], rev[user.LoginName]...)
+			}
+		}
+	}
+
+	//log.Printf("Usermap is: %v\n", rev)
+	return rev
+}
+
+var iOS = runtime.GOOS == "darwin" && (runtime.GOARCH == "arm" || runtime.GOARCH == "arm64")
+var keepalive = !iOS
+
+const (
+	UAllowSingleHosts = 1 << iota
+	UAllowSubnetRoutes
+	UAllowDefaultRoute
+	UHackDefaultRoute
+
+	UDefault = 0
+)
+
+// Several programs need to parse these arguments into uflags, so let's
+// centralize it here.
+func UFlagsHelper(uroutes, rroutes, droutes bool) int {
+	uflags := 0
+	if uroutes {
+		uflags |= UAllowSingleHosts
+	}
+	if rroutes {
+		uflags |= UAllowSubnetRoutes
+	}
+	if droutes {
+		uflags |= UAllowDefaultRoute
+	}
+	return uflags
+}
+
+func (nm *NetworkMap) UAPI(uflags int, dnsOverride []wgcfg.IP) string {
+	wgcfg, err := nm.WGCfg(uflags, dnsOverride)
+	if err != nil {
+		log.Fatalf("WGCfg() failed unexpectedly: %v\n", err)
+	}
+	s, err := wgcfg.ToUAPI()
+	if err != nil {
+		log.Fatalf("ToUAPI() failed unexpectedly: %v\n", err)
+	}
+	return s
+}
+
+func (nm *NetworkMap) WGCfg(uflags int, dnsOverride []wgcfg.IP) (*wgcfg.Config, error) {
+	s := nm._WireGuardConfig(uflags, dnsOverride, true)
+	return wgcfg.FromWgQuick(s, "tailscale")
+}
+
+// TODO(apenwarr): This mode is dangerous.
+// Discarding the extra endpoints is almost universally the wrong choice.
+// Except that plain wireguard can't handle a peer with multiple endpoints.
+// (Yet?)
+func (nm *NetworkMap) WireGuardConfigOneEndpoint(uflags int, dnsOverride []wgcfg.IP) string {
+	return nm._WireGuardConfig(uflags, dnsOverride, false)
+}
+
+func (nm *NetworkMap) _WireGuardConfig(uflags int, dnsOverride []wgcfg.IP, allEndpoints bool) string {
+	buf := new(strings.Builder)
+	fmt.Fprintf(buf, "[Interface]\n")
+	fmt.Fprintf(buf, "PrivateKey = %s\n", base64.StdEncoding.EncodeToString(nm.PrivateKey[:]))
+	if len(nm.Addresses) > 0 {
+		fmt.Fprintf(buf, "Address = ")
+		for i, cidr := range nm.Addresses {
+			if i > 0 {
+				fmt.Fprintf(buf, ", ")
+			}
+			fmt.Fprintf(buf, "%s", cidr)
+		}
+		fmt.Fprintf(buf, "\n")
+	}
+	fmt.Fprintf(buf, "ListenPort = %d\n", nm.LocalPort)
+	if len(dnsOverride) > 0 {
+		dnss := []string{}
+		for _, ip := range dnsOverride {
+			dnss = append(dnss, ip.String())
+		}
+		fmt.Fprintf(buf, "DNS = %s\n", strings.Join(dnss, ","))
+	}
+	fmt.Fprintf(buf, "\n")
+
+	for i, peer := range nm.Peers {
+		if (uflags&UAllowSingleHosts) == 0 && len(peer.AllowedIPs) < 2 {
+			log.Printf("wgcfg: %v skipping a single-host peer.\n", peer.Key.AbbrevString())
+			continue
+		}
+		if i > 0 {
+			fmt.Fprintf(buf, "\n")
+		}
+		fmt.Fprintf(buf, "[Peer]\n")
+		fmt.Fprintf(buf, "PublicKey = %s\n", base64.StdEncoding.EncodeToString(peer.Key[:]))
+		if len(peer.Endpoints) > 0 {
+			if len(peer.Endpoints) == 1 {
+				fmt.Fprintf(buf, "Endpoint = %s", peer.Endpoints[0])
+			} else if allEndpoints {
+				// TODO(apenwarr): This mode is incompatible.
+				// Normal wireguard clients don't know how to
+				// parse it (yet?)
+				fmt.Fprintf(buf, "Endpoint = %s",
+					strings.Join(peer.Endpoints, ","))
+			} else {
+				fmt.Fprintf(buf, "Endpoint = %s # other endpoints: %s",
+					peer.Endpoints[0],
+					strings.Join(peer.Endpoints[1:], ", "))
+			}
+			buf.WriteByte('\n')
+		}
+		var aips []string
+		for _, allowedIP := range peer.AllowedIPs {
+			aip := allowedIP.String()
+			if allowedIP.Mask == 0 {
+				if (uflags & UAllowDefaultRoute) == 0 {
+					log.Printf("wgcfg: %v skipping default route\n", peer.Key.AbbrevString())
+					continue
+				}
+				if (uflags & UHackDefaultRoute) != 0 {
+					aip = "10.0.0.0/8"
+					log.Printf("wgcfg: %v converting default route => %v\n", peer.Key.AbbrevString(), aip)
+				}
+			} else if allowedIP.Mask < 32 {
+				if (uflags & UAllowSubnetRoutes) == 0 {
+					log.Printf("wgcfg: %v skipping subnet route\n", peer.Key.AbbrevString())
+					continue
+				}
+			}
+			aips = append(aips, aip)
+		}
+		fmt.Fprintf(buf, "AllowedIPs = %s\n", strings.Join(aips, ", "))
+		if keepalive {
+			fmt.Fprintf(buf, "PersistentKeepalive = 25\n")
+		}
+	}
+
+	return buf.String()
+}

+ 227 - 0
control/policy/policy.go

@@ -0,0 +1,227 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package policy
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"github.com/tailscale/hujson"
+	"net"
+	"strconv"
+	"strings"
+	"tailscale.com/wgengine/filter"
+)
+
+type IP = filter.IP
+
+const IPAny = filter.IPAny
+
+type row struct {
+	Action string
+	Users  []string
+	Ports  []string
+}
+
+type Policy struct {
+	ACLs   []row
+	Groups map[string][]string
+	Hosts  map[string]IP
+}
+
+func lineAndColumn(b []byte, ofs int64) (line, col int) {
+	line = 1
+	for _, c := range b[:ofs] {
+		if c == '\n' {
+			col = 1
+			line++
+		} else {
+			col++
+		}
+	}
+	return line, col
+}
+
+func betterUnmarshal(b []byte, obj interface{}) error {
+	bio := bytes.NewReader(b)
+	d := hujson.NewDecoder(bio)
+	d.DisallowUnknownFields()
+	err := d.Decode(obj)
+	if err != nil {
+		switch ee := err.(type) {
+		case *hujson.SyntaxError:
+			row, col := lineAndColumn(b, ee.Offset)
+			return fmt.Errorf("line %d col %d: %v", row, col, ee)
+		default:
+			return fmt.Errorf("parser: %v", err)
+		}
+	}
+	return nil
+}
+
+func Parse(acljson string) (*Policy, error) {
+	p := &Policy{}
+	err := betterUnmarshal([]byte(acljson), p)
+	if err != nil {
+		return nil, err
+	}
+
+	// Check syntax with an empty usermap to start with.
+	// The caller might not have a valid usermap at startup, but we still
+	// want to check that the acljson doesn't have any syntax errors
+	// as early as possible. When the usermap updates later, it won't
+	// add any new syntax errors.
+	//
+	// TODO(apenwarr): change unmarshal code to detect syntax errors above.
+	//  Right now some of the sub-objects aren't parsed until .Expand().
+	emptyUserMap := make(map[string][]IP)
+	_, err = p.Expand(emptyUserMap)
+	if err != nil {
+		return nil, err
+	}
+
+	return p, nil
+}
+
+func parseHostPortRange(hostport string) (host string, ports []filter.PortRange, err error) {
+	hl := strings.Split(hostport, ":")
+	if len(hl) != 2 {
+		return "", nil, errors.New("hostport must have exactly one colon(:)")
+	}
+	host = hl[0]
+	portlist := hl[1]
+
+	if portlist == "*" {
+		// Special case: permit hostname:* as a port wildcard.
+		ports = append(ports, filter.PortRangeAny)
+		return host, ports, nil
+	}
+
+	pl := strings.Split(portlist, ",")
+	for _, pp := range pl {
+		if len(pp) == 0 {
+			return "", nil, fmt.Errorf("invalid port list: %#v", portlist)
+		}
+
+		pr := strings.Split(pp, "-")
+		if len(pr) > 2 {
+			return "", nil, fmt.Errorf("port range %#v: too many dashes(-)", pp)
+		}
+
+		var first, last uint64
+		first, err := strconv.ParseUint(pr[0], 10, 16)
+		if err != nil {
+			return "", nil, fmt.Errorf("port range %#v: invalid first integer", pp)
+		}
+
+		if len(pr) >= 2 {
+			last, err = strconv.ParseUint(pr[1], 10, 16)
+			if err != nil {
+				return "", nil, fmt.Errorf("port range %#v: invalid last integer", pp)
+			}
+		} else {
+			last = first
+		}
+
+		if first == 0 {
+			return "", nil, fmt.Errorf("port range %#v: first port must be >0, or use '*' for wildcard", pp)
+		}
+
+		if first > last {
+			return "", nil, fmt.Errorf("port range %#v: first port must be >= last port", pp)
+		}
+
+		ports = append(ports, filter.PortRange{uint16(first), uint16(last)})
+	}
+
+	return host, ports, nil
+}
+
+func (p *Policy) Expand(usermap map[string][]IP) (filter.Matches, error) {
+	lcusermap := make(map[string][]IP)
+	for k, v := range usermap {
+		k = strings.ToLower(k)
+		lcusermap[k] = v
+	}
+
+	for k, userlist := range p.Groups {
+		k = strings.ToLower(k)
+		if !strings.HasPrefix(k, "group:") {
+			return nil, fmt.Errorf("Group[%#v]: group names must start with 'group:'", k)
+		}
+		for _, u := range userlist {
+			uips := lcusermap[u]
+			lcusermap[k] = append(lcusermap[k], uips...)
+		}
+	}
+
+	hosts := p.Hosts
+
+	var out filter.Matches
+	for _, acl := range p.ACLs {
+		if acl.Action != "accept" {
+			return nil, fmt.Errorf("Action=%#v is not supported", acl.Action)
+		}
+
+		var srcs []IP
+		for _, user := range acl.Users {
+			user = strings.ToLower(user)
+			if user == "*" {
+				srcs = append(srcs, IPAny)
+				continue
+			} else if strings.Contains(user, "@") ||
+				strings.HasPrefix(user, "role:") ||
+				strings.HasPrefix(user, "group:") {
+				// fine if the requested user doesn't exist.
+				// we don't want to crash ACL parsing just
+				// because a previously authed user gets
+				// deleted. We'll silently ignore it and
+				// no firewall rules are needed.
+				// TODO(apenwarr): maybe print a warning?
+				for _, ip := range lcusermap[user] {
+					if ip != IPAny {
+						srcs = append(srcs, ip)
+					}
+				}
+			} else {
+				return nil, fmt.Errorf("wgengine/filter: invalid username: %q: needs @domain or group: or role:", user)
+			}
+		}
+
+		var dsts []filter.IPPortRange
+		for _, hostport := range acl.Ports {
+			host, ports, err := parseHostPortRange(hostport)
+			if err != nil {
+				return nil, fmt.Errorf("Ports=%#v: %v", hostport, err)
+			}
+			ip := net.ParseIP(host)
+			ipv, ok := hosts[host]
+			if ok {
+				// matches an alias; ipv is now valid
+			} else if ip != nil && ip.IsUnspecified() {
+				// For clarity, reject 0.0.0.0 as an input
+				return nil, fmt.Errorf("Ports=%#v: to allow all IP addresses, use *:port, not 0.0.0.0:port", hostport)
+			} else if ip == nil && host == "*" {
+				// User explicitly requested wildcard dst ip
+				ipv = IPAny
+			} else {
+				if ip != nil {
+					ip = ip.To4()
+				}
+				if ip == nil || len(ip) != 4 {
+					return nil, fmt.Errorf("Ports=%#v: %#v: invalid IPv4 address", hostport, host)
+				}
+				ipv = filter.NewIP(ip)
+			}
+
+			for _, pr := range ports {
+				dsts = append(dsts, filter.IPPortRange{ipv, pr})
+			}
+		}
+
+		out = append(out, filter.Match{DstPorts: dsts, SrcIPs: srcs})
+	}
+	return out, nil
+}

+ 156 - 0
control/policy/policy_test.go

@@ -0,0 +1,156 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package policy
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"tailscale.com/wgengine/filter"
+)
+
+type PortRange = filter.PortRange
+type IPPortRange = filter.IPPortRange
+
+var syntax_errors = []string{
+	`{ "ACLs": []! }`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "xPorts": ["100.122.98.50:22"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "drop", "Users": [], "Ports": ["100.122.98.50:22"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Users": [], "Ports": ["100.122.98.50:22"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4:0"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["0.0.0.0:12"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["*:0"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4:5:6"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4.5:12"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4::12"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4:0-0"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4:1-10,2-"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4:1-10,*"]}
+	]}`,
+
+	`{ "ACLs": [
+	  {"Action": "accept", "Users": [], "Ports": ["1.2.3.4,5.6.7.8:1-10"]}
+	]}`,
+
+	`{ "Hosts": {"mailserver": "not-an-ip"} }`,
+
+	`{ "Hosts": {"mailserver": "1.2.3.4:55"} }`,
+
+	`{ "xGroups": {
+	  "bob": ["user1", "user2"]
+	 }}`,
+}
+
+func TestSyntaxErrors(t *testing.T) {
+	for _, s := range syntax_errors {
+		_, err := Parse(s)
+		if err == nil {
+			t.Fatalf("Parse passed when it shouldn't. json:\n---\n%v\n---", s)
+		}
+	}
+}
+
+func ippr(ip IP, start, end uint16) []IPPortRange {
+	return []IPPortRange{
+		IPPortRange{ip, PortRange{start, end}},
+	}
+}
+
+func TestPolicy(t *testing.T) {
+	// Check ACL table parsing
+
+	usermap := map[string][]IP{
+		"[email protected]":    []IP{0x08010101, 0x08020202},
+		"role:admin": []IP{0x02020202},
+		"user1@org":  []IP{0x99010101, 0x99010102},
+		// user2 is intentionally missing
+		"user3@org": []IP{0x99030303},
+		"user4@org": []IP{},
+	}
+	want := filter.Matches{
+		{SrcIPs: []IP{0x08010101, 0x08020202}, DstPorts: []IPPortRange{
+			IPPortRange{0x01020304, PortRange{22, 22}},
+			IPPortRange{0x05060708, PortRange{23, 24}},
+			IPPortRange{0x05060708, PortRange{27, 28}},
+		}},
+		{SrcIPs: []IP{0x02020202}, DstPorts: ippr(0x08010101, 22, 22)},
+		{SrcIPs: []IP{0}, DstPorts: []IPPortRange{
+			IPPortRange{0x647a6232, PortRange{0, 65535}},
+			IPPortRange{0, PortRange{443, 443}},
+		}},
+		{SrcIPs: []IP{0x99010101, 0x99010102, 0x99030303}, DstPorts: ippr(0x01020304, 999, 999)},
+	}
+
+	p, err := Parse(`
+{
+    // Test comment
+    "Hosts": {
+    	"h1": "1.2.3.4", /* test comment */
+    	"h2": "5.6.7.8"
+    },
+    "Groups": {
+    	"group:eng": ["user1@org", "user2@org", "user3@org", "user4@org"]
+    },
+    "ACLs": [
+	{"Action": "accept", "Users": ["[email protected]"], "Ports": ["h1:22", "h2:23-24,27-28"]},
+	{"Action": "accept", "Users": ["role:Admin"], "Ports": ["8.1.1.1:22"]},
+	{"Action": "accept", "Users": ["*"], "Ports": ["100.122.98.50:*", "*:443"]},
+	{"Action": "accept", "Users": ["group:eng"], "Ports": ["h1:999"]},
+    ]}
+`)
+	if err != nil {
+		t.Fatalf("Parse failed: %v", err)
+	}
+	matches, err := p.Expand(usermap)
+	if err != nil {
+		t.Fatalf("Expand failed: %v", err)
+	}
+	if diff := cmp.Diff(want, matches); diff != "" {
+		t.Fatalf("Expand mismatch (-want +got):\n%s", diff)
+	}
+}

+ 182 - 0
derp/derp_client.go

@@ -0,0 +1,182 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package derp
+
+import (
+	"bufio"
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net"
+	"time"
+
+	"golang.org/x/crypto/curve25519"
+	"golang.org/x/crypto/nacl/box"
+)
+
+type Client struct {
+	serverKey  [32]byte
+	privateKey [32]byte // TODO(crawshaw): make this wgcfg.PrivateKey?
+	publicKey  [32]byte
+	logf       func(format string, args ...interface{})
+	netConn    net.Conn
+	conn       *bufio.ReadWriter
+}
+
+func NewClient(privateKey [32]byte, netConn net.Conn, conn *bufio.ReadWriter, logf func(format string, args ...interface{})) (*Client, error) {
+	c := &Client{
+		privateKey: privateKey,
+		logf:       logf,
+		netConn:    netConn,
+		conn:       conn,
+	}
+	curve25519.ScalarBaseMult(&c.publicKey, &c.privateKey)
+
+	if err := c.recvServerKey(); err != nil {
+		return nil, fmt.Errorf("derp.Client: failed to receive server key: %v", err)
+	}
+	if err := c.sendClientKey(); err != nil {
+		return nil, fmt.Errorf("derp.Client: failed to send client key: %v", err)
+	}
+	_, err := c.recvServerInfo()
+	if err != nil {
+		return nil, fmt.Errorf("derp.Client: failed to receive server info: %v", err)
+	}
+
+	return c, nil
+}
+
+func (c *Client) recvServerKey() error {
+	gotMagic, err := readUint32(c.conn, 0xffffffff)
+	if err != nil {
+		return err
+	}
+	if gotMagic != magic {
+		return fmt.Errorf("bad magic %x, want %x", gotMagic, magic)
+	}
+	if err := readType(c.conn.Reader, typeServerKey); err != nil {
+		return err
+	}
+	if _, err := io.ReadFull(c.conn, c.serverKey[:]); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (c *Client) recvServerInfo() (*serverInfo, error) {
+	if err := readType(c.conn.Reader, typeServerInfo); err != nil {
+		return nil, err
+	}
+	var nonce [24]byte
+	if _, err := io.ReadFull(c.conn, nonce[:]); err != nil {
+		return nil, fmt.Errorf("nonce: %v", err)
+	}
+	msgLen, err := readUint32(c.conn, oneMB)
+	if err != nil {
+		return nil, fmt.Errorf("msglen: %v", err)
+	}
+	msgbox := make([]byte, msgLen)
+	if _, err := io.ReadFull(c.conn, msgbox); err != nil {
+		return nil, fmt.Errorf("msgbox: %v", err)
+	}
+	msg, ok := box.Open(nil, msgbox, &nonce, &c.serverKey, &c.privateKey)
+	if !ok {
+		return nil, fmt.Errorf("msgbox: cannot open len=%d with server key %x", msgLen, c.serverKey[:])
+	}
+	info := new(serverInfo)
+	if err := json.Unmarshal(msg, info); err != nil {
+		return nil, fmt.Errorf("msg: %v", err)
+	}
+	return info, nil
+}
+
+func (c *Client) sendClientKey() error {
+	var nonce [24]byte
+	if _, err := rand.Read(nonce[:]); err != nil {
+		return err
+	}
+	msg := []byte("{}") // no clientInfo for now
+	msgbox := box.Seal(nil, msg, &nonce, &c.serverKey, &c.privateKey)
+
+	if _, err := c.conn.Write(c.publicKey[:]); err != nil {
+		return err
+	}
+	if _, err := c.conn.Write(nonce[:]); err != nil {
+		return err
+	}
+	if err := putUint32(c.conn.Writer, uint32(len(msgbox))); err != nil {
+		return err
+	}
+	if _, err := c.conn.Write(msgbox); err != nil {
+		return err
+	}
+	return c.conn.Flush()
+}
+
+func (c *Client) Send(dstKey [32]byte, msg []byte) (err error) {
+	defer func() {
+		if err != nil {
+			err = fmt.Errorf("derp.Send: %v", err)
+		}
+	}()
+
+	if err := c.conn.WriteByte(typeSendPacket); err != nil {
+		return err
+	}
+	if _, err := c.conn.Write(dstKey[:]); err != nil {
+		return err
+	}
+	msgLen := uint32(len(msg))
+	if int(msgLen) != len(msg) {
+		return fmt.Errorf("packet too big: %d", len(msg))
+	}
+	if err := putUint32(c.conn.Writer, msgLen); err != nil {
+		return err
+	}
+	if _, err := c.conn.Write(msg); err != nil {
+		return err
+	}
+	return c.conn.Flush()
+}
+
+func (c *Client) Recv(b []byte) (n int, err error) {
+	defer func() {
+		if err != nil {
+			err = fmt.Errorf("derp.Recv: %v", err)
+		}
+	}()
+
+loop:
+	for {
+		c.netConn.SetReadDeadline(time.Now().Add(120 * time.Second))
+		packetType, err := c.conn.ReadByte()
+		if err != nil {
+			return 0, err
+		}
+		switch packetType {
+		case typeKeepAlive:
+			continue
+		case typeRecvPacket:
+			break loop
+		default:
+			return 0, fmt.Errorf("derp.Recv: unknown packet type %d", packetType)
+		}
+	}
+
+	packetLen, err := readUint32(c.conn.Reader, oneMB)
+	if err != nil {
+		return 0, err
+	}
+	if int(packetLen) > len(b) {
+		// TODO(crawshaw): discard the packet
+		return 0, io.ErrShortBuffer
+	}
+	b = b[:packetLen]
+	if _, err := io.ReadFull(c.conn, b); err != nil {
+		return 0, err
+	}
+	return int(packetLen), nil
+}

+ 380 - 0
derp/derp_server.go

@@ -0,0 +1,380 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package derp
+
+// TODO(crawshaw): revise protocol so unknown type packets have a predictable length for skipping.
+// TODO(crawshaw): send srcKey with packets to clients?
+// TODO(crawshaw): with predefined serverKey in clients and HMAC on packets we could skip TLS
+
+import (
+	"bufio"
+	"context"
+	"crypto/rand"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math/big"
+	"net"
+	"sync"
+	"time"
+
+	"golang.org/x/crypto/curve25519"
+	"golang.org/x/crypto/nacl/box"
+)
+
+const magic = 0x44c55250 // "DERP" with a non-ASCII high-bit
+
+const (
+	typeServerKey  = 0x01
+	typeServerInfo = 0x02
+	typeSendPacket = 0x03
+	typeRecvPacket = 0x04
+	typeKeepAlive  = 0x05
+)
+
+const keepAlive = 60 * time.Second
+
+var bin = binary.BigEndian
+
+const oneMB = 1 << 20
+
+type Server struct {
+	privateKey [32]byte // TODO(crawshaw): make this wgcfg.PrivateKey?
+	publicKey  [32]byte
+	logf       func(format string, args ...interface{})
+
+	mu       sync.Mutex
+	netConns map[net.Conn]chan struct{}
+	clients  map[[32]byte]*client
+}
+
+func NewServer(privateKey [32]byte, logf func(format string, args ...interface{})) *Server {
+	s := &Server{
+		privateKey: privateKey,
+		logf:       logf,
+		clients:    make(map[[32]byte]*client),
+		netConns:   make(map[net.Conn]chan struct{}),
+	}
+	curve25519.ScalarBaseMult(&s.publicKey, &s.privateKey)
+	return s
+}
+
+func (s *Server) Close() error {
+	var closedChs []chan struct{}
+
+	s.mu.Lock()
+	for netConn, closed := range s.netConns {
+		netConn.Close()
+		closedChs = append(closedChs, closed)
+	}
+	s.mu.Unlock()
+
+	for _, closed := range closedChs {
+		<-closed
+	}
+
+	return nil
+}
+
+func (s *Server) Accept(netConn net.Conn, conn *bufio.ReadWriter) {
+	closed := make(chan struct{})
+
+	s.mu.Lock()
+	s.netConns[netConn] = closed
+	s.mu.Unlock()
+
+	defer func() {
+		netConn.Close()
+		close(closed)
+
+		s.mu.Lock()
+		delete(s.netConns, netConn)
+		s.mu.Unlock()
+	}()
+
+	if err := s.accept(netConn, conn); err != nil {
+		s.logf("derp: %s: %v", netConn.RemoteAddr(), err)
+	}
+}
+
+func (s *Server) accept(netConn net.Conn, conn *bufio.ReadWriter) error {
+	netConn.SetDeadline(time.Now().Add(10 * time.Second))
+	if err := s.sendServerKey(conn); err != nil {
+		return fmt.Errorf("send server key: %v", err)
+	}
+	netConn.SetDeadline(time.Now().Add(10 * time.Second))
+	clientKey, clientInfo, err := s.recvClientKey(conn)
+	if err != nil {
+		return fmt.Errorf("receive client key: %v", err)
+	}
+	if err := s.verifyClient(clientKey, clientInfo); err != nil {
+		return fmt.Errorf("client %x rejected: %v", clientKey, err)
+	}
+
+	// At this point we trust the client so we don't time out.
+	netConn.SetDeadline(time.Time{})
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	c := &client{
+		key:     clientKey,
+		netConn: netConn,
+		conn:    conn,
+	}
+	if clientInfo != nil {
+		c.info = *clientInfo
+	}
+	go func() {
+		if err := c.keepAlive(ctx); err != nil {
+			s.logf("derp: %s: client %x: keep alive failed: %v", netConn.RemoteAddr(), c.key, err)
+		}
+	}()
+
+	defer func() {
+		s.mu.Lock()
+		curClient := s.clients[c.key]
+		if curClient != nil && curClient.conn == conn {
+			s.logf("derp: %s: client %x: removing connection", netConn.RemoteAddr(), c.key)
+			delete(s.clients, c.key)
+		}
+		s.mu.Unlock()
+	}()
+
+	// Hold mu while we add the new client to the clients list and under
+	// the same acquisition send server info. This ensure that both:
+	// 1. by the time the client receives the server info, it can be addressed.
+	// 2. the server info is the very first
+	c.mu.Lock()
+	s.mu.Lock()
+	oldClient := s.clients[c.key]
+	s.clients[c.key] = c
+	s.mu.Unlock()
+	if err := s.sendServerInfo(conn, clientKey); err != nil {
+		return fmt.Errorf("send server info: %v", err)
+	}
+	c.mu.Unlock()
+
+	if oldClient == nil {
+		s.logf("derp: %s: client %x: adding connection", netConn.RemoteAddr(), c.key)
+	} else {
+		oldClient.netConn.Close()
+		s.logf("derp: %s: client %x: adding connection, replacing %s", netConn.RemoteAddr(), c.key, oldClient.netConn.RemoteAddr())
+	}
+
+	for {
+		dstKey, contents, err := s.recvPacket(c.conn)
+		if err != nil {
+			return fmt.Errorf("client %x: recv: %v", c.key, err)
+		}
+
+		s.mu.Lock()
+		dst := s.clients[dstKey]
+		s.mu.Unlock()
+
+		if dst == nil {
+			s.logf("derp: %s: client %x: dropping packet for unknown %x", netConn.RemoteAddr(), c.key, dstKey)
+			continue
+		}
+
+		dst.mu.Lock()
+		err = s.sendPacket(dst.conn, c.key, contents)
+		dst.mu.Unlock()
+
+		if err != nil {
+			s.logf("derp: %s: client %x: dropping packet for %x: %v", netConn.RemoteAddr(), c.key, dstKey, err)
+
+			// If we cannot send to a destination, shut it down.
+			// Let its receive loop do the cleanup.
+			s.mu.Lock()
+			if s.clients[dstKey].conn == dst.conn {
+				s.clients[dstKey].netConn.Close()
+			}
+			s.mu.Unlock()
+		}
+	}
+}
+
+func (s *Server) verifyClient(clientKey [32]byte, info *clientInfo) error {
+	// TODO(crawshaw): implement policy constraints on who can use the DERP server
+	return nil
+}
+
+func (s *Server) sendServerKey(conn *bufio.ReadWriter) error {
+	if err := putUint32(conn, magic); err != nil {
+		return err
+	}
+	if err := conn.WriteByte(typeServerKey); err != nil {
+		return err
+	}
+	if _, err := conn.Write(s.publicKey[:]); err != nil {
+		return err
+	}
+	return conn.Flush()
+}
+
+func (s *Server) sendServerInfo(conn *bufio.ReadWriter, clientKey [32]byte) error {
+	var nonce [24]byte
+	if _, err := rand.Read(nonce[:]); err != nil {
+		return err
+	}
+	msg := []byte("{}") // no serverInfo for now
+	msgbox := box.Seal(nil, msg, &nonce, &clientKey, &s.privateKey)
+
+	if err := conn.WriteByte(typeServerInfo); err != nil {
+		return err
+	}
+	if _, err := conn.Write(nonce[:]); err != nil {
+		return err
+	}
+	if err := putUint32(conn, uint32(len(msgbox))); err != nil {
+		return err
+	}
+	if _, err := conn.Write(msgbox); err != nil {
+		return err
+	}
+	return conn.Flush()
+}
+
+func (s *Server) recvClientKey(conn *bufio.ReadWriter) (clientKey [32]byte, info *clientInfo, err error) {
+	if _, err := io.ReadFull(conn, clientKey[:]); err != nil {
+		return [32]byte{}, nil, err
+	}
+	var nonce [24]byte
+	if _, err := io.ReadFull(conn, nonce[:]); err != nil {
+		return [32]byte{}, nil, fmt.Errorf("nonce: %v", err)
+	}
+	msgLen, err := readUint32(conn, oneMB)
+	if err != nil {
+		return [32]byte{}, nil, fmt.Errorf("msglen: %v", err)
+	}
+	msgbox := make([]byte, msgLen)
+	if _, err := io.ReadFull(conn, msgbox); err != nil {
+		return [32]byte{}, nil, fmt.Errorf("msgbox: %v", err)
+	}
+	msg, ok := box.Open(nil, msgbox, &nonce, &clientKey, &s.privateKey)
+	if !ok {
+		return [32]byte{}, nil, fmt.Errorf("msgbox: cannot open len=%d with client key %x", msgLen, clientKey[:])
+	}
+	info = new(clientInfo)
+	if err := json.Unmarshal(msg, info); err != nil {
+		return [32]byte{}, nil, fmt.Errorf("msg: %v", err)
+	}
+	return clientKey, info, nil
+}
+
+func (s *Server) sendPacket(conn *bufio.ReadWriter, srcKey [32]byte, contents []byte) error {
+	if err := conn.WriteByte(typeRecvPacket); err != nil {
+		return err
+	}
+	if err := putUint32(conn.Writer, uint32(len(contents))); err != nil {
+		return err
+	}
+	if _, err := conn.Write(contents); err != nil {
+		return err
+	}
+	return conn.Flush()
+}
+
+func (s *Server) recvPacket(conn *bufio.ReadWriter) (dstKey [32]byte, contents []byte, err error) {
+	if err := readType(conn.Reader, typeSendPacket); err != nil {
+		return [32]byte{}, nil, err
+	}
+	if _, err := io.ReadFull(conn, dstKey[:]); err != nil {
+		return [32]byte{}, nil, err
+	}
+	packetLen, err := readUint32(conn.Reader, oneMB)
+	if err != nil {
+		return [32]byte{}, nil, err
+	}
+	contents = make([]byte, packetLen)
+	if _, err := io.ReadFull(conn, contents); err != nil {
+		return [32]byte{}, nil, err
+	}
+	return dstKey, contents, nil
+}
+
+type client struct {
+	netConn net.Conn
+	key     [32]byte
+	info    clientInfo
+
+	keepAliveTimer *time.Timer
+	keepAliveReset chan struct{}
+
+	mu   sync.Mutex
+	conn *bufio.ReadWriter
+}
+
+func (c *client) keepAlive(ctx context.Context) error {
+	jitterMs, err := rand.Int(rand.Reader, big.NewInt(5000))
+	if err != nil {
+		panic(err)
+	}
+	jitter := time.Duration(jitterMs.Int64()) * time.Millisecond
+	c.keepAliveTimer = time.NewTimer(keepAlive + jitter)
+
+	for {
+		select {
+		case <-ctx.Done():
+			return nil
+		case <-c.keepAliveReset:
+			if c.keepAliveTimer.Stop() {
+				<-c.keepAliveTimer.C
+			}
+			c.keepAliveTimer.Reset(keepAlive + jitter)
+		case <-c.keepAliveTimer.C:
+			c.mu.Lock()
+			err := c.conn.WriteByte(typeKeepAlive)
+			if err == nil {
+				err = c.conn.Flush()
+			}
+			c.mu.Unlock()
+
+			if err != nil {
+				// TODO log
+				c.netConn.Close()
+				return err
+			}
+		}
+	}
+}
+
+type clientInfo struct {
+}
+
+type serverInfo struct {
+}
+
+func readType(r *bufio.Reader, t uint8) error {
+	packetType, err := r.ReadByte()
+	if err != nil {
+		return err
+	}
+	if packetType != t {
+		return fmt.Errorf("bad packet type 0x%X, want 0x%X", packetType, t)
+	}
+	return nil
+}
+
+func putUint32(w io.Writer, v uint32) error {
+	var b [4]byte
+	bin.PutUint32(b[:], v)
+	_, err := w.Write(b[:])
+	return err
+}
+
+func readUint32(r io.Reader, maxVal uint32) (uint32, error) {
+	b := make([]byte, 4)
+	if _, err := io.ReadFull(r, b); err != nil {
+		return 0, err
+	}
+	val := bin.Uint32(b)
+	if val > maxVal {
+		return 0, fmt.Errorf("uint32 %d exceeds limit %d", val, maxVal)
+	}
+	return val, nil
+}

+ 125 - 0
derp/derp_test.go

@@ -0,0 +1,125 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package derp
+
+import (
+	"bufio"
+	"crypto/rand"
+	"net"
+	"testing"
+	"time"
+
+	"golang.org/x/crypto/curve25519"
+)
+
+func TestSendRecv(t *testing.T) {
+	const numClients = 3
+	var serverPrivateKey [32]byte
+	if _, err := rand.Read(serverPrivateKey[:]); err != nil {
+		t.Fatal(err)
+	}
+	var clientPrivateKeys [][32]byte
+	for i := 0; i < numClients; i++ {
+		var key [32]byte
+		if _, err := rand.Read(key[:]); err != nil {
+			t.Fatal(err)
+		}
+		clientPrivateKeys = append(clientPrivateKeys, key)
+	}
+	var clientKeys [][32]byte
+	for _, privKey := range clientPrivateKeys {
+		var key [32]byte
+		curve25519.ScalarBaseMult(&key, &privKey)
+		clientKeys = append(clientKeys, key)
+	}
+
+	ln, err := net.Listen("tcp", ":0")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var clientConns []net.Conn
+	for i := 0; i < numClients; i++ {
+		conn, err := net.Dial("tcp", ln.Addr().String())
+		if err != nil {
+			t.Fatal(err)
+		}
+		clientConns = append(clientConns, conn)
+	}
+	s := NewServer(serverPrivateKey, t.Logf)
+	defer s.Close()
+	for i := 0; i < numClients; i++ {
+		netConn, err := ln.Accept()
+		if err != nil {
+			t.Fatal(err)
+		}
+		conn := bufio.NewReadWriter(bufio.NewReader(netConn), bufio.NewWriter(netConn))
+		go s.Accept(netConn, conn)
+	}
+
+	var clients []*Client
+	var recvChs []chan []byte
+	errCh := make(chan error, 3)
+	for i := 0; i < numClients; i++ {
+		key := clientPrivateKeys[i]
+		netConn := clientConns[i]
+		conn := bufio.NewReadWriter(bufio.NewReader(netConn), bufio.NewWriter(netConn))
+		c, err := NewClient(key, netConn, conn, t.Logf)
+		if err != nil {
+			t.Fatalf("client %d: %v", i, err)
+		}
+		clients = append(clients, c)
+		recvChs = append(recvChs, make(chan []byte))
+
+		go func(i int) {
+			for {
+				b := make([]byte, 1<<16)
+				n, err := c.Recv(b)
+				if err != nil {
+					errCh <- err
+					return
+				}
+				b = b[:n]
+				recvChs[i] <- b
+			}
+		}(i)
+	}
+
+	recv := func(i int, want string) {
+		t.Helper()
+		select {
+		case b := <-recvChs[i]:
+			if got := string(b); got != want {
+				t.Errorf("client1.Recv=%q, want %q", got, want)
+			}
+		case <-time.After(1 * time.Second):
+			t.Errorf("client%d.Recv, got nothing, want %q", i, want)
+		}
+	}
+	recvNothing := func(i int) {
+		t.Helper()
+		select {
+		case b := <-recvChs[0]:
+			t.Errorf("client%d.Recv=%q, want nothing", i, string(b))
+		default:
+		}
+	}
+
+	msg1 := []byte("hello 0->1\n")
+	if err := clients[0].Send(clientKeys[1], msg1); err != nil {
+		t.Fatal(err)
+	}
+	recv(1, string(msg1))
+	recvNothing(0)
+	recvNothing(2)
+
+	msg2 := []byte("hello 1->2\n")
+	if err := clients[1].Send(clientKeys[2], msg2); err != nil {
+		t.Fatal(err)
+	}
+	recv(2, string(msg2))
+	recvNothing(0)
+	recvNothing(1)
+}

+ 203 - 0
derp/derphttp/derphttp_client.go

@@ -0,0 +1,203 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package derphttp implements DERP-over-HTTP.
+//
+// This makes DERP look exactly like WebSockets.
+// A server can implement DERP over HTTPS and even if the TLS connection
+// intercepted using a fake root CA, unless the interceptor knows how to
+// detect DERP packets, it will look like a web socket.
+package derphttp
+
+import (
+	"bufio"
+	"bytes"
+	"crypto/tls"
+	"errors"
+	"fmt"
+	"io/ioutil"
+	"net"
+	"net/http"
+	"net/url"
+	"sync"
+
+	"tailscale.com/derp"
+)
+
+// Client is a DERP-over-HTTP client.
+//
+// It automatically reconnects on error retry. That is, a failed Send or
+// Recv will report the error and not retry, but subsequent calls to
+// Send/Recv will completely re-establish the connection.
+type Client struct {
+	privateKey [32]byte
+	logf       func(format string, args ...interface{})
+	closed     chan struct{}
+	url        *url.URL
+	resp       *http.Response
+
+	netConnMu sync.Mutex
+	netConn   net.Conn
+
+	clientMu sync.Mutex
+	client   *derp.Client
+}
+
+func NewClient(privateKey [32]byte, serverURL string, logf func(format string, args ...interface{})) (c *Client, err error) {
+	u, err := url.Parse(serverURL)
+	if err != nil {
+		return nil, fmt.Errorf("derphttp.NewClient: %v", err)
+	}
+
+	c = &Client{
+		privateKey: privateKey,
+		logf:       logf,
+		url:        u,
+		closed:     make(chan struct{}),
+	}
+	if _, err := c.connect("derphttp.NewClient"); err != nil {
+		c.logf("%v", err)
+	}
+	return c, nil
+}
+
+func (c *Client) connect(caller string) (client *derp.Client, err error) {
+	select {
+	case <-c.closed:
+		return nil, ErrClientClosed
+	default:
+	}
+
+	c.clientMu.Lock()
+	defer c.clientMu.Unlock()
+
+	if c.client != nil {
+		return c.client, nil
+	}
+
+	c.logf("%s: connecting", caller)
+
+	var netConn net.Conn
+	defer func() {
+		if err != nil {
+			err = fmt.Errorf("%s connect: %v", caller, err)
+			if netConn := netConn; netConn != nil {
+				netConn.Close()
+			}
+		}
+	}()
+
+	if c.url.Scheme == "https" {
+		port := c.url.Port()
+		if port == "" {
+			port = "443"
+		}
+		config := &tls.Config{}
+		var tlsConn *tls.Conn
+		tlsConn, err = tls.Dial("tcp", net.JoinHostPort(c.url.Host, port), config)
+		if tlsConn != nil {
+			netConn = tlsConn
+		}
+	} else {
+		netConn, err = net.Dial("tcp", c.url.Host)
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	c.netConnMu.Lock()
+	c.netConn = netConn
+	c.netConnMu.Unlock()
+
+	conn := bufio.NewReadWriter(bufio.NewReader(netConn), bufio.NewWriter(netConn))
+
+	req, err := http.NewRequest("GET", c.url.String(), nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Upgrade", "WebSocket")
+	req.Header.Set("Connection", "Upgrade")
+	if err := req.Write(conn); err != nil {
+		return nil, err
+	}
+	if err := conn.Flush(); err != nil {
+		return nil, err
+	}
+
+	resp, err := http.ReadResponse(conn.Reader, req)
+	if err != nil {
+		return nil, err
+	}
+	if resp.StatusCode != http.StatusSwitchingProtocols {
+		b, _ := ioutil.ReadAll(resp.Body)
+		resp.Body.Close()
+		return nil, fmt.Errorf("GET failed: %v: %s", err, b)
+	}
+	resp.Body = ioutil.NopCloser(bytes.NewReader([]byte{}))
+
+	derpClient, err := derp.NewClient(c.privateKey, netConn, conn, c.logf)
+	if err != nil {
+		return nil, err
+	}
+	c.resp = resp
+	c.client = derpClient
+	return c.client, nil
+}
+
+func (c *Client) Send(dstKey [32]byte, b []byte) error {
+	client, err := c.connect("derphttp.Client.Send")
+	if err != nil {
+		return err
+	}
+	if err := client.Send(dstKey, b); err != nil {
+		c.close()
+	}
+	return err
+}
+
+func (c *Client) Recv(b []byte) (int, error) {
+	client, err := c.connect("derphttp.Client.Recv")
+	if err != nil {
+		return 0, err
+	}
+	n, err := client.Recv(b)
+	if err != nil {
+		c.close()
+	}
+	return n, err
+}
+
+func (c *Client) Close() error {
+	select {
+	case <-c.closed:
+		return ErrClientClosed
+	default:
+	}
+	close(c.closed)
+	c.close()
+	return nil
+}
+
+func (c *Client) close() {
+	c.netConnMu.Lock()
+	netConn := c.netConn
+	c.netConnMu.Unlock()
+
+	if netConn != nil {
+		netConn.Close()
+	}
+
+	c.clientMu.Lock()
+	defer c.clientMu.Unlock()
+	if c.client == nil {
+		return
+	}
+	c.resp = nil
+	c.client = nil
+	c.netConnMu.Lock()
+	c.netConn = nil
+	c.netConnMu.Unlock()
+}
+
+var ErrClientClosed = errors.New("derphttp.Client closed")

+ 35 - 0
derp/derphttp/derphttp_server.go

@@ -0,0 +1,35 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package derphttp
+
+import (
+	"net/http"
+
+	"tailscale.com/derp"
+)
+
+func Handler(s *derp.Server) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Header.Get("Upgrade") != "WebSocket" {
+			http.Error(w, "DERP requires connection upgrade", http.StatusUpgradeRequired)
+			return
+		}
+		w.Header().Set("Upgrade", "WebSocket")
+		w.Header().Set("Connection", "Upgrade")
+		w.WriteHeader(http.StatusSwitchingProtocols)
+
+		h, ok := w.(http.Hijacker)
+		if !ok {
+			http.Error(w, "HTTP does not support general TCP support", 500)
+			return
+		}
+		netConn, conn, err := h.Hijack()
+		if err != nil {
+			http.Error(w, "HTTP does not support general TCP support", 500)
+			return
+		}
+		s.Accept(netConn, conn)
+	})
+}

+ 142 - 0
derp/derphttp/derphttp_test.go

@@ -0,0 +1,142 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package derphttp
+
+import (
+	"crypto/rand"
+	"crypto/tls"
+	"net"
+	"net/http"
+	"sync"
+	"testing"
+	"time"
+
+	"golang.org/x/crypto/curve25519"
+	"tailscale.com/derp"
+)
+
+func TestSendRecv(t *testing.T) {
+	const numClients = 3
+	var serverPrivateKey [32]byte
+	if _, err := rand.Read(serverPrivateKey[:]); err != nil {
+		t.Fatal(err)
+	}
+	var clientPrivateKeys [][32]byte
+	for i := 0; i < numClients; i++ {
+		var key [32]byte
+		if _, err := rand.Read(key[:]); err != nil {
+			t.Fatal(err)
+		}
+		clientPrivateKeys = append(clientPrivateKeys, key)
+	}
+	var clientKeys [][32]byte
+	for _, privKey := range clientPrivateKeys {
+		var key [32]byte
+		curve25519.ScalarBaseMult(&key, &privKey)
+		clientKeys = append(clientKeys, key)
+	}
+
+	s := derp.NewServer(serverPrivateKey, t.Logf)
+	defer s.Close()
+
+	httpsrv := &http.Server{
+		TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
+		Handler:      Handler(s),
+	}
+
+	ln, err := net.Listen("tcp4", "localhost:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	serverURL := "http://" + ln.Addr().String()
+	t.Logf("server URL: %s", serverURL)
+
+	go func() {
+		if err := httpsrv.Serve(ln); err != nil {
+			if err == http.ErrServerClosed {
+				return
+			}
+			panic(err)
+		}
+	}()
+
+	var clients []*Client
+	var recvChs []chan []byte
+	done := make(chan struct{})
+	var wg sync.WaitGroup
+	defer func() {
+		close(done)
+		for _, c := range clients {
+			c.Close()
+		}
+		wg.Wait()
+	}()
+	for i := 0; i < numClients; i++ {
+		key := clientPrivateKeys[i]
+		c, err := NewClient(key, serverURL, t.Logf)
+		if err != nil {
+			t.Fatalf("client %d: %v", i, err)
+		}
+		clients = append(clients, c)
+		recvChs = append(recvChs, make(chan []byte))
+
+		wg.Add(1)
+		go func(i int) {
+			defer wg.Done()
+			for {
+				select {
+				case <-done:
+					return
+				default:
+				}
+				b := make([]byte, 1<<16)
+				n, err := c.Recv(b)
+				if err != nil {
+					t.Logf("client%d: %v", i, err)
+					break
+				}
+				b = b[:n]
+				recvChs[i] <- b
+			}
+		}(i)
+	}
+
+	recv := func(i int, want string) {
+		t.Helper()
+		select {
+		case b := <-recvChs[i]:
+			if got := string(b); got != want {
+				t.Errorf("client1.Recv=%q, want %q", got, want)
+			}
+		case <-time.After(1 * time.Second):
+			t.Errorf("client%d.Recv, got nothing, want %q", i, want)
+		}
+	}
+	recvNothing := func(i int) {
+		t.Helper()
+		select {
+		case b := <-recvChs[0]:
+			t.Errorf("client%d.Recv=%q, want nothing", i, string(b))
+		default:
+		}
+	}
+
+	msg1 := []byte("hello 0->1\n")
+	if err := clients[0].Send(clientKeys[1], msg1); err != nil {
+		t.Fatal(err)
+	}
+	recv(1, string(msg1))
+	recvNothing(0)
+	recvNothing(2)
+
+	msg2 := []byte("hello 1->2\n")
+	if err := clients[1].Send(clientKeys[2], msg2); err != nil {
+		t.Fatal(err)
+	}
+	recv(2, string(msg2))
+	recvNothing(0)
+	recvNothing(1)
+
+}

+ 13 - 0
derp/doc.go

@@ -0,0 +1,13 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package derp implements DERP, the Detour Encrypted Routing Protocol.
+//
+// DERP routes packets to clients using curve25519 keys as addresses.
+//
+// DERP is used by Tailscale nodes to proxy encrypted WireGuard
+// packets through the Tailscale cloud servers when a direct path
+// cannot be found or opened. DERP is a last resort. Both sides
+// between very aggressive NATs, firewalls, no IPv6, etc? Well, DERP.
+package derp

+ 19 - 0
go.mod

@@ -0,0 +1,19 @@
+module tailscale.com
+
+go 1.13
+
+require (
+	github.com/apenwarr/fixconsole v0.0.0-20191012055117-5a9f6489cc29
+	github.com/go-ole/go-ole v1.2.4
+	github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e
+	github.com/google/go-cmp v0.4.0
+	github.com/klauspost/compress v1.9.8
+	github.com/mdlayher/netlink v1.1.0
+	github.com/pborman/getopt v0.0.0-20190409184431-ee0cd42419d3
+	github.com/tailscale/hujson v0.0.0-20190930033718-5098e564d9b3
+	github.com/tailscale/wireguard-go v0.0.0-20200208214841-2981baf46731
+	golang.org/x/crypto v0.0.0-20200208060501-ecb85df21340
+	golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
+	golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5
+	gortc.io/stun v1.22.1
+)

+ 76 - 0
go.sum

@@ -0,0 +1,76 @@
+cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/apenwarr/fixconsole v0.0.0-20191012055117-5a9f6489cc29 h1:muXWUcay7DDy1/hEQWrYlBy+g0EuwT70sBHg65SeUc4=
+github.com/apenwarr/fixconsole v0.0.0-20191012055117-5a9f6489cc29/go.mod h1:JYWahgHer+Z2xbsgHPtaDYVWzeHDminu+YIBWkxpCAY=
+github.com/apenwarr/w32 v0.0.0-20190407065021-aa00fece76ab h1:CMGzRRCjnD50RjUFSArBLuCxiDvdp7b8YPAcikBEQ+k=
+github.com/apenwarr/w32 v0.0.0-20190407065021-aa00fece76ab/go.mod h1:nfFtvHn2Hgs9G1u0/J6LHQv//EksNC+7G8vXmd1VTJ8=
+github.com/go-ole/go-ole v1.2.4 h1:nNBDSCOigTSiarFpYE9J/KtEA1IOW4CNeqT9TQDqCxI=
+github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/jsimonetti/rtnetlink v0.0.0-20190606172950-9527aa82566a/go.mod h1:Oz+70psSo5OFh8DBl0Zv2ACw7Esh6pPUphlvZG9x7uw=
+github.com/jsimonetti/rtnetlink v0.0.0-20200117123717-f846d4f6c1f4 h1:nwOc1YaOrYJ37sEBrtWZrdqzK22hiJs3GpDmP3sR2Yw=
+github.com/jsimonetti/rtnetlink v0.0.0-20200117123717-f846d4f6c1f4/go.mod h1:WGuG/smIU4J/54PblvSbh+xvCZmpJnFgr3ds6Z55XMQ=
+github.com/klauspost/compress v1.9.8 h1:VMAMUUOh+gaxKTMk+zqbjsSjsIcUcL/LF4o63i82QyA=
+github.com/klauspost/compress v1.9.8/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/mdlayher/netlink v0.0.0-20190409211403-11939a169225/go.mod h1:eQB3mZE4aiYnlUsyGGCOpPETfdQq4Jhsgf1fk3cwQaA=
+github.com/mdlayher/netlink v1.0.0/go.mod h1:KxeJAFOFLG6AjpyDkQ/iIhxygIUKD+vcwqcnu43w/+M=
+github.com/mdlayher/netlink v1.1.0 h1:mpdLgm+brq10nI9zM1BpX1kpDbh3NLl3RSnVq6ZSkfg=
+github.com/mdlayher/netlink v1.1.0/go.mod h1:H4WCitaheIsdF9yOYu8CFmCgQthAPIWZmcKp9uZHgmY=
+github.com/pborman/getopt v0.0.0-20190409184431-ee0cd42419d3 h1:YtFkrqsMEj7YqpIhRteVxJxCeC3jJBieuLr0d4C4rSA=
+github.com/pborman/getopt v0.0.0-20190409184431-ee0cd42419d3/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o=
+github.com/tailscale/hujson v0.0.0-20190930033718-5098e564d9b3 h1:rdtXEo9yffOjh4vZQJw3heaY+ggXKp+zvMX5fihh6lI=
+github.com/tailscale/hujson v0.0.0-20190930033718-5098e564d9b3/go.mod h1:STqf+YV0ADdzk4ejtXFsGqDpATP9JoL0OB+hiFQbkdE=
+github.com/tailscale/wireguard-go v0.0.0-20191108062213-b93cdd0582db h1:oP0crfwOb3WZSVrMVm/o51NXN2JirDlcdlNEIPTmgI0=
+github.com/tailscale/wireguard-go v0.0.0-20200207221558-a158079b156a h1:5TWA3nl2QUfL9OiE3tlBpqJd4GYd4hbGtDNkWQQ2fyc=
+github.com/tailscale/wireguard-go v0.0.0-20200207221558-a158079b156a/go.mod h1:QPS8HjBzzAXoQNndUNx2efJaQbCCz8nI2Cv1ksTUHyY=
+github.com/tailscale/wireguard-go v0.0.0-20200208161837-3cd0a483944a h1:vIyObUBvnXB1XTKTBM4AgoUFR9RHiz/kslGHClkXQVg=
+github.com/tailscale/wireguard-go v0.0.0-20200208161837-3cd0a483944a/go.mod h1:JPm5cTfu1K+qDFRbiHy0sOlHUylYQbpl356sdYFD8V4=
+github.com/tailscale/wireguard-go v0.0.0-20200208214841-2981baf46731 h1:sNmny/5pHqHdm081Fx8rcNFnwt0zTGuee/0+Jz+tXCA=
+github.com/tailscale/wireguard-go v0.0.0-20200208214841-2981baf46731/go.mod h1:JPm5cTfu1K+qDFRbiHy0sOlHUylYQbpl356sdYFD8V4=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191002192127-34f69633bfdc/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200206161412-a0c6ece9d31a h1:aczoJ0HPNE92XKa7DrIzkNN6esOKO2TBwiiYoKcINhA=
+golang.org/x/crypto v0.0.0-20200206161412-a0c6ece9d31a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20200208060501-ecb85df21340 h1:KOcEaR10tFr7gdJV2GCKw8Os5yED1u1aOqHjOAb6d2Y=
+golang.org/x/crypto v0.0.0-20200208060501-ecb85df21340/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20191003171128-d98b1b443823/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20191007182048-72f939374954/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
+golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190405154228-4b34438f7a67/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190411185658-b44545bcd369/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191003212358-c178f38b412c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 h1:LfCXLvNmTYH9kEmVgqbnsWfruoXZIrh4YBgqVHtDvw0=
+golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
+golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.zx2c4.com/wireguard v0.0.20200121 h1:vcswa5Q6f+sylDfjqyrVNNrjsFUUbPsgAQTBCAg/Qf8=
+golang.zx2c4.com/wireguard v0.0.20200121/go.mod h1:P2HsVp8SKwZEufsnezXZA4GRX/T49/HlU7DGuelXsU4=
+google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+gortc.io/stun v1.22.1 h1:96mOdDATYRqhYB+TZdenWBg4CzL2Ye5kPyBXQ8KAB+8=
+gortc.io/stun v1.22.1/go.mod h1:XD5lpONVyjvV3BgOyJFNo0iv6R2oZB4L+weMqxts+zg=

+ 79 - 0
ipn/backend.go

@@ -0,0 +1,79 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipn
+
+import (
+	"tailscale.com/control/controlclient"
+	"tailscale.com/tailcfg"
+	"tailscale.com/wgengine"
+	"time"
+)
+
+type State int
+
+const (
+	NoState = State(iota)
+	NeedsLogin
+	NeedsMachineAuth
+	Stopped
+	Starting
+	Running
+)
+
+func (s State) String() string {
+	return [...]string{"NoState", "NeedsLogin", "NeedsMachineAuth",
+		"Stopped", "Starting", "Running"}[s]
+}
+
+type EngineStatus struct {
+	RBytes, WBytes wgengine.ByteCount
+	NumLive        int
+	LivePeers      map[tailcfg.NodeKey]wgengine.PeerStatus
+}
+
+type NetworkMap = controlclient.NetworkMap
+
+// In any given notification, any or all of these may be nil, meaning
+// that they have not changed.
+type Notify struct {
+	Version       string        // version number of IPN backend
+	ErrMessage    *string       // critical error message, if any
+	LoginFinished *struct{}     // event: login process succeeded
+	State         *State        // current IPN state has changed
+	Prefs         *Prefs        // preferences were changed
+	NetMap        *NetworkMap   // new netmap received
+	Engine        *EngineStatus // wireguard engine stats
+	BrowseToURL   *string       // UI should open a browser right now
+	BackendLogID  *string       // public logtail id used by backend
+}
+
+type Options struct {
+	FrontendLogID string // public logtail id used by frontend
+	ServerURL     string
+	Prefs         Prefs
+	LoginFlags    controlclient.LoginFlags
+	Notify        func(n Notify) `json:"-"`
+}
+
+type Backend interface {
+	// Start or restart the backend, because a new Handle has connected.
+	Start(opts Options) error
+	// Start a new interactive login. This should trigger a new
+	// BrowseToURL notification eventually.
+	StartLoginInteractive()
+	// Terminate the current login session and stop the wireguard engine.
+	Logout()
+	// Install a new set of user preferences, including WantRunning.
+	// This may cause the wireguard engine to reconfigure or stop.
+	SetPrefs(new Prefs)
+	// Poll for an update from the wireguard engine. Only needed if
+	// you want to display byte counts. Connection events are emitted
+	// automatically without polling.
+	RequestEngineStatus()
+	// Pretend the current key is going to expire after duration x.
+	// This is useful for testing GUIs to make sure they react properly
+	// with keys that are going to expire.
+	FakeExpireAfter(x time.Duration)
+}

+ 11 - 0
ipn/doc.go

@@ -0,0 +1,11 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package ipn implements the interactions between the Tailscale cloud
+// control plane and the local network stack.
+//
+// IPN is the abbreviated name for a Tailscale network. What's less
+// clear is what it's an abbreviation for: Identified Private Network?
+// IP Network? Internet Private Network? I Privately Network?
+package ipn

+ 207 - 0
ipn/e2e_test.go

@@ -0,0 +1,207 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build depends_on_currently_unreleased
+
+package ipn
+
+import (
+	"bytes"
+	"io/ioutil"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/tailscale/wireguard-go/tun/tuntest"
+	"tailscale.com/control/controlclient"
+	"tailscale.com/tailcfg"
+	"tailscale.com/testy"
+	"tailscale.com/wgengine"
+	"tailscale.com/wgengine/magicsock"
+	"tailscale.io/control" // not yet released
+)
+
+func TestIPN(t *testing.T) {
+	testy.FixLogs(t)
+	defer testy.UnfixLogs(t)
+
+	// Turn off STUN for the test to make it hermitic.
+	// TODO(crawshaw): add a test that runs against a local STUN server.
+	origDefaultSTUN := magicsock.DefaultSTUN
+	magicsock.DefaultSTUN = nil
+	defer func() {
+		magicsock.DefaultSTUN = origDefaultSTUN
+	}()
+
+	// TODO(apenwarr): Make resource checks actually pass.
+	// They don't right now, because (at least) wgengine doesn't fully
+	// shut down.
+	//	rc := testy.NewResourceCheck()
+	//	defer rc.Assert(t)
+
+	var ctl *control.Server
+
+	ctlHandler := func(w http.ResponseWriter, r *http.Request) {
+		ctl.ServeHTTP(w, r)
+	}
+	https := httptest.NewServer(http.HandlerFunc(ctlHandler))
+	serverURL := https.URL
+	defer https.Close()
+	defer https.CloseClientConnections()
+
+	tmpdir, err := ioutil.TempDir("", "ipntest")
+	if err != nil {
+		t.Fatalf("create tempdir: %v\n", err)
+	}
+	ctl, err = control.New(tmpdir, serverURL, true)
+	if err != nil {
+		t.Fatalf("create control server: %v\n", ctl)
+	}
+
+	n1 := newNode(t, "n1", https)
+	defer n1.Backend.Shutdown()
+	n1.Backend.StartLoginInteractive()
+
+	n2 := newNode(t, "n2", https)
+	defer n2.Backend.Shutdown()
+	n2.Backend.StartLoginInteractive()
+
+	var s1, s2 State
+	for {
+		t.Logf("\n\nn1.state=%v n2.state=%v\n\n", s1, s2)
+
+		// TODO(crawshaw): switch from || to &&. To do this we need to
+		// transmit some data so that the handshake completes on both
+		// sides. (Beacuse handshakes are 1RTT, it is the data
+		// transmission that completes the handshake.)
+		if s1 == Running || s2 == Running {
+			// TODO(apenwarr): ensure state sequence.
+			// Right now we'll just exit as soon as
+			// state==Running, even if the backend is lying or
+			// something. Not a great test.
+			break
+		}
+
+		select {
+		case n := <-n1.NotifyCh:
+			t.Logf("n1n: %v\n", n)
+			if n.State != nil {
+				s1 = *n.State
+				if s1 == NeedsMachineAuth {
+					authNode(t, ctl, n1.Backend)
+				}
+			}
+		case n := <-n2.NotifyCh:
+			t.Logf("n2n: %v\n", n)
+			if n.State != nil {
+				s2 = *n.State
+				if s2 == NeedsMachineAuth {
+					authNode(t, ctl, n2.Backend)
+				}
+			}
+		case <-time.After(3 * time.Second):
+			t.Fatalf("\n\n\nFATAL: timed out waiting for notifications.\n\n\n")
+		}
+	}
+
+	t.Skip("skipping ping tests, they are flaky") // TODO(crawshaw): this exposes a real bug!
+
+	n1addr := n1.Backend.NetMap().Addresses[0].IP
+	n2addr := n2.Backend.NetMap().Addresses[0].IP
+	t.Run("ping n2", func(t *testing.T) {
+		msg := tuntest.Ping(n2addr.IP(), n1addr.IP())
+		n1.ChannelTUN.Outbound <- msg
+		select {
+		case msgRecv := <-n2.ChannelTUN.Inbound:
+			if !bytes.Equal(msg, msgRecv) {
+				t.Error("bad ping")
+			}
+		case <-time.After(1 * time.Second):
+			t.Error("no ping seen")
+		}
+	})
+	t.Run("ping n1", func(t *testing.T) {
+		msg := tuntest.Ping(n1addr.IP(), n2addr.IP())
+		n2.ChannelTUN.Outbound <- msg
+		select {
+		case msgRecv := <-n1.ChannelTUN.Inbound:
+			if !bytes.Equal(msg, msgRecv) {
+				t.Error("bad ping")
+			}
+		case <-time.After(1 * time.Second):
+			t.Error("no ping seen")
+		}
+	})
+}
+
+type testNode struct {
+	Backend    *LocalBackend
+	ChannelTUN *tuntest.ChannelTUN
+	NotifyCh   <-chan Notify
+}
+
+// Create a new IPN node.
+func newNode(t *testing.T, prefix string, https *httptest.Server) testNode {
+	t.Helper()
+	logfe := func(fmt string, args ...interface{}) {
+		t.Logf(prefix+".e: "+fmt, args...)
+	}
+	logf := func(fmt string, args ...interface{}) {
+		t.Logf(prefix+": "+fmt, args...)
+	}
+
+	derp := false
+	tun := tuntest.NewChannelTUN()
+	e1, err := wgengine.NewUserspaceEngineAdvanced(logfe, tun.TUN(), wgengine.NewFakeRouter, 0, derp)
+	if err != nil {
+		t.Fatalf("NewFakeEngine: %v\n", err)
+	}
+	n, err := NewLocalBackend(logf, prefix, e1)
+	if err != nil {
+		t.Fatalf("NewLocalBackend: %v\n", err)
+	}
+	nch := make(chan Notify, 1000)
+	c := controlclient.Persist{
+		Provider:  "google",
+		LoginName: "[email protected]",
+	}
+	n.Start(Options{
+		FrontendLogID: prefix + "-f",
+		ServerURL:     https.URL,
+		Prefs: Prefs{
+			RouteAll:         true,
+			AllowSingleHosts: true,
+			CorpDNS:          true,
+			WantRunning:      true,
+			Persist:          &c,
+		},
+		LoginFlags: controlclient.LoginDefault,
+		Notify: func(n Notify) {
+			// Automatically visit auth URLs
+			if n.BrowseToURL != nil {
+				t.Logf("\n\n\nURL! %vv\n", *n.BrowseToURL)
+				hc := https.Client()
+				_, err := hc.Get(*n.BrowseToURL)
+				if err != nil {
+					t.Logf("BrowseToURL: %v\n", err)
+				}
+			}
+			nch <- n
+		},
+	})
+
+	return testNode{
+		Backend:    n,
+		ChannelTUN: tun,
+		NotifyCh:   nch,
+	}
+}
+
+// Tell the control server to authorize the given node.
+func authNode(t *testing.T, ctl *control.Server, n *LocalBackend) {
+	mk := *n.prefs.Persist.PrivateMachineKey.Public()
+	nk := *n.prefs.Persist.PrivateNodeKey.Public()
+	ctl.AuthorizeMachine(tailcfg.MachineKey(mk), tailcfg.NodeKey(nk))
+}

+ 72 - 0
ipn/fake.go

@@ -0,0 +1,72 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipn
+
+import (
+	"log"
+	"time"
+)
+
+type FakeBackend struct {
+	serverURL string
+	notify    func(n Notify)
+	live      bool
+}
+
+func (b *FakeBackend) Start(opts Options) error {
+	b.serverURL = opts.ServerURL
+	if opts.Notify == nil {
+		log.Fatalf("FakeBackend.Start: opts.Notify is nil\n")
+	}
+	b.notify = opts.Notify
+	b.notify(Notify{Prefs: &opts.Prefs})
+	nl := NeedsLogin
+	b.notify(Notify{State: &nl})
+	return nil
+}
+
+func (b *FakeBackend) newState(s State) {
+	b.notify(Notify{State: &s})
+	if s == Running {
+		b.live = true
+	} else {
+		b.live = false
+	}
+}
+
+func (b *FakeBackend) StartLoginInteractive() {
+	u := b.serverURL + "/this/is/fake"
+	b.notify(Notify{BrowseToURL: &u})
+	b.newState(NeedsMachineAuth)
+	b.newState(Stopped)
+	// TODO(apenwarr): Fill in a more interesting netmap here.
+	b.notify(Notify{NetMap: &NetworkMap{}})
+	b.newState(Starting)
+	// TODO(apenwarr): Fill in a more interesting status.
+	b.notify(Notify{Engine: &EngineStatus{}})
+	b.newState(Running)
+}
+
+func (b *FakeBackend) Logout() {
+	b.newState(NeedsLogin)
+}
+
+func (b *FakeBackend) SetPrefs(new Prefs) {
+	b.notify(Notify{Prefs: &new})
+	if new.WantRunning && !b.live {
+		b.newState(Starting)
+		b.newState(Running)
+	} else if !new.WantRunning && b.live {
+		b.newState(Stopped)
+	}
+}
+
+func (b *FakeBackend) RequestEngineStatus() {
+	b.notify(Notify{Engine: &EngineStatus{}})
+}
+
+func (b *FakeBackend) FakeExpireAfter(x time.Duration) {
+	b.notify(Notify{NetMap: &NetworkMap{}})
+}

+ 166 - 0
ipn/handle.go

@@ -0,0 +1,166 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipn
+
+import (
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/tailscale/wireguard-go/wgcfg"
+	"tailscale.com/logger"
+)
+
+type Handle struct {
+	serverURL     string
+	frontendLogID string
+	b             Backend
+	xnotify       func(n Notify)
+	logf          logger.Logf
+
+	// Mutex protects everything below
+	mu                sync.Mutex
+	netmapCache       *NetworkMap
+	engineStatusCache EngineStatus
+	stateCache        State
+	prefsCache        Prefs
+}
+
+func NewHandle(b Backend, logf logger.Logf, opts Options) (*Handle, error) {
+	h := &Handle{
+		b:    b,
+		logf: logf,
+	}
+
+	err := h.Start(opts)
+	if err != nil {
+		return nil, err
+	}
+
+	return h, nil
+}
+
+func (h *Handle) Start(opts Options) error {
+	h.serverURL = strings.TrimRight(opts.ServerURL, "/")
+	h.frontendLogID = opts.FrontendLogID
+	h.xnotify = opts.Notify
+	h.netmapCache = nil
+	h.engineStatusCache = EngineStatus{}
+	h.stateCache = NoState
+	h.prefsCache = opts.Prefs
+	xopts := opts
+	xopts.Notify = h.notify
+	return h.b.Start(xopts)
+}
+
+func (h *Handle) Reset() {
+	st := NoState
+	h.notify(Notify{State: &st})
+}
+
+func (h *Handle) notify(n Notify) {
+	h.mu.Lock()
+	if n.BackendLogID != nil {
+		h.logf("Handle: logs: be:%v fe:%v\n",
+			*n.BackendLogID, h.frontendLogID)
+	}
+	if n.State != nil {
+		h.stateCache = *n.State
+	}
+	if n.Prefs != nil {
+		h.prefsCache = *n.Prefs
+	}
+	if n.NetMap != nil {
+		h.netmapCache = n.NetMap
+	}
+	if n.Engine != nil {
+		h.engineStatusCache = *n.Engine
+	}
+	h.mu.Unlock()
+
+	if h.xnotify != nil {
+		// Forward onward to our parent's notifier
+		h.xnotify(n)
+	}
+}
+
+func (h *Handle) Prefs() Prefs {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	return h.prefsCache
+}
+
+func (h *Handle) UpdatePrefs(updateFn func(old Prefs) (new Prefs)) {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	new := updateFn(h.prefsCache)
+	h.prefsCache = new
+	h.b.SetPrefs(new)
+}
+
+func (h *Handle) State() State {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	return h.stateCache
+}
+
+func (h *Handle) EngineStatus() EngineStatus {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	return h.engineStatusCache
+}
+
+func (h *Handle) LocalAddrs() []wgcfg.CIDR {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	nm := h.netmapCache
+	if nm != nil {
+		return nm.Addresses
+	}
+	return []wgcfg.CIDR{}
+}
+
+func (h *Handle) NetMap() *NetworkMap {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	return h.netmapCache
+}
+
+func (h *Handle) Expiry() time.Time {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+
+	nm := h.netmapCache
+	if nm != nil {
+		return nm.Expiry
+	}
+	return time.Time{}
+}
+
+func (h *Handle) AdminPageURL() string {
+	return h.serverURL + "/admin/machines"
+}
+
+func (h *Handle) StartLoginInteractive() {
+	h.b.StartLoginInteractive()
+}
+
+func (h *Handle) Logout() {
+	h.b.Logout()
+}
+
+func (h *Handle) RequestEngineStatus() {
+	h.b.RequestEngineStatus()
+}
+
+func (h *Handle) FakeExpireAfter(x time.Duration) {
+	h.b.FakeExpireAfter(x)
+}

+ 253 - 0
ipn/ipnserver/server.go

@@ -0,0 +1,253 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipnserver
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"log"
+	"net"
+	"os"
+	"os/exec"
+	"os/signal"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/klauspost/compress/zstd"
+	"tailscale.com/control/controlclient"
+	"tailscale.com/ipn"
+	"tailscale.com/logger"
+	"tailscale.com/logtail/backoff"
+	"tailscale.com/safesocket"
+	"tailscale.com/wgengine"
+)
+
+type Options struct {
+	SurviveDisconnects bool
+	AllowQuit          bool
+}
+
+func pump(logf logger.Logf, ctx context.Context, bs *ipn.BackendServer, s net.Conn) {
+	defer logf("Control connection done.\n")
+
+	for ctx.Err() == nil && !bs.GotQuit {
+		msg, err := ipn.ReadMsg(s)
+		if err != nil {
+			logf("ReadMsg: %v\n", err)
+			break
+		}
+		err = bs.GotCommandMsg(msg)
+		if err != nil {
+			logf("GotCommandMsg: %v\n", err)
+			break
+		}
+	}
+}
+
+func Run(rctx context.Context, logf logger.Logf, logid string, opts Options, e wgengine.Engine) error {
+	bo := backoff.Backoff{Name: "ipnserver"}
+
+	listen, _, err := safesocket.Listen("", "Tailscale", "tailscaled", 41112)
+	if err != nil {
+		return fmt.Errorf("safesocket.Listen: %v", err)
+	}
+
+	b, err := ipn.NewLocalBackend(logf, logid, e)
+	if err != nil {
+		return fmt.Errorf("NewLocalBackend: %v", err)
+	}
+	b.SetDecompressor(func() (controlclient.Decompressor, error) {
+		return zstd.NewReader(nil)
+	})
+	b.SetCmpDiff(func(x, y interface{}) string { return cmp.Diff(x, y) })
+
+	var s net.Conn
+	serverToClient := func(b []byte) {
+		if s != nil {
+			ipn.WriteMsg(s, b)
+		}
+	}
+
+	bs := ipn.NewBackendServer(logf, b, serverToClient)
+
+	logf("Listening on %v\n", listen.Addr())
+
+	// Go listeners can't take a context, close it instead.
+	go func() {
+		<-rctx.Done()
+		listen.Close()
+	}()
+
+	var oldS net.Conn
+	ctx, cancel := context.WithCancel(rctx)
+
+	stopAll := func() {
+		// Currently we only support one client connection at a time.
+		// Theoretically we could allow multiple clients, by passing
+		// notifications to all of them and accepting commands from
+		// any of them, but there doesn't seem to be much need for
+		// that right now.
+		if oldS != nil {
+			cancel()
+			safesocket.ConnCloseRead(oldS)
+			safesocket.ConnCloseWrite(oldS)
+		}
+	}
+
+	for i := 1; rctx.Err() == nil; i++ {
+		s, err = listen.Accept()
+		if err != nil {
+			logf("%d: Accept: %v\n", i, err)
+			bo.BackOff(rctx, err)
+			continue
+		}
+		logf("%d: Incoming control connection.\n", i)
+		stopAll()
+
+		ctx, cancel = context.WithCancel(context.Background())
+		oldS = s
+
+		go func(ctx context.Context, bs *ipn.BackendServer, s net.Conn, i int) {
+			si := fmt.Sprintf("%d: ", i)
+			pump(func(fmt string, args ...interface{}) {
+				logf(si+fmt, args...)
+			}, ctx, bs, s)
+			if !opts.SurviveDisconnects || bs.GotQuit {
+				bs.Reset()
+				s.Close()
+			}
+			if opts.AllowQuit {
+				os.Exit(0)
+			} else {
+				bs.GotQuit = false
+			}
+		}(ctx, bs, s, i)
+
+		bo.BackOff(ctx, nil)
+	}
+	stopAll()
+
+	return rctx.Err()
+}
+
+func BabysitProc(ctx context.Context, args []string, logf logger.Logf) {
+
+	executable, err := os.Executable()
+	if err != nil {
+		panic("cannot determine executable: " + err.Error())
+	}
+
+	var proc struct {
+		mu sync.Mutex
+		p  *os.Process
+	}
+
+	done := make(chan struct{})
+	go func() {
+		interrupt := make(chan os.Signal, 1)
+		signal.Notify(interrupt, syscall.SIGINT, syscall.SIGTERM)
+		var sig os.Signal
+		select {
+		case sig = <-interrupt:
+			logf("BabysitProc: got signal: %v\n", sig)
+			close(done)
+		case <-ctx.Done():
+			logf("BabysitProc: context done\n")
+			sig = os.Kill
+			close(done)
+		}
+
+		proc.mu.Lock()
+		proc.p.Signal(sig)
+		proc.mu.Unlock()
+	}()
+
+	bo := backoff.Backoff{Name: "BabysitProc"}
+
+	for {
+		startTime := time.Now()
+		log.Printf("exec: %#v %v\n", executable, args)
+		cmd := exec.Command(executable, args...)
+
+		// Create a pipe object to use as the subproc's stdin.
+		// When the writer goes away, the reader gets EOF.
+		// A subproc can watch its stdin and exit when it gets EOF;
+		// this is a very reliable way to have a subproc die when
+		// its parent (us) disappears.
+		// We never need to actually write to wStdin.
+		rStdin, wStdin, err := os.Pipe()
+		if err != nil {
+			log.Printf("os.Pipe 1: %v\n", err)
+			return
+		}
+
+		// Create a pipe object to use as the subproc's stdout/stderr.
+		// We'll read from this pipe and send it to logf, line by line.
+		// We can't use os.exec's io.Writer for this because it
+		// doesn't care about lines, and thus ends up merging multiple
+		// log lines into one or splitting one line into multiple
+		// logf() calls. bufio is more appropriate.
+		rStdout, wStdout, err := os.Pipe()
+		if err != nil {
+			log.Printf("os.Pipe 2: %v\n", err)
+		}
+		go func(r *os.File) {
+			defer r.Close()
+			rb := bufio.NewReader(r)
+			for {
+				s, err := rb.ReadString('\n')
+				if s != "" {
+					logf("%s\n", strings.TrimSuffix(s, "\n"))
+				}
+				if err != nil {
+					break
+				}
+			}
+		}(rStdout)
+
+		cmd.Stdin = rStdin
+		cmd.Stdout = wStdout
+		cmd.Stderr = wStdout
+		err = cmd.Start()
+
+		// Now that the subproc is started, get rid of our copy of the
+		// pipe reader. Bad things happen on Windows if more than one
+		// process owns the read side of a pipe.
+		rStdin.Close()
+		wStdout.Close()
+
+		if err != nil {
+			log.Printf("starting subprocess failed: %v", err)
+		} else {
+			proc.mu.Lock()
+			proc.p = cmd.Process
+			proc.mu.Unlock()
+
+			err = cmd.Wait()
+			log.Printf("subprocess exited: %v", err)
+		}
+
+		// If the process finishes, clean up the write side of the
+		// pipe. We'll make a new one when we restart the subproc.
+		wStdin.Close()
+
+		if time.Since(startTime) < 60*time.Second {
+			bo.BackOff(ctx, fmt.Errorf("subproc early exit: %v", err))
+		} else {
+			// Reset the timeout, since the process ran for a while.
+			bo.BackOff(ctx, nil)
+		}
+
+		select {
+		case <-done:
+			return
+		default:
+		}
+	}
+}

+ 635 - 0
ipn/local.go

@@ -0,0 +1,635 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipn
+
+import (
+	"fmt"
+	"log"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/tailscale/wireguard-go/wgcfg"
+	"tailscale.com/control/controlclient"
+	"tailscale.com/logger"
+	"tailscale.com/portlist"
+	"tailscale.com/tailcfg"
+	"tailscale.com/version"
+	"tailscale.com/wgengine"
+	"tailscale.com/wgengine/filter"
+)
+
+// LocalBackend is the scaffolding between the Tailscale cloud control
+// plane and the local network stack.
+type LocalBackend struct {
+	logf            logger.Logf
+	notify          func(n Notify)
+	c               *controlclient.Client
+	e               wgengine.Engine
+	serverURL       string
+	backendLogID    string
+	portpoll        *portlist.Poller // may be nil
+	newDecompressor func() (controlclient.Decompressor, error)
+	cmpDiff         func(x, y interface{}) string
+
+	// The mutex protects the following elements.
+	mu           sync.Mutex
+	prefs        Prefs
+	state        State
+	hiCache      tailcfg.Hostinfo
+	netMapCache  *controlclient.NetworkMap
+	engineStatus EngineStatus
+	endPoints    []string
+	blocked      bool
+	authURL      string
+	interact     int
+
+	// statusLock must be held before calling statusChanged.Lock() or
+	// statusChanged.Broadcast().
+	statusLock    sync.Mutex
+	statusChanged *sync.Cond
+}
+
+func NewLocalBackend(logf logger.Logf, logid string, e wgengine.Engine) (*LocalBackend, error) {
+
+	if e == nil {
+		panic("ipn.NewLocalBackend: wgengine must not be nil")
+	}
+
+	// Default filter blocks everything, until Start() is called.
+	e.SetFilter(filter.NewAllowNone())
+
+	portpoll, err := portlist.NewPoller()
+	if err != nil {
+		logf("skipping portlist: %s\n", err)
+	}
+
+	b := LocalBackend{
+		logf:         logf,
+		e:            e,
+		backendLogID: logid,
+		state:        NoState,
+		portpoll:     portpoll,
+	}
+	b.statusChanged = sync.NewCond(&b.statusLock)
+
+	if b.portpoll != nil {
+		go b.portpoll.Run()
+		go b.runPoller()
+	}
+
+	return &b, nil
+}
+
+func (b *LocalBackend) Shutdown() {
+	if b.portpoll != nil {
+		b.portpoll.Close()
+	}
+	b.c.Shutdown()
+	b.e.Close()
+	b.e.Wait()
+}
+
+// SetDecompressor sets a decompression function, which must be a zstd
+// reader.
+//
+// This exists because the iOS/Mac NetworkExtension is very resource
+// constrained, and the zstd package is too heavy to fit in the
+// constrained RSS limit.
+func (b *LocalBackend) SetDecompressor(fn func() (controlclient.Decompressor, error)) {
+	b.newDecompressor = fn
+}
+
+// SetCmpDiff sets a comparison function used to generate logs of what
+// has changed in the network map.
+//
+// Typically the comparison function comes from go-cmp.
+// We don't wire it in directly here because the go-cmp package adds
+// 1.77mb to the binary size of the iOS NetworkExtension, which takes
+// away from its precious RSS limit.
+func (b *LocalBackend) SetCmpDiff(cmpDiff func(x, y interface{}) string) {
+	b.cmpDiff = cmpDiff
+}
+
+func (b *LocalBackend) Start(opts Options) error {
+	if b.c != nil {
+		// TODO(apenwarr): avoid the need to reinit controlclient.
+		// This will trigger a full relogin/reconfigure cycle every
+		// time a Handle reconnects to the backend. Ideally, we
+		// would send the new Prefs and everything would get back
+		// into sync with the minimal changes. But that's not how it
+		// is right now, which is a sign that the code is still too
+		// complicated.
+		b.c.Shutdown()
+	}
+
+	b.logf("Start: %v\n", opts.Prefs.Pretty())
+
+	hi := controlclient.NewHostinfo()
+	hi.BackendLogID = b.backendLogID
+	hi.FrontendLogID = opts.FrontendLogID
+
+	b.mu.Lock()
+	hi.Services = b.hiCache.Services // keep any previous session
+	b.hiCache = hi
+	b.state = NoState
+	b.serverURL = opts.ServerURL
+	b.prefs = opts.Prefs
+	b.notify = opts.Notify
+	b.netMapCache = nil
+	b.mu.Unlock()
+
+	b.updateFilter()
+
+	var err error
+	persist := b.prefs.Persist
+	if persist == nil {
+		// let controlclient initialize it
+		persist = &controlclient.Persist{}
+	}
+	cli, err := controlclient.New(controlclient.Options{
+		Logf: func(fmt string, args ...interface{}) {
+			b.logf("control: "+fmt, args...)
+		},
+		Persist:         *persist,
+		ServerURL:       b.serverURL,
+		Hostinfo:        &hi,
+		KeepAlive:       true,
+		NewDecompressor: b.newDecompressor,
+	})
+	if err != nil {
+		return err
+	}
+
+	b.mu.Lock()
+	b.c = cli
+	b.mu.Unlock()
+
+	if b.endPoints != nil {
+		cli.UpdateEndpoints(0, b.endPoints)
+	}
+
+	cli.SetStatusFunc(func(new controlclient.Status) {
+		if new.LoginFinished != nil {
+			// Auth completed, unblock the engine
+			b.blockEngineUpdates(false)
+			b.authReconfig()
+			noargs := struct{}{}
+			b.send(Notify{LoginFinished: &noargs})
+		}
+		if new.Persist != nil {
+			persist := *new.Persist // copy
+			b.prefs.Persist = &persist
+			np := b.prefs
+			b.send(Notify{Prefs: &np})
+		}
+		if new.NetMap != nil {
+			if b.netMapCache != nil && b.cmpDiff != nil {
+				s1 := strings.Split(b.netMapCache.Concise(), "\n")
+				s2 := strings.Split(new.NetMap.Concise(), "\n")
+				b.logf("netmap diff:\n%v\n", b.cmpDiff(s1, s2))
+			}
+			b.netMapCache = new.NetMap
+			b.send(Notify{NetMap: new.NetMap})
+			b.updateFilter()
+		}
+		if new.URL != "" {
+			b.logf("Received auth URL: %.20v...\n", new.URL)
+
+			b.mu.Lock()
+			interact := b.interact
+			b.authURL = new.URL
+			b.mu.Unlock()
+
+			if interact > 0 {
+				b.popBrowserAuthNow()
+			}
+		}
+		if new.Err != "" {
+			// TODO(crawshaw): display in the UI.
+			log.Print(new.Err)
+			return
+		}
+		if new.NetMap != nil {
+			if b.prefs.WantRunning || b.State() == NeedsLogin {
+				b.prefs.WantRunning = true
+			}
+			b.SetPrefs(b.prefs)
+		}
+		b.stateMachine()
+	})
+
+	b.e.SetStatusCallback(func(s *wgengine.Status, err error) {
+		if err != nil {
+			b.logf("wgengine status error: %#v", err)
+			return
+		}
+		if s == nil {
+			log.Fatalf("weird: non-error wgengine update with status=nil\n")
+		}
+
+		b.mu.Lock()
+		es := b.parseWgStatus(s)
+		b.mu.Unlock()
+
+		b.engineStatus = es
+
+		if b.c != nil {
+			b.c.UpdateEndpoints(0, s.LocalAddrs)
+		}
+		b.endPoints = append([]string{}, s.LocalAddrs...)
+		b.stateMachine()
+
+		b.statusLock.Lock()
+		b.statusChanged.Broadcast()
+		b.statusLock.Unlock()
+
+		b.send(Notify{Engine: &es})
+	})
+
+	blid := b.backendLogID
+	b.logf("Backend: logs: be:%v fe:%v\n", blid, opts.FrontendLogID)
+	b.send(Notify{BackendLogID: &blid})
+
+	cli.Login(nil, opts.LoginFlags)
+	return nil
+}
+
+func (b *LocalBackend) updateFilter() {
+	if !b.Prefs().UsePacketFilter {
+		b.e.SetFilter(filter.NewAllowAll())
+	} else if b.netMapCache == nil {
+		// Not configured yet, block everything
+		b.e.SetFilter(filter.NewAllowNone())
+	} else {
+		b.logf("netmap packet filter: %v\n", b.netMapCache.PacketFilter)
+		b.e.SetFilter(filter.New(b.netMapCache.PacketFilter))
+	}
+}
+
+func (b *LocalBackend) runPoller() {
+	for {
+		ports := <-b.portpoll.C
+		if ports == nil {
+			break
+		}
+		sl := []tailcfg.Service{}
+		for _, p := range ports {
+			var proto tailcfg.ServiceProto
+			if p.Proto == "tcp" {
+				proto = tailcfg.TCP
+			} else if p.Proto == "udp" {
+				proto = tailcfg.UDP
+			}
+			if p.Port == 53 || p.Port == 68 ||
+				p.Port == 5353 || p.Port == 5355 {
+				// uninteresting system services
+				continue
+			}
+			s := tailcfg.Service{
+				Proto:       proto,
+				Port:        p.Port,
+				Description: p.Process,
+			}
+			sl = append(sl, s)
+		}
+
+		b.mu.Lock()
+		hi := b.hiCache
+		hi.Services = sl
+		b.hiCache = hi
+		cli := b.c
+		b.mu.Unlock()
+
+		// b.c might not be started yet
+		if cli != nil {
+			cli.SetHostinfo(hi)
+		}
+	}
+}
+
+func (b *LocalBackend) send(n Notify) {
+	if b.notify != nil {
+		n.Version = version.LONG
+		b.notify(n)
+	}
+}
+
+func (b *LocalBackend) popBrowserAuthNow() {
+	b.mu.Lock()
+	url := b.authURL
+	b.interact = 0
+	b.authURL = ""
+	b.mu.Unlock()
+	b.logf("popBrowserAuthNow: url=%v\n", url != "")
+
+	b.blockEngineUpdates(true)
+	b.stopEngineAndWait()
+	b.send(Notify{BrowseToURL: &url})
+	if b.State() == Running {
+		b.enterState(Starting)
+	}
+}
+
+func (b *LocalBackend) State() State {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	return b.state
+}
+
+func (b *LocalBackend) EngineStatus() EngineStatus {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	return b.engineStatus
+}
+
+func (b *LocalBackend) StartLoginInteractive() {
+	b.assertClient()
+	b.mu.Lock()
+	b.interact++
+	url := b.authURL
+	b.mu.Unlock()
+	b.logf("StartLoginInteractive: url=%v\n", url != "")
+
+	if url != "" {
+		b.popBrowserAuthNow()
+	} else {
+		b.c.Login(nil, controlclient.LoginInteractive)
+	}
+}
+
+func (b *LocalBackend) FakeExpireAfter(x time.Duration) {
+	b.logf("FakeExpireAfter: %v\n", x)
+	if b.netMapCache != nil {
+		e := b.netMapCache.Expiry
+		if e.IsZero() || time.Until(e) > x {
+			b.netMapCache.Expiry = time.Now().Add(x)
+		}
+		b.send(Notify{NetMap: b.netMapCache})
+	}
+}
+
+func (b *LocalBackend) LocalAddrs() []wgcfg.CIDR {
+	if b.netMapCache != nil {
+		return b.netMapCache.Addresses
+	} else {
+		return nil
+	}
+}
+
+func (b *LocalBackend) Expiry() time.Time {
+	if b.netMapCache != nil {
+		return b.netMapCache.Expiry
+	} else {
+		return time.Time{}
+	}
+}
+
+func (b *LocalBackend) parseWgStatus(s *wgengine.Status) EngineStatus {
+	var ss []string
+	var rx, tx wgengine.ByteCount
+	peers := make(map[tailcfg.NodeKey]wgengine.PeerStatus)
+
+	live := 0
+	for _, p := range s.Peers {
+		if p.LastHandshake.IsZero() {
+			ss = append(ss, "x")
+		} else {
+			ss = append(ss, fmt.Sprintf("%d/%d", p.RxBytes, p.TxBytes))
+			live++
+			peers[p.NodeKey] = p
+		}
+		rx += p.RxBytes
+		tx += p.TxBytes
+	}
+	b.logf("v%v peers: %v\n", version.LONG, strings.Join(ss, " "))
+	return EngineStatus{
+		RBytes:    rx,
+		WBytes:    tx,
+		NumLive:   live,
+		LivePeers: peers,
+	}
+}
+
+func (b *LocalBackend) AdminPageURL() string {
+	return b.serverURL + "/admin/machines"
+}
+
+func (b *LocalBackend) Prefs() Prefs {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	return b.prefs
+}
+
+func (b *LocalBackend) SetPrefs(new Prefs) {
+	b.mu.Lock()
+	old := b.prefs
+	new.Persist = old.Persist // caller isn't allowed to override this
+	b.prefs = new
+	b.mu.Unlock()
+
+	if old.WantRunning != new.WantRunning {
+		b.stateMachine()
+	} else {
+		b.authReconfig()
+	}
+
+	b.logf("SetPrefs: %v\n", new.Pretty())
+	b.send(Notify{Prefs: &new})
+}
+
+// Note: return value may be nil, if we haven't received a netmap yet.
+func (b *LocalBackend) NetMap() *controlclient.NetworkMap {
+	return b.netMapCache
+}
+
+func (b *LocalBackend) blockEngineUpdates(block bool) {
+	// TODO(apenwarr): probably need mutex here (and several other places)
+	b.logf("blockEngineUpdates(%v)\n", block)
+
+	b.mu.Lock()
+	b.blocked = block
+	b.mu.Unlock()
+}
+
+func (b *LocalBackend) authReconfig() {
+	b.mu.Lock()
+	blocked := b.blocked
+	uc := b.prefs
+	nm := b.netMapCache
+	b.mu.Unlock()
+
+	if blocked {
+		b.logf("authReconfig: blocked, skipping.\n")
+		return
+	}
+	if nm == nil {
+		b.logf("authReconfig: netmap not yet valid. Skipping.\n")
+		return
+	}
+	if !uc.WantRunning {
+		b.logf("authReconfig: skipping because !WantRunning.\n")
+		return
+	}
+	b.logf("Configuring wireguard connection.\n")
+
+	uflags := controlclient.UDefault
+	if uc.RouteAll {
+		uflags |= controlclient.UAllowDefaultRoute
+		// TODO(apenwarr): Make subnet routes a different pref?
+		uflags |= controlclient.UAllowSubnetRoutes
+		// TODO(apenwarr): Remove this once we sort out subnet routes.
+		//  Right now default routes are broken in Windows, but
+		//  controlclient doesn't properly send subnet routes. So
+		//  let's convert a default route into a subnet route in order
+		//  to allow experimentation.
+		uflags |= controlclient.UHackDefaultRoute
+	}
+	if uc.AllowSingleHosts {
+		uflags |= controlclient.UAllowSingleHosts
+	}
+	b.logf("reconfig: ra=%v dns=%v 0x%02x\n", uc.RouteAll, uc.CorpDNS, uflags)
+
+	if nm != nil {
+		dns := nm.DNS
+		dom := nm.DNSDomains
+		if !uc.CorpDNS {
+			dns = []wgcfg.IP{}
+			dom = []string{}
+		}
+		cfg, err := nm.WGCfg(uflags, dns)
+		if err != nil {
+			log.Fatalf("WGCfg: %v\n", err)
+		}
+
+		err = b.e.Reconfig(cfg, dom)
+		if err != nil {
+			b.logf("reconfig: %v", err)
+		}
+	}
+}
+
+func (b *LocalBackend) enterState(newState State) {
+	b.mu.Lock()
+	state := b.state
+	prefs := b.prefs
+	b.mu.Unlock()
+
+	if state == newState {
+		return
+	}
+	b.logf("Switching ipn state %v -> %v (WantRunning=%v)\n",
+		state, newState, prefs.WantRunning)
+	if b.notify != nil {
+		b.send(Notify{State: &newState})
+	}
+
+	b.state = newState
+	switch newState {
+	case NeedsLogin:
+		b.blockEngineUpdates(true)
+		fallthrough
+	case Stopped:
+		err := b.e.Reconfig(&wgcfg.Config{}, nil)
+		if err != nil {
+			b.logf("Reconfig(down): %v\n", err)
+		}
+	case Starting, NeedsMachineAuth:
+		b.authReconfig()
+		// Needed so that UpdateEndpoints can run
+		b.e.RequestStatus()
+	case Running:
+		break
+	default:
+		b.logf("Weird: unknown newState %#v\n", newState)
+	}
+
+}
+
+func (b *LocalBackend) nextState() State {
+	b.assertClient()
+	state := b.State()
+
+	if b.netMapCache == nil {
+		if b.c.AuthCantContinue() {
+			// Auth was interrupted or waiting for URL visit,
+			// so it won't proceed without human help.
+			return NeedsLogin
+		} else {
+			// Auth or map request needs to finish
+			return state
+		}
+	} else if !b.prefs.WantRunning {
+		return Stopped
+	} else if e := b.netMapCache.Expiry; !e.IsZero() && time.Until(e) <= 0 {
+		return NeedsLogin
+	} else if b.netMapCache.MachineStatus != tailcfg.MachineAuthorized {
+		// TODO(crawshaw): handle tailcfg.MachineInvalid
+		return NeedsMachineAuth
+	} else if state == NeedsMachineAuth {
+		// (if we get here, we know MachineAuthorized == true)
+		return Starting
+	} else if state == Starting {
+		if b.EngineStatus().NumLive > 0 {
+			return Running
+		} else {
+			return state
+		}
+	} else if state == Running {
+		return Running
+	} else {
+		return Starting
+	}
+}
+
+func (b *LocalBackend) RequestEngineStatus() {
+	b.e.RequestStatus()
+}
+
+// TODO(apenwarr): use a channel or something to prevent re-entrancy?
+//  Or maybe just call the state machine from fewer places.
+func (b *LocalBackend) stateMachine() {
+	b.enterState(b.nextState())
+}
+
+func (b *LocalBackend) stopEngineAndWait() {
+	b.logf("stopEngineAndWait...\n")
+	b.e.Reconfig(&wgcfg.Config{}, nil)
+	b.requestEngineStatusAndWait()
+	b.logf("stopEngineAndWait: done.\n")
+}
+
+// Requests the wgengine status, and does not return until the status
+// was delivered (to the usual callback).
+func (b *LocalBackend) requestEngineStatusAndWait() {
+	b.logf("requestEngineStatusAndWait\n")
+
+	b.statusLock.Lock()
+	go b.e.RequestStatus()
+	b.logf("requestEngineStatusAndWait: waiting...\n")
+	b.statusChanged.Wait() // temporarily releases lock while waiting
+	b.logf("requestEngineStatusAndWait: got status update.\n")
+	b.statusLock.Unlock()
+}
+
+// NOTE(apenwarr): No easy way to persist logged-out status.
+//  Maybe that's for the better; if someone logs out accidentally,
+//  rebooting will fix it.
+func (b *LocalBackend) Logout() {
+	b.assertClient()
+	b.netMapCache = nil
+	b.c.Logout()
+	b.netMapCache = nil
+	b.stateMachine()
+}
+
+func (b *LocalBackend) assertClient() {
+	if b.c == nil {
+		panic("LocalBackend.assertClient: b.c == nil")
+	}
+}

+ 249 - 0
ipn/message.go

@@ -0,0 +1,249 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipn
+
+import (
+	"encoding/binary"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"time"
+
+	"tailscale.com/logger"
+	"tailscale.com/version"
+)
+
+type NoArgs struct{}
+
+type StartArgs struct {
+	Opts Options
+}
+
+type SetPrefsArgs struct {
+	New Prefs
+}
+
+type FakeExpireAfterArgs struct {
+	Duration time.Duration
+}
+
+// A command message sent to the server. Exactly one of these must be non-nil.
+type Command struct {
+	Version               string
+	Quit                  *NoArgs
+	Start                 *StartArgs
+	StartLoginInteractive *NoArgs
+	Logout                *NoArgs
+	SetPrefs              *SetPrefsArgs
+	RequestEngineStatus   *NoArgs
+	FakeExpireAfter       *FakeExpireAfterArgs
+}
+
+type BackendServer struct {
+	logf          logger.Logf
+	b             Backend        // the Backend we are serving up
+	sendNotifyMsg func(b []byte) // send a notification message
+	GotQuit       bool           // a Quit command was received
+}
+
+func NewBackendServer(logf logger.Logf, b Backend, sendNotifyMsg func(b []byte)) *BackendServer {
+	return &BackendServer{
+		logf:          logf,
+		b:             b,
+		sendNotifyMsg: sendNotifyMsg,
+	}
+}
+
+func (bs *BackendServer) send(n Notify) {
+	n.Version = version.LONG
+	b, err := json.Marshal(n)
+	if err != nil {
+		log.Fatalf("Failed json.Marshal(notify): %v\n%#v\n", err, n)
+	}
+	bs.sendNotifyMsg(b)
+}
+
+// Inform the BackendServer of an incoming message.
+func (bs *BackendServer) GotCommandMsg(b []byte) error {
+	cmd := Command{}
+	if err := json.Unmarshal(b, &cmd); err != nil {
+		return err
+	}
+	return bs.GotCommand(&cmd)
+}
+
+func (bs *BackendServer) GotCommand(cmd *Command) error {
+	if cmd.Version != version.LONG {
+		vs := fmt.Sprintf("Version mismatch! frontend=%#v backend=%#v\n",
+			cmd.Version, version.LONG)
+		bs.logf("%s\n", vs)
+		// ignore the command, but send a message back to the
+		// caller so it can realize the version mismatch too.
+		// We don't want to exit because it might cause a crash
+		// loop, and restarting won't fix the problem.
+		bs.send(Notify{
+			ErrMessage: &vs,
+		})
+		return nil
+	}
+	if cmd.Quit != nil {
+		bs.GotQuit = true
+		return errors.New("Quit command received")
+	}
+
+	if c := cmd.Start; c != nil {
+		opts := c.Opts
+		opts.Notify = bs.send
+		return bs.b.Start(opts)
+	} else if c := cmd.StartLoginInteractive; c != nil {
+		bs.b.StartLoginInteractive()
+		return nil
+	} else if c := cmd.Logout; c != nil {
+		bs.b.Logout()
+		return nil
+	} else if c := cmd.SetPrefs; c != nil {
+		bs.b.SetPrefs(c.New)
+		return nil
+	} else if c := cmd.RequestEngineStatus; c != nil {
+		bs.b.RequestEngineStatus()
+		return nil
+	} else if c := cmd.FakeExpireAfter; c != nil {
+		bs.b.FakeExpireAfter(c.Duration)
+		return nil
+	} else {
+		return fmt.Errorf("BackendServer.Do: no command specified")
+	}
+}
+
+func (bs *BackendServer) Reset() error {
+	// Tell the backend we got a Logout command, which will cause it
+	// to forget all its authentication information.
+	return bs.GotCommand(&Command{Logout: &NoArgs{}})
+}
+
+type BackendClient struct {
+	logf           logger.Logf
+	sendCommandMsg func(b []byte)
+	notify         func(n Notify)
+}
+
+func NewBackendClient(logf logger.Logf, sendCommandMsg func(b []byte)) *BackendClient {
+	return &BackendClient{
+		logf:           logf,
+		sendCommandMsg: sendCommandMsg,
+	}
+}
+
+func (bc *BackendClient) GotNotifyMsg(b []byte) {
+	n := Notify{}
+	if err := json.Unmarshal(b, &n); err != nil {
+		log.Fatalf("BackendClient.Notify: cannot decode message")
+	}
+	if n.Version != version.LONG {
+		vs := fmt.Sprintf("Version mismatch! frontend=%#v backend=%#v",
+			version.LONG, n.Version)
+		bc.logf("%s\n", vs)
+		// delete anything in the notification except the version,
+		// to prevent incorrect operation.
+		n = Notify{
+			Version:    n.Version,
+			ErrMessage: &vs,
+		}
+	}
+	if bc.notify != nil {
+		bc.notify(n)
+	}
+}
+
+func (bc *BackendClient) send(cmd Command) {
+	cmd.Version = version.LONG
+	b, err := json.Marshal(cmd)
+	if err != nil {
+		log.Fatalf("Failed json.Marshal(cmd): %v\n%#v\n", err, cmd)
+	}
+	bc.sendCommandMsg(b)
+}
+
+func (bc *BackendClient) Quit() error {
+	bc.send(Command{Quit: &NoArgs{}})
+	return nil
+}
+
+func (bc *BackendClient) Start(opts Options) error {
+	bc.notify = opts.Notify
+	opts.Notify = nil // server can't call our function pointer
+	bc.send(Command{Start: &StartArgs{Opts: opts}})
+	return nil // remote Start() errors must be handled remotely
+}
+
+func (bc *BackendClient) StartLoginInteractive() {
+	bc.send(Command{StartLoginInteractive: &NoArgs{}})
+}
+
+func (bc *BackendClient) Logout() {
+	bc.send(Command{Logout: &NoArgs{}})
+}
+
+func (bc *BackendClient) SetPrefs(new Prefs) {
+	bc.send(Command{SetPrefs: &SetPrefsArgs{New: new}})
+}
+
+func (bc *BackendClient) RequestEngineStatus() {
+	bc.send(Command{RequestEngineStatus: &NoArgs{}})
+}
+
+func (bc *BackendClient) FakeExpireAfter(x time.Duration) {
+	bc.send(Command{FakeExpireAfter: &FakeExpireAfterArgs{Duration: x}})
+}
+
+const MSG_MAX = 1024 * 1024
+
+// TODO(apenwarr): incremental json decode?
+//  That would let us avoid storing the whole byte array uselessly in RAM.
+func ReadMsg(r io.Reader) ([]byte, error) {
+	cb := make([]byte, 4)
+	_, err := io.ReadFull(r, cb)
+	if err != nil {
+		return nil, err
+	}
+	n := binary.LittleEndian.Uint32(cb)
+	if n > 1024*1024 {
+		return nil, fmt.Errorf("ipn.Read: message too large: %v bytes", n)
+	}
+	b := make([]byte, n)
+	_, err = io.ReadFull(r, b)
+	if err != nil {
+		return nil, err
+	}
+	return b, nil
+}
+
+// TODO(apenwarr): incremental json encode?
+//  That would save RAM, at the expense of having to encode once so that
+//  we can produce the initial byte count.
+func WriteMsg(w io.Writer, b []byte) error {
+	cb := make([]byte, 4)
+	if len(b) > MSG_MAX {
+		return fmt.Errorf("ipn.Write: message too large: %v bytes", len(b))
+	}
+	binary.LittleEndian.PutUint32(cb, uint32(len(b)))
+	n, err := w.Write(cb)
+	if err != nil {
+		return err
+	}
+	if n != 4 {
+		return fmt.Errorf("ipn.Write: short write: %v bytes (wanted 4)", n)
+	}
+	n, err = w.Write(b)
+	if err != nil {
+		return err
+	}
+	if n != len(b) {
+		return fmt.Errorf("ipn.Write: short write: %v bytes (wanted %v)", n, len(b))
+	}
+	return nil
+}

+ 171 - 0
ipn/message_test.go

@@ -0,0 +1,171 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipn
+
+import (
+	"bytes"
+	"tailscale.com/testy"
+	"testing"
+	"time"
+)
+
+func TestReadWrite(t *testing.T) {
+	testy.FixLogs(t)
+	defer testy.UnfixLogs(t)
+
+	rc := testy.NewResourceCheck()
+	defer rc.Assert(t)
+
+	buf := bytes.Buffer{}
+	err := WriteMsg(&buf, []byte("Test string1"))
+	if err != nil {
+		t.Fatalf("write1: %v\n", err)
+	}
+	err = WriteMsg(&buf, []byte(""))
+	if err != nil {
+		t.Fatalf("write2: %v\n", err)
+	}
+	err = WriteMsg(&buf, []byte("Test3"))
+	if err != nil {
+		t.Fatalf("write3: %v\n", err)
+	}
+
+	b, err := ReadMsg(&buf)
+	if want, got := "Test string1", string(b); want != got {
+		t.Fatalf("read1: %#v != %#v\n", want, got)
+	}
+	b, err = ReadMsg(&buf)
+	if want, got := "", string(b); want != got {
+		t.Fatalf("read2: %#v != %#v\n", want, got)
+	}
+	b, err = ReadMsg(&buf)
+	if want, got := "Test3", string(b); want != got {
+		t.Fatalf("read3: %#v != %#v\n", want, got)
+	}
+
+	b, err = ReadMsg(&buf)
+	if err == nil {
+		t.Fatalf("read4: expected error, got %#v\n", b)
+	}
+}
+
+func TestClientServer(t *testing.T) {
+	testy.FixLogs(t)
+	defer testy.UnfixLogs(t)
+
+	rc := testy.NewResourceCheck()
+	defer rc.Assert(t)
+
+	b := &FakeBackend{}
+	var bs *BackendServer
+	var bc *BackendClient
+	serverToClientCh := make(chan []byte, 16)
+	defer close(serverToClientCh)
+	go func() {
+		for b := range serverToClientCh {
+			bc.GotNotifyMsg(b)
+		}
+	}()
+	serverToClient := func(b []byte) {
+		serverToClientCh <- append([]byte{}, b...)
+	}
+	clientToServer := func(b []byte) {
+		bs.GotCommandMsg(b)
+	}
+	slogf := func(fmt string, args ...interface{}) {
+		t.Logf("s: "+fmt, args...)
+	}
+	clogf := func(fmt string, args ...interface{}) {
+		t.Logf("c: "+fmt, args...)
+	}
+	bs = NewBackendServer(slogf, b, serverToClient)
+	bc = NewBackendClient(clogf, clientToServer)
+
+	ch := make(chan Notify, 256)
+	h, err := NewHandle(bc, clogf, Options{
+		ServerURL: "http://example.com/fake",
+		Notify: func(n Notify) {
+			ch <- n
+		},
+	})
+	if err != nil {
+		t.Fatalf("NewHandle error: %v\n", err)
+	}
+
+	notes := Notify{}
+	nn := []Notify{}
+	processNote := func(n Notify) {
+		nn = append(nn, n)
+		if n.State != nil {
+			t.Logf("state change: %v", *n.State)
+			notes.State = n.State
+		}
+		if n.Prefs != nil {
+			notes.Prefs = n.Prefs
+		}
+		if n.NetMap != nil {
+			notes.NetMap = n.NetMap
+		}
+		if n.Engine != nil {
+			notes.Engine = n.Engine
+		}
+		if n.BrowseToURL != nil {
+			notes.BrowseToURL = n.BrowseToURL
+		}
+	}
+	notesState := func() State {
+		if notes.State != nil {
+			return *notes.State
+		}
+		return NoState
+	}
+
+	flushUntil := func(wantFlush State) {
+		t.Helper()
+		timer := time.NewTimer(1 * time.Second)
+	loop:
+		for {
+			select {
+			case n := <-ch:
+				processNote(n)
+				if notesState() == wantFlush {
+					break loop
+				}
+			case <-timer.C:
+				t.Fatalf("timeout waiting for state %v, got %v", wantFlush, notes.State)
+			}
+		}
+		timer.Stop()
+	loop2:
+		for {
+			select {
+			case n := <-ch:
+				processNote(n)
+			default:
+				break loop2
+			}
+		}
+		if got, want := h.State(), notesState(); got != want {
+			t.Errorf("h.State()=%v, notes.State=%v (on flush until %v)\n", got, want, wantFlush)
+		}
+	}
+
+	flushUntil(NeedsLogin)
+
+	h.StartLoginInteractive()
+	flushUntil(Running)
+	if notes.NetMap == nil && h.NetMap() != nil {
+		t.Errorf("notes.NetMap == nil while h.NetMap != nil\nnotes:\n%v", nn)
+	}
+
+	h.UpdatePrefs(func(p Prefs) Prefs {
+		p.WantRunning = false
+		return p
+	})
+	flushUntil(Stopped)
+
+	h.Logout()
+	flushUntil(NeedsLogin)
+}

+ 149 - 0
ipn/prefs.go

@@ -0,0 +1,149 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipn
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+
+	"tailscale.com/atomicfile"
+	"tailscale.com/control/controlclient"
+)
+
+type Prefs struct {
+	RouteAll         bool
+	AllowSingleHosts bool
+	CorpDNS          bool
+	WantRunning      bool
+	NotepadURLs      bool
+	UsePacketFilter  bool
+
+	// The Persist field is named 'Config' in the file for backward
+	// compatibility with earlier versions.
+	// TODO(apenwarr): We should move this out of here, it's not a pref.
+	//  We can maybe do that once we're sure which module should persist
+	//  it (backend or frontend?)
+	Persist *controlclient.Persist `json:"Config"`
+}
+
+func (uc *Prefs) Pretty() string {
+	var ucp string
+	if uc.Persist != nil {
+		ucp = uc.Persist.Pretty()
+	} else {
+		ucp = "Persist=nil"
+	}
+	return fmt.Sprintf("Prefs{ra=%v mesh=%v dns=%v want=%v notepad=%v %v}",
+		uc.RouteAll, uc.AllowSingleHosts, uc.CorpDNS, uc.WantRunning,
+		uc.NotepadURLs, ucp)
+}
+
+func (uc *Prefs) ToBytes() []byte {
+	data, err := json.MarshalIndent(uc, "", "\t")
+	if err != nil {
+		log.Fatalf("Prefs marshal: %v\n", err)
+	}
+	return data
+}
+
+func (uc *Prefs) Equals(uc2 *Prefs) bool {
+	b1 := uc.ToBytes()
+	b2 := uc2.ToBytes()
+	return bytes.Equal(b1, b2)
+}
+
+func NewPrefs() Prefs {
+	return Prefs{
+		// Provide default values for options which are normally
+		// true, but might be missing from the json data for any
+		// reason. The json can still override them to false.
+		RouteAll:         true,
+		AllowSingleHosts: true,
+		CorpDNS:          true,
+		WantRunning:      true,
+		UsePacketFilter:  true,
+	}
+}
+
+func PrefsFromBytes(b []byte, enforceDefaults bool) (Prefs, error) {
+	uc := NewPrefs()
+	if len(b) == 0 {
+		return uc, nil
+	}
+	persist := &controlclient.Persist{}
+	err := json.Unmarshal(b, persist)
+	if err == nil && (persist.Provider != "" || persist.LoginName != "") {
+		// old-style relaynode config; import it
+		uc.Persist = persist
+	} else {
+		err = json.Unmarshal(b, &uc)
+		if err != nil {
+			log.Printf("Prefs parse: %v: %v\n", err, b)
+		}
+	}
+	if enforceDefaults {
+		uc.RouteAll = true
+		uc.AllowSingleHosts = true
+	}
+	return uc, err
+}
+
+func (uc *Prefs) Copy() *Prefs {
+	uc2, err := PrefsFromBytes(uc.ToBytes(), false)
+	if err != nil {
+		log.Fatalf("Prefs was uncopyable: %v\n", err)
+	}
+	return &uc2
+}
+
+func LoadPrefs(filename string, enforceDefaults bool) Prefs {
+	log.Printf("Loading prefs %v\n", filename)
+	data, err := ioutil.ReadFile(filename)
+	uc := NewPrefs()
+	if err != nil {
+		log.Printf("Read: %v: %v\n", filename, err)
+		goto fail
+	}
+	uc, err = PrefsFromBytes(data, enforceDefaults)
+	if err != nil {
+		log.Printf("Parse: %v: %v\n", filename, err)
+		goto fail
+	}
+	goto post
+fail:
+	log.Printf("failed to load config. Generating a new one.\n")
+	uc = NewPrefs()
+	uc.WantRunning = true
+post:
+	// Update: we changed our minds :)
+	// Versabank would like to persist the setting across reboots, for now,
+	// because they don't fully trust the system and want to be able to
+	// leave it turned off when not in use. Eventually we need to make
+	// all motivation for this go away.
+	if false {
+		// Usability note: we always want WantRunning = true on startup.
+		// That way, if someone accidentally disables their VPN and doesn't
+		// know how, rebooting will fix it.
+		// We still persist WantRunning just in case we change our minds on
+		// this topic.
+		uc.WantRunning = true
+	}
+	log.Printf("Loaded prefs %v %v\n", filename, uc.Pretty())
+	return uc
+}
+
+func SavePrefs(filename string, uc *Prefs) {
+	log.Printf("Saving prefs %v %v\n", filename, uc.Pretty())
+	data := uc.ToBytes()
+	os.MkdirAll(filepath.Dir(filename), 0700)
+	if err := atomicfile.WriteFile(filename, data, 0666); err != nil {
+		log.Printf("SavePrefs: %v\n", err)
+	}
+}

+ 68 - 0
ipn/prefs_test.go

@@ -0,0 +1,68 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipn
+
+import (
+	"testing"
+
+	"tailscale.com/control/controlclient"
+)
+
+func checkPrefs(t *testing.T, p Prefs) {
+	var err error
+	var p2, p2c Prefs
+	var p2b Prefs
+
+	pp := p.Pretty()
+	if pp == "" {
+		t.Fatalf("default p.Pretty() failed\n")
+	}
+	t.Logf("\npp:   %#v\n", pp)
+	b := p.ToBytes()
+	if len(b) == 0 {
+		t.Fatalf("default p.ToBytes() failed\n")
+	}
+	if p != p {
+		t.Fatalf("p != p\n")
+	}
+	p2 = p
+	p2.RouteAll = true
+	if p == p2 {
+		t.Fatalf("p == p2\n")
+	}
+	p2b, err = PrefsFromBytes(p2.ToBytes(), false)
+	if err != nil {
+		t.Fatalf("PrefsFromBytes(p2) failed\n")
+	}
+	p2p := p2.Pretty()
+	p2bp := p2b.Pretty()
+	t.Logf("\np2p:  %#v\np2bp: %#v\n", p2p, p2bp)
+	if p2p != p2bp {
+		t.Fatalf("p2p != p2bp\n%#v\n%#v\n", p2p, p2bp)
+	}
+	if !p2.Equals(&p2b) {
+		t.Fatalf("p2 != p2b\n%#v\n%#v\n", p2, p2b)
+	}
+	p2c = *p2.Copy()
+	if !p2b.Equals(&p2c) {
+		t.Fatalf("p2b != p2c\n")
+	}
+}
+
+func TestBasicPrefs(t *testing.T) {
+	p := Prefs{}
+	checkPrefs(t, p)
+}
+
+func TestPrefsPersist(t *testing.T) {
+	c := controlclient.Persist{
+		LoginName: "[email protected]",
+	}
+	p := Prefs{
+		CorpDNS: true,
+		Persist: &c,
+	}
+	checkPrefs(t, p)
+}

+ 10 - 0
logger/logger.go

@@ -0,0 +1,10 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package logger defines a type for writing to logs. It's just a
+// convenience type so that we don't have to pass verbose func(...)
+// types around.
+package logger
+
+type Logf func(fmt string, args ...interface{})

+ 171 - 0
logpolicy/logpolicy.go

@@ -0,0 +1,171 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package logpolicy
+
+import (
+	"context"
+	"encoding/json"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"runtime"
+
+	"github.com/klauspost/compress/zstd"
+	"golang.org/x/crypto/ssh/terminal"
+	"tailscale.com/atomicfile"
+	"tailscale.com/logtail"
+	"tailscale.com/logtail/filch"
+	"tailscale.com/version"
+)
+
+type Config struct {
+	Collection string
+	PrivateID  logtail.PrivateID
+	PublicID   logtail.PublicID
+}
+
+type Policy struct {
+	Logtail  logtail.Logger
+	PublicID logtail.PublicID
+}
+
+func (c *Config) ToBytes() []byte {
+	data, err := json.MarshalIndent(c, "", "\t")
+	if err != nil {
+		log.Fatalf("logpolicy.Config marshal: %v\n", err)
+	}
+	return data
+}
+
+func (c *Config) Save(statefile string) {
+	c.PublicID = c.PrivateID.Public()
+	os.MkdirAll(filepath.Dir(statefile), 0777)
+	data := c.ToBytes()
+	if err := atomicfile.WriteFile(statefile, data, 0600); err != nil {
+		log.Printf("logpolicy.Config write: %v\n", err)
+	}
+}
+
+func ConfigFromBytes(b []byte) (*Config, error) {
+	c := &Config{}
+	if err := json.Unmarshal(b, c); err != nil {
+		return nil, err
+	}
+	return c, nil
+}
+
+type stderrWriter struct{}
+
+// Always writes to the latest os.Stderr, even if os.Stderr changes
+// during the lifetime of this object.
+func (l *stderrWriter) Write(buf []byte) (int, error) {
+	return os.Stderr.Write(buf)
+}
+
+type logWriter struct {
+	logger *log.Logger
+}
+
+func (l *logWriter) Write(buf []byte) (int, error) {
+	l.logger.Print(string(buf))
+	return len(buf), nil
+}
+
+func New(collection string, filePrefix string) *Policy {
+	statefile := filePrefix + ".log.conf"
+	var lflags int
+	if terminal.IsTerminal(2) || runtime.GOOS == "windows" {
+		lflags = 0
+	} else {
+		lflags = log.LstdFlags
+	}
+	console := log.New(&stderrWriter{}, "", lflags)
+
+	var oldc *Config
+	data, err := ioutil.ReadFile(statefile)
+	if err != nil {
+		log.Printf("logpolicy.Read %v: %v\n", statefile, err)
+		oldc = &Config{}
+		oldc.Collection = collection
+	} else {
+		oldc, err = ConfigFromBytes(data)
+		if err != nil {
+			log.Printf("logpolicy.Config unmarshal: %v\n", err)
+			oldc = &Config{}
+		}
+	}
+
+	newc := *oldc
+	if newc.Collection != collection {
+		log.Printf("logpolicy.Config: config collection %q does not match %q", newc.Collection, collection)
+		// We picked up an incompatible config file.
+		// Regenerate the private ID.
+		newc.PrivateID = logtail.PrivateID{}
+		newc.Collection = collection
+	}
+	if newc.PrivateID == (logtail.PrivateID{}) {
+		newc.PrivateID, err = logtail.NewPrivateID()
+		if err != nil {
+			log.Fatalf("logpolicy: NewPrivateID() should never fail")
+		}
+	}
+	newc.PublicID = newc.PrivateID.Public()
+	if newc != *oldc {
+		newc.Save(statefile)
+	}
+
+	c := logtail.Config{
+		Collection: newc.Collection,
+		PrivateID:  newc.PrivateID,
+		Stderr:     &logWriter{console},
+		NewZstdEncoder: func() logtail.Encoder {
+			w, err := zstd.NewWriter(nil)
+			if err != nil {
+				panic(err)
+			}
+			return w
+		},
+	}
+
+	// TODO(crawshaw): filePrefix is a place meant to store configuration.
+	//                 OS policies usually have other preferred places to
+	//                 store logs. Use one of them?
+	filchBuf, filchErr := filch.New(filePrefix, filch.Options{})
+	if filchBuf != nil {
+		c.Buffer = filchBuf
+	}
+	lw := logtail.Log(c)
+	log.SetFlags(0) // other logflags are set on console, not here
+	log.SetOutput(lw)
+
+	log.Printf("Program starting: v%v: %#v\n", version.LONG, os.Args)
+	log.Printf("LogID: %v\n", newc.PublicID)
+	if filchErr != nil {
+		log.Printf("filch failed: %v", err)
+	}
+
+	return &Policy{
+		Logtail:  lw,
+		PublicID: newc.PublicID,
+	}
+}
+
+// Close immediately shuts down the logger.
+func (p *Policy) Close() {
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	p.Shutdown(ctx)
+}
+
+// Shutdown gracefully shuts down the logger, finishing any current
+// log upload if it can be done before ctx is canceled.
+func (p *Policy) Shutdown(ctx context.Context) error {
+	log.Printf("flushing log.\n")
+	if p.Logtail != nil {
+		return p.Logtail.Shutdown(ctx)
+	}
+	return nil
+}

+ 6 - 0
logtail/.gitignore

@@ -0,0 +1,6 @@
+*~
+*.out
+/example/logadopt/logadopt
+/example/logreprocess/logreprocess
+/example/logtail/logtail
+/logtail

+ 10 - 0
logtail/README.md

@@ -0,0 +1,10 @@
+# Tailscale Logs Service
+
+This github repository contains libraries, documentation, and examples
+for working with the public API of the tailscale logs service.
+
+For a very quick introduction to the core features, read the
+[API docs](api.md) and peruse the
+[logs reprocessing](./example/logreprocess/demo.sh) example.
+
+For more information, write to [email protected].

+ 195 - 0
logtail/api.md

@@ -0,0 +1,195 @@
+# Tailscale Logs Service
+
+The Tailscale Logs Service defines a REST interface for configuring, storing,
+retrieving, and processing log entries.
+
+# Overview
+
+HTTP requests are received at the service **base URL**
+[https://log.tailscale.io](https://log.tailscale.io), and return JSON-encoded
+responses using standard HTTP response codes.
+
+Authorization for the configuration and retrieval APIs is done with a secret
+API key passed as the HTTP basic auth username. Secret keys are generated via
+the web UI at base URL. An example of using basic auth with curl:
+
+    curl -u <log_api_key>: https://log.tailscale.io/collections
+
+In the future, an HTTP header will allow using MessagePack instead of JSON.
+
+## Collections
+
+Logs are organized into collections. Inside each collection is any number of
+instances.
+
+A collection is a domain name. It is a grouping of related logs. As a
+guideline, create one collection per product using subdomains of your
+company's domain name. Collections must be registered with the logs service
+before any attempt is made to store logs.
+
+## Instances
+
+Each collection is a set of instances. There is one instance per machine
+writing logs.
+
+An instance has a name and a number. An instance has a **private** and
+**public** ID. The private ID is a 32-byte random number encoded as hex.
+The public ID is the SHA-256 hash of the private ID, encoded as hex.
+
+The private ID is used to write logs. The only copy of the private ID
+should be on the machine sending logs. Ideally it is generated on the
+machine. Logs can be written as soon as a private ID is generated. 
+
+The public ID is used to read and adopt logs. It is designed to be sent
+to a service that also holds a logs service API key.
+
+The tailscale logs service will store any logs for a short period of time.
+To enable logs retention, the log can be **adopted** using the public ID
+and a logs service API key.
+Once this is done, logs will be retained long-term (for the configured
+retention period).
+
+Unadopted instance logs are stored temporarily to help with debugging:
+a misconfigured machine writing logs with a bad ID can be spotted by
+reading the logs.
+If a public ID is not adopted, storage is tightly capped and logs are
+deleted after 12 hours.
+
+# APIs
+
+## Storage
+
+### `POST /c/<collection-name>/<private-ID>` — send a log
+
+The body of the request is JSON.
+
+A **single message** is an object with properties:
+
+`{ }`
+
+The client may send any properties it wants in the JSON message, except
+for the `logtail` property which has special meaning. Inside the logtail
+object the client may only set the following properties:
+
+- `client_time` in the format of RFC3339: "2006-01-02T15:04:05.999999999Z07:00"
+
+A future version of the logs service API will also support:
+
+- `client_time_offset` a integer of nanoseconds since the client was reset
+- `client_time_reset` a boolean if set to true resets the time offset counter
+
+On receipt by the server the `client_time_offset` is transformed into a
+`client_time` based on the `server_time` when the first (or
+client_time_reset) event was received. 
+
+If any other properties are set in the logtail object they are moved into
+the "error" field, the message is saved and a 4xx status code is returned.
+
+A **batch of messages** is a JSON array filled with single message objects:
+
+`[ { }, { }, ... ]`
+
+If any of the array entries are not objects, the content is converted
+into a message with a `"logtail": { "error": ...}` property, saved, and
+a 4xx status code is returned.
+
+Similarly any other request content not matching one of these formats is
+saved in a logtail error field, and a 4xx status code is returned.
+
+An invalid collection name returns `{"error": "invalid collection name"}`
+along with a 403 status code.
+
+Clients are encouraged to:
+
+- POST as rapidly as possible (if not battery constrained). This minimizes
+  both the time necessary to see logs in a log viewer and the chance of
+  losing logs.
+- Use HTTP/2 when streaming logs, as it does a much better job of
+  maintaining a TLS connection to minimize overhead for subsequent posts.
+
+A future version of logs service API will support sending requests with
+`Content-Encoding: zstd`.
+
+## Retrieval
+
+### `GET /collections` — query the set of collections and instances
+
+Returns a JSON object listing all of the named collections.
+
+The caller can query-encode the following fields:
+
+- `collection-name` — limit the results to one collection
+
+    ```
+    {
+      "collections": {
+        "collection1.yourcompany.com": {
+          "instances": {
+            "<logtail.PublicID>" :{
+              "first-seen": "timestamp",
+              "size": 4096
+            },
+            "<logtail.PublicID>" :{
+              "first-seen": "timestamp",
+              "size": 512000,
+              "orphan": true,
+            }
+          }
+        }
+      }
+    }
+    ```
+
+### `GET /c/<collection_name>` — query stored logs
+
+The caller can query-encode the following fields:
+
+- `instances` — zero or more log collection instances to limit results to
+- `time-start` — the earliest log to include
+- One of:
+    - `time-end` — the latest log to include
+    - `max-count` — maximum number of logs to return, allows paging
+    - `stream` — boolean that keeps the response dangling, streaming in
+      logs like `tail -f`. Incompatible with logtail-time-end.
+
+In **stream=false** mode, the response is a single JSON object:
+
+    {
+    	// TODO: header fields
+    	"logs": [ {}, {}, ... ]
+    }
+
+In **stream=true** mode, the response begins with a JSON header object
+similar to the storage format, and then is a sequence of JSON log
+objects, `{...}`, one per line. The server continues to send these until
+the client closes the connection.
+
+## Configuration
+
+For organizations with a small number of instances writing logs, the
+Configuration API are best used by a trusted human operator, usually
+through a GUI. Organizations with many instances will need to automate
+the creation of tokens.
+
+### `POST /collections` — create or delete a collection
+
+The caller must set the `collection` property and `action=create` or
+`action=delete`, either form encoded or JSON encoded. Its character set
+is restricted to the mundane: [a-zA-Z0-9-_.]+
+
+Collection names are a global space. Typically they are a domain name.
+
+### `POST /instances` — adopt an instance into a collection
+
+The caller must send the following properties, form encoded or JSON encoded:
+
+- `collection` — a valid FQDN ([a-zA-Z0-9-_.]+)
+- `instances` an instance public ID encoded as hex
+
+The collection name must be claimed by a group the caller belongs to.
+The pair (collection-name, instance-public-ID) may or may not already have
+logs associated with it.
+
+On failure, an error message is returned with a 4xx or 5xx status code:
+
+`{"error": "what went wrong"}`

+ 49 - 0
logtail/backoff/backoff.go

@@ -0,0 +1,49 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package backoff
+
+import (
+	"context"
+	"log"
+	"math/rand"
+	"time"
+)
+
+const MAX_BACKOFF_MSEC = 30000
+
+type Backoff struct {
+	n        int
+	Name     string
+	NewTimer func(d time.Duration) *time.Timer
+}
+
+func (b *Backoff) BackOff(ctx context.Context, err error) {
+	if ctx.Err() == nil && err != nil {
+		b.n++
+		// n^2 backoff timer is a little smoother than the
+		// common choice of 2^n.
+		msec := b.n * b.n * 10
+		if msec > MAX_BACKOFF_MSEC {
+			msec = MAX_BACKOFF_MSEC
+		}
+		// Randomize the delay between 0.5-1.5 x msec, in order
+		// to prevent accidental "thundering herd" problems.
+		msec = rand.Intn(msec) + msec/2
+		log.Printf("%s: backoff: %d msec\n", b.Name, msec)
+		newTimer := b.NewTimer
+		if newTimer == nil {
+			newTimer = time.NewTimer
+		}
+		t := newTimer(time.Duration(msec) * time.Millisecond)
+		select {
+		case <-ctx.Done():
+			t.Stop()
+		case <-t.C:
+		}
+	} else {
+		// not a regular error
+		b.n = 0
+	}
+}

+ 82 - 0
logtail/buffer.go

@@ -0,0 +1,82 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package logtail
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"sync"
+)
+
+type Buffer interface {
+	// TryReadLine tries to read a log line from the ring buffer.
+	// If no line is available it returns a nil slice.
+	// If the ring buffer is closed it returns io.EOF.
+	TryReadLine() ([]byte, error)
+
+	// Write writes a log line into the ring buffer.
+	Write([]byte) (int, error)
+}
+
+func NewMemoryBuffer(numEntries int) Buffer {
+	return &memBuffer{
+		pending: make(chan qentry, numEntries),
+	}
+}
+
+type memBuffer struct {
+	next    []byte
+	pending chan qentry
+
+	dropMu    sync.Mutex
+	dropCount int
+}
+
+func (m *memBuffer) TryReadLine() ([]byte, error) {
+	if m.next != nil {
+		msg := m.next
+		m.next = nil
+		return msg, nil
+	}
+
+	select {
+	case ent := <-m.pending:
+		if ent.dropCount > 0 {
+			m.next = ent.msg
+			buf := new(bytes.Buffer)
+			fmt.Fprintf(buf, "----------- %d logs dropped ----------", ent.dropCount)
+			return buf.Bytes(), nil
+		}
+		return ent.msg, nil
+	default:
+		return nil, nil
+	}
+}
+
+func (m *memBuffer) Write(b []byte) (int, error) {
+	m.dropMu.Lock()
+	defer m.dropMu.Unlock()
+
+	ent := qentry{
+		msg:       b,
+		dropCount: m.dropCount,
+	}
+	select {
+	case m.pending <- ent:
+		m.dropCount = 0
+		return len(b), nil
+	default:
+		m.dropCount++
+		return 0, errBufferFull
+	}
+}
+
+type qentry struct {
+	msg       []byte
+	dropCount int
+}
+
+var errBufferFull = errors.New("logtail: buffer full")

+ 51 - 0
logtail/example/logadopt/logadopt.go

@@ -0,0 +1,51 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"flag"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+)
+
+func main() {
+	collection := flag.String("c", "", "logtail collection name")
+	publicID := flag.String("m", "", "machine public identifier")
+	apiKey := flag.String("p", "", "logtail API key")
+	flag.Parse()
+	if len(flag.Args()) != 0 {
+		flag.Usage()
+		os.Exit(1)
+	}
+	log.SetFlags(0)
+
+	req, err := http.NewRequest("POST", "https://log.tailscale.io/instances", strings.NewReader(url.Values{
+		"collection": []string{*collection},
+		"instances":  []string{*publicID},
+		"adopt":      []string{"true"},
+	}.Encode()))
+	if err != nil {
+		log.Fatal(err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.SetBasicAuth(*apiKey, "")
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		log.Fatal(err)
+	}
+	b, err := ioutil.ReadAll(resp.Body)
+	resp.Body.Close()
+	if err != nil {
+		log.Fatalf("logadopt: response read failed %d: %v", resp.StatusCode, err)
+	}
+	if resp.StatusCode != 200 {
+		log.Fatalf("adoption failed: %d: %s", resp.StatusCode, string(b))
+	}
+	log.Printf("%s", string(b))
+}

+ 87 - 0
logtail/example/logreprocess/demo.sh

@@ -0,0 +1,87 @@
+#!/bin/bash
+# Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+#
+# This shell script demonstrates writing logs from machines
+# and then reprocessing those logs to amalgamate python tracebacks
+# into a single log entry in a new collection.
+#
+# To run this demo, first install the example applications:
+#
+#        go install tailscale.com/logtail/example/...
+#
+# Then generate a LOGTAIL_API_KEY and two test collections by visiting:
+#
+#        https://log.tailscale.io
+#
+# Then set the three variables below.
+trap 'rv=$?; [ "$rv" = 0 ] || echo "-- exiting with code $rv"; exit $rv' EXIT
+set -e
+
+LOG_TEXT='server starting
+config file loaded
+answering queries
+Traceback (most recent call last):
+  File "/Users/crawshaw/junk.py", line 6, in <module>
+    main()
+  File "/Users/crawshaw/junk.py", line 4, in main
+    raise Exception("oops")
+Exception: oops'
+
+die() {
+	echo "$0: $*" >&2
+	exit 1
+}
+
+msg() {
+	echo "-- $*" >&2
+}
+
+if [ -z "$LOGTAIL_API_KEY" ]; then
+	die "LOGTAIL_API_KEY is not set"
+fi
+
+if [ -z "$COLLECTION_IN" ]; then
+	die "COLLECTION_IN is not set"
+fi
+
+if [ -z "$COLLECTION_OUT" ]; then
+	die "COLLECTION_OUT is not set"
+fi
+
+# Private IDs are 32-bytes of random hex.
+# Normally you'd keep the same private IDs from one run to the next, but
+# this is just an example.
+msg "Generating keys..."
+privateid1=$(hexdump -n 32 -e '8/4 "%08X"' /dev/urandom)
+privateid2=$(hexdump -n 32 -e '8/4 "%08X"' /dev/urandom)
+privateid3=$(hexdump -n 32 -e '8/4 "%08X"' /dev/urandom)
+
+# Public IDs are the SHA-256 of the private ID.
+publicid1=$(echo -n $privateid1 | xxd -r -p - | shasum -a 256 | sed 's/ -//')
+publicid2=$(echo -n $privateid2 | xxd -r -p - | shasum -a 256 | sed 's/ -//')
+publicid3=$(echo -n $privateid3 | xxd -r -p - | shasum -a 256 | sed 's/ -//')
+
+# Write the machine logs to the input collection.
+# Notice that this doesn't require an API key.
+msg "Producing new logs..."
+echo "$LOG_TEXT" | logtail -c $COLLECTION_IN -k $privateid1 >/dev/null
+echo "$LOG_TEXT" | logtail -c $COLLECTION_IN -k $privateid2 >/dev/null
+
+# Adopt the logs, so they will be kept and are readable.
+msg "Adopting logs..."
+logadopt -p "$LOGTAIL_API_KEY" -c "$COLLECTION_IN" -m $publicid1
+logadopt -p "$LOGTAIL_API_KEY" -c "$COLLECTION_IN" -m $publicid2
+
+# Reprocess the logs, amalgamating python tracebacks.
+#
+# We'll take that reprocessed output and write it to a separate collection,
+# again via logtail.
+#
+# Time out quickly because all our "interesting" logs (generated
+# above) have already been processed.
+msg "Reprocessing logs..."
+logreprocess -t 3s -c "$COLLECTION_IN" -p "$LOGTAIL_API_KEY" 2>&1 |
+  logtail -c "$COLLECTION_OUT" -k $privateid3

+ 116 - 0
logtail/example/logreprocess/logreprocess.go

@@ -0,0 +1,116 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The logreprocess program tails a log and reprocesses it.
+package main
+
+import (
+	"bufio"
+	"encoding/json"
+	"flag"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"os"
+	"strings"
+	"time"
+
+	"tailscale.com/logtail"
+)
+
+func main() {
+	collection := flag.String("c", "", "logtail collection name to read")
+	apiKey := flag.String("p", "", "logtail API key")
+	timeout := flag.Duration("t", 0, "timeout after which logreprocess quits")
+	flag.Parse()
+	if len(flag.Args()) != 0 {
+		flag.Usage()
+		os.Exit(1)
+	}
+	log.SetFlags(0)
+
+	if *timeout != 0 {
+		go func() {
+			<-time.After(*timeout)
+			log.Printf("logreprocess: timeout reached, quitting")
+			os.Exit(1)
+		}()
+	}
+
+	req, err := http.NewRequest("GET", "https://log.tailscale.io/c/"+*collection+"?stream=true", nil)
+	if err != nil {
+		log.Fatal(err)
+	}
+	req.SetBasicAuth(*apiKey, "")
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != 200 {
+		b, err := ioutil.ReadAll(resp.Body)
+		if err != nil {
+			log.Fatalf("logreprocess: read error %d: %v", resp.StatusCode, err)
+		}
+		log.Fatalf("logreprocess: read error %d: %s", resp.StatusCode, string(b))
+	}
+
+	tracebackCache := make(map[logtail.PublicID]*ProcessedMsg)
+
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		var msg Msg
+		if err := json.Unmarshal(scanner.Bytes(), &msg); err != nil {
+			log.Fatalf("logreprocess of %q: %v", string(scanner.Bytes()), err)
+		}
+		var pMsg *ProcessedMsg
+		if pMsg = tracebackCache[msg.Logtail.Instance]; pMsg != nil {
+			pMsg.Text += "\n" + msg.Text
+			if strings.HasPrefix(msg.Text, "Exception: ") {
+				delete(tracebackCache, msg.Logtail.Instance)
+			} else {
+				continue // write later
+			}
+		} else {
+			pMsg = &ProcessedMsg{
+				OrigInstance: msg.Logtail.Instance,
+				Text:         msg.Text,
+			}
+			pMsg.Logtail.ClientTime = msg.Logtail.ClientTime
+		}
+
+		if strings.HasPrefix(msg.Text, "Traceback (most recent call last):") {
+			tracebackCache[msg.Logtail.Instance] = pMsg
+			continue // write later
+		}
+
+		b, err := json.Marshal(pMsg)
+		if err != nil {
+			log.Fatal(err)
+		}
+		log.Printf("%s", b)
+	}
+	if err := scanner.Err(); err != nil {
+		log.Fatal(err)
+	}
+}
+
+type Msg struct {
+	Logtail struct {
+		Instance   logtail.PublicID `json:"instance"`
+		ClientTime time.Time        `json:"client_time"`
+	} `json:"logtail"`
+
+	Text string `json:"text"`
+}
+
+type ProcessedMsg struct {
+	Logtail struct {
+		ClientTime time.Time `json:"client_time"`
+	} `json:"logtail"`
+
+	OrigInstance logtail.PublicID `json:"orig_instance"`
+	Text         string           `json:"text"`
+}

+ 46 - 0
logtail/example/logtail/logtail.go

@@ -0,0 +1,46 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The logtail program logs stdin.
+package main
+
+import (
+	"bufio"
+	"flag"
+	"io"
+	"log"
+	"os"
+
+	"tailscale.com/logtail"
+)
+
+func main() {
+	collection := flag.String("c", "", "logtail collection name")
+	privateID := flag.String("k", "", "machine private identifier, 32-bytes in hex")
+	flag.Parse()
+	if len(flag.Args()) != 0 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	log.SetFlags(0)
+
+	var id logtail.PrivateID
+	if err := id.UnmarshalText([]byte(*privateID)); err != nil {
+		log.Fatalf("logtail: bad -privateid: %v", err)
+	}
+
+	logger := logtail.Log(logtail.Config{
+		Collection: *collection,
+		PrivateID:  id,
+	})
+	log.SetOutput(io.MultiWriter(logger, os.Stdout))
+	defer logger.Flush()
+	defer log.Printf("logtail exited")
+
+	scanner := bufio.NewScanner(os.Stdin)
+	for scanner.Scan() {
+		log.Println(scanner.Text())
+	}
+}

+ 238 - 0
logtail/filch/filch.go

@@ -0,0 +1,238 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package filch is a file system queue that pilfers your stderr.
+// (A FILe CHannel that filches.)
+package filch
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"sync"
+)
+
+var stderrFD = 2 // a variable for testing
+
+type Options struct {
+	ReplaceStderr bool // dup over fd 2 so everything written to stderr comes here
+}
+
+type Filch struct {
+	OrigStderr *os.File
+
+	mu        sync.Mutex
+	cur       *os.File
+	alt       *os.File
+	altscan   *bufio.Scanner
+	recovered int64
+}
+
+func (f *Filch) TryReadLine() ([]byte, error) {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+
+	if f.altscan != nil {
+		if b, err := f.scan(); b != nil || err != nil {
+			return b, err
+		}
+	}
+
+	f.cur, f.alt = f.alt, f.cur
+	if f.OrigStderr != nil {
+		if err := dup2Stderr(f.cur); err != nil {
+			return nil, err
+		}
+	}
+	if _, err := f.alt.Seek(0, os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	f.altscan = bufio.NewScanner(f.alt)
+	f.altscan.Split(splitLines)
+	return f.scan()
+}
+
+func (f *Filch) scan() ([]byte, error) {
+	if f.altscan.Scan() {
+		return f.altscan.Bytes(), nil
+	}
+	err := f.altscan.Err()
+	err2 := f.alt.Truncate(0)
+	_, err3 := f.alt.Seek(0, os.SEEK_SET)
+	f.altscan = nil
+	if err != nil {
+		return nil, err
+	}
+	if err2 != nil {
+		return nil, err2
+	}
+	if err3 != nil {
+		return nil, err3
+	}
+	return nil, nil
+}
+
+func (f *Filch) Write(b []byte) (int, error) {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+
+	if len(b) == 0 || b[len(b)-1] != '\n' {
+		bnl := make([]byte, len(b)+1)
+		copy(bnl, b)
+		bnl[len(bnl)-1] = '\n'
+		return f.cur.Write(bnl)
+	}
+	return f.cur.Write(b)
+}
+
+func (f *Filch) Close() (err error) {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+
+	if f.OrigStderr != nil {
+		if err2 := unsaveStderr(f.OrigStderr); err == nil {
+			err = err2
+		}
+		f.OrigStderr = nil
+	}
+
+	if err2 := f.cur.Close(); err == nil {
+		err = err2
+	}
+	if err2 := f.alt.Close(); err == nil {
+		err = err2
+	}
+
+	return err
+}
+
+func New(filePrefix string, opts Options) (f *Filch, err error) {
+	var f1, f2 *os.File
+	defer func() {
+		if err != nil {
+			if f1 != nil {
+				f1.Close()
+			}
+			if f2 != nil {
+				f2.Close()
+			}
+			err = fmt.Errorf("filch: %s", err)
+		}
+	}()
+
+	path1 := filePrefix + ".log1.txt"
+	path2 := filePrefix + ".log2.txt"
+
+	f1, err = os.OpenFile(path1, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return nil, err
+	}
+	f2, err = os.OpenFile(path2, os.O_CREATE|os.O_RDWR, 0666)
+	if err != nil {
+		return nil, err
+	}
+
+	fi1, err := f1.Stat()
+	if err != nil {
+		return nil, err
+	}
+	fi2, err := f2.Stat()
+	if err != nil {
+		return nil, err
+	}
+
+	f = &Filch{
+		OrigStderr: os.Stderr, // temporary, for past logs recovery
+	}
+
+	// Neither, either, or both files may exist and contain logs from
+	// the last time the process ran. The three cases are:
+	//
+	//	- neither: all logs were read out and files were truncated
+	//	- either: logs were being written into one of the files
+	//	- both: the files were swapped and were starting to be
+	//		read out, while new logs streamed into the other
+	//		file, but the read out did not complete
+	if n := fi1.Size() + fi2.Size(); n > 0 {
+		f.recovered = n
+	}
+	switch {
+	case fi1.Size() > 0 && fi2.Size() == 0:
+		f.cur, f.alt = f2, f1
+	case fi2.Size() > 0 && fi1.Size() == 0:
+		f.cur, f.alt = f1, f2
+	case fi1.Size() > 0 && fi2.Size() > 0: // both
+		// We need to pick one of the files to be the elder,
+		// which we do using the mtime.
+		var older, newer *os.File
+		if fi1.ModTime().Before(fi2.ModTime()) {
+			older, newer = f1, f2
+		} else {
+			older, newer = f2, f1
+		}
+		if err := moveContents(older, newer); err != nil {
+			fmt.Fprintf(f.OrigStderr, "filch: recover move failed: %v\n", err)
+			fmt.Fprintf(older, "filch: recover move failed: %v\n", err)
+		}
+		f.cur, f.alt = newer, older
+	default:
+		f.cur, f.alt = f1, f2 // does not matter
+	}
+	if f.recovered > 0 {
+		f.altscan = bufio.NewScanner(f.alt)
+		f.altscan.Split(splitLines)
+	}
+
+	f.OrigStderr = nil
+	if opts.ReplaceStderr {
+		f.OrigStderr, err = saveStderr()
+		if err != nil {
+			return nil, err
+		}
+		if err := dup2Stderr(f.cur); err != nil {
+			return nil, err
+		}
+	}
+
+	return f, nil
+}
+
+func moveContents(dst, src *os.File) (err error) {
+	defer func() {
+		_, err2 := src.Seek(0, os.SEEK_SET)
+		err3 := src.Truncate(0)
+		_, err4 := dst.Seek(0, os.SEEK_SET)
+		if err == nil {
+			err = err2
+		}
+		if err == nil {
+			err = err3
+		}
+		if err == nil {
+			err = err4
+		}
+	}()
+	if _, err := src.Seek(0, os.SEEK_SET); err != nil {
+		return err
+	}
+	if _, err := io.Copy(dst, src); err != nil {
+		return err
+	}
+	return nil
+}
+
+func splitLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	if atEOF && len(data) == 0 {
+		return 0, nil, nil
+	}
+	if i := bytes.IndexByte(data, '\n'); i >= 0 {
+		return i + 1, data[0 : i+1], nil
+	}
+	if atEOF {
+		return len(data), data, nil
+	}
+	return 0, nil, nil
+}

+ 178 - 0
logtail/filch/filch_test.go

@@ -0,0 +1,178 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package filch
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"unicode"
+)
+
+type filchTest struct {
+	*Filch
+}
+
+func newFilchTest(t *testing.T, filePrefix string, opts Options) *filchTest {
+	f, err := New(filePrefix, opts)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return &filchTest{Filch: f}
+}
+
+func (f *filchTest) write(t *testing.T, s string) {
+	t.Helper()
+	if _, err := f.Write([]byte(s)); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func (f *filchTest) read(t *testing.T, want string) {
+	t.Helper()
+	if b, err := f.TryReadLine(); err != nil {
+		t.Fatalf("r.ReadLine() err=%v", err)
+	} else if got := strings.TrimRightFunc(string(b), unicode.IsSpace); got != want {
+		t.Errorf("r.ReadLine()=%q, want %q", got, want)
+	}
+}
+
+func (f *filchTest) readEOF(t *testing.T) {
+	t.Helper()
+	if b, err := f.TryReadLine(); b != nil || err != nil {
+		t.Fatalf("r.ReadLine()=%q err=%v, want nil slice", string(b), err)
+	}
+}
+
+func (f *filchTest) close(t *testing.T) {
+	t.Helper()
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func genFilePrefix(t *testing.T) string {
+	t.Helper()
+	filePrefix, err := ioutil.TempDir("", "filch")
+	if err != nil {
+		t.Fatal(err)
+	}
+	return filepath.Join(filePrefix, "ringbuffer-")
+}
+
+func TestQueue(t *testing.T) {
+	filePrefix := genFilePrefix(t)
+	defer os.RemoveAll(filepath.Dir(filePrefix))
+
+	f := newFilchTest(t, filePrefix, Options{ReplaceStderr: false})
+
+	f.readEOF(t)
+	const line1 = "Hello, World!"
+	const line2 = "This is a test."
+	const line3 = "Of filch."
+	f.write(t, line1)
+	f.write(t, line2)
+	f.read(t, line1)
+	f.write(t, line3)
+	f.read(t, line2)
+	f.read(t, line3)
+	f.readEOF(t)
+	f.write(t, line1)
+	f.read(t, line1)
+	f.readEOF(t)
+	f.close(t)
+}
+
+func TestRecover(t *testing.T) {
+	t.Run("empty", func(t *testing.T) {
+		filePrefix := genFilePrefix(t)
+		defer os.RemoveAll(filepath.Dir(filePrefix))
+		f := newFilchTest(t, filePrefix, Options{ReplaceStderr: false})
+		f.write(t, "hello")
+		f.read(t, "hello")
+		f.readEOF(t)
+		f.close(t)
+
+		f = newFilchTest(t, filePrefix, Options{ReplaceStderr: false})
+		f.readEOF(t)
+		f.close(t)
+	})
+
+	t.Run("cur", func(t *testing.T) {
+		filePrefix := genFilePrefix(t)
+		defer os.RemoveAll(filepath.Dir(filePrefix))
+		f := newFilchTest(t, filePrefix, Options{ReplaceStderr: false})
+		f.write(t, "hello")
+		f.close(t)
+
+		f = newFilchTest(t, filePrefix, Options{ReplaceStderr: false})
+		f.read(t, "hello")
+		f.readEOF(t)
+		f.close(t)
+	})
+
+	t.Run("alt", func(t *testing.T) {
+		t.Skip("currently broken on linux, passes on macOS")
+		/* --- FAIL: TestRecover/alt (0.00s)
+		filch_test.go:128: r.ReadLine()="world", want "hello"
+		filch_test.go:129: r.ReadLine()="hello", want "world"
+		*/
+
+		filePrefix := genFilePrefix(t)
+		defer os.RemoveAll(filepath.Dir(filePrefix))
+		f := newFilchTest(t, filePrefix, Options{ReplaceStderr: false})
+		f.write(t, "hello")
+		f.read(t, "hello")
+		f.write(t, "world")
+		f.close(t)
+
+		f = newFilchTest(t, filePrefix, Options{ReplaceStderr: false})
+		// TODO(crawshaw): The "hello" log is replayed in recovery.
+		//                 We could reduce replays by risking some logs loss.
+		//                 What should our policy here be?
+		f.read(t, "hello")
+		f.read(t, "world")
+		f.readEOF(t)
+		f.close(t)
+	})
+}
+
+func TestFilchStderr(t *testing.T) {
+	pipeR, pipeW, err := os.Pipe()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer pipeR.Close()
+	defer pipeW.Close()
+
+	stderrFD = int(pipeW.Fd())
+	defer func() {
+		stderrFD = 2
+	}()
+
+	filePrefix := genFilePrefix(t)
+	defer os.RemoveAll(filepath.Dir(filePrefix))
+	f := newFilchTest(t, filePrefix, Options{ReplaceStderr: true})
+	f.write(t, "hello")
+	if _, err := fmt.Fprintf(pipeW, "filch\n"); err != nil {
+		t.Fatal(err)
+	}
+	f.read(t, "hello")
+	f.read(t, "filch")
+	f.readEOF(t)
+	f.close(t)
+
+	pipeW.Close()
+	b, err := ioutil.ReadAll(pipeR)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(b) > 0 {
+		t.Errorf("unexpected write to fake stderr: %s", b)
+	}
+}

+ 30 - 0
logtail/filch/filch_unix.go

@@ -0,0 +1,30 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !windows
+
+package filch
+
+import (
+	"os"
+	"syscall"
+)
+
+func saveStderr() (*os.File, error) {
+	fd, err := syscall.Dup(stderrFD)
+	if err != nil {
+		return nil, err
+	}
+	return os.NewFile(uintptr(fd), "stderr"), nil
+}
+
+func unsaveStderr(f *os.File) error {
+	err := dup2Stderr(f)
+	f.Close()
+	return err
+}
+
+func dup2Stderr(f *os.File) error {
+	return syscall.Dup2(int(f.Fd()), stderrFD)
+}

+ 44 - 0
logtail/filch/filch_windows.go

@@ -0,0 +1,44 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package filch
+
+import (
+	"fmt"
+	"os"
+	"syscall"
+)
+
+var kernel32 = syscall.MustLoadDLL("kernel32.dll")
+var procSetStdHandle = kernel32.MustFindProc("SetStdHandle")
+
+func setStdHandle(stdHandle int32, handle syscall.Handle) error {
+	r, _, e := syscall.Syscall(procSetStdHandle.Addr(), 2, uintptr(stdHandle), uintptr(handle), 0)
+	if r == 0 {
+		if e != 0 {
+			return error(e)
+		}
+		return syscall.EINVAL
+	}
+	return nil
+}
+
+func saveStderr() (*os.File, error) {
+	return os.Stderr, nil
+}
+
+func unsaveStderr(f *os.File) error {
+	os.Stderr = f
+	return nil
+}
+
+func dup2Stderr(f *os.File) error {
+	fd := int(f.Fd())
+	err := setStdHandle(syscall.STD_ERROR_HANDLE, syscall.Handle(fd))
+	if err != nil {
+		return fmt.Errorf("dup2Stderr: %w", err)
+	}
+	os.Stderr = f
+	return nil
+}

+ 103 - 0
logtail/id.go

@@ -0,0 +1,103 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package logtail
+
+import (
+	"crypto/rand"
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+)
+
+// PrivateID represents an instance that write logs.
+// Private IDs are only shared with the server when writing logs.
+type PrivateID [32]byte
+
+// Safely generate a new PrivateId for use in Config objects.
+// You should persist this across runs of an instance of your app, so that
+// it can append to the same log file on each run.
+func NewPrivateID() (id PrivateID, err error) {
+	_, err = rand.Read(id[:])
+	if err != nil {
+		return PrivateID{}, err
+	}
+	// Clamping, for future use.
+	id[0] &= 248
+	id[31] = (id[31] & 127) | 64
+	return id, nil
+}
+
+func (id PrivateID) MarshalText() ([]byte, error) {
+	b := make([]byte, hex.EncodedLen(len(id)))
+	if i := hex.Encode(b, id[:]); i != len(b) {
+		return nil, fmt.Errorf("logtail.PrivateID.MarhsalText: i=%d", i)
+	}
+	return b, nil
+}
+
+func (id *PrivateID) UnmarshalText(s []byte) error {
+	b, err := hex.DecodeString(string(s))
+	if err != nil {
+		return fmt.Errorf("logtail.PrivateID.UnmarshalText: %v", err)
+	}
+	if len(b) != len(id) {
+		return fmt.Errorf("logtail.PrivateID.UnmarshalText: invalid hex length: %d", len(b))
+	}
+	copy(id[:], b)
+	return nil
+}
+
+func (id PrivateID) String() string {
+	b, err := id.MarshalText()
+	if err != nil {
+		panic(err)
+	}
+	return string(b)
+}
+
+func (id PrivateID) Public() (pub PublicID) {
+	var emptyID PrivateID
+	if id == emptyID {
+		panic("invalid logtail.Public() on an empty private ID")
+	}
+	h := sha256.New()
+	h.Write(id[:])
+	if n := copy(pub[:], h.Sum(pub[:0])); n != len(pub) {
+		panic(fmt.Sprintf("public id short copy: %d", n))
+	}
+	return pub
+}
+
+// PublicID represents an instance in the logs service for reading and adoption.
+// The public ID value is a SHA-256 hash of a private ID.
+type PublicID [sha256.Size]byte
+
+func (id PublicID) MarshalText() ([]byte, error) {
+	b := make([]byte, hex.EncodedLen(len(id)))
+	if i := hex.Encode(b, id[:]); i != len(b) {
+		return nil, fmt.Errorf("logtail.PublicID.MarhsalText: i=%d", i)
+	}
+	return b, nil
+}
+
+func (id *PublicID) UnmarshalText(s []byte) error {
+	b, err := hex.DecodeString(string(s))
+	if err != nil {
+		return fmt.Errorf("logtail.PublicID.UnmarshalText: %v", err)
+	}
+	if len(b) != len(id) {
+		return fmt.Errorf("logtail.PublicID.UnmarshalText: invalid hex length: %d", len(b))
+	}
+	copy(id[:], b)
+	return nil
+}
+
+func (id PublicID) String() string {
+	b, err := id.MarshalText()
+	if err != nil {
+		panic(err)
+	}
+	return string(b)
+}

+ 54 - 0
logtail/id_test.go

@@ -0,0 +1,54 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package logtail
+
+import (
+	"testing"
+)
+
+func TestIDs(t *testing.T) {
+	id1, err := NewPrivateID()
+	if err != nil {
+		t.Fatal(err)
+	}
+	pub1 := id1.Public()
+
+	id2, err := NewPrivateID()
+	if err != nil {
+		t.Fatal(err)
+	}
+	pub2 := id2.Public()
+
+	if id1 == id2 {
+		t.Fatalf("subsequent private IDs match: %v", id1)
+	}
+	if pub1 == pub2 {
+		t.Fatalf("subsequent public IDs match: %v", id1)
+	}
+	if id1.String() == id2.String() {
+		t.Fatalf("id1.String()=%v equals id2.String()", id1.String())
+	}
+	if pub1.String() == pub2.String() {
+		t.Fatalf("pub1.String()=%v equals pub2.String()", pub1.String())
+	}
+
+	id1txt, err := id1.MarshalText()
+	if err != nil {
+		t.Fatal(err)
+	}
+	var id3 PrivateID
+	if err := id3.UnmarshalText(id1txt); err != nil {
+		t.Fatal(err)
+	}
+	if id1 != id3 {
+		t.Fatalf("id1 %v: marshal and unmarshal gives different key: %v", id1, id3)
+	}
+	if want, got := id1.Public(), id3.Public(); want != got {
+		t.Fatalf("id1.Public()=%v does not match id3.Public()=%v", want, got)
+	}
+	if id1.String() != id3.String() {
+		t.Fatalf("id1.String()=%v does not match id3.String()=%v", id1.String(), id3.String())
+	}
+}

+ 464 - 0
logtail/logtail.go

@@ -0,0 +1,464 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package logtail sends logs to log.tailscale.io.
+package logtail
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math/big"
+	"net/http"
+	"os"
+	"strconv"
+	"sync"
+	"time"
+
+	"tailscale.com/logtail/backoff"
+)
+
+type Logger interface {
+	// Write logs an encoded JSON blob.
+	//
+	// If the []byte passed to Write is not an encoded JSON blob,
+	// then contents is fit into a JSON blob and written.
+	//
+	// This is intended as an interface for the stdlib "log" package.
+	Write([]byte) (int, error)
+
+	// Flush uploads all logs to the server.
+	// It blocks until complete or there is an unrecoverable error.
+	Flush() error
+
+	// Shutdown gracefully shuts down the logger while completing any
+	// remaining uploads.
+	//
+	// It will block, continuing to try and upload unless the passed
+	// context object interrupts it by being done.
+	// If the shutdown is interrupted, an error is returned.
+	Shutdown(context.Context) error
+
+	// Close shuts down this logger object, the background log uploader
+	// process, and any associated goroutines.
+	//
+	// DEPRECATED: use Shutdown
+	Close()
+}
+
+type Encoder interface {
+	EncodeAll(src, dst []byte) []byte
+	Close() error
+}
+
+type Config struct {
+	Collection     string           // collection name, a domain name
+	PrivateID      PrivateID        // machine-specific private identifier
+	BaseURL        string           // if empty defaults to "https://log.tailscale.io"
+	HTTPC          *http.Client     // if empty defaults to http.DefaultClient
+	SkipClientTime bool             // if true, client_time is not written to logs
+	LowMemory      bool             // if true, logtail minimizes memory use
+	TimeNow        func() time.Time // if set, subsitutes uses of time.Now
+	Stderr         io.Writer        // if set, logs are sent here instead of os.Stderr
+	Buffer         Buffer           // temp storage, if nil a MemoryBuffer
+	CheckLogs      <-chan struct{}  // signals Logger to check for filched logs to upload
+	NewZstdEncoder func() Encoder   // if set, used to compress logs for transmission
+}
+
+func Log(cfg Config) Logger {
+	if cfg.BaseURL == "" {
+		cfg.BaseURL = "https://log.tailscale.io"
+	}
+	if cfg.HTTPC == nil {
+		cfg.HTTPC = http.DefaultClient
+	}
+	if cfg.TimeNow == nil {
+		cfg.TimeNow = time.Now
+	}
+	if cfg.Stderr == nil {
+		cfg.Stderr = os.Stderr
+	}
+	if cfg.Buffer == nil {
+		pendingSize := 256
+		if cfg.LowMemory {
+			pendingSize = 64
+		}
+		cfg.Buffer = NewMemoryBuffer(pendingSize)
+	}
+	if cfg.CheckLogs == nil {
+		cfg.CheckLogs = make(chan struct{})
+	}
+	l := &logger{
+		stderr:         cfg.Stderr,
+		httpc:          cfg.HTTPC,
+		url:            cfg.BaseURL + "/c/" + cfg.Collection + "/" + cfg.PrivateID.String(),
+		lowMem:         cfg.LowMemory,
+		buffer:         cfg.Buffer,
+		skipClientTime: cfg.SkipClientTime,
+		sent:           make(chan struct{}, 1),
+		sentinel:       make(chan int32, 16),
+		checkLogs:      cfg.CheckLogs,
+		timeNow:        cfg.TimeNow,
+		bo: backoff.Backoff{
+			Name: "logtail",
+		},
+
+		shutdownStart: make(chan struct{}),
+		shutdownDone:  make(chan struct{}),
+	}
+	if cfg.NewZstdEncoder != nil {
+		l.zstdEncoder = cfg.NewZstdEncoder()
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	l.uploadCancel = cancel
+
+	go l.uploading(ctx)
+	l.Write([]byte("logtail started"))
+	return l
+}
+
+type logger struct {
+	stderr         io.Writer
+	httpc          *http.Client
+	url            string
+	lowMem         bool
+	skipClientTime bool
+	buffer         Buffer
+	sent           chan struct{}   // signal to speed up drain
+	checkLogs      <-chan struct{} // external signal to attempt a drain
+	sentinel       chan int32
+	timeNow        func() time.Time
+	bo             backoff.Backoff
+	zstdEncoder    Encoder
+	uploadCancel   func()
+
+	shutdownStart chan struct{} // closed when shutdown begins
+	shutdownDone  chan struct{} // closd when shutdown complete
+
+	dropMu    sync.Mutex
+	dropCount int
+}
+
+func (l *logger) Shutdown(ctx context.Context) error {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	done := make(chan struct{})
+	go func() {
+		select {
+		case <-ctx.Done():
+			l.uploadCancel()
+			<-l.shutdownDone
+		case <-l.shutdownDone:
+		}
+		close(done)
+	}()
+
+	close(l.shutdownStart)
+	io.WriteString(l, "logger closing down\n")
+	<-done
+
+	if l.zstdEncoder != nil {
+		return l.zstdEncoder.Close()
+	}
+	return nil
+}
+
+func (l *logger) Close() {
+	l.Shutdown(nil)
+}
+
+func (l *logger) drainPending() (res []byte) {
+	buf := new(bytes.Buffer)
+	entries := 0
+
+	var batchDone bool
+	for buf.Len() < 1<<18 && !batchDone {
+		b, err := l.buffer.TryReadLine()
+		if err == io.EOF {
+			break
+		} else if err != nil {
+			b = []byte(fmt.Sprintf("reading ringbuffer: %v", err))
+			batchDone = true
+		} else if b == nil {
+			if entries > 0 {
+				break
+			}
+
+			select {
+			case <-l.shutdownStart:
+				batchDone = true
+			case <-l.checkLogs:
+			case <-l.sent:
+			}
+			continue
+		}
+
+		if len(b) == 0 {
+			continue
+		}
+		if b[0] != '{' || !json.Valid(b) {
+			// This is probably a log added to stderr by filch
+			// outside of the logtail logger. Encode it.
+			// Do not add a client time, as it could have been
+			// been written a long time ago.
+			b = l.encodeText(b, true)
+		}
+
+		switch {
+		case entries == 0:
+			buf.Write(b)
+		case entries == 1:
+			buf2 := new(bytes.Buffer)
+			buf2.WriteByte('[')
+			buf2.Write(buf.Bytes())
+			buf2.WriteByte(',')
+			buf2.Write(b)
+			buf.Reset()
+			buf.Write(buf2.Bytes())
+		default:
+			buf.WriteByte(',')
+			buf.Write(b)
+		}
+		entries++
+	}
+
+	if entries > 1 {
+		buf.WriteByte(']')
+	}
+	if buf.Len() == 0 {
+		return nil
+	}
+	return buf.Bytes()
+}
+
+var clientSentinelPrefix = []byte(`{"logtail":{"client_sentinel":`)
+
+const (
+	noSentinel   = 0
+	stopSentinel = 1
+)
+
+// newSentinel creates a client sentinel between 2 and maxint32.
+// It does not generate the reserved values:
+//	0 is no sentinel
+//	1 is stop the logger
+func newSentinel() ([]byte, int32) {
+	val, err := rand.Int(rand.Reader, big.NewInt(1<<31-2))
+	if err != nil {
+		panic(err)
+	}
+	v := int32(val.Int64()) + 2
+
+	buf := new(bytes.Buffer)
+	fmt.Fprintf(buf, "%s%d}}\n", clientSentinelPrefix, v)
+	return buf.Bytes(), v
+}
+
+// readSentinel reads a sentinel.
+// If it is not a sentinel it reports 0.
+func readSentinel(b []byte) int32 {
+	if !bytes.HasPrefix(b, clientSentinelPrefix) {
+		return 0
+	}
+	b = bytes.TrimPrefix(b, clientSentinelPrefix)
+	b = bytes.TrimSuffix(bytes.TrimSpace(b), []byte("}}"))
+	v, err := strconv.Atoi(string(b))
+	if err != nil {
+		return 0
+	}
+	return int32(v)
+}
+
+// This is the goroutine that repeatedly uploads logs in the background.
+func (l *logger) uploading(ctx context.Context) {
+	defer close(l.shutdownDone)
+
+	for {
+		body := l.drainPending()
+		if l.zstdEncoder != nil {
+			body = l.zstdEncoder.EncodeAll(body, nil)
+		}
+
+		for len(body) > 0 {
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+			uploaded, err := l.upload(ctx, body)
+			if err != nil {
+				fmt.Fprintf(l.stderr, "logtail: upload: %v\n", err)
+			}
+			if uploaded {
+				break
+			}
+			l.bo.BackOff(ctx, err)
+		}
+
+		select {
+		case <-l.shutdownStart:
+			return
+		default:
+		}
+	}
+}
+
+func (l *logger) upload(ctx context.Context, body []byte) (uploaded bool, err error) {
+	req, err := http.NewRequest("POST", l.url, bytes.NewReader(body))
+	if err != nil {
+		// I know of no conditions under which this could fail.
+		// Report it very loudly.
+		// TODO record logs to disk
+		panic("logtail: cannot build http request: " + err.Error())
+	}
+	if l.zstdEncoder != nil {
+		req.Header.Add("Content-Encoding", "zstd")
+	}
+
+	maxUploadTime := 45 * time.Second
+	ctx, cancel := context.WithTimeout(ctx, maxUploadTime)
+	defer cancel()
+	req = req.WithContext(ctx)
+
+	compressedNote := "not-compressed"
+	if l.zstdEncoder != nil {
+		compressedNote = "compressed"
+	}
+
+	resp, err := l.httpc.Do(req)
+	if err != nil {
+		return false, fmt.Errorf("log upload of %d bytes %s failed: %v", len(body), compressedNote, err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		uploaded = resp.StatusCode == 400 // the server saved the logs anyway
+		b, _ := ioutil.ReadAll(resp.Body)
+		return uploaded, fmt.Errorf("log upload of %d bytes %s failed %d: %q", len(body), compressedNote, resp.StatusCode, string(b))
+	}
+	return true, nil
+}
+
+func (l *logger) Flush() error {
+	return nil
+}
+
+var errHasLogtail = errors.New("logtail: JSON log message contains reserved 'logtail' property")
+
+func (l *logger) send(jsonBlob []byte) (int, error) {
+	n, err := l.buffer.Write(jsonBlob)
+	select {
+	case l.sent <- struct{}{}:
+	default:
+	}
+	return n, err
+}
+
+func (l *logger) encodeText(buf []byte, skipClientTime bool) []byte {
+	now := l.timeNow()
+
+	b := make([]byte, 0, len(buf)+16)
+	b = append(b, '{')
+
+	if !skipClientTime {
+		b = append(b, `"logtail": {"client_time": "`...)
+		b = now.AppendFormat(b, time.RFC3339Nano)
+		b = append(b, "\"}, "...)
+	}
+
+	b = append(b, "\"text\": \""...)
+	for i, c := range buf {
+		switch c {
+		case '\b':
+			b = append(b, '\\', 'b')
+		case '\f':
+			b = append(b, '\\', 'f')
+		case '\n':
+			b = append(b, '\\', 'n')
+		case '\r':
+			b = append(b, '\\', 'r')
+		case '\t':
+			b = append(b, '\\', 't')
+		case '"':
+			b = append(b, '\\', '"')
+		case '\\':
+			b = append(b, '\\', '\\')
+		default:
+			b = append(b, c)
+		}
+		if l.lowMem && i > 254 {
+			b = append(b, "…"...)
+			break
+		}
+	}
+	b = append(b, "\"}\n"...)
+	return b
+}
+
+func (l *logger) encode(buf []byte) []byte {
+	if buf[0] != '{' {
+		return l.encodeText(buf, l.skipClientTime) // text fast-path
+	}
+
+	now := l.timeNow()
+
+	obj := make(map[string]interface{})
+	if err := json.Unmarshal(buf, &obj); err != nil {
+		for k := range obj {
+			delete(obj, k)
+		}
+		obj["text"] = string(buf)
+	}
+	if txt, isStr := obj["text"].(string); l.lowMem && isStr && len(txt) > 254 {
+		// TODO(crawshaw): trim to unicode code point
+		obj["text"] = txt[:254] + "…"
+	}
+
+	hasLogtail := obj["logtail"] != nil
+	if hasLogtail {
+		obj["error_has_logtail"] = obj["logtail"]
+		obj["logtail"] = nil
+	}
+	if !l.skipClientTime {
+		obj["logtail"] = map[string]string{
+			"client_time": now.Format(time.RFC3339Nano),
+		}
+	}
+
+	b, err := json.Marshal(obj)
+	if err != nil {
+		fmt.Fprintf(l.stderr, "logtail: re-encoding JSON failed: %v\n", err)
+		// I know of no conditions under which this could fail.
+		// Report it very loudly.
+		panic("logtail: re-encoding JSON failed: " + err.Error())
+	}
+	b = append(b, '\n')
+	return b
+}
+
+func (l *logger) Write(buf []byte) (int, error) {
+	if len(buf) == 0 {
+		return 0, nil
+	}
+	if l.stderr != nil && l.stderr != ioutil.Discard {
+		if buf[len(buf)-1] == '\n' {
+			l.stderr.Write(buf)
+		} else {
+			// The log package always line-terminates logs,
+			// so this is an uncommon path.
+			bufnl := make([]byte, len(buf)+1)
+			copy(bufnl, buf)
+			bufnl[len(bufnl)-1] = '\n'
+			l.stderr.Write(bufnl)
+		}
+	}
+	b := l.encode(buf)
+	return l.send(b)
+}

+ 20 - 0
logtail/logtail_test.go

@@ -0,0 +1,20 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package logtail
+
+import (
+	"context"
+	"testing"
+)
+
+func TestFastShutdown(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	l := Log(Config{
+		BaseURL: "http://localhost:1234",
+	})
+	l.Shutdown(ctx)
+}

+ 155 - 0
portlist/netstat.go

@@ -0,0 +1,155 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package portlist
+
+import (
+	"fmt"
+	"sort"
+	"strconv"
+	"strings"
+
+	exec "tailscale.com/tempfork/osexec"
+)
+
+func parsePort(s string) int {
+	// a.b.c.d:1234 or [a:b:c:d]:1234
+	i1 := strings.LastIndexByte(s, ':')
+	// a.b.c.d.1234 or [a:b:c:d].1234
+	i2 := strings.LastIndexByte(s, '.')
+
+	i := i1
+	if i2 > i {
+		i = i2
+	}
+	if i < 0 {
+		// no match; weird
+		return -1
+	}
+
+	portstr := s[i+1 : len(s)]
+	if portstr == "*" {
+		return 0
+	}
+
+	port, err := strconv.ParseUint(portstr, 10, 16)
+	if err != nil {
+		// invalid port; weird
+		return -1
+	}
+
+	return int(port)
+}
+
+type nothing struct{}
+
+// Lowest common denominator parser for "netstat -na" format.
+// All of Linux, Windows, and macOS support -na and give similar-ish output
+// formats that we can parse without special detection logic.
+// Unfortunately, options to filter by proto or state are non-portable,
+// so we'll filter for ourselves.
+func parsePortsNetstat(output string) List {
+	m := map[Port]nothing{}
+	lines := strings.Split(string(output), "\n")
+
+	var lastline string
+	var lastport Port
+	for _, line := range lines {
+		trimline := strings.TrimSpace(line)
+		cols := strings.Fields(trimline)
+		if len(cols) < 1 {
+			continue
+		}
+		protos := strings.ToLower(cols[0])
+		var proto, laddr, raddr string
+		if strings.HasPrefix(protos, "tcp") {
+			if len(cols) < 4 {
+				continue
+			}
+			proto = "tcp"
+			laddr = cols[len(cols)-3]
+			raddr = cols[len(cols)-2]
+			state := cols[len(cols)-1]
+			if !strings.HasPrefix(state, "LISTEN") {
+				// not interested in non-listener sockets
+				continue
+			}
+		} else if strings.HasPrefix(protos, "udp") {
+			if len(cols) < 3 {
+				continue
+			}
+			proto = "udp"
+			laddr = cols[len(cols)-2]
+			raddr = cols[len(cols)-1]
+		} else if protos[0] == '[' && len(trimline) > 2 {
+			// Windows: with netstat -nab, appends a line like:
+			//  [description]
+			// after the port line.
+			p := lastport
+			delete(m, lastport)
+			proc := trimline[1 : len(trimline)-1]
+			if proc == "svchost.exe" && lastline != "" {
+				p.Process = lastline
+			} else {
+				if strings.HasSuffix(proc, ".exe") {
+					p.Process = proc[:len(proc)-4]
+				} else {
+					p.Process = proc
+				}
+			}
+			m[p] = nothing{}
+		} else {
+			// not interested in other protocols
+			lastline = trimline
+			continue
+		}
+
+		lport := parsePort(laddr)
+		rport := parsePort(raddr)
+		if rport != 0 || lport <= 0 {
+			// not interested in "connected" sockets
+			continue
+		}
+
+		p := Port{
+			Proto: proto,
+			Port:  uint16(lport),
+		}
+		m[p] = nothing{}
+		lastport = p
+		lastline = ""
+	}
+
+	l := []Port{}
+	for p := range m {
+		l = append(l, p)
+	}
+	sort.Slice(l, func(i, j int) bool {
+		return (&l[i]).lessThan(&l[j])
+	})
+
+	return l
+}
+
+func listPortsNetstat(args string) (List, error) {
+	exe, err := exec.LookPath("netstat")
+	if err != nil {
+		return nil, fmt.Errorf("netstat: lookup: %v", err)
+	}
+	c := exec.Cmd{
+		Path: exe,
+		Args: []string{exe, args},
+	}
+	output, err := c.Output()
+	if err != nil {
+		xe, ok := err.(*exec.ExitError)
+		stderr := ""
+		if ok {
+			stderr = strings.TrimSpace(string(xe.Stderr))
+		}
+		return nil, fmt.Errorf("netstat: %v (%q)", err, stderr)
+	}
+
+	return parsePortsNetstat(string(output)), nil
+}

+ 89 - 0
portlist/netstat_test.go

@@ -0,0 +1,89 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package portlist
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestParsePort(t *testing.T) {
+	type InOut struct {
+		in     string
+		expect int
+	}
+	tests := []InOut{
+		InOut{"1.2.3.4:5678", 5678},
+		InOut{"0.0.0.0.999", 999},
+		InOut{"1.2.3.4:*", 0},
+		InOut{"5.5.5.5:0", 0},
+		InOut{"[1::2]:5", 5},
+		InOut{"[1::2].5", 5},
+		InOut{"gibberish", -1},
+	}
+
+	for _, io := range tests {
+		got := parsePort(io.in)
+		if got != io.expect {
+			t.Fatalf("input:%#v expect:%v got:%v\n", io.in, io.expect, got)
+		}
+	}
+}
+
+var netstat_output = `
+// linux
+tcp        0      0 0.0.0.0:22              0.0.0.0:*               LISTEN     
+udp        0      0 0.0.0.0:5353            0.0.0.0:*                          
+udp6       0      0 :::5353                 :::*                               
+udp6       0      0 :::5354                 :::*                               
+
+// macOS
+tcp4       0      0  *.23                   *.*                    LISTEN     
+tcp6       0      0  *.24                   *.*                    LISTEN     
+udp6       0      0  *.5453                 *.*                               
+udp4       0      0  *.5553                 *.*                               
+
+// Windows 10
+  Proto  Local Address          Foreign Address        State
+  TCP    0.0.0.0:32             0.0.0.0:0              LISTENING
+ [sshd.exe]
+  UDP    0.0.0.0:5050           *:*
+  CDPSvc
+ [svchost.exe]
+  UDP    0.0.0.0:53             *:*
+ [chrome.exe]
+  UDP    10.0.1.43:9353         *:*
+ [iTunes.exe]
+  UDP    [::]:53                *:*
+  UDP    [::]:53                *:*
+ [funball.exe]
+`
+
+func TestParsePortsNetstat(t *testing.T) {
+	expect := List{
+		Port{"tcp", 22, "", ""},
+		Port{"tcp", 23, "", ""},
+		Port{"tcp", 24, "", ""},
+		Port{"tcp", 32, "", "sshd"},
+		Port{"udp", 53, "", "chrome"},
+		Port{"udp", 53, "", "funball"},
+		Port{"udp", 5050, "", "CDPSvc"},
+		Port{"udp", 5353, "", ""},
+		Port{"udp", 5354, "", ""},
+		Port{"udp", 5453, "", ""},
+		Port{"udp", 5553, "", ""},
+		Port{"udp", 9353, "", "iTunes"},
+	}
+
+	pl := parsePortsNetstat(netstat_output)
+	fmt.Printf("--- expect:\n%v\n", expect)
+	fmt.Printf("--- got:\n%v\n", pl)
+	for i := range pl {
+		if expect[i] != pl[i] {
+			t.Fatalf("row#%d\n expect=%v\n    got=%v\n",
+				i, expect[i], pl[i])
+		}
+	}
+}

+ 59 - 0
portlist/poller.go

@@ -0,0 +1,59 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package portlist
+
+import (
+	"time"
+)
+
+type Poller struct {
+	C      chan List     // new data when it arrives; closed when done
+	quitCh chan struct{} // close this to force exit
+	Err    error         // last returned error code, if any
+	prev   List          // most recent data
+}
+
+func NewPoller() (*Poller, error) {
+	p := &Poller{
+		C:      make(chan List),
+		quitCh: make(chan struct{}),
+	}
+	// Do one initial poll synchronously, so the caller can react
+	// to any obvious errors.
+	p.prev, p.Err = GetList(nil)
+	return p, p.Err
+}
+
+func (p *Poller) Close() {
+	close(p.quitCh)
+	<-p.C
+}
+
+// Poll periodically. Run this in a goroutine if you want.
+func (p *Poller) Run() error {
+	defer close(p.C)
+	tick := time.NewTicker(POLL_SECONDS * time.Second)
+	defer tick.Stop()
+
+	// Send out the pre-generated initial value
+	p.C <- p.prev
+
+	for {
+		select {
+		case <-tick.C:
+			pl, err := GetList(p.prev)
+			if err != nil {
+				p.Err = err
+				return p.Err
+			}
+			if !pl.SameInodes(p.prev) {
+				p.prev = pl
+				p.C <- pl
+			}
+		case <-p.quitCh:
+			return nil
+		}
+	}
+}

+ 87 - 0
portlist/portlist.go

@@ -0,0 +1,87 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package portlist
+
+import (
+	"fmt"
+	"strings"
+)
+
+type Port struct {
+	Proto   string
+	Port    uint16
+	inode   string
+	Process string
+}
+
+type List []Port
+
+var protos = []string{"tcp", "udp"}
+
+func (a *Port) lessThan(b *Port) bool {
+	if a.Port < b.Port {
+		return true
+	} else if a.Port > b.Port {
+		return false
+	}
+
+	if a.Proto < b.Proto {
+		return true
+	} else if a.Proto > b.Proto {
+		return false
+	}
+
+	if a.inode < b.inode {
+		return true
+	} else if a.inode > b.inode {
+		return false
+	}
+
+	if a.Process < b.Process {
+		return true
+	} else if a.Process > b.Process {
+		return false
+	}
+	return false
+}
+
+func (a List) SameInodes(b List) bool {
+	if a == nil || b == nil || len(a) != len(b) {
+		return false
+	}
+	for i := range a {
+		if a[i].Proto != b[i].Proto ||
+			a[i].Port != b[i].Port ||
+			a[i].inode != b[i].inode {
+			return false
+		}
+	}
+	return true
+}
+
+func (pl List) String() string {
+	out := []string{}
+	for _, v := range pl {
+		out = append(out, fmt.Sprintf("%-3s %5d %-17s %#v",
+			v.Proto, v.Port, v.inode, v.Process))
+	}
+	return strings.Join(out, "\n")
+}
+
+func GetList(prev List) (List, error) {
+	pl, err := listPorts()
+	if err != nil {
+		return nil, fmt.Errorf("listPorts: %s", err)
+	}
+	if pl.SameInodes(prev) {
+		// Nothing changed, skip inode lookup
+		return prev, nil
+	}
+	pl, err = addProcesses(pl)
+	if err != nil {
+		return nil, fmt.Errorf("addProcesses: %s", err)
+	}
+	return pl, nil
+}

+ 99 - 0
portlist/portlist_darwin.go

@@ -0,0 +1,99 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !linux,!windows
+
+package portlist
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"log"
+	"os"
+	"strings"
+
+	exec "tailscale.com/tempfork/osexec"
+)
+
+// We have to run netstat, which is a bit expensive, so don't do it too often.
+const POLL_SECONDS = 5
+
+func listPorts() (List, error) {
+	return listPortsNetstat("-na")
+}
+
+// In theory, lsof could replace the function of both listPorts() and
+// addProcesses(), since it provides a superset of the netstat output.
+// However, "netstat -na" runs ~100x faster than lsof on my machine, so
+// we should do it only if the list of open ports has actually changed.
+//
+// TODO(apenwarr): this fails in a macOS sandbox (ie. our usual case).
+// We might as well just delete this code if we can't find a solution.
+func addProcesses(pl []Port) ([]Port, error) {
+	exe, err := exec.LookPath("lsof")
+	if err != nil {
+		return nil, fmt.Errorf("lsof: lookup: %v", err)
+	}
+	c := exec.Cmd{
+		Path: exe,
+		Args: []string{exe, "-F", "-n", "-P", "-O", "-S2", "-T", "-i4", "-i6"},
+	}
+	output, err := c.Output()
+	if err != nil {
+		xe, ok := err.(*exec.ExitError)
+		stderr := ""
+		if ok {
+			stderr = strings.TrimSpace(string(xe.Stderr))
+		}
+		// fails when run in a macOS sandbox, so make this non-fatal.
+		log.Printf("portlist: lsof: %v (%q)\n", err, stderr)
+		return pl, nil
+	}
+
+	type ProtoPort struct {
+		proto string
+		port  uint16
+	}
+	m := map[ProtoPort]*Port{}
+	for i := range pl {
+		pp := ProtoPort{pl[i].Proto, pl[i].Port}
+		m[pp] = &pl[i]
+	}
+
+	r := bytes.NewReader(output)
+	scanner := bufio.NewScanner(r)
+
+	var cmd, proto string
+	for scanner.Scan() {
+		line := scanner.Text()
+		if line[0] == 'p' {
+			// starting a new process
+			cmd = ""
+			proto = ""
+		} else if line[0] == 'c' {
+			cmd = line[1:len(line)]
+		} else if line[0] == 'P' {
+			proto = strings.ToLower(line[1:len(line)])
+		} else if line[0] == 'n' {
+			rest := line[1:len(line)]
+			i := strings.Index(rest, "->")
+			if i < 0 {
+				// a listening port
+				port := parsePort(rest)
+				if port > 0 {
+					pp := ProtoPort{proto, uint16(port)}
+					p := m[pp]
+					if p != nil {
+						p.Process = cmd
+					} else {
+						fmt.Fprintf(os.Stderr, "weird: missing %v\n", pp)
+					}
+				}
+			}
+		}
+	}
+
+	return pl, nil
+}

+ 155 - 0
portlist/portlist_linux.go

@@ -0,0 +1,155 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package portlist
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+// Reading the sockfiles on Linux is very fast, so we can do it often.
+const POLL_SECONDS = 1
+
+// TODO(apenwarr): Include IPv6 ports eventually.
+// Right now we don't route IPv6 anyway so it's better to exclude them.
+var sockfiles = []string{"/proc/net/tcp", "/proc/net/udp"}
+
+func listPorts() (List, error) {
+	l := []Port{}
+
+	for pi, fname := range sockfiles {
+		proto := protos[pi]
+
+		f, err := os.Open(fname)
+		if err != nil {
+			return nil, fmt.Errorf("%s: %s", fname, err)
+		}
+		defer f.Close()
+		r := bufio.NewReader(f)
+
+		// skip header row
+		_, err = r.ReadString('\n')
+		if err != nil {
+			return nil, err
+		}
+
+		for err == nil {
+			line, err := r.ReadString('\n')
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				return nil, err
+			}
+
+			// sl local rem ... inode
+			words := strings.Fields(line)
+			local := words[1]
+			rem := words[2]
+			inode := words[9]
+
+			if rem != "00000000:0000" {
+				// not a "listener" port
+				continue
+			}
+
+			portv, err := strconv.ParseUint(local[9:], 16, 16)
+			if err != nil {
+				return nil, fmt.Errorf("%#v: %s", local[9:], err)
+			}
+			inodev := fmt.Sprintf("socket:[%s]", inode)
+			l = append(l, Port{
+				Proto: proto,
+				Port:  uint16(portv),
+				inode: inodev,
+			})
+		}
+	}
+
+	sort.Slice(l, func(i, j int) bool {
+		return (&l[i]).lessThan(&l[j])
+	})
+
+	return l, nil
+}
+
+func addProcesses(pl []Port) ([]Port, error) {
+	pm := map[string]*Port{}
+	for k := range pl {
+		pm[pl[k].inode] = &pl[k]
+	}
+
+	pdir, err := os.Open("/proc")
+	if err != nil {
+		return nil, fmt.Errorf("/proc: %s", err)
+	}
+	defer pdir.Close()
+
+	for {
+		pids, err := pdir.Readdirnames(100)
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return nil, fmt.Errorf("/proc: %s", err)
+		}
+
+		for _, pid := range pids {
+			_, err := strconv.ParseInt(pid, 10, 64)
+			if err != nil {
+				// not a pid, ignore it.
+				// /proc has lots of non-pid stuff in it.
+				continue
+			}
+			fddir, err := os.Open(fmt.Sprintf("/proc/%s/fd", pid))
+			if err != nil {
+				// Can't open fd list for this pid. Maybe
+				// don't have access. Ignore it.
+				continue
+			}
+			defer fddir.Close()
+
+			for {
+				fds, err := fddir.Readdirnames(100)
+				if err == io.EOF {
+					break
+				}
+				if err != nil {
+					return nil, fmt.Errorf("readdir: %s", err)
+				}
+				for _, fd := range fds {
+					target, err := os.Readlink(fmt.Sprintf("/proc/%s/fd/%s", pid, fd))
+					if err != nil {
+						// Not a symlink or no permission.
+						// Skip it.
+						continue
+					}
+
+					// TODO(apenwarr): use /proc/*/cmdline instead of /comm?
+					// Unsure right now whether users will want the extra detail
+					// or not.
+					pe := pm[target]
+					if pe != nil {
+						comm, err := ioutil.ReadFile(fmt.Sprintf("/proc/%s/comm", pid))
+						if err != nil {
+							// Usually shouldn't happen. One possibility is
+							// the process has gone away, so let's skip it.
+							continue
+						}
+						pe.Process = strings.TrimSpace(string(comm))
+					}
+				}
+			}
+		}
+	}
+
+	return pl, nil
+}

+ 20 - 0
portlist/portlist_other.go

@@ -0,0 +1,20 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !linux,!windows,!darwin
+
+package portlist
+
+// We have to run netstat, which is a bit expensive, so don't do it too often.
+const POLL_SECONDS = 5
+
+func listPorts() (List, error) {
+	return listPortsNetstat("-na")
+}
+
+func addProcesses(pl []Port) ([]Port, error) {
+	// Generic version has no way to get process mappings.
+	// This has to be OS-specific.
+	return pl, nil
+}

+ 16 - 0
portlist/portlist_windows.go

@@ -0,0 +1,16 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package portlist
+
+// Forking on Windows is insanely expensive, so don't do it too often.
+const POLL_SECONDS = 5
+
+func listPorts() (List, error) {
+	return listPortsNetstat("-na")
+}
+
+func addProcesses(pl []Port) ([]Port, error) {
+	return listPortsNetstat("-nab")
+}

+ 78 - 0
ratelimit/ratelimit.go

@@ -0,0 +1,78 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ratelimit
+
+import (
+	"sync"
+	"time"
+)
+
+type Bucket struct {
+	mu           sync.Mutex
+	FillInterval time.Duration
+	Burst        int
+	v            int
+	quitCh       chan struct{}
+	started      bool
+	closed       bool
+}
+
+func (b *Bucket) startLocked() {
+	b.v = b.Burst
+	b.quitCh = make(chan struct{})
+	b.started = true
+
+	t := time.NewTicker(b.FillInterval)
+	go func() {
+		for {
+			select {
+			case <-b.quitCh:
+				return
+			case <-t.C:
+				b.tick()
+			}
+		}
+	}()
+}
+
+func (b *Bucket) tick() {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	if b.v < b.Burst {
+		b.v++
+	}
+}
+
+func (b *Bucket) Close() {
+	b.mu.Lock()
+	if !b.started {
+		b.closed = true
+		b.mu.Unlock()
+		return
+	}
+	if b.closed {
+		b.mu.Unlock()
+		return
+	}
+	b.closed = true
+	b.mu.Unlock()
+
+	b.quitCh <- struct{}{}
+}
+
+func (b *Bucket) TryGet() int {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+
+	if !b.started {
+		b.startLocked()
+	}
+	if b.v > 0 {
+		b.v--
+		return b.v + 1
+	}
+	return 0
+}

+ 28 - 0
ratelimit/ratelimit_test.go

@@ -0,0 +1,28 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ratelimit
+
+import (
+	"testing"
+	"time"
+)
+
+func TestBucket(t *testing.T) {
+	b := Bucket{
+		FillInterval: time.Second,
+		Burst:        3,
+	}
+	expect := []int{3, 2, 1, 0, 0}
+	for i, want := range expect {
+		got := b.TryGet()
+		if want != got {
+			t.Errorf("#%d want=%d got=%d\n", i, want, got)
+		}
+	}
+	b.tick()
+	if want, got := 1, b.TryGet(); want != got {
+		t.Errorf("after tick: want=%d got=%d\n", want, got)
+	}
+}

+ 63 - 0
safesocket/basic_test.go

@@ -0,0 +1,63 @@
+// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package safesocket
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestBasics(t *testing.T) {
+	fmt.Printf("listening2...\n")
+	l, port, err := Listen("COOKIE", "Tailscale", "test", 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	fmt.Printf("listened.\n")
+
+	go func() {
+		fmt.Printf("accepting...\n")
+		s, err := l.Accept()
+		if err != nil {
+			t.Fatal(err)
+		}
+		fmt.Printf("accepted.\n")
+		l.Close()
+		s.Write([]byte("hello"))
+		fmt.Printf("server wrote.\n")
+
+		b := make([]byte, 1024)
+		n, err := s.Read(b)
+		if err != nil {
+			t.Fatal(err)
+		}
+		fmt.Printf("server read %d bytes.\n", n)
+		if string(b[:n]) != "world" {
+			t.Fatalf("got %#v, expected %#v\n", string(b[:n]), "world")
+		}
+		s.Close()
+	}()
+
+	fmt.Printf("connecting...\n")
+	c, err := Connect("COOKIE", "Tailscale", "test", port)
+	if err != nil {
+		t.Fatal(err)
+	}
+	fmt.Printf("connected.\n")
+	c.Write([]byte("world"))
+	fmt.Printf("client wrote.\n")
+
+	b := make([]byte, 1024)
+	n, err := c.Read(b)
+	if err != nil {
+		t.Fatal(err)
+	}
+	fmt.Printf("client read %d bytes.\n", n)
+	if string(b[:n]) != "hello" {
+		t.Fatalf("got %#v, expected %#v\n", string(b[:n]), "hello")
+	}
+
+	c.Close()
+}

Some files were not shown because too many files changed in this diff