Browse Source

cmd/strelaypoolsrv: Move metric scraping to the server itself (#4866)

Audrius Butkevicius 7 years ago
parent
commit
afb27f7f02
4 changed files with 505 additions and 181 deletions
  1. 82 107
      cmd/strelaypoolsrv/gui/index.html
  2. 206 74
      cmd/strelaypoolsrv/main.go
  3. 213 0
      cmd/strelaypoolsrv/stats.go
  4. 4 0
      cmd/strelaysrv/status.go

+ 82 - 107
cmd/strelaypoolsrv/gui/index.html

@@ -56,83 +56,83 @@
             <tr>
               <th rowspan="2">Address</td>
               <th rowspan="2">
-                <a ng-click="sortType = 'status.numActiveSessions'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.numActiveSessions'; sortReverse = !sortReverse">
                   Sessions
-                  <span ng-show="sortType == 'status.numActiveSessions' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.numActiveSessions' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.numActiveSessions' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.numActiveSessions' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th rowspan="2">
-                <a ng-click="sortType = 'status.numConnections'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.numConnections'; sortReverse = !sortReverse">
                   Connections
-                  <span ng-show="sortType == 'status.numConnections' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.numConnections' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.numConnections' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.numConnections' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th rowspan="2">
-                <a ng-click="sortType = 'status.bytesProxied'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.bytesProxied'; sortReverse = !sortReverse">
                   Data relayed
-                  <span ng-show="sortType == 'status.bytesProxied' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.bytesProxied' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.bytesProxied' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.bytesProxied' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th colspan="6" class="text-center">Transfer rate in the last period</th>
               <th rowspan="2">
-                <a ng-click="sortType = 'status.uptimeSeconds'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.uptimeSeconds'; sortReverse = !sortReverse">
                   Uptime hours
-                  <span ng-show="sortType == 'status.uptimeSeconds' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.uptimeSeconds' && !sortReverse" class="fa fa-caret-down"></span>
                   <span ng-show="sortType == 'status.uptimeSeconds' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th rowspan="2">
-                <a ng-click="sortType = 'status.options[\'provided-by\'] || \'\''; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.options[\'provided-by\'] || \'\''; sortReverse = !sortReverse">
                   Provided by
-                  <span ng-show="sortType == 'status.options[\'provided-by\'] || \'\'' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.options[\'provided-by\'] || \'\'' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.options[\'provided-by\'] || \'\'' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.options[\'provided-by\'] || \'\'' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
             </tr>
             <tr>
               <th>
-                <a ng-click="sortType = 'status.kbps10s1m5m15m30m60m[0]'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.kbps10s1m5m15m30m60m[0]'; sortReverse = !sortReverse">
                   10s
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[0]' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[0]' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[0]' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[0]' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th>
-                <a ng-click="sortType = 'status.kbps10s1m5m15m30m60m[1]'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.kbps10s1m5m15m30m60m[1]'; sortReverse = !sortReverse">
                   1m
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[1]' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[1]' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[1]' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[1]' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th>
-                <a ng-click="sortType = 'status.kbps10s1m5m15m30m60m[2]'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.kbps10s1m5m15m30m60m[2]'; sortReverse = !sortReverse">
                   5m
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[2]' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[2]' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[2]' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[2]' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th>
-                <a ng-click="sortType = 'status.kbps10s1m5m15m30m60m[3]'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.kbps10s1m5m15m30m60m[3]'; sortReverse = !sortReverse">
                   15m
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[3]' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[3]' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[3]' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[3]' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th>
-                <a ng-click="sortType = 'status.kbps10s1m5m15m30m60m[4]'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.kbps10s1m5m15m30m60m[4]'; sortReverse = !sortReverse">
                   30m
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[4]' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[4]' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[4]' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[4]' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
               <th>
-                <a ng-click="sortType = 'status.kbps10s1m5m15m30m60m[5]'; sortReverse = !sortReverse">
+                <a ng-click="sortType = 'stats.kbps10s1m5m15m30m60m[5]'; sortReverse = !sortReverse">
                   60m
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[5]' && !sortReverse" class="fa fa-caret-down"></span>
-                  <span ng-show="sortType == 'status.kbps10s1m5m15m30m60m[5]' && sortReverse" class="fa fa-caret-up"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[5]' && !sortReverse" class="fa fa-caret-down"></span>
+                  <span ng-show="sortType == 'stats.kbps10s1m5m15m30m60m[5]' && sortReverse" class="fa fa-caret-up"></span>
                 </a>
               </th>
             </tr>
@@ -140,21 +140,21 @@
           <tbody>
             <tr ng-repeat="relay in relays | orderBy:sortType:sortReverse:sortCompare" ng-mouseover="relay.showMarker()" ng-mouseleave="relay.hideMarker()">
               <td>{{ relay.address }}</td>
-              <td ng-if="relay.status === undefined" colspan="11" class="text-center">Looking up...</td>
-              <td ng-if-start="relay.status !== undefined">{{ relay.status.numActiveSessions }}</td>
-              <td>{{ relay.status.numConnections }}</td>
-              <td>{{ relay.status.bytesProxied | bytes }}</td>
-              <td>{{ relay.status.kbps10s1m5m15m30m60m[0] * 128 | bytes }}/s</td>
-              <td>{{ relay.status.kbps10s1m5m15m30m60m[1] * 128 | bytes }}/s</td>
-              <td>{{ relay.status.kbps10s1m5m15m30m60m[2] * 128 | bytes }}/s</td>
-              <td>{{ relay.status.kbps10s1m5m15m30m60m[3] * 128 | bytes }}/s</td>
-              <td>{{ relay.status.kbps10s1m5m15m30m60m[4] * 128 | bytes }}/s</td>
-              <td>{{ relay.status.kbps10s1m5m15m30m60m[5] * 128 | bytes }}/s</td>
-              <td ng-if="relay.status.uptimeSeconds != undefined">{{ relay.status.uptimeSeconds/60/60 | number:0 }}</td>
-              <td ng-if="relay.status.uptimeSeconds == undefined"></td>
-              <td title="{{ relay.status.options['provided-by'] || '' }}" ng-if-end>
-                {{ relay.status.options['provided-by'] || '' | limitTo:50 }}
-                <span ng-if="(relay.status.options['provided-by'] || '').length > 50">&hellip;
+              <td ng-if="!relay.stats" colspan="11"></td>
+              <td ng-if-start="relay.stats">{{ relay.stats.numActiveSessions }}</td>
+              <td>{{ relay.stats.numConnections }}</td>
+              <td>{{ relay.stats.bytesProxied | bytes }}</td>
+              <td>{{ relay.stats.kbps10s1m5m15m30m60m[0] * 128 | bytes }}/s</td>
+              <td>{{ relay.stats.kbps10s1m5m15m30m60m[1] * 128 | bytes }}/s</td>
+              <td>{{ relay.stats.kbps10s1m5m15m30m60m[2] * 128 | bytes }}/s</td>
+              <td>{{ relay.stats.kbps10s1m5m15m30m60m[3] * 128 | bytes }}/s</td>
+              <td>{{ relay.stats.kbps10s1m5m15m30m60m[4] * 128 | bytes }}/s</td>
+              <td>{{ relay.stats.kbps10s1m5m15m30m60m[5] * 128 | bytes }}/s</td>
+              <td ng-if="relay.stats.uptimeSeconds != undefined">{{ relay.stats.uptimeSeconds/60/60 | number:0 }}</td>
+              <td ng-if="relay.stats.uptimeSeconds == undefined"></td>
+              <td title="{{ relay.stats.options['provided-by'] || '' }}" ng-if-end>
+                {{ relay.stats.options['provided-by'] || '' | limitTo:50 }}
+                <span ng-if="(relay.stats.options['provided-by'] || '').length > 50">&hellip;
               </td>
             </tr>
           </tbody>
@@ -235,16 +235,16 @@
     $scope.mapBounds = new google.maps.LatLngBounds();
     $scope.tooltipTemplate = $('#infoTemplate').html();
     $scope.usedLocations = {};
-    $scope.sortType = 'status.numActiveSessions';
+    $scope.sortType = 'stats.numActiveSessions';
     $scope.sortReverse = true;
     $scope.sortCompare = function(a, b) {
       if (a.value == b.value) {
         return 0;
       }
-      if (a.type == "undefined") {
+      if (a.type == "undefined" || a.type == "null") {
         return -1;
       }
-      if (b.type == "undefined") {
+      if (b.type == "undefined" || b.type == "null") {
         return 1;
       }
       return a.value > b.value ? 1 : -1;
@@ -252,25 +252,31 @@
 
     $http.get("/endpoint").then(function(response) {
       $scope.relays = response.data.relays;
-      var promises = [];
-      angular.forEach($scope.relays, function(relay) {
 
+      angular.forEach($scope.relays, function(relay) {
         relay.uri = constructURI(relay.url);
         relay.address = relay.url.split('/')[2];
 
         addMarkerToMap(relay);
 
-        promises.push(getRelayStatus(relay));
+        if (relay.stats) {
+          angular.forEach($scope.totals, function(value, key) {
+            if (typeof $scope.totals[key] == 'number') {
+              $scope.totals[key] += relay.stats[key];
+            } else if (typeof $scope.totals[key] == 'object' && $scope.totals[key] instanceof Array) {
+              angular.forEach($scope.totals[key], function(value, index) {
+                $scope.totals[key][index] += relay.stats[key][index];
+              });
+            }
+          });
+        }
       });
 
-      // Can only add circles once we know the totals for transfers, which means
-      // we need to resolve all statuses.
-      $q.all(promises).then(function() {
-        angular.forEach($scope.relays, function(relay) {
-          if (relay.status) {
-            addCircleToMap(relay);
-          }
-        });
+      // After the totals were calculated, add circles.
+      angular.forEach($scope.relays, function(relay) {
+        if (relay.stats) {
+          addCircleToMap(relay);
+        }
       });
 
       $scope.map.fitBounds($scope.mapBounds);
@@ -330,41 +336,10 @@
         fillOpacity: 0.35,
         map: $scope.map,
         center: relay.marker.position,
-        radius: ((relay.status.bytesProxied * 100) / $scope.totals.bytesProxied) * 10000
+        radius: ((relay.stats.bytesProxied * 100) / $scope.totals.bytesProxied) * 10000
       });
     }
 
-    function getRelayStatus(relay) {
-      // Normal timeout doesn't deal with relays which accept the TCP connection
-      // but don't respond (some firewalls do that), so deal with it this way.
-      var timeoutRequest = $q.defer();
-      var resolveStatus = $q.defer();
-
-
-      $http.get("http://" + relay.uri.hostname + ':' + ((relay.uri.args.statusAddr && relay.uri.args.statusAddr.split(':')[1]) || "22070") + "/status", { timeout: timeoutRequest.promise }).then(function (response) {
-        relay.status = response.data;
-        resolveStatus.resolve();
-        angular.forEach($scope.totals, function(value, key) {
-          if (typeof $scope.totals[key] == 'number') {
-            $scope.totals[key] += response.data[key];
-          } else if (typeof $scope.totals[key] == 'object' && $scope.totals[key] instanceof Array) {
-            angular.forEach($scope.totals[key], function(value, index) {
-              $scope.totals[key][index] += response.data[key][index];
-            });
-          }
-        });
-      }, function() {
-        relay.status = null;
-        resolveStatus.resolve();
-      });
-
-      $timeout(function() {
-        timeoutRequest.resolve();
-      }, 5000);
-
-      return resolveStatus.promise;
-    }
-
     function constructURI(url) {
       var uri = document.createElement('a');
 
@@ -385,25 +360,25 @@
 
   <script type="text/template" id="infoTemplate">
     <div>
-      <p><b>{{ relay.uri.hostname }}</b> <span ng-if="relay.status.options['provided-by']">provided by <u>{{ relay.status.options['provided-by'] }}</u></span></p>
-      <div ng-if="relay.status">
-        <span ng-if="relay.status.startTime">Start time: {{ relay.status.startTime | date:"medium" }}</br></span>
-        <span ng-if="relay.status.bytesProxied != undefined">Proxied: {{ relay.status.bytesProxied | bytes }}</br></span>
-        <span ng-if="relay.status.numActiveSessions != undefined">Sessions: {{ relay.status.numActiveSessions }}</br></span>
-        <span ng-if="relay.status.numConnections != undefined">Clients: {{ relay.status.numConnections }}</br></span>
-        <span ng-if="relay.status.options.pools">Pools: {{ relay.status.options.pools.join(', ') }}</br></span>
-        <span ng-if="relay.status.options['global-rate'] != undefined">
-          <span ng-if="relay.status.options['global-rate'] > 0">Global rate limit: {{ relay.status.options['global-rate'] | bytes }}/s</span>
-          <span ng-if="relay.status.options['global-rate'] == 0">Global rate limit: unlimited</span>
+      <p><b>{{ relay.uri.hostname }}</b> <span ng-if="relay.stats.options['provided-by']">provided by <u>{{ relay.stats.options['provided-by'] }}</u></span></p>
+      <div ng-if="relay.stats">
+        <span ng-if="relay.stats.startTime">Start time: {{ relay.stats.startTime | date:"medium" }}</br></span>
+        <span ng-if="relay.stats.bytesProxied != undefined">Proxied: {{ relay.stats.bytesProxied | bytes }}</br></span>
+        <span ng-if="relay.stats.numActiveSessions != undefined">Sessions: {{ relay.stats.numActiveSessions }}</br></span>
+        <span ng-if="relay.stats.numConnections != undefined">Clients: {{ relay.stats.numConnections }}</br></span>
+        <span ng-if="relay.stats.options.pools">Pools: {{ relay.stats.options.pools.join(', ') }}</br></span>
+        <span ng-if="relay.stats.options['global-rate'] != undefined">
+          <span ng-if="relay.stats.options['global-rate'] > 0">Global rate limit: {{ relay.stats.options['global-rate'] | bytes }}/s</span>
+          <span ng-if="relay.stats.options['global-rate'] == 0">Global rate limit: unlimited</span>
           <br/>
         </span>
-        <span ng-if="relay.status.options['per-session-rate'] != undefined">
-          <span ng-if="relay.status.options['per-session-rate'] > 0">Session rate limit: {{ relay.status.options['per-session-rate'] | bytes }}/s</span>
-          <span ng-if="relay.status.options['per-session-rate'] == 0">Session rate limit: unlimited</span>
+        <span ng-if="relay.stats.options['per-session-rate'] != undefined">
+          <span ng-if="relay.stats.options['per-session-rate'] > 0">Session rate limit: {{ relay.stats.options['per-session-rate'] | bytes }}/s</span>
+          <span ng-if="relay.stats.options['per-session-rate'] == 0">Session rate limit: unlimited</span>
           <br/>
         </span>
       </div>
-      <div ng-if="!relay.status">
+      <div ng-if="!relay.stats">
         Data unavailable.
       <div>
     </div>

+ 206 - 74
cmd/strelaypoolsrv/main.go

@@ -18,12 +18,16 @@ import (
 	"net"
 	"net/http"
 	"net/url"
+	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"time"
 
 	"github.com/golang/groupcache/lru"
 	"github.com/oschwald/geoip2-golang"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
 	"github.com/syncthing/syncthing/cmd/strelaypoolsrv/auto"
 	"github.com/syncthing/syncthing/lib/relay/client"
 	"github.com/syncthing/syncthing/lib/sync"
@@ -34,12 +38,42 @@ import (
 type location struct {
 	Latitude  float64 `json:"latitude"`
 	Longitude float64 `json:"longitude"`
+	City      string  `json:"city"`
+	Country   string  `json:"country"`
+	Continent string  `json:"continent"`
 }
 
 type relay struct {
-	URL      string   `json:"url"`
-	Location location `json:"location"`
-	uri      *url.URL
+	URL            string   `json:"url"`
+	Location       location `json:"location"`
+	uri            *url.URL
+	Stats          *stats    `json:"stats"`
+	StatsRetrieved time.Time `json:"statsRetrieved"`
+}
+
+type stats struct {
+	StartTime          time.Time `json:"startTime"`
+	UptimeSeconds      int       `json:"uptimeSeconds"`
+	PendingSessionKeys int       `json:"numPendingSessionKeys"`
+	ActiveSessions     int       `json:"numActiveSessions"`
+	Connections        int       `json:"numConnections"`
+	Proxies            int       `json:"numProxies"`
+	BytesProxied       int       `json:"bytesProxied"`
+	GoVersion          string    `json:"goVersion"`
+	GoOS               string    `json:"goOS"`
+	GoArch             string    `json:"goArch"`
+	GoMaxProcs         int       `json:"goMaxProcs"`
+	GoRoutines         int       `json:"goNumRoutine"`
+	Rates              []int64   `json:"kbps10s1m5m15m30m60m"`
+	Options            struct {
+		NetworkTimeout int      `json:"network-timeout"`
+		PintInterval   int      `json:"ping-interval"`
+		MessageTimeout int      `json:"message-timeout"`
+		SessionRate    int      `json:"per-session-rate"`
+		GlobalRate     int      `json:"global-rate"`
+		Pools          []string `json:"pools"`
+		ProvidedBy     string   `json:"provided-by"`
+	} `json:"options"`
 }
 
 func (r relay) String() string {
@@ -47,9 +81,9 @@ func (r relay) String() string {
 }
 
 type request struct {
-	relay  relay
-	uri    *url.URL
-	result chan result
+	relay      *relay
+	result     chan result
+	queueTimer *prometheus.Timer
 }
 
 type result struct {
@@ -58,23 +92,25 @@ type result struct {
 }
 
 var (
-	testCert       tls.Certificate
-	listen         = ":80"
-	dir            string
-	evictionTime   = time.Hour
-	debug          bool
-	getLRUSize     = 10 << 10
-	getLimitBurst  = 10
-	getLimitAvg    = 1
-	postLRUSize    = 1 << 10
-	postLimitBurst = 2
-	postLimitAvg   = 1
-	getLimit       time.Duration
-	postLimit      time.Duration
-	permRelaysFile string
-	ipHeader       string
-	geoipPath      string
-	proto          string
+	testCert        tls.Certificate
+	knownRelaysFile = filepath.Join(os.TempDir(), "strelaypoolsrv_known_relays")
+	listen          = ":80"
+	dir             string
+	evictionTime    = time.Hour
+	debug           bool
+	getLRUSize      = 10 << 10
+	getLimitBurst   = 10
+	getLimitAvg     = 2
+	postLRUSize     = 1 << 10
+	postLimitBurst  = 2
+	postLimitAvg    = 2
+	getLimit        time.Duration
+	postLimit       time.Duration
+	permRelaysFile  string
+	ipHeader        string
+	geoipPath       string
+	proto           string
+	statsRefresh    = time.Minute / 2
 
 	getMut      = sync.NewRWMutex()
 	getLRUCache *lru.Cache
@@ -85,8 +121,8 @@ var (
 	requests = make(chan request, 10)
 
 	mut             = sync.NewRWMutex()
-	knownRelays     = make([]relay, 0)
-	permanentRelays = make([]relay, 0)
+	knownRelays     = make([]*relay, 0)
+	permanentRelays = make([]*relay, 0)
 	evictionTimers  = make(map[string]*time.Timer)
 )
 
@@ -100,15 +136,16 @@ func main() {
 	flag.BoolVar(&debug, "debug", debug, "Enable debug output")
 	flag.DurationVar(&evictionTime, "eviction", evictionTime, "After how long the relay is evicted")
 	flag.IntVar(&getLRUSize, "get-limit-cache", getLRUSize, "Get request limiter cache size")
-	flag.IntVar(&getLimitAvg, "get-limit-avg", 2, "Allowed average get request rate, per 10 s")
+	flag.IntVar(&getLimitAvg, "get-limit-avg", getLimitAvg, "Allowed average get request rate, per 10 s")
 	flag.IntVar(&getLimitBurst, "get-limit-burst", getLimitBurst, "Allowed burst get requests")
 	flag.IntVar(&postLRUSize, "post-limit-cache", postLRUSize, "Post request limiter cache size")
-	flag.IntVar(&postLimitAvg, "post-limit-avg", 2, "Allowed average post request rate, per minute")
+	flag.IntVar(&postLimitAvg, "post-limit-avg", postLimitAvg, "Allowed average post request rate, per minute")
 	flag.IntVar(&postLimitBurst, "post-limit-burst", postLimitBurst, "Allowed burst post requests")
 	flag.StringVar(&permRelaysFile, "perm-relays", "", "Path to list of permanent relays")
 	flag.StringVar(&ipHeader, "ip-header", "", "Name of header which holds clients ip:port. Only meaningful when running behind a reverse proxy.")
 	flag.StringVar(&geoipPath, "geoip", "GeoLite2-City.mmdb", "Path to GeoLite2-City database")
 	flag.StringVar(&proto, "protocol", "tcp", "Protocol used for listening. 'tcp' for IPv4 and IPv6, 'tcp4' for IPv4, 'tcp6' for IPv6")
+	flag.DurationVar(&statsRefresh, "stats-refresh", statsRefresh, "Interval at which to refresh relay stats")
 
 	flag.Parse()
 
@@ -122,13 +159,31 @@ func main() {
 	var err error
 
 	if permRelaysFile != "" {
-		loadPermanentRelays(permRelaysFile)
+		permanentRelays = loadRelays(permRelaysFile)
 	}
 
 	testCert = createTestCertificate()
 
 	go requestProcessor()
 
+	// Load relays from cache in the background.
+	// Load them in a serial fashion to make sure any genuine requests
+	// are not dropped.
+	go func() {
+		for _, relay := range loadRelays(knownRelaysFile) {
+			resultChan := make(chan result)
+			requests <- request{relay, resultChan, nil}
+			result := <-resultChan
+			if result.err != nil {
+				relayTestsTotal.WithLabelValues("failed").Inc()
+			} else {
+				relayTestsTotal.WithLabelValues("success").Inc()
+			}
+		}
+		// Run the the stats refresher once the relays are loaded.
+		statsRefresher(statsRefresh)
+	}()
+
 	if dir != "" {
 		if debug {
 			log.Println("Starting TLS listener on", listen)
@@ -173,6 +228,7 @@ func main() {
 	handler := http.NewServeMux()
 	handler.HandleFunc("/", handleAssets)
 	handler.HandleFunc("/endpoint", handleRequest)
+	handler.HandleFunc("/metrics", handleMetrics)
 
 	srv := http.Server{
 		Handler:     handler,
@@ -185,6 +241,15 @@ func main() {
 	}
 }
 
+func handleMetrics(w http.ResponseWriter, r *http.Request) {
+	timer := prometheus.NewTimer(metricsRequestsSeconds)
+	// Acquire the mutex just to make sure we're not caught mid-way stats collection
+	mut.RLock()
+	promhttp.Handler().ServeHTTP(w, r)
+	mut.RUnlock()
+	timer.ObserveDuration()
+}
+
 func handleAssets(w http.ResponseWriter, r *http.Request) {
 	assets := auto.Assets()
 	path := r.URL.Path[1:]
@@ -245,6 +310,15 @@ func mimeTypeForFile(file string) string {
 }
 
 func handleRequest(w http.ResponseWriter, r *http.Request) {
+	timer := prometheus.NewTimer(apiRequestsSeconds.WithLabelValues(r.Method))
+
+	lw := NewLoggingResponseWriter(w)
+
+	defer func() {
+		timer.ObserveDuration()
+		apiRequestsTotal.WithLabelValues(r.Method, strconv.Itoa(lw.statusCode)).Inc()
+	}()
+
 	if ipHeader != "" {
 		r.RemoteAddr = r.Header.Get(ipHeader)
 	}
@@ -252,13 +326,13 @@ func handleRequest(w http.ResponseWriter, r *http.Request) {
 	switch r.Method {
 	case "GET":
 		if limit(r.RemoteAddr, getLRUCache, getMut, getLimit, getLimitBurst) {
-			w.WriteHeader(429)
+			w.WriteHeader(httpStatusEnhanceYourCalm)
 			return
 		}
 		handleGetRequest(w, r)
 	case "POST":
 		if limit(r.RemoteAddr, postLRUCache, postMut, postLimit, postLimitBurst) {
-			w.WriteHeader(429)
+			w.WriteHeader(httpStatusEnhanceYourCalm)
 			return
 		}
 		handlePostRequest(w, r)
@@ -282,7 +356,7 @@ func handleGetRequest(w http.ResponseWriter, r *http.Request) {
 		relays[i], relays[j] = relays[j], relays[i]
 	}
 
-	json.NewEncoder(w).Encode(map[string][]relay{
+	json.NewEncoder(w).Encode(map[string][]*relay{
 		"relays": relays,
 	})
 }
@@ -333,11 +407,11 @@ func handlePostRequest(w http.ResponseWriter, r *http.Request) {
 		if debug {
 			log.Println("IP address advertised does not match client IP address", r.RemoteAddr, uri)
 		}
-		http.Error(w, "IP address does not match client IP", http.StatusUnauthorized)
+		http.Error(w, fmt.Sprintf("IP advertised %s does not match client IP %s", host, rhost), http.StatusUnauthorized)
 		return
 	}
+
 	newRelay.uri = uri
-	newRelay.Location = getLocation(uri.Host)
 
 	for _, current := range permanentRelays {
 		if current.uri.Host == newRelay.uri.Host {
@@ -352,18 +426,21 @@ func handlePostRequest(w http.ResponseWriter, r *http.Request) {
 	reschan := make(chan result)
 
 	select {
-	case requests <- request{newRelay, uri, reschan}:
+	case requests <- request{&newRelay, reschan, prometheus.NewTimer(relayTestActionsSeconds.WithLabelValues("queue"))}:
 		result := <-reschan
 		if result.err != nil {
+			relayTestsTotal.WithLabelValues("failed").Inc()
 			http.Error(w, result.err.Error(), http.StatusBadRequest)
 			return
 		}
+		relayTestsTotal.WithLabelValues("success").Inc()
 		w.Header().Set("Content-Type", "application/json; charset=utf-8")
 		json.NewEncoder(w).Encode(map[string]time.Duration{
 			"evictionIn": result.eviction,
 		})
 
 	default:
+		relayTestsTotal.WithLabelValues("dropped").Inc()
 		if debug {
 			log.Println("Dropping request")
 		}
@@ -373,57 +450,81 @@ func handlePostRequest(w http.ResponseWriter, r *http.Request) {
 
 func requestProcessor() {
 	for request := range requests {
+		if request.queueTimer != nil {
+			request.queueTimer.ObserveDuration()
+		}
+
+		timer := prometheus.NewTimer(relayTestActionsSeconds.WithLabelValues("test"))
+		handleRelayTest(request)
+		timer.ObserveDuration()
+	}
+}
+
+func handleRelayTest(request request) {
+	if debug {
+		log.Println("Request for", request.relay)
+	}
+	if !client.TestRelay(request.relay.uri, []tls.Certificate{testCert}, time.Second, 2*time.Second, 3) {
 		if debug {
-			log.Println("Request for", request.relay)
+			log.Println("Test for relay", request.relay, "failed")
 		}
-		if !client.TestRelay(request.uri, []tls.Certificate{testCert}, time.Second, 2*time.Second, 3) {
-			if debug {
-				log.Println("Test for relay", request.relay, "failed")
-			}
-			request.result <- result{fmt.Errorf("connection test failed"), 0}
-			continue
+		request.result <- result{fmt.Errorf("connection test failed"), 0}
+		return
+	}
+
+	stats := fetchStats(request.relay)
+	location := getLocation(request.relay.uri.Host)
+
+	mut.Lock()
+	if stats != nil {
+		updateMetrics(request.relay.uri.Host, stats, location)
+	}
+	request.relay.Stats = stats
+	request.relay.StatsRetrieved = time.Now()
+	request.relay.Location = location
+
+	timer, ok := evictionTimers[request.relay.uri.Host]
+	if ok {
+		if debug {
+			log.Println("Stopping existing timer for", request.relay)
 		}
+		timer.Stop()
+	}
 
-		mut.Lock()
-		timer, ok := evictionTimers[request.relay.uri.Host]
-		if ok {
+	for i, current := range knownRelays {
+		if current.uri.Host == request.relay.uri.Host {
 			if debug {
-				log.Println("Stopping existing timer for", request.relay)
+				log.Println("Relay", request.relay, "already exists")
 			}
-			timer.Stop()
-		}
 
-		for i, current := range knownRelays {
-			if current.uri.Host == request.relay.uri.Host {
-				if debug {
-					log.Println("Relay", request.relay, "already exists")
-				}
+			// Evict the old entry anyway, as configuration might have changed.
+			last := len(knownRelays) - 1
+			knownRelays[i] = knownRelays[last]
+			knownRelays = knownRelays[:last]
 
-				// Evict the old entry anyway, as configuration might have changed.
-				last := len(knownRelays) - 1
-				knownRelays[i] = knownRelays[last]
-				knownRelays = knownRelays[:last]
-
-				goto found
-			}
+			goto found
 		}
+	}
 
-		if debug {
-			log.Println("Adding new relay", request.relay)
-		}
+	if debug {
+		log.Println("Adding new relay", request.relay)
+	}
+
+found:
 
-	found:
+	knownRelays = append(knownRelays, request.relay)
+	evictionTimers[request.relay.uri.Host] = time.AfterFunc(evictionTime, evict(request.relay))
 
-		knownRelays = append(knownRelays, request.relay)
+	mut.Unlock()
 
-		evictionTimers[request.relay.uri.Host] = time.AfterFunc(evictionTime, evict(request.relay))
-		mut.Unlock()
-		request.result <- result{nil, evictionTime}
+	if err := saveRelays(knownRelaysFile, knownRelays); err != nil {
+		log.Println("Failed to write known relays: " + err.Error())
 	}
 
+	request.result <- result{nil, evictionTime}
 }
 
-func evict(relay relay) func() {
+func evict(relay *relay) func() {
 	return func() {
 		mut.Lock()
 		defer mut.Unlock()
@@ -438,6 +539,7 @@ func evict(relay relay) func() {
 				last := len(knownRelays) - 1
 				knownRelays[i] = knownRelays[last]
 				knownRelays = knownRelays[:last]
+				deleteMetrics(current.uri.Host)
 			}
 		}
 		delete(evictionTimers, relay.uri.Host)
@@ -466,12 +568,14 @@ func limit(addr string, cache *lru.Cache, lock sync.RWMutex, intv time.Duration,
 	return false
 }
 
-func loadPermanentRelays(file string) {
+func loadRelays(file string) []*relay {
 	content, err := ioutil.ReadFile(file)
 	if err != nil {
-		log.Fatal(err)
+		log.Println("Failed to load relays: " + err.Error())
+		return nil
 	}
 
+	var relays []*relay
 	for _, line := range strings.Split(string(content), "\n") {
 		if len(line) == 0 {
 			continue
@@ -480,21 +584,30 @@ func loadPermanentRelays(file string) {
 		uri, err := url.Parse(line)
 		if err != nil {
 			if debug {
-				log.Println("Skipping permanent relay", line, "due to parse error", err)
+				log.Println("Skipping relay", line, "due to parse error", err)
 			}
 			continue
 
 		}
 
-		permanentRelays = append(permanentRelays, relay{
+		relays = append(relays, &relay{
 			URL:      line,
 			Location: getLocation(uri.Host),
 			uri:      uri,
 		})
 		if debug {
-			log.Println("Adding permanent relay", line)
+			log.Println("Adding relay", line)
 		}
 	}
+	return relays
+}
+
+func saveRelays(file string, relays []*relay) error {
+	var content string
+	for _, relay := range relays {
+		content += relay.uri.String() + "\n"
+	}
+	return ioutil.WriteFile(file, []byte(content), 0777)
 }
 
 func createTestCertificate() tls.Certificate {
@@ -513,6 +626,8 @@ func createTestCertificate() tls.Certificate {
 }
 
 func getLocation(host string) location {
+	timer := prometheus.NewTimer(locationLookupSeconds)
+	defer timer.ObserveDuration()
 	db, err := geoip2.Open(geoipPath)
 	if err != nil {
 		return location{}
@@ -530,7 +645,24 @@ func getLocation(host string) location {
 	}
 
 	return location{
-		Latitude:  city.Location.Latitude,
 		Longitude: city.Location.Longitude,
+		Latitude:  city.Location.Latitude,
+		City:      city.City.Names["en"],
+		Country:   city.Country.IsoCode,
+		Continent: city.Continent.Code,
 	}
 }
+
+type loggingResponseWriter struct {
+	http.ResponseWriter
+	statusCode int
+}
+
+func NewLoggingResponseWriter(w http.ResponseWriter) *loggingResponseWriter {
+	return &loggingResponseWriter{w, http.StatusOK}
+}
+
+func (lrw *loggingResponseWriter) WriteHeader(code int) {
+	lrw.statusCode = code
+	lrw.ResponseWriter.WriteHeader(code)
+}

+ 213 - 0
cmd/strelaypoolsrv/stats.go

@@ -0,0 +1,213 @@
+// Copyright (C) 2018 Audrius Butkevicius and Contributors (see the CONTRIBUTORS file).
+
+package main
+
+import (
+	"encoding/json"
+	"net"
+	"net/http"
+	"os"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/syncthing/syncthing/lib/sync"
+)
+
+func init() {
+	prometheus.MustRegister(prometheus.NewProcessCollector(os.Getpid(), "syncthing_relaypoolsrv"))
+}
+
+var (
+	statusClient = http.Client{
+		Timeout: 5 * time.Second,
+	}
+
+	apiRequestsTotal   = makeCounter("api_requests_total", "Number of API requests.", "type", "result")
+	apiRequestsSeconds = makeSummary("api_requests_seconds", "Latency of API requests.", "type")
+
+	relayTestsTotal         = makeCounter("tests_total", "Number of relay tests.", "result")
+	relayTestActionsSeconds = makeSummary("test_actions_seconds", "Latency of relay test actions.", "type")
+
+	locationLookupSeconds = makeSummary("location_lookup_seconds", "Latency of location lookups.").WithLabelValues()
+
+	metricsRequestsSeconds = makeSummary("metrics_requests_seconds", "Latency of metric requests.").WithLabelValues()
+	scrapeSeconds          = makeSummary("relay_scrape_seconds", "Latency of metric scrapes from remote relays.", "result")
+
+	relayUptime             = makeGauge("relay_uptime", "Uptime of relay", "relay")
+	relayPendingSessionKeys = makeGauge("relay_pending_session_keys", "Number of pending session keys (two keys per session, one per each side of the connection)", "relay")
+	relayActiveSessions     = makeGauge("relay_active_sessions", "Number of sessions that are happening, a session contains two parties", "relay")
+	relayConnections        = makeGauge("relay_connections", "Number of devices connected to the relay", "relay")
+	relayProxies            = makeGauge("relay_proxies", "Number of active proxy routines sending data between peers (two proxies per session, one for each way)", "relay")
+	relayBytesProxied       = makeGauge("relay_bytes_proxied", "Number of bytes proxied by the relay", "relay")
+	relayGoRoutines         = makeGauge("relay_go_routines", "Number of Go routines in the process", "relay")
+	relaySessionRate        = makeGauge("relay_session_rate", "Rate applied per session", "relay")
+	relayGlobalRate         = makeGauge("relay_global_rate", "Global rate applied on the whole relay", "relay")
+	relayBuildInfo          = makeGauge("relay_build_info", "Build information about a relay", "relay", "go_version", "go_os", "go_arch")
+	relayLocationInfo       = makeGauge("relay_location_info", "Location information about a relay", "relay", "city", "country", "continent")
+)
+
+func makeGauge(name string, help string, labels ...string) *prometheus.GaugeVec {
+	gauge := prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: "syncthing",
+			Subsystem: "relaypoolsrv",
+			Name:      name,
+			Help:      help,
+		},
+		labels,
+	)
+	prometheus.MustRegister(gauge)
+	return gauge
+}
+
+func makeSummary(name string, help string, labels ...string) *prometheus.SummaryVec {
+	summary := prometheus.NewSummaryVec(
+		prometheus.SummaryOpts{
+			Namespace:  "syncthing",
+			Subsystem:  "relaypoolsrv",
+			Name:       name,
+			Help:       help,
+			Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
+		},
+		labels,
+	)
+	prometheus.MustRegister(summary)
+	return summary
+}
+
+func makeCounter(name string, help string, labels ...string) *prometheus.CounterVec {
+	counter := prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: "syncthing",
+			Subsystem: "relaypoolsrv",
+			Name:      name,
+			Help:      help,
+		},
+		labels,
+	)
+	prometheus.MustRegister(counter)
+	return counter
+}
+
+func statsRefresher(interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	for range ticker.C {
+		refreshStats()
+	}
+}
+
+type statsFetchResult struct {
+	relay *relay
+	stats *stats
+}
+
+func refreshStats() {
+	mut.RLock()
+	relays := append(permanentRelays, knownRelays...)
+	mut.RUnlock()
+
+	now := time.Now()
+	wg := sync.NewWaitGroup()
+
+	results := make(chan statsFetchResult, len(relays))
+	for _, rel := range relays {
+		wg.Add(1)
+		go func(rel *relay) {
+			t0 := time.Now()
+			stats := fetchStats(rel)
+			duration := time.Now().Sub(t0).Seconds()
+			result := "success"
+			if stats == nil {
+				result = "failed"
+			}
+			scrapeSeconds.WithLabelValues(result).Observe(duration)
+
+			results <- statsFetchResult{
+				relay: rel,
+				stats: fetchStats(rel),
+			}
+			wg.Done()
+		}(rel)
+	}
+
+	wg.Wait()
+	close(results)
+
+	mut.Lock()
+	relayBuildInfo.Reset()
+	relayLocationInfo.Reset()
+	for result := range results {
+		result.relay.StatsRetrieved = now
+		result.relay.Stats = result.stats
+		if result.stats == nil {
+			deleteMetrics(result.relay.uri.Host)
+		} else {
+			updateMetrics(result.relay.uri.Host, result.stats, result.relay.Location)
+		}
+	}
+	mut.Unlock()
+}
+
+func fetchStats(relay *relay) *stats {
+	statusAddr := relay.uri.Query().Get("statusAddr")
+	if statusAddr == "" {
+		statusAddr = ":22070"
+	}
+
+	statusHost, statusPort, err := net.SplitHostPort(statusAddr)
+	if err != nil {
+		return nil
+	}
+
+	if statusHost == "" {
+		if host, _, err := net.SplitHostPort(relay.uri.Host); err != nil {
+			return nil
+		} else {
+			statusHost = host
+		}
+	}
+
+	url := "http://" + net.JoinHostPort(statusHost, statusPort) + "/status"
+
+	response, err := statusClient.Get(url)
+	if err != nil {
+		return nil
+	}
+
+	var stats stats
+
+	if json.NewDecoder(response.Body).Decode(&stats); err != nil {
+		return nil
+	}
+	return &stats
+}
+
+func updateMetrics(host string, stats *stats, location location) {
+	if stats.GoVersion != "" || stats.GoOS != "" || stats.GoArch != "" {
+		relayBuildInfo.WithLabelValues(host, stats.GoVersion, stats.GoOS, stats.GoArch).Add(1)
+	}
+	if location.City != "" || location.Country != "" || location.Continent != "" {
+		relayLocationInfo.WithLabelValues(host, location.City, location.Country, location.Continent).Add(1)
+	}
+	relayUptime.WithLabelValues(host).Set(float64(stats.UptimeSeconds))
+	relayPendingSessionKeys.WithLabelValues(host).Set(float64(stats.PendingSessionKeys))
+	relayActiveSessions.WithLabelValues(host).Set(float64(stats.ActiveSessions))
+	relayConnections.WithLabelValues(host).Set(float64(stats.Connections))
+	relayProxies.WithLabelValues(host).Set(float64(stats.Proxies))
+	relayBytesProxied.WithLabelValues(host).Set(float64(stats.BytesProxied))
+	relayGoRoutines.WithLabelValues(host).Set(float64(stats.GoRoutines))
+	relaySessionRate.WithLabelValues(host).Set(float64(stats.Options.SessionRate))
+	relayGlobalRate.WithLabelValues(host).Set(float64(stats.Options.GlobalRate))
+}
+
+func deleteMetrics(host string) {
+	relayUptime.DeleteLabelValues(host)
+	relayPendingSessionKeys.DeleteLabelValues(host)
+	relayActiveSessions.DeleteLabelValues(host)
+	relayConnections.DeleteLabelValues(host)
+	relayProxies.DeleteLabelValues(host)
+	relayBytesProxied.DeleteLabelValues(host)
+	relayGoRoutines.DeleteLabelValues(host)
+	relaySessionRate.DeleteLabelValues(host)
+	relayGlobalRate.DeleteLabelValues(host)
+}

+ 4 - 0
cmd/strelaysrv/status.go

@@ -40,6 +40,10 @@ func getStatus(w http.ResponseWriter, r *http.Request) {
 
 	sessionMut.Lock()
 	// This can potentially be double the number of pending sessions, as each session has two keys, one for each side.
+	status["version"] = Version
+	status["buildHost"] = BuildHost
+	status["buildUser"] = BuildUser
+	status["buildDate"] = BuildDate
 	status["startTime"] = rc.startTime
 	status["uptimeSeconds"] = time.Since(rc.startTime) / time.Second
 	status["numPendingSessionKeys"] = len(pendingSessions)