Browse Source

cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services for ProxyGroup (#13746)

cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services

Set a readiness condition on ExternalName Services that define a tailnet target
to route cluster traffic to via a ProxyGroup's proxies. The condition
is set to true if at least one proxy is currently set up to route.

Updates tailscale/tailscale#13406

Signed-off-by: Irbe Krumina <[email protected]>
Irbe Krumina 1 year ago
parent
commit
89ee6bbdae

+ 179 - 0
cmd/k8s-operator/egress-services-readiness.go

@@ -0,0 +1,179 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+//go:build !plan9
+
+package main
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+
+	"go.uber.org/zap"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	discoveryv1 "k8s.io/api/discovery/v1"
+	apiequality "k8s.io/apimachinery/pkg/api/equality"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+	tsoperator "tailscale.com/k8s-operator"
+	tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
+	"tailscale.com/tstime"
+)
+
+const (
+	reasonReadinessCheckFailed     = "ReadinessCheckFailed"
+	reasonClusterResourcesNotReady = "ClusterResourcesNotReady"
+	reasonNoProxies                = "NoProxiesConfigured"
+	reasonNotReady                 = "NotReadyToRouteTraffic"
+	reasonReady                    = "ReadyToRouteTraffic"
+	reasonPartiallyReady           = "PartiallyReadyToRouteTraffic"
+	msgReadyToRouteTemplate        = "%d out of %d replicas are ready to route traffic"
+)
+
+type egressSvcsReadinessReconciler struct {
+	client.Client
+	logger      *zap.SugaredLogger
+	clock       tstime.Clock
+	tsNamespace string
+}
+
+// Reconcile reconciles an ExternalName Service that defines a tailnet target to be exposed on a ProxyGroup and sets the
+// EgressSvcReady condition on it. The condition gets set to true if at least one of the proxies is currently ready to
+// route traffic to the target. It compares proxy Pod IPs with the endpoints set on the EndpointSlice for the egress
+// service to determine how many replicas are currently able to route traffic.
+func (esrr *egressSvcsReadinessReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
+	l := esrr.logger.With("Service", req.NamespacedName)
+	defer l.Info("reconcile finished")
+
+	svc := new(corev1.Service)
+	if err = esrr.Get(ctx, req.NamespacedName, svc); apierrors.IsNotFound(err) {
+		l.Info("Service not found")
+		return res, nil
+	} else if err != nil {
+		return res, fmt.Errorf("failed to get Service: %w", err)
+	}
+	var (
+		reason, msg string
+		st          metav1.ConditionStatus = metav1.ConditionUnknown
+	)
+	oldStatus := svc.Status.DeepCopy()
+	defer func() {
+		tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, st, reason, msg, esrr.clock, l)
+		if !apiequality.Semantic.DeepEqual(oldStatus, svc.Status) {
+			err = errors.Join(err, esrr.Status().Update(ctx, svc))
+		}
+	}()
+
+	crl := egressSvcChildResourceLabels(svc)
+	eps, err := getSingleObject[discoveryv1.EndpointSlice](ctx, esrr.Client, esrr.tsNamespace, crl)
+	if err != nil {
+		err = fmt.Errorf("error getting EndpointSlice: %w", err)
+		reason = reasonReadinessCheckFailed
+		msg = err.Error()
+		return res, err
+	}
+	if eps == nil {
+		l.Infof("EndpointSlice for Service does not yet exist, waiting...")
+		reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
+		st = metav1.ConditionFalse
+		return res, nil
+	}
+	pg := &tsapi.ProxyGroup{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: svc.Annotations[AnnotationProxyGroup],
+		},
+	}
+	err = esrr.Get(ctx, client.ObjectKeyFromObject(pg), pg)
+	if apierrors.IsNotFound(err) {
+		l.Infof("ProxyGroup for Service does not exist, waiting...")
+		reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
+		st = metav1.ConditionFalse
+		return res, nil
+	}
+	if err != nil {
+		err = fmt.Errorf("error retrieving ProxyGroup: %w", err)
+		reason = reasonReadinessCheckFailed
+		msg = err.Error()
+		return res, err
+	}
+	if !tsoperator.ProxyGroupIsReady(pg) {
+		l.Infof("ProxyGroup for Service is not ready, waiting...")
+		reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
+		st = metav1.ConditionFalse
+		return res, nil
+	}
+
+	replicas := pgReplicas(pg)
+	if replicas == 0 {
+		l.Infof("ProxyGroup replicas set to 0")
+		reason, msg = reasonNoProxies, reasonNoProxies
+		st = metav1.ConditionFalse
+		return res, nil
+	}
+	podLabels := pgLabels(pg.Name, nil)
+	var readyReplicas int32
+	for i := range replicas {
+		podLabels[appsv1.PodIndexLabel] = fmt.Sprintf("%d", i)
+		pod, err := getSingleObject[corev1.Pod](ctx, esrr.Client, esrr.tsNamespace, podLabels)
+		if err != nil {
+			err = fmt.Errorf("error retrieving ProxyGroup Pod: %w", err)
+			reason = reasonReadinessCheckFailed
+			msg = err.Error()
+			return res, err
+		}
+		if pod == nil {
+			l.Infof("[unexpected] ProxyGroup is ready, but replica %d was not found", i)
+			reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
+			return res, nil
+		}
+		l.Infof("looking at Pod with IPs %v", pod.Status.PodIPs)
+		ready := false
+		for _, ep := range eps.Endpoints {
+			l.Infof("looking at endpoint with addresses %v", ep.Addresses)
+			if endpointReadyForPod(&ep, pod, l) {
+				l.Infof("endpoint is ready for Pod")
+				ready = true
+				break
+			}
+		}
+		if ready {
+			readyReplicas++
+		}
+	}
+	msg = fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas)
+	if readyReplicas == 0 {
+		reason = reasonNotReady
+		st = metav1.ConditionFalse
+		return res, nil
+	}
+	st = metav1.ConditionTrue
+	if readyReplicas < replicas {
+		reason = reasonPartiallyReady
+	} else {
+		reason = reasonReady
+	}
+	return res, nil
+}
+
+// endpointReadyForPod returns true if the endpoint is for the Pod's IPv4 address and is ready to serve traffic.
+// Endpoint must not be nil.
+func endpointReadyForPod(ep *discoveryv1.Endpoint, pod *corev1.Pod, l *zap.SugaredLogger) bool {
+	podIP, err := podIPv4(pod)
+	if err != nil {
+		l.Infof("[unexpected] error retrieving Pod's IPv4 address: %v", err)
+		return false
+	}
+	// Currently we only ever set a single address on and Endpoint and nothing else is meant to modify this.
+	if len(ep.Addresses) != 1 {
+		return false
+	}
+	return strings.EqualFold(ep.Addresses[0], podIP) &&
+		*ep.Conditions.Ready &&
+		*ep.Conditions.Serving &&
+		!*ep.Conditions.Terminating
+}

+ 169 - 0
cmd/k8s-operator/egress-services-readiness_test.go

@@ -0,0 +1,169 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+//go:build !plan9
+
+package main
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/AlekSi/pointer"
+	"go.uber.org/zap"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	discoveryv1 "k8s.io/api/discovery/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+	tsoperator "tailscale.com/k8s-operator"
+	tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
+	"tailscale.com/tstest"
+	"tailscale.com/tstime"
+)
+
+func TestEgressServiceReadiness(t *testing.T) {
+	// We need to pass a ProxyGroup object to WithStatusSubresource because of some quirks in how the fake client
+	// works. Without this code further down would not be able to update ProxyGroup status.
+	fc := fake.NewClientBuilder().
+		WithScheme(tsapi.GlobalScheme).
+		WithStatusSubresource(&tsapi.ProxyGroup{}).
+		Build()
+	zl, _ := zap.NewDevelopment()
+	cl := tstest.NewClock(tstest.ClockOpts{})
+	rec := &egressSvcsReadinessReconciler{
+		tsNamespace: "operator-ns",
+		Client:      fc,
+		logger:      zl.Sugar(),
+		clock:       cl,
+	}
+	tailnetFQDN := "my-app.tailnetxyz.ts.net"
+	egressSvc := &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "my-app",
+			Namespace: "dev",
+			Annotations: map[string]string{
+				AnnotationProxyGroup:        "dev",
+				AnnotationTailnetTargetFQDN: tailnetFQDN,
+			},
+		},
+	}
+	fakeClusterIPSvc := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: "my-app", Namespace: "operator-ns"}}
+	l := egressSvcEpsLabels(egressSvc, fakeClusterIPSvc)
+	eps := &discoveryv1.EndpointSlice{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "my-app",
+			Namespace: "operator-ns",
+			Labels:    l,
+		},
+		AddressType: discoveryv1.AddressTypeIPv4,
+	}
+	pg := &tsapi.ProxyGroup{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "dev",
+		},
+	}
+	mustCreate(t, fc, egressSvc)
+	setClusterNotReady(egressSvc, cl, zl.Sugar())
+	t.Run("endpointslice_does_not_exist", func(t *testing.T) {
+		expectReconciled(t, rec, "dev", "my-app")
+		expectEqual(t, fc, egressSvc, nil) // not ready
+	})
+	t.Run("proxy_group_does_not_exist", func(t *testing.T) {
+		mustCreate(t, fc, eps)
+		expectReconciled(t, rec, "dev", "my-app")
+		expectEqual(t, fc, egressSvc, nil) // still not ready
+	})
+	t.Run("proxy_group_not_ready", func(t *testing.T) {
+		mustCreate(t, fc, pg)
+		expectReconciled(t, rec, "dev", "my-app")
+		expectEqual(t, fc, egressSvc, nil) // still not ready
+	})
+	t.Run("no_ready_replicas", func(t *testing.T) {
+		setPGReady(pg, cl, zl.Sugar())
+		mustUpdateStatus(t, fc, pg.Namespace, pg.Name, func(p *tsapi.ProxyGroup) {
+			p.Status = pg.Status
+		})
+		expectEqual(t, fc, pg, nil)
+		for i := range pgReplicas(pg) {
+			p := pod(pg, i)
+			mustCreate(t, fc, p)
+			mustUpdateStatus(t, fc, p.Namespace, p.Name, func(existing *corev1.Pod) {
+				existing.Status.PodIPs = p.Status.PodIPs
+			})
+		}
+		expectReconciled(t, rec, "dev", "my-app")
+		setNotReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg))
+		expectEqual(t, fc, egressSvc, nil) // still not ready
+	})
+	t.Run("one_ready_replica", func(t *testing.T) {
+		setEndpointForReplica(pg, 0, eps)
+		mustUpdate(t, fc, eps.Namespace, eps.Name, func(e *discoveryv1.EndpointSlice) {
+			e.Endpoints = eps.Endpoints
+		})
+		setReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg), 1)
+		expectReconciled(t, rec, "dev", "my-app")
+		expectEqual(t, fc, egressSvc, nil) // partially ready
+	})
+	t.Run("all_replicas_ready", func(t *testing.T) {
+		for i := range pgReplicas(pg) {
+			setEndpointForReplica(pg, i, eps)
+		}
+		mustUpdate(t, fc, eps.Namespace, eps.Name, func(e *discoveryv1.EndpointSlice) {
+			e.Endpoints = eps.Endpoints
+		})
+		setReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg), pgReplicas(pg))
+		expectReconciled(t, rec, "dev", "my-app")
+		expectEqual(t, fc, egressSvc, nil) // ready
+	})
+}
+
+func setClusterNotReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger) {
+	tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionFalse, reasonClusterResourcesNotReady, reasonClusterResourcesNotReady, cl, l)
+}
+
+func setNotReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger, replicas int32) {
+	msg := fmt.Sprintf(msgReadyToRouteTemplate, 0, replicas)
+	tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionFalse, reasonNotReady, msg, cl, l)
+}
+
+func setReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger, replicas, readyReplicas int32) {
+	reason := reasonPartiallyReady
+	if readyReplicas == replicas {
+		reason = reasonReady
+	}
+	msg := fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas)
+	tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionTrue, reason, msg, cl, l)
+}
+
+func setPGReady(pg *tsapi.ProxyGroup, cl tstime.Clock, l *zap.SugaredLogger) {
+	tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, "foo", "foo", pg.Generation, cl, l)
+}
+
+func setEndpointForReplica(pg *tsapi.ProxyGroup, ordinal int32, eps *discoveryv1.EndpointSlice) {
+	p := pod(pg, ordinal)
+	eps.Endpoints = append(eps.Endpoints, discoveryv1.Endpoint{
+		Addresses: []string{p.Status.PodIPs[0].IP},
+		Conditions: discoveryv1.EndpointConditions{
+			Ready:       pointer.ToBool(true),
+			Serving:     pointer.ToBool(true),
+			Terminating: pointer.ToBool(false),
+		},
+	})
+}
+
+func pod(pg *tsapi.ProxyGroup, ordinal int32) *corev1.Pod {
+	l := pgLabels(pg.Name, nil)
+	l[appsv1.PodIndexLabel] = fmt.Sprintf("%d", ordinal)
+	ip := fmt.Sprintf("10.0.0.%d", ordinal)
+	return &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      fmt.Sprintf("%s-%d", pg.Name, ordinal),
+			Namespace: "operator-ns",
+			Labels:    l,
+		},
+		Status: corev1.PodStatus{
+			PodIPs: []corev1.PodIP{{IP: ip}},
+		},
+	}
+}

+ 14 - 4
cmd/k8s-operator/egress-services.go

@@ -161,7 +161,6 @@ func (esr *egressSvcsReconciler) Reconcile(ctx context.Context, req reconcile.Re
 }
 
 func (esr *egressSvcsReconciler) maybeProvision(ctx context.Context, svc *corev1.Service, l *zap.SugaredLogger) (err error) {
-	l.Debug("maybe provision")
 	r := svcConfiguredReason(svc, false, l)
 	st := metav1.ConditionFalse
 	defer func() {
@@ -272,11 +271,9 @@ func (esr *egressSvcsReconciler) provision(ctx context.Context, proxyGroupName s
 		}
 	}
 
-	crl := egressSvcChildResourceLabels(svc)
+	crl := egressSvcEpsLabels(svc, clusterIPSvc)
 	// TODO(irbekrm): support IPv6, but need to investigate how kube proxy
 	// sets up Service -> Pod routing when IPv6 is involved.
-	crl[discoveryv1.LabelServiceName] = clusterIPSvc.Name
-	crl[discoveryv1.LabelManagedBy] = "tailscale.com"
 	eps := &discoveryv1.EndpointSlice{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      fmt.Sprintf("%s-ipv4", clusterIPSvc.Name),
@@ -634,6 +631,19 @@ func egressSvcChildResourceLabels(svc *corev1.Service) map[string]string {
 	}
 }
 
+// egressEpsLabels returns labels to be added to an EndpointSlice created for an egress service.
+func egressSvcEpsLabels(extNSvc, clusterIPSvc *corev1.Service) map[string]string {
+	l := egressSvcChildResourceLabels(extNSvc)
+	// Adding this label is what makes kube proxy set up rules to route traffic sent to the clusterIP Service to the
+	// endpoints defined on this EndpointSlice.
+	// https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#ownership
+	l[discoveryv1.LabelServiceName] = clusterIPSvc.Name
+	// Kubernetes recommends setting this label.
+	// https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/#management
+	l[discoveryv1.LabelManagedBy] = "tailscale.com"
+	return l
+}
+
 func svcConfigurationUpToDate(svc *corev1.Service, l *zap.SugaredLogger) bool {
 	cond := tsoperator.GetServiceCondition(svc, tsapi.EgressSvcConfigured)
 	if cond == nil {

+ 45 - 2
cmd/k8s-operator/operator.go

@@ -376,6 +376,22 @@ func runReconcilers(opts reconcilerOpts) {
 		startlog.Fatalf("failed setting up indexer for egress Services: %v", err)
 	}
 
+	egressSvcFromEpsFilter := handler.EnqueueRequestsFromMapFunc(egressSvcFromEps)
+	err = builder.
+		ControllerManagedBy(mgr).
+		Named("egress-svcs-readiness-reconciler").
+		Watches(&corev1.Service{}, egressSvcFilter).
+		Watches(&discoveryv1.EndpointSlice{}, egressSvcFromEpsFilter).
+		Complete(&egressSvcsReadinessReconciler{
+			Client:      mgr.GetClient(),
+			tsNamespace: opts.tailscaleNamespace,
+			clock:       tstime.DefaultClock{},
+			logger:      opts.log.Named("egress-svcs-readiness-reconciler"),
+		})
+	if err != nil {
+		startlog.Fatalf("could not create egress Services readiness reconciler: %v", err)
+	}
+
 	epsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsHandler)
 	podsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsFromPGPods(mgr.GetClient(), opts.tailscaleNamespace))
 	secretsFilter := handler.EnqueueRequestsFromMapFunc(egressEpsFromPGStateSecrets(mgr.GetClient(), opts.tailscaleNamespace))
@@ -847,7 +863,7 @@ func egressEpsHandler(_ context.Context, o client.Object) []reconcile.Request {
 // returns reconciler requests for all egress EndpointSlices for that ProxyGroup.
 func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc {
 	return func(_ context.Context, o client.Object) []reconcile.Request {
-		if _, ok := o.GetLabels()[LabelManaged]; !ok {
+		if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
 			return nil
 		}
 		// TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we
@@ -867,7 +883,7 @@ func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc {
 // returns reconciler requests for all egress EndpointSlices for that ProxyGroup.
 func egressEpsFromPGStateSecrets(cl client.Client, ns string) handler.MapFunc {
 	return func(_ context.Context, o client.Object) []reconcile.Request {
-		if _, ok := o.GetLabels()[LabelManaged]; !ok {
+		if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
 			return nil
 		}
 		// TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we
@@ -886,6 +902,33 @@ func egressEpsFromPGStateSecrets(cl client.Client, ns string) handler.MapFunc {
 	}
 }
 
+// egressSvcFromEps is an event handler for EndpointSlices. If an EndpointSlice is for an egress ExternalName Service
+// meant to be exposed on a ProxyGroup, returns a reconcile request for the Service.
+func egressSvcFromEps(_ context.Context, o client.Object) []reconcile.Request {
+	if typ := o.GetLabels()[labelSvcType]; typ != typeEgress {
+		return nil
+	}
+	if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
+		return nil
+	}
+	svcName, ok := o.GetLabels()[LabelParentName]
+	if !ok {
+		return nil
+	}
+	svcNs, ok := o.GetLabels()[LabelParentNamespace]
+	if !ok {
+		return nil
+	}
+	return []reconcile.Request{
+		{
+			NamespacedName: types.NamespacedName{
+				Namespace: svcNs,
+				Name:      svcName,
+			},
+		},
+	}
+}
+
 func reconcileRequestsForPG(pg string, cl client.Client, ns string) []reconcile.Request {
 	epsList := discoveryv1.EndpointSliceList{}
 	if err := cl.List(context.Background(), &epsList,

+ 13 - 8
k8s-operator/apis/v1alpha1/types_connector.go

@@ -172,14 +172,19 @@ type ConditionType string
 const (
 	ConnectorReady  ConditionType = `ConnectorReady`
 	ProxyClassReady ConditionType = `ProxyClassReady`
-	ProxyGroupReady ConditionType = `ProxyGroupReady`
+	ProxyGroupReady ConditionType = `TailscaleProxyGroupReady`
 	ProxyReady      ConditionType = `TailscaleProxyReady` // a Tailscale-specific condition type for corev1.Service
 	RecorderReady   ConditionType = `RecorderReady`
-	// EgressSvcValid is set to true if the user configured ExternalName Service for exposing a tailnet target on
-	// ProxyGroup nodes is valid.
-	EgressSvcValid ConditionType = `EgressSvcValid`
-	// EgressSvcConfigured is set to true if the configuration for the egress Service (proxy ConfigMap update,
-	// EndpointSlice for the Service) has been successfully applied. The Reason for this condition
-	// contains the name of the ProxyGroup and the hash of the Service ports and the tailnet target.
-	EgressSvcConfigured ConditionType = `EgressSvcConfigured`
+	// EgressSvcValid gets set on a user configured ExternalName Service that defines a tailnet target to be exposed
+	// on a ProxyGroup.
+	// Set to true if the user provided configuration is valid.
+	EgressSvcValid ConditionType = `TailscaleEgressSvcValid`
+	// EgressSvcConfigured gets set on a user configured ExternalName Service that defines a tailnet target to be exposed
+	// on a ProxyGroup.
+	// Set to true if the cluster resources for the service have been successfully configured.
+	EgressSvcConfigured ConditionType = `TailscaleEgressSvcConfigured`
+	// EgressSvcReady gets set on a user configured ExternalName Service that defines a tailnet target to be exposed
+	// on a ProxyGroup.
+	// Set to true if the service is ready to route cluster traffic.
+	EgressSvcReady ConditionType = `TailscaleEgressSvcReady`
 )