Browse Source

cmd/{containerboot,k8s-operator},kube/kubetypes: kube Ingress L7 proxies only advertise HTTPS endpoint when ready (#14171)

cmd/containerboot,kube/kubetypes,cmd/k8s-operator: detect if Ingress is created in a tailnet that has no HTTPS

This attempts to make Kubernetes Operator L7 Ingress setup failures more explicit:
- the Ingress resource now only advertises HTTPS endpoint via status.ingress.loadBalancer.hostname when/if the proxy has succesfully loaded serve config
- the proxy attempts to catch cases where HTTPS is disabled for the tailnet and logs a warning

Updates tailscale/tailscale#12079
Updates tailscale/tailscale#10407

Signed-off-by: Irbe Krumina <[email protected]>
Irbe Krumina 1 year ago
parent
commit
2aac916888

+ 63 - 29
cmd/containerboot/kube.go

@@ -9,30 +9,55 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"log"
 	"net/http"
 	"net/netip"
 	"os"
 
 	"tailscale.com/kube/kubeapi"
 	"tailscale.com/kube/kubeclient"
+	"tailscale.com/kube/kubetypes"
 	"tailscale.com/tailcfg"
 )
 
-// storeDeviceID writes deviceID to 'device_id' data field of the named
-// Kubernetes Secret.
-func storeDeviceID(ctx context.Context, secretName string, deviceID tailcfg.StableNodeID) error {
+// kubeClient is a wrapper around Tailscale's internal kube client that knows how to talk to the kube API server. We use
+// this rather than any of the upstream Kubernetes client libaries to avoid extra imports.
+type kubeClient struct {
+	kubeclient.Client
+	stateSecret string
+}
+
+func newKubeClient(root string, stateSecret string) (*kubeClient, error) {
+	if root != "/" {
+		// If we are running in a test, we need to set the root path to the fake
+		// service account directory.
+		kubeclient.SetRootPathForTesting(root)
+	}
+	var err error
+	kc, err := kubeclient.New("tailscale-container")
+	if err != nil {
+		return nil, fmt.Errorf("Error creating kube client: %w", err)
+	}
+	if (root != "/") || os.Getenv("TS_KUBERNETES_READ_API_SERVER_ADDRESS_FROM_ENV") == "true" {
+		// Derive the API server address from the environment variables
+		// Used to set http server in tests, or optionally enabled by flag
+		kc.SetURL(fmt.Sprintf("https://%s:%s", os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT_HTTPS")))
+	}
+	return &kubeClient{Client: kc, stateSecret: stateSecret}, nil
+}
+
+// storeDeviceID writes deviceID to 'device_id' data field of the client's state Secret.
+func (kc *kubeClient) storeDeviceID(ctx context.Context, deviceID tailcfg.StableNodeID) error {
 	s := &kubeapi.Secret{
 		Data: map[string][]byte{
-			"device_id": []byte(deviceID),
+			kubetypes.KeyDeviceID: []byte(deviceID),
 		},
 	}
-	return kc.StrategicMergePatchSecret(ctx, secretName, s, "tailscale-container")
+	return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
 }
 
-// storeDeviceEndpoints writes device's tailnet IPs and MagicDNS name to fields
-// 'device_ips', 'device_fqdn' of the named Kubernetes Secret.
-func storeDeviceEndpoints(ctx context.Context, secretName string, fqdn string, addresses []netip.Prefix) error {
+// storeDeviceEndpoints writes device's tailnet IPs and MagicDNS name to fields 'device_ips', 'device_fqdn' of client's
+// state Secret.
+func (kc *kubeClient) storeDeviceEndpoints(ctx context.Context, fqdn string, addresses []netip.Prefix) error {
 	var ips []string
 	for _, addr := range addresses {
 		ips = append(ips, addr.Addr().String())
@@ -44,16 +69,28 @@ func storeDeviceEndpoints(ctx context.Context, secretName string, fqdn string, a
 
 	s := &kubeapi.Secret{
 		Data: map[string][]byte{
-			"device_fqdn": []byte(fqdn),
-			"device_ips":  deviceIPs,
+			kubetypes.KeyDeviceFQDN: []byte(fqdn),
+			kubetypes.KeyDeviceIPs:  deviceIPs,
 		},
 	}
-	return kc.StrategicMergePatchSecret(ctx, secretName, s, "tailscale-container")
+	return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
+}
+
+// storeHTTPSEndpoint writes an HTTPS endpoint exposed by this device via 'tailscale serve' to the client's state
+// Secret. In practice this will be the same value that gets written to 'device_fqdn', but this should only be called
+// when the serve config has been successfully set up.
+func (kc *kubeClient) storeHTTPSEndpoint(ctx context.Context, ep string) error {
+	s := &kubeapi.Secret{
+		Data: map[string][]byte{
+			kubetypes.KeyHTTPSEndpoint: []byte(ep),
+		},
+	}
+	return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
 }
 
 // deleteAuthKey deletes the 'authkey' field of the given kube
 // secret. No-op if there is no authkey in the secret.
-func deleteAuthKey(ctx context.Context, secretName string) error {
+func (kc *kubeClient) deleteAuthKey(ctx context.Context) error {
 	// m is a JSON Patch data structure, see https://jsonpatch.com/ or RFC 6902.
 	m := []kubeclient.JSONPatch{
 		{
@@ -61,7 +98,7 @@ func deleteAuthKey(ctx context.Context, secretName string) error {
 			Path: "/data/authkey",
 		},
 	}
-	if err := kc.JSONPatchResource(ctx, secretName, kubeclient.TypeSecrets, m); err != nil {
+	if err := kc.JSONPatchResource(ctx, kc.stateSecret, kubeclient.TypeSecrets, m); err != nil {
 		if s, ok := err.(*kubeapi.Status); ok && s.Code == http.StatusUnprocessableEntity {
 			// This is kubernetes-ese for "the field you asked to
 			// delete already doesn't exist", aka no-op.
@@ -72,22 +109,19 @@ func deleteAuthKey(ctx context.Context, secretName string) error {
 	return nil
 }
 
-var kc kubeclient.Client
-
-func initKubeClient(root string) {
-	if root != "/" {
-		// If we are running in a test, we need to set the root path to the fake
-		// service account directory.
-		kubeclient.SetRootPathForTesting(root)
+// storeCapVerUID stores the current capability version of tailscale and, if provided, UID of the Pod in the tailscale
+// state Secret.
+// These two fields are used by the Kubernetes Operator to observe the current capability version of tailscaled running in this container.
+func (kc *kubeClient) storeCapVerUID(ctx context.Context, podUID string) error {
+	capVerS := fmt.Sprintf("%d", tailcfg.CurrentCapabilityVersion)
+	d := map[string][]byte{
+		kubetypes.KeyCapVer: []byte(capVerS),
 	}
-	var err error
-	kc, err = kubeclient.New("tailscale-container")
-	if err != nil {
-		log.Fatalf("Error creating kube client: %v", err)
+	if podUID != "" {
+		d[kubetypes.KeyPodUID] = []byte(podUID)
 	}
-	if (root != "/") || os.Getenv("TS_KUBERNETES_READ_API_SERVER_ADDRESS_FROM_ENV") == "true" {
-		// Derive the API server address from the environment variables
-		// Used to set http server in tests, or optionally enabled by flag
-		kc.SetURL(fmt.Sprintf("https://%s:%s", os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT_HTTPS")))
+	s := &kubeapi.Secret{
+		Data: d,
 	}
+	return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
 }

+ 21 - 21
cmd/containerboot/kube_test.go

@@ -21,7 +21,7 @@ func TestSetupKube(t *testing.T) {
 		cfg     *settings
 		wantErr bool
 		wantCfg *settings
-		kc      kubeclient.Client
+		kc      *kubeClient
 	}{
 		{
 			name: "TS_AUTHKEY set, state Secret exists",
@@ -29,14 +29,14 @@ func TestSetupKube(t *testing.T) {
 				AuthKey:    "foo",
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return false, false, nil
 				},
 				GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
 					return nil, nil
 				},
-			},
+			}},
 			wantCfg: &settings{
 				AuthKey:    "foo",
 				KubeSecret: "foo",
@@ -48,14 +48,14 @@ func TestSetupKube(t *testing.T) {
 				AuthKey:    "foo",
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return false, true, nil
 				},
 				GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
 					return nil, &kubeapi.Status{Code: 404}
 				},
-			},
+			}},
 			wantCfg: &settings{
 				AuthKey:    "foo",
 				KubeSecret: "foo",
@@ -67,14 +67,14 @@ func TestSetupKube(t *testing.T) {
 				AuthKey:    "foo",
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return false, false, nil
 				},
 				GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
 					return nil, &kubeapi.Status{Code: 404}
 				},
-			},
+			}},
 			wantCfg: &settings{
 				AuthKey:    "foo",
 				KubeSecret: "foo",
@@ -87,14 +87,14 @@ func TestSetupKube(t *testing.T) {
 				AuthKey:    "foo",
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return false, false, nil
 				},
 				GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
 					return nil, &kubeapi.Status{Code: 403}
 				},
-			},
+			}},
 			wantCfg: &settings{
 				AuthKey:    "foo",
 				KubeSecret: "foo",
@@ -111,11 +111,11 @@ func TestSetupKube(t *testing.T) {
 				AuthKey:    "foo",
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return false, false, errors.New("broken")
 				},
-			},
+			}},
 			wantErr: true,
 		},
 		{
@@ -127,14 +127,14 @@ func TestSetupKube(t *testing.T) {
 			wantCfg: &settings{
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return false, true, nil
 				},
 				GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
 					return nil, &kubeapi.Status{Code: 404}
 				},
-			},
+			}},
 		},
 		{
 			// Interactive login using URL in Pod logs
@@ -145,28 +145,28 @@ func TestSetupKube(t *testing.T) {
 			wantCfg: &settings{
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return false, false, nil
 				},
 				GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
 					return &kubeapi.Secret{}, nil
 				},
-			},
+			}},
 		},
 		{
 			name: "TS_AUTHKEY not set, state Secret contains auth key, we do not have RBAC to patch it",
 			cfg: &settings{
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return false, false, nil
 				},
 				GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
 					return &kubeapi.Secret{Data: map[string][]byte{"authkey": []byte("foo")}}, nil
 				},
-			},
+			}},
 			wantCfg: &settings{
 				KubeSecret: "foo",
 			},
@@ -177,14 +177,14 @@ func TestSetupKube(t *testing.T) {
 			cfg: &settings{
 				KubeSecret: "foo",
 			},
-			kc: &kubeclient.FakeClient{
+			kc: &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
 				CheckSecretPermissionsImpl: func(context.Context, string) (bool, bool, error) {
 					return true, false, nil
 				},
 				GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
 					return &kubeapi.Secret{Data: map[string][]byte{"authkey": []byte("foo")}}, nil
 				},
-			},
+			}},
 			wantCfg: &settings{
 				KubeSecret:         "foo",
 				AuthKey:            "foo",
@@ -194,9 +194,9 @@ func TestSetupKube(t *testing.T) {
 	}
 
 	for _, tt := range tests {
-		kc = tt.kc
+		kc := tt.kc
 		t.Run(tt.name, func(t *testing.T) {
-			if err := tt.cfg.setupKube(context.Background()); (err != nil) != tt.wantErr {
+			if err := tt.cfg.setupKube(context.Background(), kc); (err != nil) != tt.wantErr {
 				t.Errorf("settings.setupKube() error = %v, wantErr %v", err, tt.wantErr)
 			}
 			if diff := cmp.Diff(*tt.cfg, *tt.wantCfg); diff != "" {

+ 37 - 12
cmd/containerboot/main.go

@@ -121,6 +121,7 @@ import (
 	"tailscale.com/client/tailscale"
 	"tailscale.com/ipn"
 	kubeutils "tailscale.com/k8s-operator"
+	"tailscale.com/kube/kubetypes"
 	"tailscale.com/tailcfg"
 	"tailscale.com/types/logger"
 	"tailscale.com/types/ptr"
@@ -167,9 +168,13 @@ func main() {
 	bootCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
 	defer cancel()
 
+	var kc *kubeClient
 	if cfg.InKubernetes {
-		initKubeClient(cfg.Root)
-		if err := cfg.setupKube(bootCtx); err != nil {
+		kc, err = newKubeClient(cfg.Root, cfg.KubeSecret)
+		if err != nil {
+			log.Fatalf("error initializing kube client: %v", err)
+		}
+		if err := cfg.setupKube(bootCtx, kc); err != nil {
 			log.Fatalf("error setting up for running on Kubernetes: %v", err)
 		}
 	}
@@ -319,12 +324,16 @@ authLoop:
 		}
 	}
 
+	// Remove any serve config and advertised HTTPS endpoint that may have been set by a previous run of
+	// containerboot, but only if we're providing a new one.
 	if cfg.ServeConfigPath != "" {
-		// Remove any serve config that may have been set by a previous run of
-		// containerboot, but only if we're providing a new one.
+		log.Printf("serve proxy: unsetting previous config")
 		if err := client.SetServeConfig(ctx, new(ipn.ServeConfig)); err != nil {
 			log.Fatalf("failed to unset serve config: %v", err)
 		}
+		if err := kc.storeHTTPSEndpoint(ctx, ""); err != nil {
+			log.Fatalf("failed to update HTTPS endpoint in tailscale state: %v", err)
+		}
 	}
 
 	if hasKubeStateStore(cfg) && isTwoStepConfigAuthOnce(cfg) {
@@ -332,11 +341,17 @@ authLoop:
 		// authkey is no longer needed. We don't strictly need to
 		// wipe it, but it's good hygiene.
 		log.Printf("Deleting authkey from kube secret")
-		if err := deleteAuthKey(ctx, cfg.KubeSecret); err != nil {
+		if err := kc.deleteAuthKey(ctx); err != nil {
 			log.Fatalf("deleting authkey from kube secret: %v", err)
 		}
 	}
 
+	if hasKubeStateStore(cfg) {
+		if err := kc.storeCapVerUID(ctx, cfg.PodUID); err != nil {
+			log.Fatalf("storing capability version and UID: %v", err)
+		}
+	}
+
 	w, err = client.WatchIPNBus(ctx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState)
 	if err != nil {
 		log.Fatalf("rewatching tailscaled for updates after auth: %v", err)
@@ -355,10 +370,10 @@ authLoop:
 
 		certDomain        = new(atomic.Pointer[string])
 		certDomainChanged = make(chan bool, 1)
+
+		triggerWatchServeConfigChanges sync.Once
 	)
-	if cfg.ServeConfigPath != "" {
-		go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client)
-	}
+
 	var nfr linuxfw.NetfilterRunner
 	if isL3Proxy(cfg) {
 		nfr, err = newNetfilterRunner(log.Printf)
@@ -459,7 +474,7 @@ runLoop:
 				// fails.
 				deviceID := n.NetMap.SelfNode.StableID()
 				if hasKubeStateStore(cfg) && deephash.Update(&currentDeviceID, &deviceID) {
-					if err := storeDeviceID(ctx, cfg.KubeSecret, n.NetMap.SelfNode.StableID()); err != nil {
+					if err := kc.storeDeviceID(ctx, n.NetMap.SelfNode.StableID()); err != nil {
 						log.Fatalf("storing device ID in Kubernetes Secret: %v", err)
 					}
 				}
@@ -532,8 +547,11 @@ runLoop:
 					resetTimer(false)
 					backendAddrs = newBackendAddrs
 				}
-				if cfg.ServeConfigPath != "" && len(n.NetMap.DNS.CertDomains) != 0 {
-					cd := n.NetMap.DNS.CertDomains[0]
+				if cfg.ServeConfigPath != "" {
+					cd := certDomainFromNetmap(n.NetMap)
+					if cd == "" {
+						cd = kubetypes.ValueNoHTTPS
+					}
 					prev := certDomain.Swap(ptr.To(cd))
 					if prev == nil || *prev != cd {
 						select {
@@ -575,7 +593,7 @@ runLoop:
 				// TODO (irbekrm): instead of using the IP and FQDN, have some other mechanism for the proxy signal that it is 'Ready'.
 				deviceEndpoints := []any{n.NetMap.SelfNode.Name(), n.NetMap.SelfNode.Addresses()}
 				if hasKubeStateStore(cfg) && deephash.Update(&currentDeviceEndpoints, &deviceEndpoints) {
-					if err := storeDeviceEndpoints(ctx, cfg.KubeSecret, n.NetMap.SelfNode.Name(), n.NetMap.SelfNode.Addresses().AsSlice()); err != nil {
+					if err := kc.storeDeviceEndpoints(ctx, n.NetMap.SelfNode.Name(), n.NetMap.SelfNode.Addresses().AsSlice()); err != nil {
 						log.Fatalf("storing device IPs and FQDN in Kubernetes Secret: %v", err)
 					}
 				}
@@ -583,6 +601,13 @@ runLoop:
 				if healthCheck != nil {
 					healthCheck.update(len(addrs) != 0)
 				}
+
+				if cfg.ServeConfigPath != "" {
+					triggerWatchServeConfigChanges.Do(func() {
+						go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client, kc)
+					})
+				}
+
 				if egressSvcsNotify != nil {
 					egressSvcsNotify <- n
 				}

+ 21 - 15
cmd/containerboot/main_test.go

@@ -120,6 +120,8 @@ func TestContainerBoot(t *testing.T) {
 		return fmt.Sprintf("http://127.0.0.1:%d/healthz", port)
 	}
 
+	capver := fmt.Sprintf("%d", tailcfg.CurrentCapabilityVersion)
+
 	type phase struct {
 		// If non-nil, send this IPN bus notification (and remember it as the
 		// initial update for any future new watchers, then wait for all the
@@ -478,10 +480,11 @@ func TestContainerBoot(t *testing.T) {
 				{
 					Notify: runningNotify,
 					WantKubeSecret: map[string]string{
-						"authkey":     "tskey-key",
-						"device_fqdn": "test-node.test.ts.net",
-						"device_id":   "myID",
-						"device_ips":  `["100.64.0.1"]`,
+						"authkey":          "tskey-key",
+						"device_fqdn":      "test-node.test.ts.net",
+						"device_id":        "myID",
+						"device_ips":       `["100.64.0.1"]`,
+						"tailscale_capver": capver,
 					},
 				},
 			},
@@ -571,9 +574,10 @@ func TestContainerBoot(t *testing.T) {
 						"/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false",
 					},
 					WantKubeSecret: map[string]string{
-						"device_fqdn": "test-node.test.ts.net",
-						"device_id":   "myID",
-						"device_ips":  `["100.64.0.1"]`,
+						"device_fqdn":      "test-node.test.ts.net",
+						"device_id":        "myID",
+						"device_ips":       `["100.64.0.1"]`,
+						"tailscale_capver": capver,
 					},
 				},
 			},
@@ -600,10 +604,11 @@ func TestContainerBoot(t *testing.T) {
 				{
 					Notify: runningNotify,
 					WantKubeSecret: map[string]string{
-						"authkey":     "tskey-key",
-						"device_fqdn": "test-node.test.ts.net",
-						"device_id":   "myID",
-						"device_ips":  `["100.64.0.1"]`,
+						"authkey":          "tskey-key",
+						"device_fqdn":      "test-node.test.ts.net",
+						"device_id":        "myID",
+						"device_ips":       `["100.64.0.1"]`,
+						"tailscale_capver": capver,
 					},
 				},
 				{
@@ -618,10 +623,11 @@ func TestContainerBoot(t *testing.T) {
 						},
 					},
 					WantKubeSecret: map[string]string{
-						"authkey":     "tskey-key",
-						"device_fqdn": "new-name.test.ts.net",
-						"device_id":   "newID",
-						"device_ips":  `["100.64.0.1"]`,
+						"authkey":          "tskey-key",
+						"device_fqdn":      "new-name.test.ts.net",
+						"device_id":        "newID",
+						"device_ips":       `["100.64.0.1"]`,
+						"tailscale_capver": capver,
 					},
 				},
 			},

+ 49 - 11
cmd/containerboot/serve.go

@@ -19,6 +19,8 @@ import (
 	"github.com/fsnotify/fsnotify"
 	"tailscale.com/client/tailscale"
 	"tailscale.com/ipn"
+	"tailscale.com/kube/kubetypes"
+	"tailscale.com/types/netmap"
 )
 
 // watchServeConfigChanges watches path for changes, and when it sees one, reads
@@ -26,21 +28,21 @@ import (
 // applies it to lc. It exits when ctx is canceled. cdChanged is a channel that
 // is written to when the certDomain changes, causing the serve config to be
 // re-read and applied.
-func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *tailscale.LocalClient) {
+func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *tailscale.LocalClient, kc *kubeClient) {
 	if certDomainAtomic == nil {
-		panic("cd must not be nil")
+		panic("certDomainAtomic must not be nil")
 	}
 	var tickChan <-chan time.Time
 	var eventChan <-chan fsnotify.Event
 	if w, err := fsnotify.NewWatcher(); err != nil {
-		log.Printf("failed to create fsnotify watcher, timer-only mode: %v", err)
+		log.Printf("serve proxy: failed to create fsnotify watcher, timer-only mode: %v", err)
 		ticker := time.NewTicker(5 * time.Second)
 		defer ticker.Stop()
 		tickChan = ticker.C
 	} else {
 		defer w.Close()
 		if err := w.Add(filepath.Dir(path)); err != nil {
-			log.Fatalf("failed to add fsnotify watch: %v", err)
+			log.Fatalf("serve proxy: failed to add fsnotify watch: %v", err)
 		}
 		eventChan = w.Events
 	}
@@ -59,24 +61,60 @@ func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan
 			// k8s handles these mounts. So just re-read the file and apply it
 			// if it's changed.
 		}
-		if certDomain == "" {
-			continue
-		}
 		sc, err := readServeConfig(path, certDomain)
 		if err != nil {
-			log.Fatalf("failed to read serve config: %v", err)
+			log.Fatalf("serve proxy: failed to read serve config: %v", err)
 		}
 		if prevServeConfig != nil && reflect.DeepEqual(sc, prevServeConfig) {
 			continue
 		}
-		log.Printf("Applying serve config")
-		if err := lc.SetServeConfig(ctx, sc); err != nil {
-			log.Fatalf("failed to set serve config: %v", err)
+		validateHTTPSServe(certDomain, sc)
+		if err := updateServeConfig(ctx, sc, certDomain, lc); err != nil {
+			log.Fatalf("serve proxy: error updating serve config: %v", err)
+		}
+		if err := kc.storeHTTPSEndpoint(ctx, certDomain); err != nil {
+			log.Fatalf("serve proxy: error storing HTTPS endpoint: %v", err)
 		}
 		prevServeConfig = sc
 	}
 }
 
+func certDomainFromNetmap(nm *netmap.NetworkMap) string {
+	if len(nm.DNS.CertDomains) == 0 {
+		return ""
+	}
+	return nm.DNS.CertDomains[0]
+}
+
+func updateServeConfig(ctx context.Context, sc *ipn.ServeConfig, certDomain string, lc *tailscale.LocalClient) error {
+	// TODO(irbekrm): This means that serve config that does not expose HTTPS endpoint will not be set for a tailnet
+	// that does not have HTTPS enabled. We probably want to fix this.
+	if certDomain == kubetypes.ValueNoHTTPS {
+		return nil
+	}
+	log.Printf("serve proxy: applying serve config")
+	return lc.SetServeConfig(ctx, sc)
+}
+
+func validateHTTPSServe(certDomain string, sc *ipn.ServeConfig) {
+	if certDomain != kubetypes.ValueNoHTTPS || !hasHTTPSEndpoint(sc) {
+		return
+	}
+	log.Printf(
+		`serve proxy: this node is configured as a proxy that exposes an HTTPS endpoint to tailnet,
+		(perhaps a Kubernetes operator Ingress proxy) but it is not able to issue TLS certs, so this will likely not work.
+		To make it work, ensure that HTTPS is enabled for your tailnet, see https://tailscale.com/kb/1153/enabling-https for more details.`)
+}
+
+func hasHTTPSEndpoint(cfg *ipn.ServeConfig) bool {
+	for _, tcpCfg := range cfg.TCP {
+		if tcpCfg.HTTPS {
+			return true
+		}
+	}
+	return false
+}
+
 // readServeConfig reads the ipn.ServeConfig from path, replacing
 // ${TS_CERT_DOMAIN} with certDomain.
 func readServeConfig(path, certDomain string) (*ipn.ServeConfig, error) {

+ 3 - 1
cmd/containerboot/settings.go

@@ -67,6 +67,7 @@ type settings struct {
 	PodIP               string
 	PodIPv4             string
 	PodIPv6             string
+	PodUID              string
 	HealthCheckAddrPort string
 	LocalAddrPort       string
 	MetricsEnabled      bool
@@ -107,6 +108,7 @@ func configFromEnv() (*settings, error) {
 		HealthCheckEnabled:                    defaultBool("TS_ENABLE_HEALTH_CHECK", false),
 		DebugAddrPort:                         defaultEnv("TS_DEBUG_ADDR_PORT", ""),
 		EgressSvcsCfgPath:                     defaultEnv("TS_EGRESS_SERVICES_CONFIG_PATH", ""),
+		PodUID:                                defaultEnv("POD_UID", ""),
 	}
 	podIPs, ok := os.LookupEnv("POD_IPS")
 	if ok {
@@ -203,7 +205,7 @@ func (s *settings) validate() error {
 // setupKube is responsible for doing any necessary configuration and checks to
 // ensure that tailscale state storage and authentication mechanism will work on
 // Kubernetes.
-func (cfg *settings) setupKube(ctx context.Context) error {
+func (cfg *settings) setupKube(ctx context.Context, kc *kubeClient) error {
 	if cfg.KubeSecret == "" {
 		return nil
 	}

+ 5 - 5
cmd/k8s-operator/connector.go

@@ -234,21 +234,21 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge
 		return err
 	}
 
-	_, tsHost, ips, err := a.ssr.DeviceInfo(ctx, crl)
+	dev, err := a.ssr.DeviceInfo(ctx, crl, logger)
 	if err != nil {
 		return err
 	}
 
-	if tsHost == "" {
-		logger.Debugf("no Tailscale hostname known yet, waiting for connector pod to finish auth")
+	if dev == nil || dev.hostname == "" {
+		logger.Debugf("no Tailscale hostname known yet, waiting for Connector Pod to finish auth")
 		// No hostname yet. Wait for the connector pod to auth.
 		cn.Status.TailnetIPs = nil
 		cn.Status.Hostname = ""
 		return nil
 	}
 
-	cn.Status.TailnetIPs = ips
-	cn.Status.Hostname = tsHost
+	cn.Status.TailnetIPs = dev.ips
+	cn.Status.Hostname = dev.hostname
 
 	return nil
 }

+ 6 - 6
cmd/k8s-operator/ingress.go

@@ -279,12 +279,12 @@ func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
 		return fmt.Errorf("failed to provision: %w", err)
 	}
 
-	_, tsHost, _, err := a.ssr.DeviceInfo(ctx, crl)
+	dev, err := a.ssr.DeviceInfo(ctx, crl, logger)
 	if err != nil {
-		return fmt.Errorf("failed to get device ID: %w", err)
+		return fmt.Errorf("failed to retrieve Ingress HTTPS endpoint status: %w", err)
 	}
-	if tsHost == "" {
-		logger.Debugf("no Tailscale hostname known yet, waiting for proxy pod to finish auth")
+	if dev == nil || dev.ingressDNSName == "" {
+		logger.Debugf("no Ingress DNS name known yet, waiting for proxy Pod initialize and start serving Ingress")
 		// No hostname yet. Wait for the proxy pod to auth.
 		ing.Status.LoadBalancer.Ingress = nil
 		if err := a.Status().Update(ctx, ing); err != nil {
@@ -293,10 +293,10 @@ func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
 		return nil
 	}
 
-	logger.Debugf("setting ingress hostname to %q", tsHost)
+	logger.Debugf("setting Ingress hostname to %q", dev.ingressDNSName)
 	ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{
 		{
-			Hostname: tsHost,
+			Hostname: dev.ingressDNSName,
 			Ports: []networkingv1.IngressPortStatus{
 				{
 					Protocol: "TCP",

+ 148 - 0
cmd/k8s-operator/ingress_test.go

@@ -142,6 +142,154 @@ func TestTailscaleIngress(t *testing.T) {
 	expectMissing[corev1.Secret](t, fc, "operator-ns", fullName)
 }
 
+func TestTailscaleIngressHostname(t *testing.T) {
+	tsIngressClass := &networkingv1.IngressClass{ObjectMeta: metav1.ObjectMeta{Name: "tailscale"}, Spec: networkingv1.IngressClassSpec{Controller: "tailscale.com/ts-ingress"}}
+	fc := fake.NewFakeClient(tsIngressClass)
+	ft := &fakeTSClient{}
+	fakeTsnetServer := &fakeTSNetServer{certDomains: []string{"foo.com"}}
+	zl, err := zap.NewDevelopment()
+	if err != nil {
+		t.Fatal(err)
+	}
+	ingR := &IngressReconciler{
+		Client: fc,
+		ssr: &tailscaleSTSReconciler{
+			Client:            fc,
+			tsClient:          ft,
+			tsnetServer:       fakeTsnetServer,
+			defaultTags:       []string{"tag:k8s"},
+			operatorNamespace: "operator-ns",
+			proxyImage:        "tailscale/tailscale",
+		},
+		logger: zl.Sugar(),
+	}
+
+	// 1. Resources get created for regular Ingress
+	ing := &networkingv1.Ingress{
+		TypeMeta: metav1.TypeMeta{Kind: "Ingress", APIVersion: "networking.k8s.io/v1"},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test",
+			Namespace: "default",
+			// The apiserver is supposed to set the UID, but the fake client
+			// doesn't. So, set it explicitly because other code later depends
+			// on it being set.
+			UID: types.UID("1234-UID"),
+		},
+		Spec: networkingv1.IngressSpec{
+			IngressClassName: ptr.To("tailscale"),
+			DefaultBackend: &networkingv1.IngressBackend{
+				Service: &networkingv1.IngressServiceBackend{
+					Name: "test",
+					Port: networkingv1.ServiceBackendPort{
+						Number: 8080,
+					},
+				},
+			},
+			TLS: []networkingv1.IngressTLS{
+				{Hosts: []string{"default-test"}},
+			},
+		},
+	}
+	mustCreate(t, fc, ing)
+	mustCreate(t, fc, &corev1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test",
+			Namespace: "default",
+		},
+		Spec: corev1.ServiceSpec{
+			ClusterIP: "1.2.3.4",
+			Ports: []corev1.ServicePort{{
+				Port: 8080,
+				Name: "http"},
+			},
+		},
+	})
+
+	expectReconciled(t, ingR, "default", "test")
+
+	fullName, shortName := findGenName(t, fc, "default", "test", "ingress")
+	mustCreate(t, fc, &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      fullName,
+			Namespace: "operator-ns",
+			UID:       "test-uid",
+		},
+	})
+	opts := configOpts{
+		stsName:    shortName,
+		secretName: fullName,
+		namespace:  "default",
+		parentType: "ingress",
+		hostname:   "default-test",
+		app:        kubetypes.AppIngressResource,
+	}
+	serveConfig := &ipn.ServeConfig{
+		TCP: map[uint16]*ipn.TCPPortHandler{443: {HTTPS: true}},
+		Web: map[ipn.HostPort]*ipn.WebServerConfig{"${TS_CERT_DOMAIN}:443": {Handlers: map[string]*ipn.HTTPHandler{"/": {Proxy: "http://1.2.3.4:8080/"}}}},
+	}
+	opts.serveConfig = serveConfig
+
+	expectEqual(t, fc, expectedSecret(t, fc, opts), nil)
+	expectEqual(t, fc, expectedHeadlessService(shortName, "ingress"), nil)
+	expectEqual(t, fc, expectedSTSUserspace(t, fc, opts), removeHashAnnotation)
+
+	// 2. Ingress proxy with capability version >= 110 does not have an HTTPS endpoint set
+	mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) {
+		mak.Set(&secret.Data, "device_id", []byte("1234"))
+		mak.Set(&secret.Data, "tailscale_capver", []byte("110"))
+		mak.Set(&secret.Data, "pod_uid", []byte("test-uid"))
+		mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net"))
+	})
+	expectReconciled(t, ingR, "default", "test")
+	ing.Finalizers = append(ing.Finalizers, "tailscale.com/finalizer")
+
+	expectEqual(t, fc, ing, nil)
+
+	// 3. Ingress proxy with capability version >= 110 advertises HTTPS endpoint
+	mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) {
+		mak.Set(&secret.Data, "device_id", []byte("1234"))
+		mak.Set(&secret.Data, "tailscale_capver", []byte("110"))
+		mak.Set(&secret.Data, "pod_uid", []byte("test-uid"))
+		mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net"))
+		mak.Set(&secret.Data, "https_endpoint", []byte("foo.tailnetxyz.ts.net"))
+	})
+	expectReconciled(t, ingR, "default", "test")
+	ing.Status.LoadBalancer = networkingv1.IngressLoadBalancerStatus{
+		Ingress: []networkingv1.IngressLoadBalancerIngress{
+			{Hostname: "foo.tailnetxyz.ts.net", Ports: []networkingv1.IngressPortStatus{{Port: 443, Protocol: "TCP"}}},
+		},
+	}
+	expectEqual(t, fc, ing, nil)
+
+	// 4. Ingress proxy with capability version >= 110 does not have an HTTPS endpoint ready
+	mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) {
+		mak.Set(&secret.Data, "device_id", []byte("1234"))
+		mak.Set(&secret.Data, "tailscale_capver", []byte("110"))
+		mak.Set(&secret.Data, "pod_uid", []byte("test-uid"))
+		mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net"))
+		mak.Set(&secret.Data, "https_endpoint", []byte("no-https"))
+	})
+	expectReconciled(t, ingR, "default", "test")
+	ing.Status.LoadBalancer.Ingress = nil
+	expectEqual(t, fc, ing, nil)
+
+	// 5. Ingress proxy's state has https_endpoints set, but its capver is not matching Pod UID (downgrade)
+	mustUpdate(t, fc, "operator-ns", opts.secretName, func(secret *corev1.Secret) {
+		mak.Set(&secret.Data, "device_id", []byte("1234"))
+		mak.Set(&secret.Data, "tailscale_capver", []byte("110"))
+		mak.Set(&secret.Data, "pod_uid", []byte("not-the-right-uid"))
+		mak.Set(&secret.Data, "device_fqdn", []byte("foo.tailnetxyz.ts.net"))
+		mak.Set(&secret.Data, "https_endpoint", []byte("bar.tailnetxyz.ts.net"))
+	})
+	ing.Status.LoadBalancer = networkingv1.IngressLoadBalancerStatus{
+		Ingress: []networkingv1.IngressLoadBalancerIngress{
+			{Hostname: "foo.tailnetxyz.ts.net", Ports: []networkingv1.IngressPortStatus{{Port: 443, Protocol: "TCP"}}},
+		},
+	}
+	expectReconciled(t, ingR, "default", "test")
+	expectEqual(t, fc, ing, nil)
+}
+
 func TestTailscaleIngressWithProxyClass(t *testing.T) {
 	// Setup
 	pc := &tsapi.ProxyClass{

+ 70 - 23
cmd/k8s-operator/sts.go

@@ -15,6 +15,7 @@ import (
 	"net/http"
 	"os"
 	"slices"
+	"strconv"
 	"strings"
 
 	"go.uber.org/zap"
@@ -197,11 +198,11 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga
 	}
 	sts.ProxyClass = proxyClass
 
-	secretName, tsConfigHash, configs, err := a.createOrGetSecret(ctx, logger, sts, hsvc)
+	secretName, tsConfigHash, _, err := a.createOrGetSecret(ctx, logger, sts, hsvc)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create or get API key secret: %w", err)
 	}
-	_, err = a.reconcileSTS(ctx, logger, sts, hsvc, secretName, tsConfigHash, configs)
+	_, err = a.reconcileSTS(ctx, logger, sts, hsvc, secretName, tsConfigHash)
 	if err != nil {
 		return nil, fmt.Errorf("failed to reconcile statefulset: %w", err)
 	}
@@ -246,21 +247,21 @@ func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.Sugare
 		return false, nil
 	}
 
-	id, _, _, err := a.DeviceInfo(ctx, labels)
+	dev, err := a.DeviceInfo(ctx, labels, logger)
 	if err != nil {
 		return false, fmt.Errorf("getting device info: %w", err)
 	}
-	if id != "" {
-		logger.Debugf("deleting device %s from control", string(id))
-		if err := a.tsClient.DeleteDevice(ctx, string(id)); err != nil {
+	if dev != nil && dev.id != "" {
+		logger.Debugf("deleting device %s from control", string(dev.id))
+		if err := a.tsClient.DeleteDevice(ctx, string(dev.id)); err != nil {
 			errResp := &tailscale.ErrResponse{}
 			if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
-				logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
+				logger.Debugf("device %s not found, likely because it has already been deleted from control", string(dev.id))
 			} else {
 				return false, fmt.Errorf("deleting device: %w", err)
 			}
 		} else {
-			logger.Debugf("device %s deleted from control", string(id))
+			logger.Debugf("device %s deleted from control", string(dev.id))
 		}
 	}
 
@@ -440,40 +441,66 @@ func sanitizeConfigBytes(c ipn.ConfigVAlpha) string {
 // that acts as an operator proxy. It retrieves info from a Kubernetes Secret
 // labeled with the provided labels.
 // Either of device ID, hostname and IPs can be empty string if not found in the Secret.
-func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map[string]string) (id tailcfg.StableNodeID, hostname string, ips []string, err error) {
+func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map[string]string, logger *zap.SugaredLogger) (dev *device, err error) {
 	sec, err := getSingleObject[corev1.Secret](ctx, a.Client, a.operatorNamespace, childLabels)
 	if err != nil {
-		return "", "", nil, err
+		return dev, err
 	}
 	if sec == nil {
-		return "", "", nil, nil
+		return dev, nil
+	}
+	pod := new(corev1.Pod)
+	if err := a.Get(ctx, types.NamespacedName{Namespace: sec.Namespace, Name: sec.Name}, pod); err != nil && !apierrors.IsNotFound(err) {
+		return dev, nil
 	}
 
-	return deviceInfo(sec)
+	return deviceInfo(sec, pod, logger)
+}
+
+// device contains tailscale state of a proxy device as gathered from its tailscale state Secret.
+type device struct {
+	id       tailcfg.StableNodeID // device's stable ID
+	hostname string               // MagicDNS name of the device
+	ips      []string             // Tailscale IPs of the device
+	// ingressDNSName is the L7 Ingress DNS name. In practice this will be the same value as hostname, but only set
+	// when the device has been configured to serve traffic on it via 'tailscale serve'.
+	ingressDNSName string
 }
 
-func deviceInfo(sec *corev1.Secret) (id tailcfg.StableNodeID, hostname string, ips []string, err error) {
-	id = tailcfg.StableNodeID(sec.Data["device_id"])
+func deviceInfo(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) (dev *device, err error) {
+	id := tailcfg.StableNodeID(sec.Data[kubetypes.KeyDeviceID])
 	if id == "" {
-		return "", "", nil, nil
+		return dev, nil
 	}
+	dev = &device{id: id}
 	// Kubernetes chokes on well-formed FQDNs with the trailing dot, so we have
 	// to remove it.
-	hostname = strings.TrimSuffix(string(sec.Data["device_fqdn"]), ".")
-	if hostname == "" {
+	dev.hostname = strings.TrimSuffix(string(sec.Data[kubetypes.KeyDeviceFQDN]), ".")
+	if dev.hostname == "" {
 		// Device ID gets stored and retrieved in a different flow than
 		// FQDN and IPs. A device that acts as Kubernetes operator
-		// proxy, but whose route setup has failed might have an device
+		// proxy, but whose route setup has failed might have a device
 		// ID, but no FQDN/IPs. If so, return the ID, to allow the
 		// operator to clean up such devices.
-		return id, "", nil, nil
+		return dev, nil
+	}
+	// TODO(irbekrm): we fall back to using the hostname field to determine Ingress's hostname to ensure backwards
+	// compatibility. In 1.82 we can remove this fallback mechanism.
+	dev.ingressDNSName = dev.hostname
+	if proxyCapVer(sec, pod, log) >= 109 {
+		dev.ingressDNSName = strings.TrimSuffix(string(sec.Data[kubetypes.KeyHTTPSEndpoint]), ".")
+		if strings.EqualFold(dev.ingressDNSName, kubetypes.ValueNoHTTPS) {
+			dev.ingressDNSName = ""
+		}
 	}
-	if rawDeviceIPs, ok := sec.Data["device_ips"]; ok {
+	if rawDeviceIPs, ok := sec.Data[kubetypes.KeyDeviceIPs]; ok {
+		ips := make([]string, 0)
 		if err := json.Unmarshal(rawDeviceIPs, &ips); err != nil {
-			return "", "", nil, err
+			return nil, err
 		}
+		dev.ips = ips
 	}
-	return id, hostname, ips, nil
+	return dev, nil
 }
 
 func newAuthKey(ctx context.Context, tsClient tsClient, tags []string) (string, error) {
@@ -500,7 +527,7 @@ var proxyYaml []byte
 //go:embed deploy/manifests/userspace-proxy.yaml
 var userspaceProxyYaml []byte
 
-func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string, _ map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) (*appsv1.StatefulSet, error) {
+func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string) (*appsv1.StatefulSet, error) {
 	ss := new(appsv1.StatefulSet)
 	if sts.ServeConfig != nil && sts.ForwardClusterTrafficViaL7IngressProxy != true { // If forwarding cluster traffic via is required we need non-userspace + NET_ADMIN + forwarding
 		if err := yaml.Unmarshal(userspaceProxyYaml, &ss); err != nil {
@@ -1084,3 +1111,23 @@ func nameForService(svc *corev1.Service) string {
 func isValidFirewallMode(m string) bool {
 	return m == "auto" || m == "nftables" || m == "iptables"
 }
+
+// proxyCapVer accepts a proxy state Secret and a proxy Pod returns the capability version of a proxy Pod.
+// This is best effort - if the capability version can not (currently) be determined, it returns -1.
+func proxyCapVer(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) tailcfg.CapabilityVersion {
+	if sec == nil || pod == nil {
+		return tailcfg.CapabilityVersion(-1)
+	}
+	if len(sec.Data[kubetypes.KeyCapVer]) == 0 || len(sec.Data[kubetypes.KeyPodUID]) == 0 {
+		return tailcfg.CapabilityVersion(-1)
+	}
+	capVer, err := strconv.Atoi(string(sec.Data[kubetypes.KeyCapVer]))
+	if err != nil {
+		log.Infof("[unexpected]: unexpected capability version in proxy's state Secret, expected an integer, got %q", string(sec.Data[kubetypes.KeyCapVer]))
+		return tailcfg.CapabilityVersion(-1)
+	}
+	if !strings.EqualFold(string(pod.ObjectMeta.UID), string(sec.Data[kubetypes.KeyPodUID])) {
+		return tailcfg.CapabilityVersion(-1)
+	}
+	return tailcfg.CapabilityVersion(capVer)
+}

+ 5 - 5
cmd/k8s-operator/svc.go

@@ -320,11 +320,11 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
 		return nil
 	}
 
-	_, tsHost, tsIPs, err := a.ssr.DeviceInfo(ctx, crl)
+	dev, err := a.ssr.DeviceInfo(ctx, crl, logger)
 	if err != nil {
 		return fmt.Errorf("failed to get device ID: %w", err)
 	}
-	if tsHost == "" {
+	if dev == nil || dev.hostname == "" {
 		msg := "no Tailscale hostname known yet, waiting for proxy pod to finish auth"
 		logger.Debug(msg)
 		// No hostname yet. Wait for the proxy pod to auth.
@@ -333,9 +333,9 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
 		return nil
 	}
 
-	logger.Debugf("setting Service LoadBalancer status to %q, %s", tsHost, strings.Join(tsIPs, ", "))
+	logger.Debugf("setting Service LoadBalancer status to %q, %s", dev.hostname, strings.Join(dev.ips, ", "))
 	ingress := []corev1.LoadBalancerIngress{
-		{Hostname: tsHost},
+		{Hostname: dev.hostname},
 	}
 	clusterIPAddr, err := netip.ParseAddr(svc.Spec.ClusterIP)
 	if err != nil {
@@ -343,7 +343,7 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
 		tsoperator.SetServiceCondition(svc, tsapi.ProxyReady, metav1.ConditionFalse, reasonProxyFailed, msg, a.clock, logger)
 		return errors.New(msg)
 	}
-	for _, ip := range tsIPs {
+	for _, ip := range dev.ips {
 		addr, err := netip.ParseAddr(ip)
 		if err != nil {
 			continue

+ 15 - 0
kube/kubetypes/metrics.go → kube/kubetypes/types.go

@@ -27,4 +27,19 @@ const (
 	MetricEgressServiceCount             = "k8s_egress_service_resources"
 	MetricProxyGroupEgressCount          = "k8s_proxygroup_egress_resources"
 	MetricProxyGroupIngressCount         = "k8s_proxygroup_ingress_resources"
+
+	// Keys that containerboot writes to state file that can be used to determine its state.
+	// fields set in Tailscale state Secret. These are mostly used by the Tailscale Kubernetes operator to determine
+	// the state of this tailscale device.
+	KeyDeviceID   string = "device_id"   // node stable ID of the device
+	KeyDeviceFQDN string = "device_fqdn" // device's tailnet hostname
+	KeyDeviceIPs  string = "device_ips"  // device's tailnet IPs
+	KeyPodUID     string = "pod_uid"     // Pod UID
+	// KeyCapVer contains Tailscale capability version of this proxy instance.
+	KeyCapVer string = "tailscale_capver"
+	// KeyHTTPSEndpoint is a name of a field that can be set to the value of any HTTPS endpoint currently exposed by
+	// this device to the tailnet. This is used by the Kubernetes operator Ingress proxy to communicate to the operator
+	// that cluster workloads behind the Ingress can now be accessed via the given DNS name over HTTPS.
+	KeyHTTPSEndpoint string = "https_endpoint"
+	ValueNoHTTPS     string = "no-https"
 )