operator.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. //go:build !plan9
  4. // tailscale-operator provides a way to expose services running in a Kubernetes
  5. // cluster to your Tailnet.
  6. package main
  7. import (
  8. "context"
  9. "os"
  10. "strings"
  11. "time"
  12. "github.com/go-logr/zapr"
  13. "go.uber.org/zap"
  14. "go.uber.org/zap/zapcore"
  15. "golang.org/x/oauth2/clientcredentials"
  16. appsv1 "k8s.io/api/apps/v1"
  17. corev1 "k8s.io/api/core/v1"
  18. networkingv1 "k8s.io/api/networking/v1"
  19. "k8s.io/apimachinery/pkg/types"
  20. "k8s.io/client-go/rest"
  21. "sigs.k8s.io/controller-runtime/pkg/builder"
  22. "sigs.k8s.io/controller-runtime/pkg/cache"
  23. "sigs.k8s.io/controller-runtime/pkg/client"
  24. "sigs.k8s.io/controller-runtime/pkg/client/config"
  25. "sigs.k8s.io/controller-runtime/pkg/handler"
  26. logf "sigs.k8s.io/controller-runtime/pkg/log"
  27. kzap "sigs.k8s.io/controller-runtime/pkg/log/zap"
  28. "sigs.k8s.io/controller-runtime/pkg/manager"
  29. "sigs.k8s.io/controller-runtime/pkg/manager/signals"
  30. "sigs.k8s.io/controller-runtime/pkg/reconcile"
  31. "tailscale.com/client/tailscale"
  32. "tailscale.com/hostinfo"
  33. "tailscale.com/ipn"
  34. "tailscale.com/ipn/store/kubestore"
  35. "tailscale.com/tsnet"
  36. "tailscale.com/types/logger"
  37. "tailscale.com/version"
  38. )
  39. func main() {
  40. // Required to use our client API. We're fine with the instability since the
  41. // client lives in the same repo as this code.
  42. tailscale.I_Acknowledge_This_API_Is_Unstable = true
  43. var (
  44. tsNamespace = defaultEnv("OPERATOR_NAMESPACE", "")
  45. tslogging = defaultEnv("OPERATOR_LOGGING", "info")
  46. image = defaultEnv("PROXY_IMAGE", "tailscale/tailscale:latest")
  47. priorityClassName = defaultEnv("PROXY_PRIORITY_CLASS_NAME", "")
  48. tags = defaultEnv("PROXY_TAGS", "tag:k8s")
  49. tsFirewallMode = defaultEnv("PROXY_FIREWALL_MODE", "")
  50. )
  51. var opts []kzap.Opts
  52. switch tslogging {
  53. case "info":
  54. opts = append(opts, kzap.Level(zapcore.InfoLevel))
  55. case "debug":
  56. opts = append(opts, kzap.Level(zapcore.DebugLevel))
  57. case "dev":
  58. opts = append(opts, kzap.UseDevMode(true), kzap.Level(zapcore.DebugLevel))
  59. }
  60. zlog := kzap.NewRaw(opts...).Sugar()
  61. logf.SetLogger(zapr.NewLogger(zlog.Desugar()))
  62. // The operator can run either as a plain operator or it can
  63. // additionally act as api-server proxy
  64. // https://tailscale.com/kb/1236/kubernetes-operator/?q=kubernetes#accessing-the-kubernetes-control-plane-using-an-api-server-proxy.
  65. mode := parseAPIProxyMode()
  66. if mode == apiserverProxyModeDisabled {
  67. hostinfo.SetApp("k8s-operator")
  68. } else {
  69. hostinfo.SetApp("k8s-operator-proxy")
  70. }
  71. s, tsClient := initTSNet(zlog)
  72. defer s.Close()
  73. restConfig := config.GetConfigOrDie()
  74. maybeLaunchAPIServerProxy(zlog, restConfig, s, mode)
  75. runReconcilers(zlog, s, tsNamespace, restConfig, tsClient, image, priorityClassName, tags, tsFirewallMode)
  76. }
  77. // initTSNet initializes the tsnet.Server and logs in to Tailscale. It uses the
  78. // CLIENT_ID_FILE and CLIENT_SECRET_FILE environment variables to authenticate
  79. // with Tailscale.
  80. func initTSNet(zlog *zap.SugaredLogger) (*tsnet.Server, *tailscale.Client) {
  81. var (
  82. clientIDPath = defaultEnv("CLIENT_ID_FILE", "")
  83. clientSecretPath = defaultEnv("CLIENT_SECRET_FILE", "")
  84. hostname = defaultEnv("OPERATOR_HOSTNAME", "tailscale-operator")
  85. kubeSecret = defaultEnv("OPERATOR_SECRET", "")
  86. operatorTags = defaultEnv("OPERATOR_INITIAL_TAGS", "tag:k8s-operator")
  87. )
  88. startlog := zlog.Named("startup")
  89. if clientIDPath == "" || clientSecretPath == "" {
  90. startlog.Fatalf("CLIENT_ID_FILE and CLIENT_SECRET_FILE must be set")
  91. }
  92. clientID, err := os.ReadFile(clientIDPath)
  93. if err != nil {
  94. startlog.Fatalf("reading client ID %q: %v", clientIDPath, err)
  95. }
  96. clientSecret, err := os.ReadFile(clientSecretPath)
  97. if err != nil {
  98. startlog.Fatalf("reading client secret %q: %v", clientSecretPath, err)
  99. }
  100. credentials := clientcredentials.Config{
  101. ClientID: string(clientID),
  102. ClientSecret: string(clientSecret),
  103. TokenURL: "https://login.tailscale.com/api/v2/oauth/token",
  104. }
  105. tsClient := tailscale.NewClient("-", nil)
  106. tsClient.HTTPClient = credentials.Client(context.Background())
  107. s := &tsnet.Server{
  108. Hostname: hostname,
  109. Logf: zlog.Named("tailscaled").Debugf,
  110. }
  111. if kubeSecret != "" {
  112. st, err := kubestore.New(logger.Discard, kubeSecret)
  113. if err != nil {
  114. startlog.Fatalf("creating kube store: %v", err)
  115. }
  116. s.Store = st
  117. }
  118. if err := s.Start(); err != nil {
  119. startlog.Fatalf("starting tailscale server: %v", err)
  120. }
  121. lc, err := s.LocalClient()
  122. if err != nil {
  123. startlog.Fatalf("getting local client: %v", err)
  124. }
  125. ctx := context.Background()
  126. loginDone := false
  127. machineAuthShown := false
  128. waitOnline:
  129. for {
  130. startlog.Debugf("querying tailscaled status")
  131. st, err := lc.StatusWithoutPeers(ctx)
  132. if err != nil {
  133. startlog.Fatalf("getting status: %v", err)
  134. }
  135. switch st.BackendState {
  136. case "Running":
  137. break waitOnline
  138. case "NeedsLogin":
  139. if loginDone {
  140. break
  141. }
  142. caps := tailscale.KeyCapabilities{
  143. Devices: tailscale.KeyDeviceCapabilities{
  144. Create: tailscale.KeyDeviceCreateCapabilities{
  145. Reusable: false,
  146. Preauthorized: true,
  147. Tags: strings.Split(operatorTags, ","),
  148. },
  149. },
  150. }
  151. authkey, _, err := tsClient.CreateKey(ctx, caps)
  152. if err != nil {
  153. startlog.Fatalf("creating operator authkey: %v", err)
  154. }
  155. if err := lc.Start(ctx, ipn.Options{
  156. AuthKey: authkey,
  157. }); err != nil {
  158. startlog.Fatalf("starting tailscale: %v", err)
  159. }
  160. if err := lc.StartLoginInteractive(ctx); err != nil {
  161. startlog.Fatalf("starting login: %v", err)
  162. }
  163. startlog.Debugf("requested login by authkey")
  164. loginDone = true
  165. case "NeedsMachineAuth":
  166. if !machineAuthShown {
  167. startlog.Infof("Machine approval required, please visit the admin panel to approve")
  168. machineAuthShown = true
  169. }
  170. default:
  171. startlog.Debugf("waiting for tailscale to start: %v", st.BackendState)
  172. }
  173. time.Sleep(time.Second)
  174. }
  175. return s, tsClient
  176. }
  177. // runReconcilers starts the controller-runtime manager and registers the
  178. // ServiceReconciler. It blocks forever.
  179. func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string, restConfig *rest.Config, tsClient *tailscale.Client, image, priorityClassName, tags, tsFirewallMode string) {
  180. var (
  181. isDefaultLoadBalancer = defaultBool("OPERATOR_DEFAULT_LOAD_BALANCER", false)
  182. )
  183. startlog := zlog.Named("startReconcilers")
  184. // For secrets and statefulsets, we only get permission to touch the objects
  185. // in the controller's own namespace. This cannot be expressed by
  186. // .Watches(...) below, instead you have to add a per-type field selector to
  187. // the cache that sits a few layers below the builder stuff, which will
  188. // implicitly filter what parts of the world the builder code gets to see at
  189. // all.
  190. nsFilter := cache.ByObject{
  191. Field: client.InNamespace(tsNamespace).AsSelector(),
  192. }
  193. mgr, err := manager.New(restConfig, manager.Options{
  194. Cache: cache.Options{
  195. ByObject: map[client.Object]cache.ByObject{
  196. &corev1.Secret{}: nsFilter,
  197. &appsv1.StatefulSet{}: nsFilter,
  198. },
  199. },
  200. })
  201. if err != nil {
  202. startlog.Fatalf("could not create manager: %v", err)
  203. }
  204. svcFilter := handler.EnqueueRequestsFromMapFunc(serviceHandler)
  205. svcChildFilter := handler.EnqueueRequestsFromMapFunc(managedResourceHandlerForType("svc"))
  206. eventRecorder := mgr.GetEventRecorderFor("tailscale-operator")
  207. ssr := &tailscaleSTSReconciler{
  208. Client: mgr.GetClient(),
  209. tsnetServer: s,
  210. tsClient: tsClient,
  211. defaultTags: strings.Split(tags, ","),
  212. operatorNamespace: tsNamespace,
  213. proxyImage: image,
  214. proxyPriorityClassName: priorityClassName,
  215. tsFirewallMode: tsFirewallMode,
  216. }
  217. err = builder.
  218. ControllerManagedBy(mgr).
  219. Named("service-reconciler").
  220. Watches(&corev1.Service{}, svcFilter).
  221. Watches(&appsv1.StatefulSet{}, svcChildFilter).
  222. Watches(&corev1.Secret{}, svcChildFilter).
  223. Complete(&ServiceReconciler{
  224. ssr: ssr,
  225. Client: mgr.GetClient(),
  226. logger: zlog.Named("service-reconciler"),
  227. isDefaultLoadBalancer: isDefaultLoadBalancer,
  228. recorder: eventRecorder,
  229. })
  230. if err != nil {
  231. startlog.Fatalf("could not create controller: %v", err)
  232. }
  233. ingressChildFilter := handler.EnqueueRequestsFromMapFunc(managedResourceHandlerForType("ingress"))
  234. err = builder.
  235. ControllerManagedBy(mgr).
  236. For(&networkingv1.Ingress{}).
  237. Watches(&appsv1.StatefulSet{}, ingressChildFilter).
  238. Watches(&corev1.Secret{}, ingressChildFilter).
  239. Watches(&corev1.Service{}, ingressChildFilter).
  240. Complete(&IngressReconciler{
  241. ssr: ssr,
  242. recorder: eventRecorder,
  243. Client: mgr.GetClient(),
  244. logger: zlog.Named("ingress-reconciler"),
  245. })
  246. if err != nil {
  247. startlog.Fatalf("could not create controller: %v", err)
  248. }
  249. startlog.Infof("Startup complete, operator running, version: %s", version.Long())
  250. if err := mgr.Start(signals.SetupSignalHandler()); err != nil {
  251. startlog.Fatalf("could not start manager: %v", err)
  252. }
  253. }
  254. type tsClient interface {
  255. CreateKey(ctx context.Context, caps tailscale.KeyCapabilities) (string, *tailscale.Key, error)
  256. DeleteDevice(ctx context.Context, nodeStableID string) error
  257. }
  258. func isManagedResource(o client.Object) bool {
  259. ls := o.GetLabels()
  260. return ls[LabelManaged] == "true"
  261. }
  262. func isManagedByType(o client.Object, typ string) bool {
  263. ls := o.GetLabels()
  264. return isManagedResource(o) && ls[LabelParentType] == typ
  265. }
  266. func parentFromObjectLabels(o client.Object) types.NamespacedName {
  267. ls := o.GetLabels()
  268. return types.NamespacedName{
  269. Namespace: ls[LabelParentNamespace],
  270. Name: ls[LabelParentName],
  271. }
  272. }
  273. func managedResourceHandlerForType(typ string) handler.MapFunc {
  274. return func(_ context.Context, o client.Object) []reconcile.Request {
  275. if !isManagedByType(o, typ) {
  276. return nil
  277. }
  278. return []reconcile.Request{
  279. {NamespacedName: parentFromObjectLabels(o)},
  280. }
  281. }
  282. }
  283. func serviceHandler(_ context.Context, o client.Object) []reconcile.Request {
  284. if isManagedByType(o, "svc") {
  285. // If this is a Service managed by a Service we want to enqueue its parent
  286. return []reconcile.Request{{NamespacedName: parentFromObjectLabels(o)}}
  287. }
  288. if isManagedResource(o) {
  289. // If this is a Servce managed by a resource that is not a Service, we leave it alone
  290. return nil
  291. }
  292. // If this is not a managed Service we want to enqueue it
  293. return []reconcile.Request{
  294. {
  295. NamespacedName: types.NamespacedName{
  296. Namespace: o.GetNamespace(),
  297. Name: o.GetName(),
  298. },
  299. },
  300. }
  301. }