{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "CER050 - Wait for BDC to be Healthy\n", "===================================\n", "\n", "This notebook will wait until the Big Data Cluster has returned to a\n", "healthy state, after the `Controller` pod and pods that use `PolyBase`\n", "have been restarted to load the new certificates.\n", "\n", "Steps\n", "-----\n", "\n", "### Parameters" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "timeout = 600 # amount of time to wait before cluster is healthy: default to 10 minutes\n", "check_interval = 30 # amount of time between health checks - default 30 seconds\n", "min_pod_count = 10 # minimum number of healthy pods required to assert health" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instantiate Kubernetes client" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "hide_input" ] }, "outputs": [], "source": [ "# Instantiate the Python Kubernetes client into 'api' variable\n", "\n", "import os\n", "\n", "try:\n", " from kubernetes import client, config\n", " from kubernetes.stream import stream\n", "\n", " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", " config.load_incluster_config()\n", " else:\n", " try:\n", " config.load_kube_config()\n", " except:\n", " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", " raise\n", " api = client.CoreV1Api()\n", "\n", " print('Kubernetes client instantiated')\n", "except ImportError:\n", " from IPython.display import Markdown\n", " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", " raise" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get the namespace for the big data cluster\n", "\n", "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", "\n", "**NOTE:**\n", "\n", "If there is more than one Big Data Cluster in the target Kubernetes\n", "cluster, then either:\n", "\n", "- set \\[0\\] to the correct value for the big data cluster.\n", "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", " Azure Data Studio." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "hide_input" ] }, "outputs": [], "source": [ "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", "\n", "if \"AZDATA_NAMESPACE\" in os.environ:\n", " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", "else:\n", " try:\n", " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", " except IndexError:\n", " from IPython.display import Markdown\n", " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", " raise\n", "\n", "print('The kubernetes namespace for your big data cluster is: ' + namespace)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import threading\n", "import time\n", "import sys\n", "import os\n", "from IPython.display import Markdown\n", "\n", "isRunning = True\n", "\n", "\n", "def all_containers_ready(pod):\n", " \"\"\"helper method returns true if all the containers within the given pod are ready\n", "\n", " Arguments:\n", " pod {v1Pod} -- Metadata retrieved from the api call to.\n", " \"\"\"\n", " \n", " return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n", "\n", "\n", "def pod_is_ready(pod):\n", " \"\"\"tests that the pod, and all containers are ready\n", "\n", " Arguments:\n", " pod {v1Pod} -- Metadata retrieved from api call.\n", " \"\"\"\n", "\n", " return \"job-name\" in pod.metadata.labels or (pod.status.phase == \"Running\" and all_containers_ready(pod))\n", "\n", "\n", "def waitReady():\n", " \"\"\"Waits for all pods, and containers to become ready.\n", " \"\"\"\n", " while isRunning:\n", " try:\n", " pods = get_pods()\n", " allReady = len(pods.items) >= min_pod_count and all(map(pod_is_ready, pods.items))\n", "\n", " if allReady:\n", " return True\n", " else:\n", " display(Markdown(get_pod_failures(pods)))\n", " display(Markdown(f\"cluster not healthy, rechecking in {check_interval} seconds.\"))\n", "\n", " time.sleep(check_interval)\n", " except Exception as ex:\n", " last_error_message = str(ex)\n", " display(Markdown(last_error_message))\n", " time.sleep(check_interval)\n", " \n", "\n", "\n", "def get_pod_failures(pods=None):\n", " \"\"\"Returns a status message for any pods that are not ready.\n", " \"\"\"\n", " results = \"\"\n", " if not pods:\n", " pods = get_pods()\n", "\n", " for pod in pods.items:\n", " if \"job-name\" not in pod.metadata.labels:\n", " if pod.status and pod.status.container_statuses:\n", " for container in filter(lambda c: c.ready is False, pod.status.container_statuses):\n", " results = results + \"Container {0} in Pod {1} is not ready. Reported status: {2}
\".format(container.name, pod.metadata.name, container.state) \n", " else:\n", " results = results + \"Pod {0} is not ready.
\".format(pod.metadata.name)\n", " return results\n", "\n", "\n", "def get_pods():\n", " \"\"\"Returns a list of pods by namespace, or all namespaces if no namespace is specified\n", " \"\"\"\n", " pods = None\n", " if namespace is not None:\n", " display(Markdown(f'Checking namespace {namespace}'))\n", " pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n", " else:\n", " display(Markdown('Checking all namespaces'))\n", " pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n", " return pods\n", "\n", "\n", "mt = threading.Thread(target=waitReady)\n", "mt.start()\n", "mt.join(timeout=timeout)\n", "\n", "if mt.isAlive():\n", " from IPython.display import Markdown\n", " display(Markdown(\"Timeout waiting for cluster to become healthy after {0} seconds.\".format(timeout)))\n", " failures = get_pod_failures()\n", " display(Markdown(\"The following pods/containers are not in a healthy state\"))\n", " display(Markdown(failures))\n", " raise SystemExit(\"Timeout waiting for pods to become ready.\", failures)\n", "else:\n", " display(Markdown('Cluster is healthy'))\n", "\n", "isRunning = False\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('Notebook execution complete.')" ] } ], "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3" }, "azdata": { "side_effects": true } } }