{"cells":[{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["# CER050 - Wait for BDC to be Healthy\n","\n","This notebook will wait until the Big Data Cluster has returned to a\n","healthy state, after the `Controller` pod and pods that use `PolyBase`\n","have been restarted to load the new certificates.\n","\n","## Steps\n","\n","### Parameters"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["parameters"]},"outputs":[],"source":["timeout = 600 # amount of time to wait before cluster is healthy: default to 10 minutes\n","check_interval = 30 # amount of time between health checks - default 30 seconds\n","min_pod_count = 10 # minimum number of healthy pods required to assert health"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Instantiate Kubernetes client"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Instantiate the Python Kubernetes client into 'api' variable\n","\n","import os\n","from IPython.display import Markdown\n","\n","try:\n"," from kubernetes import client, config\n"," from kubernetes.stream import stream\n","except ImportError: \n","\n"," # Install the Kubernetes module\n"," import sys\n"," !{sys.executable} -m pip install kubernetes \n"," \n"," try:\n"," from kubernetes import client, config\n"," from kubernetes.stream import stream\n"," except ImportError:\n"," display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n"," raise\n","\n","if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n"," config.load_incluster_config()\n","else:\n"," try:\n"," config.load_kube_config()\n"," except:\n"," display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n"," raise\n","\n","api = client.CoreV1Api()\n","\n","print('Kubernetes client instantiated')"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Get the namespace for the big data cluster\n","\n","Get the namespace of the Big Data Cluster from the Kuberenetes API.\n","\n","**NOTE:**\n","\n","If there is more than one Big Data Cluster in the target Kubernetes\n","cluster, then either:\n","\n","- set \\[0\\] to the correct value for the big data cluster.\n","- set the environment variable AZDATA_NAMESPACE, before starting Azure\n"," Data Studio."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Place Kubernetes namespace name for BDC into 'namespace' variable\n","\n","if \"AZDATA_NAMESPACE\" in os.environ:\n"," namespace = os.environ[\"AZDATA_NAMESPACE\"]\n","else:\n"," try:\n"," namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n"," except IndexError:\n"," from IPython.display import Markdown\n"," display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n"," display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n"," display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n"," raise\n","\n","print('The kubernetes namespace for your big data cluster is: ' + namespace)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Helper functions for waiting for the cluster to become healthy"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["import threading\n","import time\n","import sys\n","import os\n","from IPython.display import Markdown\n","\n","isRunning = True\n","\n","def all_containers_ready(pod):\n"," \"\"\"helper method returns true if all the containers within the given pod are ready\n","\n"," Arguments:\n"," pod {v1Pod} -- Metadata retrieved from the api call to.\n"," \"\"\"\n"," \n"," return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n","\n","\n","def pod_is_ready(pod):\n"," \"\"\"tests that the pod, and all containers are ready\n","\n"," Arguments:\n"," pod {v1Pod} -- Metadata retrieved from api call.\n"," \"\"\"\n","\n"," return \"job-name\" in pod.metadata.labels or (pod.status.phase == \"Running\" and all_containers_ready(pod))\n","\n","\n","def waitReady():\n"," \"\"\"Waits for all pods, and containers to become ready.\n"," \"\"\"\n"," while isRunning:\n"," try:\n"," time.sleep(check_interval)\n"," pods = get_pods()\n"," allReady = len(pods.items) \u003e= min_pod_count and all(map(pod_is_ready, pods.items))\n","\n"," if allReady:\n"," return True\n"," else:\n"," display(Markdown(get_pod_failures(pods)))\n"," display(Markdown(f\"cluster not healthy, rechecking in {check_interval} seconds.\"))\n"," except Exception as ex:\n"," last_error_message = str(ex)\n"," display(Markdown(last_error_message))\n"," time.sleep(check_interval)\n","\n","def get_pod_failures(pods=None):\n"," \"\"\"Returns a status message for any pods that are not ready.\n"," \"\"\"\n"," results = \"\"\n"," if not pods:\n"," pods = get_pods()\n","\n"," for pod in pods.items:\n"," if \"job-name\" not in pod.metadata.labels:\n"," if pod.status and pod.status.container_statuses:\n"," for container in filter(lambda c: c.ready is False, pod.status.container_statuses):\n"," results = results + \"Container {0} in Pod {1} is not ready. Reported status: {2} \u003cbr/\u003e\".format(container.name, pod.metadata.name, container.state) \n"," else:\n"," results = results + \"Pod {0} is not ready. \u003cbr/\u003e\".format(pod.metadata.name)\n"," return results\n","\n","\n","def get_pods():\n"," \"\"\"Returns a list of pods by namespace, or all namespaces if no namespace is specified\n"," \"\"\"\n"," pods = None\n"," if namespace is not None:\n"," display(Markdown(f'Checking namespace {namespace}'))\n"," pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n"," else:\n"," display(Markdown('Checking all namespaces'))\n"," pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n"," return pods\n","\n","def wait_for_cluster_healthy():\n"," isRunning = True\n"," mt = threading.Thread(target=waitReady)\n"," mt.start()\n"," mt.join(timeout=timeout)\n","\n"," if mt.is_alive():\n"," raise SystemExit(\"Timeout waiting for all cluster to be healthy.\")\n"," \n"," isRunning = False"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Wait for cluster to to get healthy"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["wait_for_cluster_healthy()"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["print(\"Notebook execution is complete.\")"]}],"nbformat":4,"nbformat_minor":5,"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3"},"pansop":{"related":"","test":{"strategy":"","types":null,"disable":{"reason":"","workitems":null,"types":null}},"target":{"current":"","final":""},"internal":{"parameters":null,"symlink":false},"timeout":"0"},"language_info":{"codemirror_mode":"{ Name: \"\", Version: \"\"}","file_extension":"","mimetype":"","name":"","nbconvert_exporter":"","pygments_lexer":"","version":""},"widgets":[]}}