{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "SOP034 - Wait for BDC to be Healthy\n", "===================================\n", "\n", "Blocks until the Big Data Cluster is healthy, or the specified timeout\n", "expires.\n", "\n", "The min\\_pod\\_count parameter indicates that the health check will not\n", "pass until at least this number of pods exists in the cluster. If any\n", "existing pods beyond this limit are unhealthy, the cluster is not\n", "healthy.\n", "\n", "Steps\n", "-----\n", "\n", "### Parameters" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "timeout = 600 # amount of time to wait before cluster is healthy: default to 10 minutes\n", "check_interval = 5 # amount of time between health checks - default 5 seconds\n", "min_pod_count = 10 # minimum number of healthy pods required to assert health" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instantiate Kubernetes client" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "hide_input" ] }, "outputs": [], "source": [ "# Instantiate the Python Kubernetes client into 'api' variable\n", "\n", "import os\n", "\n", "try:\n", " from kubernetes import client, config\n", " from kubernetes.stream import stream\n", "\n", " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", " config.load_incluster_config()\n", " else:\n", " config.load_kube_config()\n", "\n", " api = client.CoreV1Api()\n", "\n", " print('Kubernetes client instantiated')\n", "except ImportError:\n", " from IPython.display import Markdown\n", " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", " raise" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get the namespace for the big data cluster\n", "\n", "Get the namespace of the big data cluster from the Kuberenetes API.\n", "\n", "NOTE: If there is more than one big data cluster in the target\n", "Kubernetes cluster, then set \\[0\\] to the correct value for the big data\n", "cluster." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "hide_input" ] }, "outputs": [], "source": [ "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", "\n", "try:\n", " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", "except IndexError:\n", " from IPython.display import Markdown\n", " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", " raise\n", "\n", "print('The kubernetes namespace for your big data cluster is: ' + namespace)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Wait for cluster to become healthy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import threading\n", "import time\n", "import sys\n", "\n", "isRunning = True\n", "\n", "def all_containers_ready(pod):\n", " \"\"\"helper method returns true if all the containers within the given pod are ready\n", "\n", " Arguments:\n", " pod {v1Pod} -- Metadata retrieved from the api call to.\n", " \"\"\"\n", " return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n", "\n", "def pod_is_ready(pod):\n", " \"\"\"tests that the pod, and all containers are ready\n", "\n", " Arguments:\n", " pod {v1Pod} -- Metadata retrieved from api call.\n", " \"\"\"\n", "\n", " return pod.status.phase == \"Running\" and all_containers_ready(pod)\n", "\n", "def waitReady():\n", " \"\"\"Waits for all pods, and containers to become ready.\n", " \"\"\"\n", "\n", " while isRunning:\n", " try:\n", " pods = None\n", "\n", " if namespace is not None:\n", " display(\"Checking namespace {0}\".format(namespace))\n", " pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n", " else:\n", " display(\"Checking all namespaces\".format(namespace))\n", " pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n", "\n", " allReady = len(pods.items) > min_pod_count and all(map(pod_is_ready, pods.items))\n", "\n", " if allReady:\n", " cluster_healthy = True\n", " return True\n", " else:\n", " display(\"cluster not healthy, rechecking in {0} seconds.\".format(check_interval))\n", "\n", " time.sleep(check_interval)\n", " except:\n", " last_error_message = str(sys.exc_info())\n", " display(last_error_message)\n", " time.sleep(check_interval)\n", "\n", "mt = threading.Thread(target=waitReady)\n", "mt.start()\n", "mt.join(timeout=timeout)\n", "\n", "if mt.isAlive():\n", " raise SystemExit(\"Timeout waiting for pods to become ready.\")\n", "else:\n", " display(\"Cluster is healthy\")\n", "\n", "isRunning = False" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('Notebook execution complete.')" ] } ], "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3" }, "azdata": { "side_effects": false } } }