SOP034 - Wait for BDC to be Healthy
===================================

Blocks until the Big Data Cluster is healthy, or the specified timeout
expires.

The min\_pod\_count parameter indicates that the health check will not
pass until at least this number of pods exists in the cluster. If any
existing pods beyond this limit are unhealthy, the cluster is not
healthy.

Steps
-----

### Parameters

In [None]:
timeout = 600 # amount of time to wait before cluster is healthy: default to 10 minutes
check_interval = 5 # amount of time between health checks - default 5 seconds
min_pod_count = 10 # minimum number of healthy pods required to assert health

### Instantiate Kubernetes client

In [None]:
# Instantiate the Python Kubernetes client into 'api' variable

import os

try:
 from kubernetes import client, config
 from kubernetes.stream import stream

 if "KUBERNETES_SERVICE_PORT" in os.environ and "KUBERNETES_SERVICE_HOST" in os.environ:
 config.load_incluster_config()
 else:
 config.load_kube_config()

 api = client.CoreV1Api()

 print('Kubernetes client instantiated')
except ImportError:
 from IPython.display import Markdown
 display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))
 raise

### Get the namespace for the big data cluster

Get the namespace of the big data cluster from the Kuberenetes API.

NOTE: If there is more than one big data cluster in the target
Kubernetes cluster, then set \[0\] to the correct value for the big data
cluster.

In [None]:
# Place Kubernetes namespace name for BDC into 'namespace' variable

try:
 namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name
except IndexError:
 from IPython.display import Markdown
 display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))
 display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))
 display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))
 raise

print('The kubernetes namespace for your big data cluster is: ' + namespace)

### Wait for cluster to become healthy

In [None]:
import threading
import time
import sys

isRunning = True

def all_containers_ready(pod):
 """helper method returns true if all the containers within the given pod are ready

 Arguments:
 pod {v1Pod} -- Metadata retrieved from the api call to.
 """
 return all(map(lambda c: c.ready is True, pod.status.container_statuses))

def pod_is_ready(pod):
 """tests that the pod, and all containers are ready

 Arguments:
 pod {v1Pod} -- Metadata retrieved from api call.
 """

 return pod.status.phase == "Running" and all_containers_ready(pod)

def waitReady():
 """Waits for all pods, and containers to become ready.
 """

 while isRunning:
 try:
 pods = None

 if namespace is not None:
 display("Checking namespace {0}".format(namespace))
 pods = api.list_namespaced_pod(namespace, _request_timeout=30) 
 else:
 display("Checking all namespaces".format(namespace))
 pods = api.list_pod_for_all_namespaces(_request_timeout=30)

 allReady = len(pods.items) > min_pod_count and all(map(pod_is_ready, pods.items))

 if allReady:
 cluster_healthy = True
 return True
 else:
 display("cluster not healthy, rechecking in {0} seconds.".format(check_interval))

 time.sleep(check_interval)
 except:
 last_error_message = str(sys.exc_info())
 display(last_error_message)
 time.sleep(check_interval)

mt = threading.Thread(target=waitReady)
mt.start()
mt.join(timeout=timeout)

if mt.isAlive():
 raise SystemExit("Timeout waiting for pods to become ready.")
else:
 display("Cluster is healthy")

isRunning = False

In [None]:
print('Notebook execution complete.')