TSG029 - Find dumps in the cluster
==================================

Description
-----------

Look for coredumps and minidumps from processes like SQL Server or
controller in a big data cluster.

Steps
-----

### Instantiate Kubernetes client

In [None]:
# Instantiate the Python Kubernetes client into 'api' variable

import os

try:
    from kubernetes import client, config
    from kubernetes.stream import stream

    if "KUBERNETES_SERVICE_PORT" in os.environ and "KUBERNETES_SERVICE_HOST" in os.environ:
        config.load_incluster_config()
    else:
        try:
            config.load_kube_config()
        except:
            display(Markdown(f'HINT: Use [TSG112 - App-Deploy Proxy Nginx Logs](../log-analyzers/tsg112-get-approxy-nginx-logs.ipynb) to resolve this issue.'))
            raise
    api = client.CoreV1Api()

    print('Kubernetes client instantiated')
except ImportError:
    from IPython.display import Markdown
    display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))
    raise

### Get the namespace for the big data cluster

Get the namespace of the Big Data Cluster from the Kuberenetes API.

**NOTE:**

If there is more than one Big Data Cluster in the target Kubernetes
cluster, then either:

-   set \[0\] to the correct value for the big data cluster.
-   set the environment variable AZDATA\_NAMESPACE, before starting
    Azure Data Studio.

In [None]:
# Place Kubernetes namespace name for BDC into 'namespace' variable

if "AZDATA_NAMESPACE" in os.environ:
    namespace = os.environ["AZDATA_NAMESPACE"]
else:
    try:
        namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name
    except IndexError:
        from IPython.display import Markdown
        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))
        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))
        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))
        raise

print('The kubernetes namespace for your big data cluster is: ' + namespace)

### Get all relevant pods

In [None]:
pod_list = api.list_namespaced_pod(namespace, label_selector='app in (compute-0, data-0, storage-0, master, controller, controldb)', field_selector='status.phase==Running')
pod_names = [pod.metadata.name for pod in pod_list.items]
print('Scanning pods: ' + ', '.join(pod_names))

command = 'find /var/opt /var/log | grep -E "core\\.sqlservr|core\\.controller|SQLD|\\.mdmp$|\\.dmp$|\\.gdmp$"'
all_dumps = ''

for name in pod_names:
    print('Searching pod: ' + name)
    container = 'mssql-server'
    if 'control-' in name:
        container = 'controller'

    try:
        dumps=stream(api.connect_get_namespaced_pod_exec, name, namespace, command=['/bin/sh', '-c', command], container=container, stderr=True, stdout=True)
    except Exception as e:
        print(f'Unable to connect to pod: {name} due to {str(e.__class__)}. Skipping dump check for this pod...')
    else:
        if dumps:
            all_dumps += '*Pod: ' + name + '*\n'
            all_dumps += dumps + '\n'

### Validate

Validate no dump files were found.

In [None]:
if len(all_dumps) > 0:
    raise SystemExit('FAIL - dump files found:\n' + all_dumps)

print('SUCCESS - no dump files were found.')

In [None]:
print('Notebook execution complete.')