{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "TSG093 - Agent log tail for all containers in BDC\n", "=================================================\n", "\n", "Steps\n", "-----\n", "\n", "### Parameters" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "tail_lines = 100\n", "line_offset = 27 # Skip the date/time at start of line\n", "\n", "cmd = f'tail -n {tail_lines} /var/log/agent/agent.log'\n", "\n", "coalesce_duplicates = True" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Analyze log in all pod containers\n", "\n", "### Instantiate Kubernetes client" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "hide_input" ] }, "outputs": [], "source": [ "# Instantiate the Python Kubernetes client into 'api' variable\n", "\n", "import os\n", "\n", "try:\n", " from kubernetes import client, config\n", " from kubernetes.stream import stream\n", "\n", " if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n", " config.load_incluster_config()\n", " else:\n", " try:\n", " config.load_kube_config()\n", " except:\n", " display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n", " raise\n", " api = client.CoreV1Api()\n", "\n", " print('Kubernetes client instantiated')\n", "except ImportError:\n", " from IPython.display import Markdown\n", " display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n", " raise" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get the namespace for the big data cluster\n", "\n", "Get the namespace of the Big Data Cluster from the Kuberenetes API.\n", "\n", "**NOTE:**\n", "\n", "If there is more than one Big Data Cluster in the target Kubernetes\n", "cluster, then either:\n", "\n", "- set \\[0\\] to the correct value for the big data cluster.\n", "- set the environment variable AZDATA\\_NAMESPACE, before starting\n", " Azure Data Studio." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "hide_input" ] }, "outputs": [], "source": [ "# Place Kubernetes namespace name for BDC into 'namespace' variable\n", "\n", "if \"AZDATA_NAMESPACE\" in os.environ:\n", " namespace = os.environ[\"AZDATA_NAMESPACE\"]\n", "else:\n", " try:\n", " namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name\n", " except IndexError:\n", " from IPython.display import Markdown\n", " display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n", " display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n", " display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n", " raise\n", "\n", "print('The kubernetes namespace for your big data cluster is: ' + namespace)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import Markdown\n", "\n", "import os\n", "import json\n", "import requests\n", "import ipykernel\n", "import datetime\n", "\n", "from urllib.parse import urljoin\n", "from notebook import notebookapp\n", "\n", "def get_notebook_name():\n", " \"\"\"Return the full path of the jupyter notebook. Some runtimes (e.g. ADS) \n", " have the kernel_id in the filename of the connection file. If so, the \n", " notebook name at runtime can be determined using `list_running_servers`.\n", " Other runtimes (e.g. azdata) do not have the kernel_id in the filename of\n", " the connection file, therefore we are unable to establish the filename\n", " \"\"\"\n", " connection_file = os.path.basename(ipykernel.get_connection_file())\n", " \n", " # If the runtime has the kernel_id in the connection filename, use it to\n", " # get the real notebook name at runtime, otherwise, use the notebook \n", " # filename from build time.\n", " try: \n", " kernel_id = connection_file.split('-', 1)[1].split('.')[0]\n", " except:\n", " pass\n", " else:\n", " for servers in list(notebookapp.list_running_servers()):\n", " try:\n", " response = requests.get(urljoin(servers['url'], 'api/sessions'), params={'token': servers.get('token', '')}, timeout=.01)\n", " except:\n", " pass\n", " else:\n", " for nn in json.loads(response.text):\n", " if nn['kernel']['id'] == kernel_id:\n", " return nn['path']\n", "\n", "def load_json(filename):\n", " with open(filename, encoding=\"utf8\") as json_file:\n", " return json.load(json_file)\n", "\n", "def get_notebook_rules():\n", " \"\"\"Load the notebook rules from the metadata of this notebook (in the .ipynb file)\"\"\"\n", " file_name = get_notebook_name()\n", "\n", " if file_name == None:\n", " return None\n", " else:\n", " j = load_json(file_name)\n", "\n", " if \"azdata\" not in j[\"metadata\"] or \\\n", " \"expert\" not in j[\"metadata\"][\"azdata\"] or \\\n", " \"log_analyzer_rules\" not in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", " return []\n", " else:\n", " return j[\"metadata\"][\"azdata\"][\"expert\"][\"log_analyzer_rules\"]\n", "\n", "rules = get_notebook_rules()\n", "\n", "pod_list = api.list_namespaced_pod(namespace)\n", "pod_names = [pod.metadata.name for pod in pod_list.items]\n", "\n", "for pod in pod_list.items:\n", " container_names = [container.name for container in pod.spec.containers]\n", " for container in container_names:\n", " print (f\"*** LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")\n", " try:\n", " logs=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True)\n", "\n", " if coalesce_duplicates:\n", " previous_line = \"\"\n", " duplicates = 1\n", " for line in logs.split('\\n'):\n", " if line[line_offset:] != previous_line[line_offset:]:\n", " if duplicates != 1:\n", " print(f\"\\t{previous_line} (x{duplicates})\")\n", " print(f\"\\t{line}\")\n", "\n", " for rule in rules:\n", " if line[line_offset:].find(rule[0]) != -1:\n", " display(Markdown(f'HINT: Use [{rule[2]}](rule[3]) to resolve this issue.'))\n", "\n", " duplicates = 1\n", " else:\n", " duplicates = duplicates + 1\n", " continue\n", "\n", " previous_line = line\n", " else:\n", " print(logs)\n", "\n", " except Exception:\n", " print (f\"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('Notebook execution complete.')" ] } ], "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3" }, "azdata": { "side_effects": false, "expert": { "log_analyzer_rules": [ [ "Failed to get file names from controller with Error", "TSG040", "TSG040 - Failed to get file names from controller with Error", "../repair/tsg040-failed-get-file-names-controller.ipynb" ], [ "Please increase sysctl fs.aio-max-nr", "TSG041", "TSG041 - Unable to create a new asynchronous I/O context (increase sysctl fs.aio-max-nr)", "../repair/tsg041-increase-fs-aio-max-nr.ipynb" ] ] } } } }