{"cells":[{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["# CER043 - Install signed Master certificates\n","\n","This notebook installs into the Big Data Cluster the certificates signed\n","using:\n","\n","- [CER033 - Sign Master certificates with generated\n"," CA](../cert-management/cer033-sign-master-generated-certs.ipynb)\n","\n","## Steps\n","\n","### Parameters"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["parameters"]},"outputs":[],"source":["app_name = \"master\"\n","scaledset_name = \"master\"\n","container_name = \"mssql-server\"\n","common_name = \"master-svc\"\n","user = \"mssql\"\n","group = \"mssql\"\n","mode = \"550\"\n","\n","prefix_keyfile_name = \"sql\"\n","certificate_names = {\"master-0\" : \"master-0-certificate.pem\", \"master-1\" : \"master-1-certificate.pem\", \"master-2\" : \"master-2-certificate.pem\"}\n","key_names = {\"master-0\" : \"master-0-privatekey.pem\", \"master-1\" : \"master-1-privatekey.pem\", \"master-2\" : \"master-2-privatekey.pem\"}\n","\n","test_cert_store_root = \"/var/opt/secrets/test-certificates\"\n","\n","timeout = 600 # amount of time to wait before cluster is healthy: default to 10 minutes\n","check_interval = 10 # amount of time between health checks - default 10 seconds\n","min_pod_count = 10 # minimum number of healthy pods required to assert health"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Common functions\n","\n","Define helper functions used in this notebook."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n","import sys\n","import os\n","import re\n","import platform\n","import shlex\n","import shutil\n","import datetime\n","\n","from subprocess import Popen, PIPE\n","from IPython.display import Markdown\n","\n","retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n","error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n","install_hint = {} # The SOP to help install the executable if it cannot be found\n","\n","def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n"," \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n","\n"," NOTES:\n","\n"," 1. Commands that need this kind of ' quoting on Windows e.g.:\n","\n"," kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n","\n"," Need to actually pass in as '\"':\n","\n"," kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n","\n"," The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n"," \n"," `iter(p.stdout.readline, b'')`\n","\n"," The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n"," \"\"\"\n"," MAX_RETRIES = 5\n"," output = \"\"\n"," retry = False\n","\n"," # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n"," #\n"," # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n"," #\n"," if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n"," cmd = cmd.replace(\"\\n\", \" \")\n","\n"," # shlex.split is required on bash and for Windows paths with spaces\n"," #\n"," cmd_actual = shlex.split(cmd)\n","\n"," # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n"," #\n"," user_provided_exe_name = cmd_actual[0].lower()\n","\n"," # When running python, use the python in the ADS sandbox ({sys.executable})\n"," #\n"," if cmd.startswith(\"python \"):\n"," cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n","\n"," # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n"," # with:\n"," #\n"," # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n"," #\n"," # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n"," #\n"," if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n"," os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n","\n"," # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n"," #\n"," if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n"," cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n","\n"," # To aid supportability, determine which binary file will actually be executed on the machine\n"," #\n"," which_binary = None\n","\n"," # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n"," # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n"," # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n"," # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n"," # look for the 2nd installation of CURL in the path)\n"," if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n"," path = os.getenv('PATH')\n"," for p in path.split(os.path.pathsep):\n"," p = os.path.join(p, \"curl.exe\")\n"," if os.path.exists(p) and os.access(p, os.X_OK):\n"," if p.lower().find(\"system32\") == -1:\n"," cmd_actual[0] = p\n"," which_binary = p\n"," break\n","\n"," # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n"," # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n"," #\n"," # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n"," #\n"," if which_binary == None:\n"," which_binary = shutil.which(cmd_actual[0])\n","\n"," # Display an install HINT, so the user can click on a SOP to install the missing binary\n"," #\n"," if which_binary == None:\n"," print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n"," print(sys.path)\n","\n"," if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n"," display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n","\n"," raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n"," else: \n"," cmd_actual[0] = which_binary\n","\n"," start_time = datetime.datetime.now().replace(microsecond=0)\n","\n"," print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n"," print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n"," print(f\" cwd: {os.getcwd()}\")\n","\n"," # Command-line tools such as CURL and AZDATA HDFS commands output\n"," # scrolling progress bars, which causes Jupyter to hang forever, to\n"," # workaround this, use no_output=True\n"," #\n","\n"," # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n"," #\n"," wait = True \n","\n"," try:\n"," if no_output:\n"," p = Popen(cmd_actual)\n"," else:\n"," p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n"," with p.stdout:\n"," for line in iter(p.stdout.readline, b''):\n"," line = line.decode()\n"," if return_output:\n"," output = output + line\n"," else:\n"," if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n"," regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n"," match = regex.match(line)\n"," if match:\n"," if match.group(1).find(\"HTML\") != -1:\n"," display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n"," else:\n"," display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n","\n"," wait = False\n"," break # otherwise infinite hang, have not worked out why yet.\n"," else:\n"," print(line, end='')\n","\n"," if wait:\n"," p.wait()\n"," except FileNotFoundError as e:\n"," if install_hint is not None:\n"," display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n","\n"," raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n","\n"," exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n","\n"," if not no_output:\n"," for line in iter(p.stderr.readline, b''):\n"," try:\n"," line_decoded = line.decode()\n"," except UnicodeDecodeError:\n"," # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n"," #\n"," # \\xa0\n"," #\n"," # For example see this in the response from `az group create`:\n"," #\n"," # ERROR: Get Token request returned http error: 400 and server \n"," # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n"," # The refresh token has expired due to inactivity.\\xa0The token was \n"," # issued on 2018-10-25T23:35:11.9832872Z\n"," #\n"," # which generates the exception:\n"," #\n"," # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n"," #\n"," print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n"," print(line)\n"," line_decoded = \"\"\n"," pass\n"," else:\n","\n"," # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n"," # print this empty \"ERR:\" as it confuses.\n"," #\n"," if line_decoded == \"\":\n"," continue\n"," \n"," print(f\"STDERR: {line_decoded}\", end='')\n","\n"," if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n"," exit_code_workaround = 1\n","\n"," # inject HINTs to next TSG/SOP based on output in stderr\n"," #\n"," if user_provided_exe_name in error_hints:\n"," for error_hint in error_hints[user_provided_exe_name]:\n"," if line_decoded.find(error_hint[0]) != -1:\n"," display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n","\n"," # Verify if a transient error, if so automatically retry (recursive)\n"," #\n"," if user_provided_exe_name in retry_hints:\n"," for retry_hint in retry_hints[user_provided_exe_name]:\n"," if line_decoded.find(retry_hint) != -1:\n"," if retry_count \u003c MAX_RETRIES:\n"," print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n"," retry_count = retry_count + 1\n"," output = run(cmd, return_output=return_output, retry_count=retry_count)\n","\n"," if return_output:\n"," if base64_decode:\n"," import base64\n"," return base64.b64decode(output).decode('utf-8')\n"," else:\n"," return output\n","\n"," elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n","\n"," # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n"," # don't wait here, if success known above\n"," #\n"," if wait: \n"," if p.returncode != 0:\n"," raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n"," else:\n"," if exit_code_workaround !=0 :\n"," raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n","\n"," print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n","\n"," if return_output:\n"," if base64_decode:\n"," import base64\n"," return base64.b64decode(output).decode('utf-8')\n"," else:\n"," return output\n","\n","\n","\n","# Hints for tool retry (on transient fault), known errors and install guide\n","#\n","retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)', 'SSPI Provider: No Kerberos credentials available', ], 'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond', ], 'python': [ ], }\n","error_hints = {'azdata': [['Please run \\'azdata login\\' to first authenticate', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Can\\'t open lib \\'ODBC Driver 17 for SQL Server', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], ['NameError: name \\'azdata_login_secret_name\\' is not defined', 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', 'TSG124 - \\'No credentials were supplied\\' error from azdata login', '../repair/tsg124-no-credentials-were-supplied.ipynb'], ['Please accept the license terms to use this product through', 'TSG126 - azdata fails with \\'accept the license terms to use this product\\'', '../repair/tsg126-accept-license-terms.ipynb'], ], 'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb'], ], 'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb'], ], }\n","install_hint = {'azdata': [ 'SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb' ], 'kubectl': [ 'SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb' ], }\n","\n","\n","print('Common functions defined successfully.')"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Get the Kubernetes namespace for the big data cluster\n","\n","Get the namespace of the Big Data Cluster use the kubectl command line\n","interface .\n","\n","**NOTE:**\n","\n","If there is more than one Big Data Cluster in the target Kubernetes\n","cluster, then either:\n","\n","- set \\[0\\] to the correct value for the big data cluster.\n","- set the environment variable AZDATA_NAMESPACE, before starting Azure\n"," Data Studio."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Place Kubernetes namespace name for BDC into 'namespace' variable\n","\n","if \"AZDATA_NAMESPACE\" in os.environ:\n"," namespace = os.environ[\"AZDATA_NAMESPACE\"]\n","else:\n"," try:\n"," namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n"," except:\n"," from IPython.display import Markdown\n"," print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'. SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n"," display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n"," display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n"," display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n"," raise\n","\n","print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Create a temporary directory to stage files"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Create a temporary directory to hold configuration files\n","\n","import tempfile\n","\n","temp_dir = tempfile.mkdtemp()\n","\n","print(f\"Temporary directory created: {temp_dir}\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Helper function to save configuration files to disk"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Define helper function 'save_file' to save configuration files to the temporary directory created above\n","import os\n","import io\n","\n","def save_file(filename, contents):\n"," with io.open(os.path.join(temp_dir, filename), \"w\", encoding='utf8', newline='\\n') as text_file:\n"," text_file.write(contents)\n","\n"," print(\"File saved: \" + os.path.join(temp_dir, filename))\n","\n","print(\"Function `save_file` defined successfully.\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Instantiate Kubernetes client"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Instantiate the Python Kubernetes client into 'api' variable\n","\n","import os\n","from IPython.display import Markdown\n","\n","try:\n"," from kubernetes import client, config\n"," from kubernetes.stream import stream\n","except ImportError: \n","\n"," # Install the Kubernetes module\n"," import sys\n"," !{sys.executable} -m pip install kubernetes \n"," \n"," try:\n"," from kubernetes import client, config\n"," from kubernetes.stream import stream\n"," except ImportError:\n"," display(Markdown(f'HINT: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))\n"," raise\n","\n","if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n"," config.load_incluster_config()\n","else:\n"," try:\n"," config.load_kube_config()\n"," except:\n"," display(Markdown(f'HINT: Use [TSG118 - Configure Kubernetes config](../repair/tsg118-configure-kube-config.ipynb) to resolve this issue.'))\n"," raise\n","\n","api = client.CoreV1Api()\n","\n","print('Kubernetes client instantiated')"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Helper functions for waiting for the cluster to become healthy"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["import threading\n","import time\n","import sys\n","import os\n","from IPython.display import Markdown\n","\n","isRunning = True\n","\n","def all_containers_ready(pod):\n"," \"\"\"helper method returns true if all the containers within the given pod are ready\n","\n"," Arguments:\n"," pod {v1Pod} -- Metadata retrieved from the api call to.\n"," \"\"\"\n"," \n"," return all(map(lambda c: c.ready is True, pod.status.container_statuses))\n","\n","\n","def pod_is_ready(pod):\n"," \"\"\"tests that the pod, and all containers are ready\n","\n"," Arguments:\n"," pod {v1Pod} -- Metadata retrieved from api call.\n"," \"\"\"\n","\n"," return \"job-name\" in pod.metadata.labels or (pod.status.phase == \"Running\" and all_containers_ready(pod))\n","\n","\n","def waitReady():\n"," \"\"\"Waits for all pods, and containers to become ready.\n"," \"\"\"\n"," while isRunning:\n"," try:\n"," time.sleep(check_interval)\n"," pods = get_pods()\n"," allReady = len(pods.items) \u003e= min_pod_count and all(map(pod_is_ready, pods.items))\n","\n"," if allReady:\n"," return True\n"," else:\n"," display(Markdown(get_pod_failures(pods)))\n"," display(Markdown(f\"cluster not healthy, rechecking in {check_interval} seconds.\"))\n"," except Exception as ex:\n"," last_error_message = str(ex)\n"," display(Markdown(last_error_message))\n"," time.sleep(check_interval)\n","\n","def get_pod_failures(pods=None):\n"," \"\"\"Returns a status message for any pods that are not ready.\n"," \"\"\"\n"," results = \"\"\n"," if not pods:\n"," pods = get_pods()\n","\n"," for pod in pods.items:\n"," if \"job-name\" not in pod.metadata.labels:\n"," if pod.status and pod.status.container_statuses:\n"," for container in filter(lambda c: c.ready is False, pod.status.container_statuses):\n"," results = results + \"Container {0} in Pod {1} is not ready. Reported status: {2} \u003cbr/\u003e\".format(container.name, pod.metadata.name, container.state) \n"," else:\n"," results = results + \"Pod {0} is not ready. \u003cbr/\u003e\".format(pod.metadata.name)\n"," return results\n","\n","\n","def get_pods():\n"," \"\"\"Returns a list of pods by namespace, or all namespaces if no namespace is specified\n"," \"\"\"\n"," pods = None\n"," if namespace is not None:\n"," display(Markdown(f'Checking namespace {namespace}'))\n"," pods = api.list_namespaced_pod(namespace, _request_timeout=30) \n"," else:\n"," display(Markdown('Checking all namespaces'))\n"," pods = api.list_pod_for_all_namespaces(_request_timeout=30)\n"," return pods\n","\n","def wait_for_cluster_healthy():\n"," isRunning = True\n"," mt = threading.Thread(target=waitReady)\n"," mt.start()\n"," mt.join(timeout=timeout)\n","\n"," if mt.is_alive():\n"," raise SystemExit(\"Timeout waiting for all cluster to be healthy.\")\n"," \n"," isRunning = False"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Get name of the ‘Running’ `controller` `pod`"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Place the name of the 'Running' controller pod in variable `controller`\n","\n","controller = run(f'kubectl get pod --selector=app=controller -n {namespace} -o jsonpath={{.items[0].metadata.name}} --field-selector=status.phase=Running', return_output=True)\n","\n","print(f\"Controller pod name: {controller}\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Get the name of the `master` `pods`"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Place the name of the master pods in variable `pods`\n","\n","podNames = run(f'kubectl get pod --selector=app=master -n {namespace} -o jsonpath={{.items[*].metadata.name}}', return_output=True)\n","pods = podNames.split(\" \")\n","\n","print(f\"Master pod names: {pods}\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Validate certificate common name and alt names"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import json\n","from urllib.parse import urlparse\n","\n","kubernetes_default_record_name = 'kubernetes.default'\n","kubernetes_default_svc_prefix = 'kubernetes.default.svc'\n","default_dns_suffix = 'svc.cluster.local'\n","dns_suffix = ''\n","\n","nslookup_output=run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"nslookup {kubernetes_default_record_name} \u003e /tmp/nslookup.out; cat /tmp/nslookup.out; rm /tmp/nslookup.out\" ', return_output=True)\n","\n","name = re.findall('Name:\\s+(.[^,|^\\s|^\\n]+)', nslookup_output)\n","\n","if not name or kubernetes_default_svc_prefix not in name[0]:\n"," dns_suffix = default_dns_suffix\n","else:\n"," dns_suffix = 'svc' + name[0].replace(kubernetes_default_svc_prefix, '')\n","\n","pods.sort()\n"," \n","for pod_name in pods:\n","\n"," alt_names = \"\"\n"," bdc_fqdn = \"\"\n","\n"," alt_names += f\"DNS.1 = {common_name}\\n\"\n"," alt_names += f\"DNS.2 = {common_name}.{namespace}.{dns_suffix} \\n\"\n","\n"," hdfs_vault_svc = \"hdfsvault-svc\"\n"," bdc_config = run(\"azdata bdc config show\", return_output=True)\n"," bdc_config = json.loads(bdc_config)\n","\n"," dns_counter = 3 # DNS.1 and DNS.2 are already in the certificate template\n","\n"," # Stateful set related DNS names\n"," #\n"," if app_name == \"gateway\" or app_name == \"master\":\n"," alt_names += f'DNS.{str(dns_counter)} = {pod_name}.{common_name}\\n'\n"," dns_counter = dns_counter + 1\n"," alt_names += f'DNS.{str(dns_counter)} = {pod_name}.{common_name}.{namespace}.{dns_suffix}\\n'\n"," dns_counter = dns_counter + 1\n","\n"," # AD related DNS names\n"," #\n"," if \"security\" in bdc_config[\"spec\"] and \"activeDirectory\" in bdc_config[\"spec\"][\"security\"]:\n"," domain_dns_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"domainDnsName\"]\n"," subdomain_name = bdc_config[\"spec\"][\"security\"][\"activeDirectory\"][\"subdomain\"]\n","\n"," if subdomain_name:\n"," bdc_fqdn = f\"{subdomain_name}.{domain_dns_name}\"\n"," else:\n"," bdc_fqdn = f\"{namespace}.{domain_dns_name}\"\n"," \n"," alt_names += f\"DNS.{str(dns_counter)} = {common_name}.{bdc_fqdn}\\n\"\n"," dns_counter = dns_counter + 1\n","\n"," if app_name == \"gateway\" or app_name == \"master\":\n"," alt_names += f'DNS.{str(dns_counter)} = {pod_name}.{bdc_fqdn}\\n'\n"," dns_counter = dns_counter + 1\n","\n"," # Endpoint DNS names for bdc certificates\n"," #\n"," if app_name in bdc_config[\"spec\"][\"resources\"]:\n"," app_name_endpoints = bdc_config[\"spec\"][\"resources\"][app_name][\"spec\"][\"endpoints\"]\n"," for endpoint in app_name_endpoints:\n"," if \"dnsName\" in endpoint:\n"," alt_names += f'DNS.{str(dns_counter)} = {endpoint[\"dnsName\"]}\\n'\n"," dns_counter = dns_counter + 1\n"," \n"," # Endpoint DNS names for control plane certificates\n"," #\n"," if app_name == \"controller\" or app_name == \"mgmtproxy\":\n"," bdc_endpoint_list = run(\"azdata bdc endpoint list\", return_output=True)\n"," bdc_endpoint_list = json.loads(bdc_endpoint_list)\n","\n"," # Parse the DNS host name from:\n"," #\n"," # \"endpoint\": \"https://monitor.aris.local:30777\"\n"," # \n"," for endpoint in bdc_endpoint_list:\n"," if endpoint[\"name\"] == app_name:\n"," url = urlparse(endpoint[\"endpoint\"])\n"," alt_names += f\"DNS.{str(dns_counter)} = {url.hostname}\\n\"\n"," dns_counter = dns_counter + 1\n","\n"," # Special case for the controller certificate\n"," #\n"," if app_name == \"controller\":\n"," alt_names += f\"DNS.{str(dns_counter)} = localhost\\n\"\n"," dns_counter = dns_counter + 1\n","\n"," # Add hdfsvault-svc host for key management calls.\n"," #\n"," alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}\\n\"\n"," dns_counter = dns_counter + 1\n","\n"," # Add hdfsvault-svc FQDN for key management calls.\n"," #\n"," if bdc_fqdn:\n"," alt_names += f\"DNS.{str(dns_counter)} = {hdfs_vault_svc}.{bdc_fqdn}\\n\"\n"," dns_counter = dns_counter + 1\n","\n"," required_dns_names = re.findall('DNS\\.[0-9] = ([^,|^\\s|^\\n]+)', alt_names)\n","\n"," # Get certificate common name and DNS names\n"," # use nameopt compat, to generate CN= format on all versions of openssl\n"," # \n"," cert = run(f'kubectl exec {controller} -c controller -n {namespace} -- openssl x509 -nameopt compat -in {test_cert_store_root}/{app_name}/{certificate_names[pod_name]} -text -noout', return_output=True)\n"," subject = re.findall('Subject:(.+)', cert)[0]\n"," certficate_common_name = re.findall('CN=(.[^,|^\\s|^\\n]+)', subject)[0]\n"," certficate_dns_names = re.findall('DNS:(.[^,|^\\s|^\\n]+)', cert)\n","\n"," # Validate the common name\n"," #\n"," if (common_name != certficate_common_name):\n"," run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n"," raise SystemExit(f'Certficate common name does not match the expected one: {common_name}')\n","\n"," # Validate the DNS names\n"," #\n"," if not all(dns_name in certficate_dns_names for dns_name in required_dns_names):\n"," run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"rm -rf {test_cert_store_root}/{app_name}\"')\n"," raise SystemExit(f'Certficate does not have all required DNS names: {required_dns_names}')"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Copy certifcate files from `controller` to local machine"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import os\n","\n","cwd = os.getcwd()\n","os.chdir(temp_dir) # Use chdir to workaround kubectl bug on Windows, which incorrectly processes 'c:\\' on kubectl cp cmd line \n","\n","for pod_name in pods:\n","\n"," run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{certificate_names[pod_name]} {certificate_names[pod_name]} -c controller -n {namespace}')\n"," run(f'kubectl cp {controller}:{test_cert_store_root}/{app_name}/{key_names[pod_name]} {key_names[pod_name]} -c controller -n {namespace}')\n","\n","os.chdir(cwd)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Copy certifcate files from local machine to `controldb`"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import os\n","\n","cwd = os.getcwd()\n","os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n"," \n","for pod_name in pods:\n"," run(f'kubectl cp {certificate_names[pod_name]} controldb-0:/var/opt/mssql/{certificate_names[pod_name]} -c mssql-server -n {namespace}')\n"," run(f'kubectl cp {key_names[pod_name]} controldb-0:/var/opt/mssql/{key_names[pod_name]} -c mssql-server -n {namespace}')\n","\n","os.chdir(cwd)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Get the `controller-db-rw-secret` secret\n","\n","Get the controller SQL symmetric key password for decryption."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import base64\n","\n","controller_db_rw_secret = run(f'kubectl get secret/controller-db-rw-secret -n {namespace} -o jsonpath={{.data.encryptionPassword}}', return_output=True)\n","controller_db_rw_secret = base64.b64decode(controller_db_rw_secret).decode('utf-8')\n","\n","print(\"controller_db_rw_secret retrieved\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Update the files table with the certificates through opened SQL connection"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import os\n","\n","sql = f\"\"\"\n","OPEN SYMMETRIC KEY ControllerDbSymmetricKey DECRYPTION BY PASSWORD = '{controller_db_rw_secret}'\n","\n","DECLARE @FileData VARBINARY(MAX), @Key uniqueidentifier;\n","SELECT @Key = KEY_GUID('ControllerDbSymmetricKey');\n","\n","\"\"\"\n"," \n","for pod_name in pods:\n","\n"," insert = f\"\"\"\n"," SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{certificate_names[pod_name]}', SINGLE_BLOB) AS doc;\n"," EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/pods/{pod_name}/containers/{container_name}/files/{prefix_keyfile_name}-certificate.pem',\n"," @Data = @FileData,\n"," @KeyGuid = @Key,\n"," @Version = '0',\n"," @User = '{user}',\n"," @Group = '{group}',\n"," @Mode = '{mode}';\n","\n"," SELECT TOP 1 @FileData = doc.BulkColumn FROM OPENROWSET(BULK N'/var/opt/mssql/{key_names[pod_name]}', SINGLE_BLOB) AS doc;\n"," EXEC [dbo].[sp_set_file_data_encrypted] @FilePath = '/config/scaledsets/{scaledset_name}/pods/{pod_name}/containers/{container_name}/files/{prefix_keyfile_name}-privatekey.pem',\n"," @Data = @FileData,\n"," @KeyGuid = @Key,\n"," @Version = '0',\n"," @User = '{user}',\n"," @Group = '{group}',\n"," @Mode = '{mode}';\n","\n"," \"\"\"\n","\n"," sql += insert\n","\n","save_file(\"insert_certificates.sql\", sql)\n","\n","cwd = os.getcwd()\n","os.chdir(temp_dir) # Workaround kubectl bug on Windows, can't put c:\\ on kubectl cp cmd line \n","\n","run(f'kubectl cp insert_certificates.sql controldb-0:/var/opt/mssql/insert_certificates.sql -c mssql-server -n {namespace}')\n","\n","run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"SQLCMDPASSWORD=`cat /var/run/secrets/credentials/mssql-sa-password/password` /opt/mssql-tools/bin/sqlcmd -b -U sa -d controller -i /var/opt/mssql/insert_certificates.sql\" \"\"\")\n","\n","# Clean up\n","run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/insert_certificates.sql\" \"\"\")\n","\n","for pod_name in pods:\n","\n"," run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{certificate_names[pod_name]}\" \"\"\")\n"," run(f\"\"\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- bash -c \"rm /var/opt/mssql/{key_names[pod_name]}\" \"\"\")\n","\n","os.chdir(cwd)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Clear out the controller_db_rw_secret variable"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["controller_db_rw_secret= \"\""]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Get the name of the `master` `pods`"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Place the name of the master pods in variable `pods`\n","\n","podNames = run(f'kubectl get pod --selector=app=master -n {namespace} -o jsonpath={{.items[*].metadata.name}}', return_output=True)\n","pods = podNames.split(\" \")\n","\n","print(f\"Master pod names: {pods}\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Restart Pods"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import threading\n","import time\n","\n","if len(pods) == 1:\n"," # One master pod indicates non-HA environment, just delete it\n"," run(f'kubectl delete pod {pods[0]} -n {namespace}')\n"," wait_for_cluster_healthy()\n","else:\n"," # HA setup, delete secondaries before primary\n"," timeout_s = 300\n"," check_interval_s = 20\n","\n"," master_primary_svc_ip = run(f'kubectl get service master-p-svc -n {namespace} -o jsonpath={{.spec.clusterIP}}', return_output=True) \n"," master_password = run(f'kubectl exec master-0 -c mssql-server -n {namespace} -- cat /var/run/secrets/credentials/pool/mssql-system-password', return_output=True) \n","\n"," def get_number_of_unsynchronized_replicas(result):\n"," cmd = 'select count(*) from sys.dm_hadr_database_replica_states where synchronization_state \u003c\u003e 2'\n"," res = run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- /opt/mssql-tools/bin/sqlcmd -S {master_primary_svc_ip} -U system -P {master_password} -h -1 -q \\\"SET NOCOUNT ON; {cmd}\\\" \", return_output=True)\n"," rows = res.strip().split(\"\\n\")\n","\n"," result[0] = int(rows[0])\n"," return True\n","\n"," def get_primary_replica():\n"," cmd = 'select distinct replica_server_name from sys.dm_hadr_database_replica_states s join sys.availability_replicas r on s.replica_id = r.replica_id where is_primary_replica = 1'\n"," res = run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- /opt/mssql-tools/bin/sqlcmd -S {master_primary_svc_ip} -U system -P {master_password} -h -1 -q \\\"SET NOCOUNT ON; {cmd}\\\" \", return_output=True)\n","\n"," rows = res.strip().split(\"\\n\")\n"," return rows[0]\n","\n"," def get_secondary_replicas():\n"," cmd = 'select distinct replica_server_name from sys.dm_hadr_database_replica_states s join sys.availability_replicas r on s.replica_id = r.replica_id where is_primary_replica = 0'\n"," res = run(f\"kubectl exec controldb-0 -c mssql-server -n {namespace} -- /opt/mssql-tools/bin/sqlcmd -S {master_primary_svc_ip} -U system -P {master_password} -h -1 -q \\\"SET NOCOUNT ON; {cmd}\\\" \", return_output=True)\n","\n"," rows = res.strip().split(\"\\n\")\n"," res = []\n"," for row in rows:\n"," if (row != \"\" and \"Sqlcmd: Warning\" not in row):\n"," res.append(row.strip())\n"," return res\n","\n"," def all_replicas_syncrhonized():\n"," while True:\n"," unsynchronized_replicas_cnt = len(pods)\n"," rows = [None]\n"," time.sleep(check_interval_s)\n","\n"," getNumberOfReplicasThread = threading.Thread(target=get_number_of_unsynchronized_replicas, args=(rows,) )\n"," getNumberOfReplicasThread.start()\n"," getNumberOfReplicasThread.join(timeout=timeout_s)\n","\n"," if getNumberOfReplicasThread.is_alive():\n"," raise SystemExit(\"Timeout getting the number of unsynchronized replicas.\")\n","\n"," unsynchronized_replicas_cnt = rows[0]\n"," if (unsynchronized_replicas_cnt == 0):\n"," return True\n","\n"," def wait_for_replicas_to_synchronize():\n"," waitForReplicasToSynchronizeThread = threading.Thread(target=all_replicas_syncrhonized)\n"," waitForReplicasToSynchronizeThread.start()\n"," waitForReplicasToSynchronizeThread.join(timeout=timeout_s)\n","\n"," if waitForReplicasToSynchronizeThread.is_alive():\n"," raise SystemExit(\"Timeout waiting for all replicas to be synchronized.\")\n"," \n"," secondary_replicas = get_secondary_replicas()\n","\n"," for replica in secondary_replicas:\n"," wait_for_replicas_to_synchronize()\n"," run(f'kubectl delete pod {replica} -n {namespace}')\n","\n"," primary_replica = get_primary_replica() \n"," wait_for_replicas_to_synchronize()\n","\n"," key = \"/var/run/secrets/certificates/sqlha/mssql-ha-operator-controller-client/mssql-ha-operator-controller-client-privatekey.pem\"\n"," cert = \"/var/run/secrets/certificates/sqlha/mssql-ha-operator-controller-client/mssql-ha-operator-controller-client-certificate.pem\"\n"," content_type_header = \"Content-Type: application/json\"\n"," authorization_header = \"Authorization: Certificate\"\n"," data = f'{{\"TargetReplicaName\":\"{secondary_replicas[0]}\",\"ForceFailover\":\"false\"}}'\n"," request_url = f'https://controller-svc:443/internal/api/v1/bdc/services/sql/resources/master/availabilitygroups/containedag/failover'\n","\n"," manual_failover_api_command = f\"curl -sS --key {key} --cert {cert} -X POST --header '{content_type_header}' --header '{authorization_header}' --data '{data}' {request_url}\"\n","\n"," operator_pod = run(f'kubectl get pod --selector=app=mssql-operator -n {namespace} -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n","\n"," run(f'kubectl exec {operator_pod} -c mssql-ha-operator -n {namespace} -- {manual_failover_api_command}')\n","\n"," wait_for_replicas_to_synchronize()\n","\n"," run(f'kubectl delete pod {primary_replica} -n {namespace}')\n"," wait_for_replicas_to_synchronize()"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Clean up certificate staging area\n","\n","Remove the certificate files generated on disk (they have now been\n","placed in the controller database)."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["cmd = f\"rm -r {test_cert_store_root}/{app_name}\"\n","\n","run(f'kubectl exec {controller} -c controller -n {namespace} -- bash -c \"{cmd}\"')"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Clean up temporary directory for staging configuration files"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Delete the temporary directory used to hold configuration files\n","\n","import shutil\n","\n","shutil.rmtree(temp_dir)\n","\n","print(f'Temporary directory deleted: {temp_dir}')"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["print(\"Notebook execution is complete.\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["Related\n","-------\n","\n","- [CER023 - Create Master certificates](../cert-management/cer023-create-master-certs.ipynb)\n","- [CER033 - Sign Master certificates with generated CA](../cert-management/cer033-sign-master-generated-certs.ipynb)\n","- [CER044 - Install signed Controller certificate](../cert-management/cer044-install-controller-cert.ipynb)\n"]}],"nbformat":4,"nbformat_minor":5,"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3"},"pansop":{"related":"CER023, CER033, CER044","test":{"strategy":"","types":null,"disable":{"reason":"","workitems":null,"types":null}},"target":{"current":"","final":""},"internal":{"parameters":null,"symlink":false},"timeout":"0"},"language_info":{"codemirror_mode":"{ Name: \"\", Version: \"\"}","file_extension":"","mimetype":"","name":"","nbconvert_exporter":"","pygments_lexer":"","version":""},"widgets":[]}}