{"cells":[{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["RUN001 - Run a notebook\n","=======================\n","\n","Description\n","-----------\n","\n","This notebook abstracts the mechanics of running and saving the results\n","of a single notebook of any kernel. It:\n","\n","1.  Use a valid notebook executor based on Kernel type\n","    (`azdata notebook run` or `Invoke-SqlNotebook`).\n","2.  Validate if a kernel threw an error (not all kernels bubble errors\n","    up to the executor).\n","3.  Save results (metrics and the .ipynb/.html output) to the Big Data\n","    Cluster.\n","    -   [RUN002 - Save result in Big Data\n","        Cluster](../notebook-runner/run002-save-result-in-bdc.ipynb)\n","\n","If `save_results_in_storage_pool` = True, ensure the T/SQL objects have\n","been setup, using: - [RUN000 - Setup Master Pool runner\n","infrastructure](../notebook-runner/run000-setup-infrastructure.ipynb)\n","\n","### Parameters\n","\n","Description of parameters:\n","\n","-   `notebook_path`: The notebook to run\n","\n","-   `namespace`: If using the SQL / PySpark/ Scala kernels specify the\n","    Big Data Cluster namespace\n","\n","-   `sql_master_pool_username`: If running the SQL kernel, specific\n","    username\n","\n","-   `sql_master_pool_password`: If running the SQL kernel, specific\n","    password\n","\n","-   `knox_username`: If running the PySpark / Scala kernel in secure\n","    (Kerberos) mode specify the Knox username (it is hardcoded to ‘root’\n","    in Basic auth), \\# e.g. admin\n","\n","-   `knox_user_domain`: i.e. AZDATA.LOCAL\n","\n","-   `knox_password`: If running the PySpark / Scala kernel specify the\n","    Knox password\n","\n","-   `save_results_in_storage_pool`: Set to “True” to save results in\n","    master pool \u0026 storage pool\n","\n","-   `app_name`: If saving results, specify any app\\_name\n","    (i.e. “my\\_app”)\n","\n","-   `app_version`: If saving results, specify any app\\_version\n","    (i.e. “v1”)\n","\n","NOTE: All types are strings, due to `azdata notebook run --arguments`\n","only supporting string type."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["parameters"]},"outputs":[],"source":["import os, getpass, datetime\n","\n","notebook_path = os.path.join(os.getcwd(), \"run500-hello-world.ipynb\")\n","\n","namespace = \"\" if \"AZDATA_NAMESPACE\" not in os.environ else os.environ[\"AZDATA_NAMESPACE\"]\n","sql_master_pool_username = \"\" if \"AZDATA_USERNAME\" not in os.environ else os.environ[\"AZDATA_USERNAME\"]\n","sql_master_pool_password = \"\" if \"AZDATA_PASSWORD\" not in os.environ else os.environ[\"AZDATA_PASSWORD\"]\n","\n","knox_username = \"\" if \"DOMAIN_SERVICE_ACCOUNT_USERNAME\" not in os.environ else os.environ[\"DOMAIN_SERVICE_ACCOUNT_USERNAME\"] \n","knox_user_domain = \"\" if \"DOMAIN_SERVICE_ACCOUNT_DOMAIN_NAME\" not in os.environ else os.environ[\"DOMAIN_SERVICE_ACCOUNT_DOMAIN_NAME\"]\n","knox_password = \"\" if \"DOMAIN_SERVICE_ACCOUNT_PASSWORD\" not in os.environ else os.environ[\"DOMAIN_SERVICE_ACCOUNT_PASSWORD\"]\n","\n","save_results_in_storage_pool = \"True\"\n","app_name = \"app-\" + getpass.getuser().lower() # set default to be app-\u003cusername\u003e\n","app_version = \"v1\"\n","\n","session_start = str(datetime.datetime.utcnow())\n","\n","NOTEBOOK_CELL_TIMEOUT = 600 # Per cell timeout in seconds (10 minutes)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Common functions\n","\n","Define helper functions used in this notebook."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n","import sys\n","import os\n","import re\n","import platform\n","import shlex\n","import shutil\n","import datetime\n","\n","from subprocess import Popen, PIPE\n","from IPython.display import Markdown\n","\n","retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n","error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n","install_hint = {} # The SOP to help install the executable if it cannot be found\n","\n","def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False, regex_mask=None):\n","    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n","\n","    NOTES:\n","\n","    1.  Commands that need this kind of ' quoting on Windows e.g.:\n","\n","            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n","\n","        Need to actually pass in as '\"':\n","\n","            kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n","\n","        The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n","        \n","            `iter(p.stdout.readline, b'')`\n","\n","        The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n","    \"\"\"\n","    MAX_RETRIES = 5\n","    output = \"\"\n","    retry = False\n","\n","    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n","    #\n","    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n","    #\n","    if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n","        cmd = cmd.replace(\"\\n\", \" \")\n","\n","    # shlex.split is required on bash and for Windows paths with spaces\n","    #\n","    cmd_actual = shlex.split(cmd)\n","\n","    # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n","    #\n","    user_provided_exe_name = cmd_actual[0].lower()\n","\n","    # When running python, use the python in the ADS sandbox ({sys.executable})\n","    #\n","    if cmd.startswith(\"python \"):\n","        cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n","\n","        # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n","        # with:\n","        #\n","        #    UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n","        #\n","        # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n","        #\n","        if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n","            os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n","\n","    # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n","    #\n","    if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n","        cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n","\n","    # To aid supportability, determine which binary file will actually be executed on the machine\n","    #\n","    which_binary = None\n","\n","    # Special case for CURL on Windows.  The version of CURL in Windows System32 does not work to\n","    # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\".  If another instance\n","    # of CURL exists on the machine use that one.  (Unfortunately the curl.exe in System32 is almost\n","    # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n","    # look for the 2nd installation of CURL in the path)\n","    if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n","        path = os.getenv('PATH')\n","        for p in path.split(os.path.pathsep):\n","            p = os.path.join(p, \"curl.exe\")\n","            if os.path.exists(p) and os.access(p, os.X_OK):\n","                if p.lower().find(\"system32\") == -1:\n","                    cmd_actual[0] = p\n","                    which_binary = p\n","                    break\n","\n","    # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n","    # seems to be required for .msi installs of azdata.cmd/az.cmd.  (otherwise Popen returns FileNotFound) \n","    #\n","    # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n","    #\n","    if which_binary == None:\n","        which_binary = shutil.which(cmd_actual[0])\n","\n","    # Display an install HINT, so the user can click on a SOP to install the missing binary\n","    #\n","    if which_binary == None:\n","        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n","        print(sys.path)\n","\n","        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n","            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n","\n","        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n","    else:   \n","        cmd_actual[0] = which_binary\n","\n","    start_time = datetime.datetime.now().replace(microsecond=0)\n","\n","    cmd_display = cmd\n","    if regex_mask is not None:\n","        regex = re.compile(regex_mask)\n","        cmd_display = re.sub(regex, '******', cmd)\n","        \n","    print(f\"START: {cmd_display} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n","    print(f\"       using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n","    print(f\"       cwd: {os.getcwd()}\")\n","\n","    # Command-line tools such as CURL and AZDATA HDFS commands output\n","    # scrolling progress bars, which causes Jupyter to hang forever, to\n","    # workaround this, use no_output=True\n","    #\n","\n","    # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n","    #\n","    wait = True \n","\n","    try:\n","        if no_output:\n","            p = Popen(cmd_actual)\n","        else:\n","            p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n","            with p.stdout:\n","                for line in iter(p.stdout.readline, b''):\n","                    line = line.decode()\n","                    if return_output:\n","                        output = output + line\n","                    else:\n","                        if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n","                            regex = re.compile('  \"(.*)\"\\: \"(.*)\"') \n","                            match = regex.match(line)\n","                            if match:\n","                                if match.group(1).find(\"HTML\") != -1:\n","                                    display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n","                                else:\n","                                    display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n","\n","                                    wait = False\n","                                    break # otherwise infinite hang, have not worked out why yet.\n","                        else:\n","                            print(line, end='')\n","\n","        if wait:\n","            p.wait()\n","    except FileNotFoundError as e:\n","        if install_hint is not None:\n","            display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n","\n","        raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n","\n","    exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n","\n","    if not no_output:\n","        for line in iter(p.stderr.readline, b''):\n","            try:\n","                line_decoded = line.decode()\n","            except UnicodeDecodeError:\n","                # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n","                #\n","                #   \\xa0\n","                #\n","                # For example see this in the response from `az group create`:\n","                #\n","                # ERROR: Get Token request returned http error: 400 and server \n","                # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n","                # The refresh token has expired due to inactivity.\\xa0The token was \n","                # issued on 2018-10-25T23:35:11.9832872Z\n","                #\n","                # which generates the exception:\n","                #\n","                # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n","                #\n","                print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n","                print(line)\n","                line_decoded = \"\"\n","                pass\n","            else:\n","\n","                # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n","                # print this empty \"ERR:\" as it confuses.\n","                #\n","                if line_decoded == \"\":\n","                    continue\n","                \n","                print(f\"STDERR: {line_decoded}\", end='')\n","\n","                if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n","                    exit_code_workaround = 1\n","\n","                # inject HINTs to next TSG/SOP based on output in stderr\n","                #\n","                if user_provided_exe_name in error_hints:\n","                    for error_hint in error_hints[user_provided_exe_name]:\n","                        if line_decoded.find(error_hint[0]) != -1:\n","                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n","\n","                # Verify if a transient error, if so automatically retry (recursive)\n","                #\n","                if user_provided_exe_name in retry_hints:\n","                    for retry_hint in retry_hints[user_provided_exe_name]:\n","                        if line_decoded.find(retry_hint) != -1:\n","                            if retry_count \u003c MAX_RETRIES:\n","                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n","                                retry_count = retry_count + 1\n","                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n","\n","                                if return_output:\n","                                    if base64_decode:\n","                                        import base64\n","                                        return base64.b64decode(output).decode('utf-8')\n","                                    else:\n","                                        return output\n","\n","    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n","\n","    # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n","    # don't wait here, if success known above\n","    #\n","    if wait: \n","        if p.returncode != 0:\n","            raise SystemExit(f'Shell command:\\n\\n\\t{cmd_display} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n","    else:\n","        if exit_code_workaround !=0 :\n","            raise SystemExit(f'Shell command:\\n\\n\\t{cmd_display} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n","\n","    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n","\n","    if return_output:\n","        if base64_decode:\n","            import base64\n","            return base64.b64decode(output).decode('utf-8')\n","        else:\n","            return output\n","\n","\n","\n","# Hints for tool retry (on transient fault), known errors and install guide\n","#\n","retry_hints = {'azdata': ['Endpoint sql-server-master does not exist', 'Endpoint livy does not exist', 'Failed to get state for cluster', 'Endpoint webhdfs does not exist', 'Adaptive Server is unavailable or does not exist', 'Error: Address already in use', 'Login timeout expired (0) (SQLDriverConnect)', 'SSPI Provider: No Kerberos credentials available',  ], 'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',  ], 'python': [ ], }\n","error_hints = {'azdata': [['Please run \\'azdata login\\' to first authenticate', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['The token is expired', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Reason: Unauthorized', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Max retries exceeded with url: /api/v1/bdc/endpoints', 'SOP028 - azdata login', '../common/sop028-azdata-login.ipynb'], ['Look at the controller logs for more details', 'TSG027 - Observe cluster deployment', '../diagnose/tsg027-observe-bdc-create.ipynb'], ['provided port is already allocated', 'TSG062 - Get tail of all previous container logs for pods in BDC namespace', '../log-files/tsg062-tail-bdc-previous-container-logs.ipynb'], ['Create cluster failed since the existing namespace', 'SOP061 - Delete a big data cluster', '../install/sop061-delete-bdc.ipynb'], ['Failed to complete kube config setup', 'TSG067 - Failed to complete kube config setup', '../repair/tsg067-failed-to-complete-kube-config-setup.ipynb'], ['Data source name not found and no default driver specified', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Can\\'t open lib \\'ODBC Driver 17 for SQL Server', 'SOP069 - Install ODBC for SQL Server', '../install/sop069-install-odbc-driver-for-sql-server.ipynb'], ['Control plane upgrade failed. Failed to upgrade controller.', 'TSG108 - View the controller upgrade config map', '../diagnose/tsg108-controller-failed-to-upgrade.ipynb'], ['NameError: name \\'azdata_login_secret_name\\' is not defined', 'SOP013 - Create secret for azdata login (inside cluster)', '../common/sop013-create-secret-for-azdata-login.ipynb'], ['ERROR: No credentials were supplied, or the credentials were unavailable or inaccessible.', 'TSG124 - \\'No credentials were supplied\\' error from azdata login', '../repair/tsg124-no-credentials-were-supplied.ipynb'], ['Please accept the license terms to use this product through', 'TSG126 - azdata fails with \\'accept the license terms to use this product\\'', '../repair/tsg126-accept-license-terms.ipynb'],  ], 'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb'],  ], 'python': [['Library not loaded: /usr/local/opt/unixodbc', 'SOP012 - Install unixodbc for Mac', '../install/sop012-brew-install-odbc-for-sql-server.ipynb'], ['WARNING: You are using pip version', 'SOP040 - Upgrade pip in ADS Python sandbox', '../install/sop040-upgrade-pip.ipynb'],  ], }\n","install_hint = {'azdata': [ 'SOP063 - Install azdata CLI (using package manager)', '../install/sop063-packman-install-azdata.ipynb' ],  'kubectl': [ 'SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb' ],  }\n","\n","\n","print('Common functions defined successfully.')"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Is notebook being run inside a Kubernetes cluster\n","\n","When this is notebook is running inside a Kubernetes cluster, such as\n","when running inside an App-Deploy pod, there is no KUBECONFIG present,\n","therefore azdata login needs to use the -e (endpoint) approach to login."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import os\n","\n","if \"KUBERNETES_SERVICE_PORT\" in os.environ and \"KUBERNETES_SERVICE_HOST\" in os.environ:\n","    inside_kubernetes_cluster = True\n","    print(\"This notebook is running inside a Kubernetes cluster\")\n","else:\n","    inside_kubernetes_cluster = False\n","    print(\"This notebook is not running inside a Kubernetes cluster\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Get the Kubernetes namespace for the big data cluster\n","\n","Get the namespace of the Big Data Cluster use the kubectl command line\n","interface .\n","\n","**NOTE:**\n","\n","If there is more than one Big Data Cluster in the target Kubernetes\n","cluster, then either:\n","\n","-   set \\[0\\] to the correct value for the big data cluster.\n","-   set the environment variable AZDATA\\_NAMESPACE, before starting\n","    Azure Data Studio."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":["hide_input"]},"outputs":[],"source":["# Place Kubernetes namespace name for BDC into 'namespace' variable\n","\n","if \"AZDATA_NAMESPACE\" in os.environ:\n","    namespace = os.environ[\"AZDATA_NAMESPACE\"]\n","else:\n","    try:\n","        namespace = run(f'kubectl get namespace --selector=MSSQL_CLUSTER -o jsonpath={{.items[0].metadata.name}}', return_output=True)\n","    except:\n","        from IPython.display import Markdown\n","        print(f\"ERROR: Unable to find a Kubernetes namespace with label 'MSSQL_CLUSTER'.  SQL Server Big Data Cluster Kubernetes namespaces contain the label 'MSSQL_CLUSTER'.\")\n","        display(Markdown(f'HINT: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))\n","        display(Markdown(f'HINT: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))\n","        display(Markdown(f'HINT: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))\n","        raise\n","\n","print(f'The SQL Server Big Data Cluster Kubernetes namespace is: {namespace}')"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Establish if cluster is Active Directory enabled\n","\n","An Active Directory enabled cluster will have a `dns` pod. Non Active\n","Directory enabled clusters do not have a `dns` pod."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["dns_pod = run(f'kubectl get pods -n {namespace} -o name -l app=dns', return_output=True)\n","\n","if len(dns_pod) \u003e 0:\n","    is_ad_enabled_cluster = True\n","    print(f\"Cluster {namespace} is an Active Directory enabled cluster\")\n","else:\n","    is_ad_enabled_cluster = False\n","    print(f\"Cluster {namespace} is not an Active Directory enabled cluster\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Notebook filename management\n","\n","The notebook to be run has many filenames over the life of it being run.\n","Here a class is defined to abstract the mechanics of these filenames."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import os\n","\n","class NotebookPath:\n","\n","    _full_path = None\n","\n","    _azdata_logging_path = None\n","\n","    _output_filename = None\n","    _output_dir = None\n","    _output_full_path = None\n","\n","    _step = None\n","\n","    def __init__(self, full_path):\n","        self._full_path = full_path\n","        self._output_filename = f\"output-{os.path.basename(full_path)}\"\n","        self._output_dir = os.getcwd()\n","        self._output_full_path = os.path.join(self._output_dir, self._output_filename)\n","        self._azdata_logging_path = os.path.join(self._output_dir, \"tmp\", \"azdata_log-\" + os.path.basename(full_path)[:-6])\n","\n","    @property\n","    def full_path(self):\n","        return self._full_path.replace('\\\\', '\\\\\\\\')\n","\n","    @property\n","    def azdata_logging_path(self):\n","        \"\"\"To allow for concurrent execution of notebooks, ensure each notebook\n","           has it's own azdata.log flie\n","        \"\"\"\n","        return self._azdata_logging_path.replace('\\\\', '\\\\\\\\')\n","\n","    @property\n","    def output_dir(self):\n","        return self._output_dir.replace(\"\\\\\", \"\\\\\\\\\")\n","\n","    @property\n","    def output_full_path(self):\n","        \"\"\"The output filename, that is saved/uploaded to the Big Data Cluster\n","        \"\"\" \n","        return self._output_full_path.replace('\\\\', '\\\\\\\\')\n","\n","    @property\n","    def output_full_path_html(self):\n","        return self._output_full_path.replace(\".ipynb\", \".html\").replace('\\\\', '\\\\\\\\')\n","\n","    @property\n","    def output_full_path_to_pass_as_azdata_arg(self):\n","        \"\"\"When passing --arguments to azdata, an extra level of \\ escaping is needed\n","        \"\"\"\n","        return self.output_full_path.replace('\\\\', '\\\\\\\\')\n","\n","input_notebook = NotebookPath(notebook_path)\n","\n","print(f\"full_path: {input_notebook.full_path}\")\n","print(f\"output_dir: {input_notebook.output_dir}\")\n","print(f\"output_full_path: {input_notebook.output_full_path}\")\n","print(f\"output_full_path_to_pass_as_azdata_arg: {input_notebook.output_full_path_to_pass_as_azdata_arg}\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Set the `azdata` logging directory\n","\n","To support running multiple creates at the same time, place the\n","azdata.log separately. This code is placed here, so it runs after\n","‘injected parameters’ (which may change the app\\_name/app\\_version)"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["os.environ[\"AZDATA_LOGGING_LOG_DIR\"] = input_notebook.azdata_logging_path\n","\n","print(\"Set AZDATA_LOGGING_LOG_DIR: \" + input_notebook.azdata_logging_path)"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Notebook JSON management\n","\n","To run the notebook, the metadata of the notebook is inspected to make\n","decisions on what kernel type is required, what internal parameter\n","values are needed at exectiuon time.\n","\n","Here a class is defined to abtract the mechanics of inspecting the\n","Notebook JSON."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["import json\n","\n","class NotebookJson:\n","\n","    _json = None\n","\n","    def __init__(self, full_path):\n","        self._json = NotebookJson._load_json(full_path)\n","\n","    @property\n","    def kernel_name(self):\n","        return self._json[\"metadata\"][\"kernelspec\"][\"name\"].lower()\n","\n","    @property\n","    def timeout(self):\n","        if \"azdata\" in self._json[\"metadata\"]:\n","            if \"timeout\" in self._json[\"metadata\"][\"azdata\"]:\n","                return int(self._json[\"metadata\"][\"azdata\"][\"timeout\"])\n","            else:\n","                return NOTEBOOK_CELL_TIMEOUT\n","        else:\n","            return NOTEBOOK_CELL_TIMEOUT\n","\n","    @property\n","    def internal_parameters(self):\n","        cmdline_args = \"\"\n","\n","        if \"azdata\" in self._json[\"metadata\"]:\n","            if \"internal\" in self._json[\"metadata\"][\"azdata\"]:\n","                if \"parameters\" in self._json[\"metadata\"][\"azdata\"][\"internal\"]:\n","                    parameters = self._json[\"metadata\"][\"azdata\"][\"internal\"][\"parameters\"]\n","\n","                    cmdline_args = str(parameters).replace(\"'\", '\\\\\"') # Windows cmd line, requires \", not '\n","\n","        if cmdline_args != \"\":\n","            cmdline_args = '--arguments \"' + cmdline_args + '\"'\n","\n","        return cmdline_args\n","\n","    @property\n","    def json(self):\n","        return self._json\n","\n","    def save_as(self, filename):\n","        NotebookJson._save_json(filename, self._json)\n","\n","    @staticmethod\n","    def _load_json(filename):\n","        with open(filename, encoding=\"utf8\") as json_file:\n","            return json.load(json_file)\n","\n","    @staticmethod\n","    def _save_json(filename, contents):\n","        with open(filename, 'w', encoding=\"utf8\") as outfile:\n","            json.dump(contents, outfile, indent=4)\n","\n","input_json = NotebookJson(input_notebook.full_path)\n","\n","print(f\"Kernel type: {input_json.kernel_name}\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Run a SQL Kernel notebook\n","\n","Notebooks that use the SQL kernel cannot be run using\n","`azdata notebook run`, therefore the Powershell method\n","Invoke-SqlNotebook is used.\n","\n","Here the command line to execute a SQL Kernel notebook is built up:"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["def get_sql_kernel_cmd_line(input_file, output_file):\n","    exit_code = 0\n","\n","    if inside_kubernetes_cluster:\n","        sql_server_master_endpoint = \"master-p-svc,1433\"\n","    else:\n","        endpoint = run('azdata bdc endpoint list --endpoint=\"sql-server-master\"', return_output=True)\n","        endpoint = json.loads(endpoint)\n","        sql_server_master_endpoint = endpoint['endpoint']\n","\n","    print (f\"The sql-server-master endpoint: {sql_server_master_endpoint}\")\n","\n","    if platform.system() == \"Windows\":\n","        powershell_cmd = \"powershell\"\n","    else:\n","        powershell_cmd = \"pwsh\" # on Linux powershell is called 'pwsh'!\n","\n","    return f\"\"\"{powershell_cmd} -ExecutionPolicy Bypass -Command \"Invoke-SqlNotebook -InputFile {input_file} -ServerInstance \\\\\\\"{sql_server_master_endpoint}\\\\\\\" -Username {sql_master_pool_username} -Password {sql_master_pool_password} -Force -OutputFile {output_file}\" \"\"\"\n","\n","print(\"Function `get_sql_kernel_cmd_line` defined\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Check a SQL Kernel notebook for errors\n","\n","The SQL Kernel does not return a non-zero exit code on cell error,\n","therefore the notebook output will be inspected to look for an\n","`output_type` of `error` and will print the `evalue` and will return a\n","non-zero exit\\_code on error."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["def check_sql_kernel_for_error(j):\n","    exit_code = 0\n","\n","    for cell in j[\"cells\"]:\n","        if cell[\"cell_type\"] == \"code\":\n","            if \"outputs\" in cell:\n","                for output in cell[\"outputs\"]:\n","                    if output[\"output_type\"] == \"error\":\n","                        print(output[\"evalue\"])\n","                        exit_code = 1\n","                        break\n","\n","    return exit_code\n","\n","print(\"Function `check_sql_kernel_for_error` defined\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Check Spark (Scala/PySpark) kernel output for error\n","\n","The Spark (Scala) Kernel does not return a non-zero exit code on cell\n","error, therefore look for an `output_type` of `stream` with a `stderr`\n","and print the `text` and return a non-zero exit\\_code."]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["def check_for_error(j):\n","    exit_code = 0\n","\n","    for cell in j[\"cells\"]:\n","        if cell[\"cell_type\"] == \"code\":\n","            if \"outputs\" in cell:\n","                for output in cell[\"outputs\"]:\n","                    if output[\"output_type\"] == \"stream\" and output[\"name\"] == \"stderr\":\n","                        exit_code = 1\n","                        break\n","\n","    return exit_code\n","\n","print(\"Function `check_for_error` defined\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Run the notebook\n","\n","Run the notebook, and raise an exception if a non zero exit code is\n","returned. The caller should catch the exception (SystemExit) and\n","preserve this notebook output for offline inspection.\n","\n","NOTE: Different ‘kernel’ types need different execution environments\n","(i.e. sql kernel runs via `Invoke-SqlNotebook` (powershell), most others\n","run in `azdata notebook run`)"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["from shutil import move, rmtree\n","\n","exit_code = 0\n","\n","if input_json.kernel_name == \"sql\":\n","    cmd_line = get_sql_kernel_cmd_line(input_notebook.full_path, input_notebook.output_full_path)\n","else:\n","    cmd_line = \"azdata notebook run --path {0} --output-path {1} --timeout {2} {3}\".format(\n","            input_notebook.full_path, \n","            input_notebook.output_dir, \n","            input_json.timeout, \n","            input_json.internal_parameters)\n","\n","start = datetime.datetime.utcnow()\n","\n","try:\n","    run(cmd_line)\n","except SystemExit as ex:\n","    print(ex)\n","    exit_code = 1\n","\n","end = datetime.datetime.utcnow()\n","\n","# Some kernels don't check for errors, so do that here and return non-zero exit code\n","#\n","if input_json.kernel_name == \"sql\" and exit_code == 0:\n","    output_json = NotebookJson(input_notebook.output_full_path)\n","    exit_code = check_sql_kernel_for_error(output_json.json)\n","\n","if input_json.kernel_name in [\"pyspark3kernel\", \"pysparkkernel\", \"sparkkernel\"] and exit_code == 0:\n","    output_json = NotebookJson(input_notebook.output_full_path)\n","    exit_code = check_for_error(output_json.json)\n","\n","output_json = None"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Record the results\n","\n","Save the notebook .ipynb/html output files to the Storage Pool (HDFS),\n","and record the metrics in the Master Pool (‘runner’ database)"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["from shutil import copyfile\n","\n","if save_results_in_storage_pool == \"True\":\n","    print(\"save_results_in_storage_pool: True\")\n","\n","    args = { \n","        \"session_start\": str(session_start), \n","        \"notebook_path\": input_notebook.output_full_path_to_pass_as_azdata_arg, \n","        \"app_name\": app_name,\n","        \"app_version\": app_version,\n","        \"exit_code\": str(exit_code),\n","        \"start\": str(start), \n","        \"end\": str(end)\n","    }\n","\n","    args = str(args).replace(\"'\", '\\\\\"') # Windows cmd line, requires \", not '\n","\n","    # Create a copy of run002, so the results for each notebook are seperated\n","    #\n","    run002_copy_full_path = \"run002-save-result-in-bdc-\" + os.path.basename(notebook_path)\n","\n","    # In the app-deploy app folder, the .ipynbs are flattened into one folder, in the book, they are in folders.\n","    #\n","    if os.path.exists(\"run002-save-result-in-bdc.ipynb\"):\n","        copyfile(\"run002-save-result-in-bdc.ipynb\", run002_copy_full_path)\n","    else:\n","        copyfile(os.path.join(\"..\", \"notebook-runner\", \"run002-save-result-in-bdc.ipynb\"), run002_copy_full_path)\n","    \n","    run('azdata notebook run --path {0} --output-path {1} --timeout {2} --arguments \"{3}\"'.format(\n","        run002_copy_full_path,\n","        input_notebook.output_dir,\n","        input_json.timeout,\n","        args))\n","\n","    os.remove(run002_copy_full_path)\n","\n","else:\n","    print(\"save_results_in_storage_pool: False\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["### Raise exception if notebook return a non-zero exit code"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["if exit_code != 0:\n","    raise SystemExit(f'{cmd_line}: returned non-zero exit code: {str(exit_code)}.')"]},{"cell_type":"code","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["print(\"Notebook execution is complete.\")"]},{"cell_type":"markdown","execution_count":null,"metadata":{"tags":[]},"outputs":[],"source":["Related\n","-------\n","\n","- [RUN000 - Setup Master Pool runner infrastructure](../notebook-runner/run000-setup-infrastructure.ipynb)\n","- [RUN002 - Save result in Big Data Cluster](../notebook-runner/run002-save-result-in-bdc.ipynb)\n"]}],"nbformat":4,"nbformat_minor":5,"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3"},"pansop":{"related":"RUN000, RUN002","test":{"strategy":"","types":null,"disable":{"reason":"","workitems":null,"types":null}},"target":{"current":"","final":""},"internal":{"parameters":null,"symlink":false},"timeout":"0"},"language_info":{"codemirror_mode":"{ Name: \"\", Version: \"\"}","file_extension":"","mimetype":"","name":"","nbconvert_exporter":"","pygments_lexer":"","version":""},"widgets":[]}}